コレクション内のすべてのキーの名前を取得します

https://stackoverflow.com/questions/2298870

21-09-2019
|

質問

MongoDB コレクション内のすべてのキーの名前を取得したいと考えています。

たとえば、次のようになります。

db.things.insert( { type : ['dog', 'cat'] } );
db.things.insert( { egg : ['cat'] } );
db.things.insert( { type : [] } );
db.things.insert( { hello : []  } );

一意のキーを取得したいと思います。

type, egg, hello

解決

あなたはMapReduceのでこれを行うことができます：

mr = db.runCommand({
  "mapreduce" : "my_collection",
  "map" : function() {
    for (var key in this) { emit(key, null); }
  },
  "reduce" : function(key, stuff) { return null; }, 
  "out": "my_collection" + "_keys"
})

そして、すべてのキーを見つけるように結果のコレクション上の個別実行します：

db[mr.result].distinct("_id")
["foo", "bar", "baz", "_id", ...]

他のヒント

とクリスティーナの答えインスピレーションとして、私はまさにこれを行う Variety というオープンソースツールを作成しました。 https://github.com/variety/variety

新しいもので集計を使用できます $objectToArrray で 3.4.4 すべての最上位のキーと値のペアをドキュメント配列に変換し、その後に続くバージョン $unwind & $group と $addToSet コレクション全体にわたって個別のキーを取得します。

$$ROOT 最上位ドキュメントを参照するため。

db.things.aggregate([
  {"$project":{"arrayofkeyvalue":{"$objectToArray":"$$ROOT"}}},
  {"$unwind":"$arrayofkeyvalue"},
  {"$group":{"_id":null,"allkeys":{"$addToSet":"$arrayofkeyvalue.k"}}}
])

単一のドキュメント内のキーを取得するには、以下のクエリを使用できます。

db.things.aggregate([
  {"$project":{"arrayofkeyvalue":{"$objectToArray":"$$ROOT"}}},
  {"$project":{"keys":"$arrayofkeyvalue.k"}}
])

これを試してみてください

doc=db.thinks.findOne();
for (key in doc) print(key);

あなたのターゲットコレクションがあまり大きくない場合には、

、あなたはモンゴシェルクライアントの下でこれを試すことができます：

var allKeys = {};

db.YOURCOLLECTION.find().forEach(function(doc){Object.keys(doc).forEach(function(key){allKeys[key]=1})});

allKeys;

のpythonを使用。戻り値コレクション内のすべてのトップレベルのキーのセット：

#Using pymongo and connection named 'db'

reduce(
    lambda all_keys, rec_keys: all_keys | set(rec_keys), 
    map(lambda d: d.keys(), db.things.find()), 
    set()
)

一例を簡単にご紹介いたしますたPython:このサンプルを返します結果をインライン.

from pymongo import MongoClient
from bson.code import Code

mapper = Code("""
    function() {
                  for (var key in this) { emit(key, null); }
               }
""")
reducer = Code("""
    function(key, stuff) { return null; }
""")

distinctThingFields = db.things.map_reduce(mapper, reducer
    , out = {'inline' : 1}
    , full_response = True)
## do something with distinctThingFields['results']

Aはpymongoを使用し、再利用可能なソリューションをクリーンアップします：

from pymongo import MongoClient
from bson import Code

def get_keys(db, collection):
    client = MongoClient()
    db = client[db]
    map = Code("function() { for (var key in this) { emit(key, null); } }")
    reduce = Code("function(key, stuff) { return null; }")
    result = db[collection].map_reduce(map, reduce, "myresults")
    return result.distinct('_id')

使用方法：

get_keys('dbname', 'collection')
>> ['key1', 'key2', ... ]

ご利用の場合はpythonの3.4.4以上を利用することができ下記の凝集を利用 $objectToArray や $group 凝集

db.collection.aggregate([
  { "$project": {
    "data": { "$objectToArray": "$$ROOT" }
  }},
  { "$project": { "data": "$data.k" }},
  { "$unwind": "$data" },
  { "$group": {
    "_id": null,
    "keys": { "$addToSet": "$data" }
  }}
])

こちらではの例

これは私のために罰金を動作します：

var arrayOfFieldNames = [];

var items = db.NAMECOLLECTION.find();

while(items.hasNext()) {
  var item = items.next();
  for(var index in item) {
    arrayOfFieldNames[index] = index;
   }
}

for (var index in arrayOfFieldNames) {
  print(index);
}

私は驚きだ、誰もここに重複値、上の簡単な例をフィルタリングし、自動的にシンプルjavascriptとSetロジックを使用して、ANSを持っていないののmongoシェルを以下のように：

var allKeys = new Set()
db.collectionName.find().forEach( function (o) {for (key in o ) allKeys.add(key)})
for(let key of allKeys) print(key)

この印刷するすべての可能な独特のキーコレクション名での：のCOLLECTIONNAMEの

次集計パイプラインの実行を検討し、すべてのキーマイナス_idのリストを取得するには
var keys = db.collection.aggregate([ { "$project": { "hashmaps": { "$objectToArray": "$$ROOT" } } }, { "$project": { "fields": "$hashmaps.k" } }, { "$group": { "_id": null, "fields": { "$addToSet": "$fields" } } }, { "$project": { "keys": { "$setDifference": [ { "$reduce": { "input": "$fields", "initialValue": [], "in": { "$setUnion" : ["$$value", "$$this"] } } }, ["_id"] ] } } } ]).toArray()[0]["keys"];

前述したようにこれを行うのが最善の方法だと思いますここ mongod 3.4.4+ にありますが、 $unwind オペレーターを使用し、パイプラインで 2 つのステージのみを使用します。代わりに、 $mergeObjects そして $objectToArray オペレーター。

の中に $group ステージでは、 $mergeObjects 演算子は、キー/値がコレクション内のすべてのドキュメントからの単一のドキュメントを返します。

次に、 $project 私たちが使う場所 $map そして $objectToArray 鍵を返却するため。

let allTopLevelKeys = [ { "$group": { "_id": null, "array": { "$mergeObjects": "$$ROOT" } } }, { "$project": { "keys": { "$map": { "input": { "$objectToArray": "$array" }, "in": "$$this.k" } } } } ];

これで、ネストされたドキュメントがあり、キーも取得したい場合、これが可能になります。わかりやすくするために、次のような単純な埋め込みドキュメントを含むドキュメントを考えてみましょう。

{field1: {field2: "abc"}, field3: "def"} {field1: {field3: "abc"}, field4: "def"}

次のパイプラインは、すべてのキー (field1、field2、field3、field4) を生成します。

let allFistSecondLevelKeys = [ { "$group": { "_id": null, "array": { "$mergeObjects": "$$ROOT" } } }, { "$project": { "keys": { "$setUnion": [ { "$map": { "input": { "$reduce": { "input": { "$map": { "input": { "$objectToArray": "$array" }, "in": { "$cond": [ { "$eq": [ { "$type": "$$this.v" }, "object" ] }, { "$objectToArray": "$$this.v" }, [ "$$this" ] ] } } }, "initialValue": [ ], "in": { "$concatArrays": [ "$$this", "$$value" ] } } }, "in": "$$this.k" } } ] } } } ]

少し努力するだけで、要素がオブジェクトである配列フィールド内のすべてのサブドキュメントのキーを取得できます。

は、私はnodejsに書き込もうとし、最後にこの思い付きました
db.collection('collectionName').mapReduce( function() { for (var key in this) { emit(key, null); } }, function(key, stuff) { return null; }, { "out": "allFieldNames" }, function(err, results) { var fields = db.collection('allFieldNames').distinct('_id'); fields .then(function(data) { var finalData = { "status": "success", "fields": data }; res.send(finalData); delteCollection(db, 'allFieldNames'); }) .catch(function(err) { res.send(err); delteCollection(db, 'allFieldNames'); }); });

は、新しく作成されたコレクション "allFieldNames" を読んだ後、それを削除します。

db.collection("allFieldNames").remove({}, function (err,result) { db.close(); return; });

mongoldb ドキュメントを1として、distinctの組み合わせ

は、単一のコレクションまたはビューを横切って指定されたフィールドの異なる値を検索し、アレイに結果を返します。

とインデックスのコレクション操作は戻ってくるものです与えられたキー、またはインデックスのためのすべての可能な値：

は、コレクションに既存のインデックスを特定し、記述した文書のリストを保持している配列を返します。

だから、すべてそれがインデックスを登録し、リターンだのためのコレクションを照会するために、次のようなメソッドを使用し行うことができます与えられた方法の一つに、この例では、のために非同期/のawaitを使用しています（キーのインデックスを持つオブジェクトを言いますNodeJSが、明らかにあなたは）他の非同期的なアプローチを使用することができます：

async function GetFor(collection, index) { let currentIndexes; let indexNames = []; let final = {}; let vals = []; try { currentIndexes = await collection.indexes(); await ParseIndexes(); //Check if a specific index was queried, otherwise, iterate for all existing indexes if (index && typeof index === "string") return await ParseFor(index, indexNames); await ParseDoc(indexNames); await Promise.all(vals); return final; } catch (e) { throw e; } function ParseIndexes() { return new Promise(function (result) { let err; for (let ind in currentIndexes) { let index = currentIndexes[ind]; if (!index) { err = "No Key For Index "+index; break; } let Name = Object.keys(index.key); if (Name.length === 0) { err = "No Name For Index"; break; } indexNames.push(Name[0]); } return result(err ? Promise.reject(err) : Promise.resolve()); }) } async function ParseFor(index, inDoc) { if (inDoc.indexOf(index) === -1) throw "No Such Index In Collection"; try { await DistinctFor(index); return final; } catch (e) { throw e } } function ParseDoc(doc) { return new Promise(function (result) { let err; for (let index in doc) { let key = doc[index]; if (!key) { err = "No Key For Index "+index; break; } vals.push(new Promise(function (pushed) { DistinctFor(key) .then(pushed) .catch(function (err) { return pushed(Promise.resolve()); }) })) } return result(err ? Promise.reject(err) : Promise.resolve()); }) } async function DistinctFor(key) { if (!key) throw "Key Is Undefined"; try { final[key] = await collection.distinct(key); } catch (e) { final[key] = 'failed'; throw e; } } }
だから、基本的な_idインデックスでコレクションを照会、以下の（テストコレクションが唯一のテストの時に1つの文書を持っている）を返します。

Mongo.MongoClient.connect(url, function (err, client) { assert.equal(null, err); let collection = client.db('my db').collection('the targeted collection'); GetFor(collection, '_id') .then(function () { //returns // { _id: [ 5ae901e77e322342de1fb701 ] } }) .catch(function (err) { //manage your error.. }) });

、NodeJSドライバーにネイティブ、この用途の方法を断っておきます。いくつかの他の回答を示唆しているように、このような集約フレームワークなどの他のアプローチがあります。あなたが簡単に作成し、どのような結果を返すように微調整することができますように私は個人的に、より柔軟なこのアプローチを見つけます。明らかに、これはアドレスのみ、トップレベルの属性ではなく、ネストされたもの。また、すべての文書が表現されている（メイン_id以外の）セカンダリインデックスが存在すべきであることを保証するために、これらの指標はrequiredとして設定する必要があります。

たぶん少しオフトピックがありますが、再帰的にオブジェクトのすべてのキー/フィールドをプリティプリントすることができます：

function _printFields(item, level) { if ((typeof item) != "object") { return } for (var index in item) { print(" ".repeat(level * 4) + index) if ((typeof item[index]) == "object") { _printFields(item[index], level + 1) } } } function printFields(item) { _printFields(item, 0) }

コレクション内のすべてのオブジェクトが同じ構造を有している場合に便利です。

まきする"という目標の達成に向けての使用もんごういかjsファイルです。以下の内容を追加しますコード getCollectionName.js ファイルを実行jsファイルのコンソールのLinuxされています。:

もんごういかのス192.168.1.135getCollectionName.js

db_set = connect("192.168.1.135:27017/database_set_name"); // for Local testing // db_set.auth("username_of_db", "password_of_db"); // if required db_set.getMongo().setSlaveOk(); var collectionArray = db_set.getCollectionNames(); collectionArray.forEach(function(collectionName){ if ( collectionName == 'system.indexes' || collectionName == 'system.profile' || collectionName == 'system.users' ) { return; } print("\nCollection Name = "+collectionName); print("All Fields :\n"); var arrayOfFieldNames = []; var items = db_set[collectionName].find(); // var items = db_set[collectionName].find().sort({'_id':-1}).limit(100); // if you want fast & scan only last 100 records of each collection while(items.hasNext()) { var item = items.next(); for(var index in item) { arrayOfFieldNames[index] = index; } } for (var index in arrayOfFieldNames) { print(index); } }); quit();

ん@ackuser

@James Cropchoの答えからスレッドに続いて、私は私が超簡単に使用することが判明し、次に上陸しました。それはまさに私が探していたものであるバイナリツール、次のとおりです。 mongoeyeするます。

私のスキーマは、コマンドラインからエクスポート得るためにそれが2分程度かかりました。このツールを使用します。

それは、より詳細ですので、
私はカルロス・LMのソリューションビットを拡張します。

スキーマの例

var schema = { _id: 123, id: 12, t: 'title', p: 4.5, ls: [{ l: 'lemma', p: { pp: 8.9 } }, { l: 'lemma2', p: { pp: 8.3 } } ] };

コンソールに入力します：

var schemafy = function(schema, i, limit) { var i = (typeof i !== 'undefined') ? i : 1; var limit = (typeof limit !== 'undefined') ? limit : false; var type = ''; var array = false; for (key in schema) { type = typeof schema[key]; array = (schema[key] instanceof Array) ? true : false; if (type === 'object') { print(Array(i).join(' ') + key+' <'+((array) ? 'array' : type)+'>:'); schemafy(schema[key], i+1, array); } else { print(Array(i).join(' ') + key+' <'+type+'>'); } if (limit) { break; } } }

ファイル名を指定して実行ます：

schemafy(db.collection.findOne());

出力

_id <number> id <number> t <string> p <number> ls <object>: 0 <object>: l <string> p <object>: pp <number>

私は...
の周りに1つの単純な仕事を持っています
何ができることは、あなたのメインコレクション「モノ」へのデータ/文書を挿入している間、あなたは1つの別のコレクションに属性を挿入する必要がありますが、「things_attributes」を言うことができます。
任意の新しいキー存在は、その文書にして、もう一度それを追加した場合、
あなたが「もの」に挿入するたびので、あなたはそれを再挿入「things_attributes」あなたの新しいドキュメントのキーと、その文書の比較値から入手できます。
things_attributesは、あなたが使用して必要とする場合、これまであなたが簡単に得ることができるユニークなキーの1つだけのドキュメントを持つことになります。
だからfindOne（）

ライセンス： CC-BY-SA と帰属

所属していません StackOverflow