Ottieni i nomi di tutte le chiavi della collezione

https://stackoverflow.com/questions/2298870

21-09-2019
|

Domanda

Mi piacerebbe ottenere i nomi di tutte le chiavi in una raccolta MongoDB.

Ad esempio, da questo:

db.things.insert( { type : ['dog', 'cat'] } );
db.things.insert( { egg : ['cat'] } );
db.things.insert( { type : [] } );
db.things.insert( { hello : []  } );

Vorrei ottenere le chiavi univoche:

type, egg, hello

Soluzione

Potresti farlo con MapReduce:

mr = db.runCommand({
  "mapreduce" : "my_collection",
  "map" : function() {
    for (var key in this) { emit(key, null); }
  },
  "reduce" : function(key, stuff) { return null; }, 
  "out": "my_collection" + "_keys"
})

Quindi esegui distinte sulla raccolta risultante in modo da trovare tutte le chiavi:

db[mr.result].distinct("_id")
["foo", "bar", "baz", "_id", ...]

Altri suggerimenti

Con La risposta di Kristina come ispirazione, ho creato uno strumento open source chiamato Variety che fa esattamente questo: https://github.com/variety/variety

È possibile utilizzare l'aggregazione con new $objectToArrray In 3.4.4 versione per convertire tutte le coppie chiave e valore superiori in array di documenti seguiti da $unwind & $group con $addToSet per ottenere chiavi distinte nell'intera raccolta.

$$ROOT per fare riferimento al documento di livello superiore.

db.things.aggregate([
  {"$project":{"arrayofkeyvalue":{"$objectToArray":"$$ROOT"}}},
  {"$unwind":"$arrayofkeyvalue"},
  {"$group":{"_id":null,"allkeys":{"$addToSet":"$arrayofkeyvalue.k"}}}
])

Puoi utilizzare la query seguente per ottenere le chiavi in un singolo documento.

db.things.aggregate([
  {"$project":{"arrayofkeyvalue":{"$objectToArray":"$$ROOT"}}},
  {"$project":{"keys":"$arrayofkeyvalue.k"}}
])

Prova questo:

doc=db.thinks.findOne();
for (key in doc) print(key);

Se la tua raccolta di destinazione non è troppo grande, puoi provare questo con il client mongo shell:

var allKeys = {};

db.YOURCOLLECTION.find().forEach(function(doc){Object.keys(doc).forEach(function(key){allKeys[key]=1})});

allKeys;

Utilizzando Python.Restituisce l'insieme di tutte le chiavi di livello superiore nella raccolta:

#Using pymongo and connection named 'db'

reduce(
    lambda all_keys, rec_keys: all_keys | set(rec_keys), 
    map(lambda d: d.keys(), db.things.find()), 
    set()
)

Ecco l'esempio lavorato in Python:Questo esempio restituisce i risultati in linea.

from pymongo import MongoClient
from bson.code import Code

mapper = Code("""
    function() {
                  for (var key in this) { emit(key, null); }
               }
""")
reducer = Code("""
    function(key, stuff) { return null; }
""")

distinctThingFields = db.things.map_reduce(mapper, reducer
    , out = {'inline' : 1}
    , full_response = True)
## do something with distinctThingFields['results']

Una soluzione pulita e riutilizzabile utilizzando pymongo:

from pymongo import MongoClient
from bson import Code

def get_keys(db, collection):
    client = MongoClient()
    db = client[db]
    map = Code("function() { for (var key in this) { emit(key, null); } }")
    reduce = Code("function(key, stuff) { return null; }")
    result = db[collection].map_reduce(map, reduce, "myresults")
    return result.distinct('_id')

Utilizzo:

get_keys('dbname', 'collection')
>> ['key1', 'key2', ... ]

Se stai utilizzando mongodb 3.4.4 e versioni successive, puoi utilizzare l'aggregazione seguente utilizzando $objectToArray E $group aggregazione

db.collection.aggregate([
  { "$project": {
    "data": { "$objectToArray": "$$ROOT" }
  }},
  { "$project": { "data": "$data.k" }},
  { "$unwind": "$data" },
  { "$group": {
    "_id": null,
    "keys": { "$addToSet": "$data" }
  }}
])

Ecco il funzionamento esempio

Questo funziona bene per me:

var arrayOfFieldNames = [];

var items = db.NAMECOLLECTION.find();

while(items.hasNext()) {
  var item = items.next();
  for(var index in item) {
    arrayOfFieldNames[index] = index;
   }
}

for (var index in arrayOfFieldNames) {
  print(index);
}

Sono sorpreso, nessuno qui ha un metodo semplice javascript E Set logica per filtrare automaticamente i valori duplicati, semplice esempio su conchiglia di mongocome sotto:

var allKeys = new Set()
db.collectionName.find().forEach( function (o) {for (key in o ) allKeys.add(key)})
for(let key of allKeys) print(key)

Questo stamperà tutti i possibili unici chiavi nel nome della raccolta: nomeraccolta.

Per ottenere un elenco di tutti i tasti meno _id, prendi in considerazione l'esecuzione della seguente pipeline aggregata:

var keys = db.collection.aggregate([
    { "$project": {
       "hashmaps": { "$objectToArray": "$$ROOT" } 
    } }, 
    { "$project": {
       "fields": "$hashmaps.k"
    } },
    { "$group": {
        "_id": null,
        "fields": { "$addToSet": "$fields" }
    } },
    { "$project": {
            "keys": {
                "$setDifference": [
                    {
                        "$reduce": {
                            "input": "$fields",
                            "initialValue": [],
                            "in": { "$setUnion" : ["$$value", "$$this"] }
                        }
                    },
                    ["_id"]
                ]
            }
        }
    }
]).toArray()[0]["keys"];

Penso che il modo migliore per farlo sia quello menzionato Qui è in mongod 3.4.4+ ma senza usare il file $unwind operatore e utilizzando solo due fasi nella pipeline.Invece possiamo usare il $mergeObjects E $objectToArray operatori.

Nel $group fase, usiamo il $mergeObjects operatore per restituire un singolo documento in cui chiave/valore provengono da tutti i documenti nella raccolta.

Poi arriva il $project dove usiamo $map E $objectToArray restituire le chiavi.

let allTopLevelKeys =  [
    {
        "$group": {
            "_id": null,
            "array": {
                "$mergeObjects": "$$ROOT"
            }
        }
    },
    {
        "$project": {
            "keys": {
                "$map": {
                    "input": { "$objectToArray": "$array" },
                    "in": "$$this.k"
                }
            }
        }
    }
];

Ora, se abbiamo documenti annidati e vogliamo ottenere anche le chiavi, questo è fattibile.Per semplicità, consideriamo un documento con un semplice documento incorporato simile a questo:

{field1: {field2: "abc"}, field3: "def"}
{field1: {field3: "abc"}, field4: "def"}

La pipeline seguente produce tutte le chiavi (field1, field2, field3, field4).

let allFistSecondLevelKeys = [
    {
        "$group": {
            "_id": null,
            "array": {
                "$mergeObjects": "$$ROOT"
            }
        }
    },
    {
        "$project": {
            "keys": {
                "$setUnion": [
                    {
                        "$map": {
                            "input": {
                                "$reduce": {
                                    "input": {
                                        "$map": {
                                            "input": {
                                                "$objectToArray": "$array"
                                            },
                                            "in": {
                                                "$cond": [
                                                    {
                                                        "$eq": [
                                                            {
                                                                "$type": "$$this.v"
                                                            },
                                                            "object"
                                                        ]
                                                    },
                                                    {
                                                        "$objectToArray": "$$this.v"
                                                    },
                                                    [
                                                        "$$this"
                                                    ]
                                                ]
                                            }
                                        }
                                    },
                                    "initialValue": [

                                    ],
                                    "in": {
                                        "$concatArrays": [
                                            "$$this",
                                            "$$value"
                                        ]
                                    }
                                }
                            },
                            "in": "$$this.k"
                        }
                    }
                ]
            }
        }
    }
]

Con un piccolo sforzo, possiamo ottenere la chiave per tutti i documenti secondari in un campo array in cui anche gli elementi sono oggetti.

Stavo cercando di scrivere in nodejs e alla fine mi è venuto in mente questo:

db.collection('collectionName').mapReduce(
function() {
    for (var key in this) {
        emit(key, null);
    }
},
function(key, stuff) {
    return null;
}, {
    "out": "allFieldNames"
},
function(err, results) {
    var fields = db.collection('allFieldNames').distinct('_id');
    fields
        .then(function(data) {
            var finalData = {
                "status": "success",
                "fields": data
            };
            res.send(finalData);
            delteCollection(db, 'allFieldNames');
        })
        .catch(function(err) {
            res.send(err);
            delteCollection(db, 'allFieldNames');
        });
 });

Dopo aver letto la raccolta appena creata "allFieldNames", eliminala.

db.collection("allFieldNames").remove({}, function (err,result) {
     db.close();
     return; 
});

Secondo il mongoldb documentazione, una combinazione di distinct

Trova i valori distinti per un campo specificato in una singola raccolta o vista e restituisce i risultati in una matrice.

E indici le operazioni di raccolta sono ciò che restituirebbe tutti i valori possibili per una determinata chiave o indice:

Restituisce un array che contiene un elenco di documenti che identificano e descrivono gli indici esistenti nella raccolta

Quindi in un dato metodo si potrebbe usare un metodo come il seguente, per interrogare una raccolta per tutti i suoi indici registrati e restituire, diciamo un oggetto con gli indici per le chiavi (questo esempio usa async/await per NodeJS, ma ovviamente potresti usare qualsiasi altro approccio asincrono):

async function GetFor(collection, index) {

    let currentIndexes;
    let indexNames = [];
    let final = {};
    let vals = [];

    try {
        currentIndexes = await collection.indexes();
        await ParseIndexes();
        //Check if a specific index was queried, otherwise, iterate for all existing indexes
        if (index && typeof index === "string") return await ParseFor(index, indexNames);
        await ParseDoc(indexNames);
        await Promise.all(vals);
        return final;
    } catch (e) {
        throw e;
    }

    function ParseIndexes() {
        return new Promise(function (result) {
            let err;
            for (let ind in currentIndexes) {
                let index = currentIndexes[ind];
                if (!index) {
                    err = "No Key For Index "+index; break;
                }
                let Name = Object.keys(index.key);
                if (Name.length === 0) {
                    err = "No Name For Index"; break;
                }
                indexNames.push(Name[0]);
            }
            return result(err ? Promise.reject(err) : Promise.resolve());
        })
    }

    async function ParseFor(index, inDoc) {
        if (inDoc.indexOf(index) === -1) throw "No Such Index In Collection";
        try {
            await DistinctFor(index);
            return final;
        } catch (e) {
            throw e
        }
    }
    function ParseDoc(doc) {
        return new Promise(function (result) {
            let err;
            for (let index in doc) {
                let key = doc[index];
                if (!key) {
                    err = "No Key For Index "+index; break;
                }
                vals.push(new Promise(function (pushed) {
                    DistinctFor(key)
                        .then(pushed)
                        .catch(function (err) {
                            return pushed(Promise.resolve());
                        })
                }))
            }
            return result(err ? Promise.reject(err) : Promise.resolve());
        })
    }

    async function DistinctFor(key) {
        if (!key) throw "Key Is Undefined";
        try {
            final[key] = await collection.distinct(key);
        } catch (e) {
            final[key] = 'failed';
            throw e;
        }
    }
}

Quindi interrogando una raccolta con basic _id indice, restituirebbe quanto segue (la raccolta di test ha solo un documento al momento del test):

Mongo.MongoClient.connect(url, function (err, client) {
    assert.equal(null, err);

    let collection = client.db('my db').collection('the targeted collection');

    GetFor(collection, '_id')
        .then(function () {
            //returns
            // { _id: [ 5ae901e77e322342de1fb701 ] }
        })
        .catch(function (err) {
            //manage your error..
        })
});

Intendiamoci, questo utilizza metodi nativi del driver NodeJS.Come suggerito da altre risposte, esistono altri approcci, come il quadro aggregato.Personalmente trovo questo approccio più flessibile, poiché puoi facilmente creare e ottimizzare il modo in cui restituire i risultati.Ovviamente, questo riguarda solo gli attributi di livello superiore, non quelli nidificati.Inoltre, per garantire che tutti i documenti siano rappresentati nel caso in cui siano presenti indici secondari (diversi da quello principale _id), tali indici dovrebbero essere impostati come required.

Forse leggermente fuori tema, ma puoi stampare ricorsivamente tutte le chiavi/campi di un oggetto:

function _printFields(item, level) {
    if ((typeof item) != "object") {
        return
    }
    for (var index in item) {
        print(" ".repeat(level * 4) + index)
        if ((typeof item[index]) == "object") {
            _printFields(item[index], level + 1)
        }
    }
}

function printFields(item) {
    _printFields(item, 0)
}

Utile quando tutti gli oggetti in una raccolta hanno la stessa struttura.

Possiamo raggiungere questo obiettivo utilizzando il file mongo js.Aggiungi il codice seguente nel tuo getCollectionName.js file ed esegui il file js nella console di Linux come indicato di seguito:

mongo --host 192.168.1.135 getCollectionName.js

db_set = connect("192.168.1.135:27017/database_set_name"); // for Local testing
// db_set.auth("username_of_db", "password_of_db"); // if required

db_set.getMongo().setSlaveOk();

var collectionArray = db_set.getCollectionNames();

collectionArray.forEach(function(collectionName){

    if ( collectionName == 'system.indexes' || collectionName == 'system.profile' || collectionName == 'system.users' ) {
        return;
    }

    print("\nCollection Name = "+collectionName);
    print("All Fields :\n");

    var arrayOfFieldNames = []; 
    var items = db_set[collectionName].find();
    // var items = db_set[collectionName].find().sort({'_id':-1}).limit(100); // if you want fast & scan only last 100 records of each collection
    while(items.hasNext()) {
        var item = items.next(); 
        for(var index in item) {
            arrayOfFieldNames[index] = index;
        }
    }
    for (var index in arrayOfFieldNames) {
        print(index);
    }

});

quit();

Grazie @ackuser

Seguendo il thread della risposta di @James Cropcho, sono arrivato a quanto segue che ho trovato super facile da usare.È uno strumento binario, che è esattamente quello che stavo cercando:mongoeye.

Utilizzando questo strumento ci sono voluti circa 2 minuti per esportare il mio schema dalla riga di comando.

Ho esteso un po' la soluzione di Carlos LM in modo che sia più dettagliata.

Esempio di schema:

var schema = {
    _id: 123,
    id: 12,
    t: 'title',
    p: 4.5,
    ls: [{
            l: 'lemma',
            p: {
                pp: 8.9
            }
        },
         {
            l: 'lemma2',
            p: {
               pp: 8.3
           }
        }
    ]
};

Digita nella console:

var schemafy = function(schema, i, limit) {
    var i = (typeof i !== 'undefined') ? i : 1;
    var limit = (typeof limit !== 'undefined') ? limit : false;
    var type = '';
    var array = false;

    for (key in schema) {
        type = typeof schema[key];
        array = (schema[key] instanceof Array) ? true : false;

        if (type === 'object') {
            print(Array(i).join('    ') + key+' <'+((array) ? 'array' : type)+'>:');
            schemafy(schema[key], i+1, array);
        } else {
            print(Array(i).join('    ') + key+' <'+type+'>');
        }

        if (limit) {
            break;
        }
    }
}

Correre:

schemafy(db.collection.findOne());

Produzione

_id <number>
id <number>
t <string>
p <number>
ls <object>:
    0 <object>:
    l <string>
    p <object>:
        pp <number>

Ho 1 soluzione più semplice...

Quello che puoi fare è che quando inserisci dati/documenti nella tua raccolta principale "cose" devi inserire gli attributi in 1 raccolta separata, diciamo "things_attributes".

quindi ogni volta che inserisci "cose", ottieni da "things_attributes" confronta i valori di quel documento con le tue nuove chiavi del documento se qualche nuova chiave presente la aggiunge a quel documento e la reinserisce.

Quindi things_attributes avrà solo 1 documento di chiavi univoche che puoi facilmente ottenere quando necessario utilizzando findOne()

Autorizzato sotto: CC-BY-SA insieme a attribuzione

Non affiliato a StackOverflow