Question

I'm wondering how to perform a kind of union in an aggregate in MongoDB. Let's imaging the following document in a collection (the structure is for the sake of the example) :

{
  linkedIn: {
    people : [
    {
      name : 'Fred'
     },
     {
       name : 'Matilda'
     }
   ]
  },
  twitter: {
    people : [
    {
       name : 'Hanna'
    },
    {
       name : 'Walter'
    }
   ]
  }
 }

How to make an aggregate that returns the union of the people in twitter and linkedIn ?

{
 { name :'Fred', source : 'LinkedIn'},
 { name :'Matilda', source : 'LinkedIn'},
 { name :'Hanna', source : 'Twitter'},
 { name :'Walter', source : 'Twitter'},
}
Was it helpful?

Solution

There are a couple of approaches to this that you can use the aggregate method for

db.collection.aggregate([
    // Assign an array of constants to each document
    { "$project": {
        "linkedIn": 1,
        "twitter": 1,
        "source": { "$cond": [1, ["linkedIn", "twitter"],0 ] }
    }},

    // Unwind the array
    { "$unwind": "$source" },

    // Conditionally push the fields based on the matching constant
    { "$group": { 
        "_id": "$_id",
        "data": { "$push": {
            "$cond": [
                { "$eq": [ "$source", "linkedIn" ] },
                { "source": "$source", "people": "$linkedIn.people" },
                { "source": "$source", "people": "$twitter.people" }
            ]
        }}
    }},

    // Unwind that array
    { "$unwind": "$data" },

    // Unwind the underlying people array
    { "$unwind": "$data.people" },

    // Project the required fields
    { "$project": {
        "_id": 0,
        "name": "$data.people.name",
        "source": "$data.source"
    }}
])

Or with a different approach using some operators from MongoDB 2.6:

db.people.aggregate([
    // Unwind the "linkedIn" people
    { "$unwind": "$linkedIn.people" },

    // Tag their source and re-group the array
    { "$group": {
        "_id": "$_id",
        "linkedIn": { "$push": {
            "name": "$linkedIn.people.name",
            "source": { "$literal": "linkedIn" }
        }},
        "twitter": { "$first": "$twitter" }
    }},

    // Unwind the "twitter" people
    { "$unwind": "$twitter.people" },

    // Tag their source and re-group the array
    { "$group": {
        "_id": "$_id",
        "linkedIn": { "$first": "$linkedIn" },
        "twitter": { "$push": {
            "name":  "$twitter.people.name",
            "source": { "$literal": "twitter" }
        }}
    }},

    // Merge the sets with "$setUnion"
    { "$project": {
        "data": { "$setUnion": [ "$twitter", "$linkedIn" ] }
    }},

    // Unwind the union array
    { "$unwind": "$data" },

    // Project the fields
    { "$project": {
        "_id": 0,
        "name": "$data.name",
        "source": "$data.source"
    }}
])

And of course if you simply did not care what the source was:

db.collection.aggregate([
    // Union the two arrays
    { "$project": {
        "data": { "$setUnion": [
            "$linkedIn.people",
            "$twitter.people"
        ]}
    }},

    // Unwind the union array
    { "$unwind": "$data" },

    // Project the fields
    { "$project": {
        "_id": 0,
        "name": "$data.name",
    }}

])

OTHER TIPS

Not sure if using aggregate is recommended over a map-reduce for that kind of operation but the following is doing what you're asking for (dunno if $const can be used with no issue at all in the .aggregate() function) :

aggregate([ 
   { $project: { linkedIn: '$linkedIn', twitter: '$twitter', idx: { $const: [0,1] }}},
   { $unwind: '$idx' },
   { $group: { _id : '$_id', data: { $push: { $cond:[ {$eq:['$idx', 0]}, { source: {$const: 'LinkedIn'}, people: '$linkedIn.people' } , { source: {$const: 'Twitter'}, people: '$twitter.people' } ] }}}},
   { $unwind: '$data'},
   { $unwind: '$data.people'},
   { $project: { _id: 0, name: '$data.people.name', source: '$data.source' }}
])
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top