Question

I got a collection in mongodb called users. It looks like this:

db.users.find() =>
{
    {
        _id: 1,

        products: [1, 2, 3, 4, 5]
    },

    {
        _id: 2,

        products: [4, 5, 6, 7, 8]
    },

    {
        _id: 3,

        products: [10, 11, 12]
    }
}

The products arrays contains ids of products that the user has bought. I want to do some kind of query/mapreduce/something to get "recommended products" for each user, like this:

// result = some kind of query/mapreduce/... on the users collection

print(result) =>
[
    { key: 1, values: [6, 7, 8]},
    { key: 2, values: [1, 2, 3]}
]

I would the the logic to work like this: User 1 has bought products 4 and 5. The same is true for user 2. Thus other products (1, 2, 3) that user 1 has bought is a good recommendation for user 2.

And products (6, 7, 8) is a good recommendation for user 2. No other user has bought the products that user 3 bought thus no recommendations for user 3.

How could I do this? Does anyone have an example you can show me?

Was it helpful?

Solution

You will never get the full results across all users as you demonstrate in a single operation. The pure reason for this is that mapReduce or the aggregation framework do not actually work that way, in that you cannot compare across documents in that way.

But you can do this on a per user basis, or if you want these results sitting around in another collection, then you would need to iterate per user in order to do the comparisons.

My favorite approach to the would be with the aggregation framework, and the fastest. But it would require MongoDB 2.6 or upwards in order to work:

 var compare = [1, 2, 3, 4, 5];

 db.colection.aggregate([

     // Get intersections and differences to the current user purchases
     { "$project": {
         "matched": { 
             "$setIntersection": [
                "$products",
                compare
             ]
         },
         "matchedSize": { "$size": {
             "$setIntersection": [
                "$products",
                compare
             ]
         }},
         "difference": {
             "$setDifference": [
                "$products",
                compare
             ]
         },
         "differenceSize": { "$size": {
             "$setDifference": [
                "$products",
                compare
             ]
         }}
     }},

     // Filter where there are no differences or no intersection on the same
     // products purchased
     { "$match": {
         "matchedSize": {"$gt": 0 },
         "differenceSize": { "$gt": 0 } 
     }},

     // Unwind the differences array
     { "$unwind": "$difference" },

     // Combine all the other results to a single set
     { "$group": {
         "_id": null,
         "recommend": { "$addToSet": "$difference" }
     }}
 ])

So it is nice and self explanatory. That would be possible in earlier versions, but the process is fairly involved.

Alternately you can do this with mapReduce, but you need to define some of the functions;

So first a mapper:

var mapper = function () {

  function intersection(a, b) {
    var result = new Array();
    while( a.length > 0 && b.length > 0 )  {
      if      (a[0] < b[0] ) { a.shift(); }
      else if (a[0] > b[0] ) { b.shift(); }
      else /* they're equal */
      {
        result.push(a.shift());
        b.shift();
      }
    }

    return result;
  }

  function difference(a, b) {
    return a.filter(function(x) { return b.indexOf(x) < 0 });
  }

  var result = {
    intersect: intersection( this.products, compare ),
    diff: difference( this.products, compare )
  };

  if ( result.intersect.length > 0 && result.diff.length > 0 )
    emit( null, result.diff );

};

Then a reducer:

var reducer = function (key,values) {

  var reduced = [];

  values.forEach(function(value) {
    value.forEach(function(el) {
      if ( reduced.indexOf(el) < 0 )
        reduced.push(el);
    });
  });

  return { value: reduced };

};

And a finalize function as well:

var finalize = function (key,value) {

  if ( value.hasOwnProperty('value') )
    value = value.value;

  return value;

};

And call the mapReduce:

db.purchase.mapReduce(
    mapper,
    reduce,
    { 
        "scope": { "compare": [ 1, 2, 3, 4, 5 ] },
        "finalize": finalize,
        "out": { "inline": 1 }
    }
)

So there are a couple of approaches, once you get the products list for a given user you can get the compared recommended items. Do that individually for each user or iterate and store the lot somewhere if that is going to suit your needs.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top