使用 Javascript 数组计算集合差异的最快或最优雅的方法是什么？

https://stackoverflow.com/questions/1723168

19-09-2019
|

题

让 A 和 B 是两套。我在找真的计算集合差值的快速或优雅的方法（A - B 或者 A \B, ，取决于您的偏好）它们之间。正如标题所示，这两个集合作为 Javascript 数组进行存储和操作。

笔记：

Gecko 特有的技巧还可以
我更喜欢坚持使用本机函数（但如果它更快的话，我愿意使用轻量级库）
我见过，但没测试过 JS.Set （见上一点）

编辑： 我注意到关于包含重复元素的集合的评论。当我说“集合”时，我指的是数学定义，这意味着（除其他外）它们不包含重复元素。

解决方案

如果不知道这是否最有效，但也许是最短的

A = [1, 2, 3, 4];
B = [1, 3, 4, 7];

diff = A.filter(function(x) { return B.indexOf(x) < 0 })

console.log(diff);

更新到ES6：

A = [1, 2, 3, 4];
B = [1, 3, 4, 7];

diff = A.filter(x => !B.includes(x) );

console.log(diff);

其他提示

嗯，7年后，与 ES6 的套装 object 它非常简单（但仍然不如 python A - B 那么紧凑），并且据说比 indexOf 对于大型数组：

console.clear();
let a = new Set([1, 2, 3, 4]);
let b = new Set([5, 4, 3, 2]);


let a_minus_b = new Set([...a].filter(x => !b.has(x)));
let b_minus_a = new Set([...b].filter(x => !a.has(x)));
let a_intersect_b = new Set([...a].filter(x => b.has(x))); 

console.log([...a_minus_b]) // {1}
console.log([...b_minus_a]) // {5}
console.log([...a_intersect_b]) // {2,3,4}

您可以使用对象作为地图以避免线性扫描 B 对于每个元素 A 如用户187291的回答:

function setMinus(A, B) {
    var map = {}, C = [];

    for(var i = B.length; i--; )
        map[B[i].toSource()] = null; // any other value would do

    for(var i = A.length; i--; ) {
        if(!map.hasOwnProperty(A[i].toSource()))
            C.push(A[i]);
    }

    return C;
}

非标的 toSource() 方法用于获取唯一的属性名称；如果所有元素已经具有唯一的字符串表示形式（与数字的情况一样），您可以通过删除 toSource() 调用。

使用 jQuery 最短的是：

var A = [1, 2, 3, 4];
var B = [1, 3, 4, 7];

var diff = $(A).not(B);

console.log(diff.toArray());

<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

我会对数组 B 进行哈希处理，然后保留数组 A 中不存在于 B 中的值：

function getHash(array){
  // Hash an array into a set of properties
  //
  // params:
  //   array - (array) (!nil) the array to hash
  //
  // return: (object)
  //   hash object with one property set to true for each value in the array

  var hash = {};
  for (var i=0; i<array.length; i++){
    hash[ array[i] ] = true;
  }
  return hash;
}

function getDifference(a, b){
  // compute the difference a\b
  //
  // params:
  //   a - (array) (!nil) first array as a set of values (no duplicates)
  //   b - (array) (!nil) second array as a set of values (no duplicates)
  //
  // return: (array)
  //   the set of values (no duplicates) in array a and not in b, 
  //   listed in the same order as in array a.

  var hash = getHash(b);
  var diff = [];
  for (var i=0; i<a.length; i++){
    var value = a[i];
    if ( !hash[value]){
      diff.push(value);
    }
  }
  return diff;
}

结合 Christoph 的想法并假设数组和对象/哈希上的一些非标准迭代方法（each 和朋友），我们可以在线性时间内得到集合差、并集和交集，总共大约 20 行：

var setOPs = {
  minusAB : function (a, b) {
    var h = {};
    b.each(function (v) { h[v] = true; });
    return a.filter(function (v) { return !h.hasOwnProperty(v); });
  },
  unionAB : function (a, b) {
    var h = {}, f = function (v) { h[v] = true; };
    a.each(f);
    b.each(f);
    return myUtils.keys(h);
  },
  intersectAB : function (a, b) {
    var h = {};
    a.each(function (v) { h[v] = 1; });
    b.each(function (v) { h[v] = (h[v] || 0) + 1; });
    var fnSel = function (v, count) { return count > 1; };
    var fnVal = function (v, c) { return v; };
    return myUtils.select(h, fnSel, fnVal);
  }
};

这假设 each 和 filter 是为数组定义的，并且我们有两个实用方法：

myUtils.keys(hash):返回带有哈希键的数组
myUtils.select(hash, fnSelector, fnEvaluator):返回一个与呼叫结果的数组 fnEvaluator在键/值对上fnSelector 返回真。

这 select() 松散地受到 Common Lisp 的启发，并且仅仅是 filter() 和 map() 卷成一团。（最好将它们定义在 Object.prototype, ，但这样做会对 jQuery 造成严重破坏，所以我选择了静态实用方法。）

表现：测试用

var a = [], b = [];
for (var i = 100000; i--; ) {
  if (i % 2 !== 0) a.push(i);
  if (i % 3 !== 0) b.push(i);
}

给出两个包含 50,000 和 66,666 个元素的集合.对于这些值，A-B 大约需要 75 毫秒，而并集和交集各大约需要 150 毫秒。（Mac Safari 4.0，使用 Javascript Date 进行计时。）

我认为这对于 20 行代码来说已经是不错的回报了。

使用下划线.js （函数式 JS 库）

>>> var foo = [1,2,3]
>>> var bar = [1,2,4]
>>> _.difference(foo, bar);
[4]

一些简单的功能，借用@milan的答案：

const setDifference = (a, b) => new Set([...a].filter(x => !b.has(x)));
const setIntersection = (a, b) => new Set([...a].filter(x => b.has(x)));
const setUnion = (a, b) => new Set([...a, ...b]);

用法：

const a = new Set([1, 2]);
const b = new Set([2, 3]);

setDifference(a, b); // Set { 1 }
setIntersection(a, b); // Set { 2 }
setUnion(a, b); // Set { 1, 2, 3 }

至于禁食方式，这不是那么优雅，但我已经进行了一些测试来确定。将一个数组作为对象加载可以更快地进行大量处理：

var t, a, b, c, objA;

    // Fill some arrays to compare
a = Array(30000).fill(0).map(function(v,i) {
    return i.toFixed();
});
b = Array(20000).fill(0).map(function(v,i) {
    return (i*2).toFixed();
});

    // Simple indexOf inside filter
t = Date.now();
c = b.filter(function(v) { return a.indexOf(v) < 0; });
console.log('completed indexOf in %j ms with result %j length', Date.now() - t, c.length);

    // Load `a` as Object `A` first to avoid indexOf in filter
t = Date.now();
objA = {};
a.forEach(function(v) { objA[v] = true; });
c = b.filter(function(v) { return !objA[v]; });
console.log('completed Object in %j ms with result %j length', Date.now() - t, c.length);

结果：

completed indexOf in 1219 ms with result 5000 length
completed Object in 8 ms with result 5000 length

然而，这适用于 仅字符串. 。如果您打算比较编号集，您需要将结果映射到 解析浮点型.

这个可行，但我认为另一个更短，也更优雅

A = [1, 'a', 'b', 12];
B = ['a', 3, 4, 'b'];

diff_set = {
    ar : {},
    diff : Array(),
    remove_set : function(a) { ar = a; return this; },
    remove: function (el) {
        if(ar.indexOf(el)<0) this.diff.push(el);
    }
}

A.forEach(diff_set.remove_set(B).remove,diff_set);
C = diff_set.diff;

许可以下： CC-BY-SA 和归因

不隶属于 StackOverflow