Question

I have a list of objects (undirected edges) like below:

pairs = [

 pair:["a2", "a5"],
 pair:["a3", "a6"],
 pair:["a4", "a5"],
 pair:["a7", "a9"]

];

I need to find all components (connected nodes) in separate groups. So from given pairs I need to get:

groups = [
  group1: ["a2", "a5", "a4"],
  group2: ["a3", "a6"],
  group3: ["a7", "a9"]
];

I actually read some answers on here and googled this and that's how I learned this is called "finding connected components in a graph" but, could't find any sample code. I use JavaScript on Node.js but any sample with other languages would be very helpful. Thanks.

Was it helpful?

Solution

This can be solved using a Breadth First Search.

The idea is to traverse all reachable vertices from a source vertex, by hopping to adjacent vertices. Vertices right next to the source vertex are first visited, followed by vertices that are 2 hops away, etc.

The code here is not very efficient due to the graph representation used, which is an edge list . To obtain better performance, you might want to use an adjacency list.

Here's some working code in JavaScript. I used node.js to run this:

// Breadth First Search function
// v is the source vertex
// all_pairs is the input array, which contains length 2 arrays
// visited is a dictionary for keeping track of whether a node is visited
var bfs = function(v, all_pairs, visited) {
  var q = [];
  var current_group = [];
  var i, nextVertex, pair;
  var length_all_pairs = all_pairs.length;
  q.push(v);
  while (q.length > 0) {
    v = q.shift();
    if (!visited[v]) {
      visited[v] = true;
      current_group.push(v);
      // go through the input array to find vertices that are
      // directly adjacent to the current vertex, and put them
      // onto the queue
      for (i = 0; i < length_all_pairs; i += 1) {
        pair = all_pairs[i];
        if (pair[0] === v && !visited[pair[1]]) {
          q.push(pair[1]);
        } else if (pair[1] === v && !visited[pair[0]]) {
          q.push(pair[0]);
        }
      }
    }
  }
  // return everything in the current "group"
  return current_group;
};

var pairs = [
  ["a2", "a5"],
  ["a3", "a6"],
  ["a4", "a5"],
  ["a7", "a9"]
];

var groups = [];
var i, k, length, u, v, src, current_pair;
var visited = {};

// main loop - find any unvisited vertex from the input array and
// treat it as the source, then perform a breadth first search from
// it. All vertices visited from this search belong to the same group
for (i = 0, length = pairs.length; i < length; i += 1) {
  current_pair = pairs[i];
  u = current_pair[0];
  v = current_pair[1];
  src = null;
  if (!visited[u]) {
    src = u;
  } else if (!visited[v]) {
    src = v;
  }
  if (src) {
    // there is an unvisited vertex in this pair.
    // perform a breadth first search, and push the resulting
    // group onto the list of all groups
    groups.push(bfs(src, pairs, visited));
  }
}

// show groups
console.log(groups);

UPDATE: I have updated my answer to demonstrate how to convert an edge list to an adjacency list. The code is commented and should illustrate the concept rather well. The Breadth First Search function is modified to make use of an adjacency list, along with another slight modification (regarding marking vertices as visited).

// Converts an edgelist to an adjacency list representation
// In this program, we use a dictionary as an adjacency list,
// where each key is a vertex, and each value is a list of all
// vertices adjacent to that vertex
var convert_edgelist_to_adjlist = function(edgelist) {
  var adjlist = {};
  var i, len, pair, u, v;
  for (i = 0, len = edgelist.length; i < len; i += 1) {
    pair = edgelist[i];
    u = pair[0];
    v = pair[1];
    if (adjlist[u]) {
      // append vertex v to edgelist of vertex u
      adjlist[u].push(v);
    } else {
      // vertex u is not in adjlist, create new adjacency list for it
      adjlist[u] = [v];
    }
    if (adjlist[v]) {
      adjlist[v].push(u);
    } else {
      adjlist[v] = [u];
    }
  }
  return adjlist;
};

// Breadth First Search using adjacency list
var bfs = function(v, adjlist, visited) {
  var q = [];
  var current_group = [];
  var i, len, adjV, nextVertex;
  q.push(v);
  visited[v] = true;
  while (q.length > 0) {
    v = q.shift();
    current_group.push(v);
    // Go through adjacency list of vertex v, and push any unvisited
    // vertex onto the queue.
    // This is more efficient than our earlier approach of going
    // through an edge list.
    adjV = adjlist[v];
    for (i = 0, len = adjV.length; i < len; i += 1) {
      nextVertex = adjV[i];
      if (!visited[nextVertex]) {
        q.push(nextVertex);
        visited[nextVertex] = true;
      }
    }
  }
  return current_group;
};

var pairs = [
  ["a2", "a5"],
  ["a3", "a6"],
  ["a4", "a5"],
  ["a7", "a9"]
];

var groups = [];
var visited = {};
var v;

// this should look like:
// {
//   "a2": ["a5"],
//   "a3": ["a6"],
//   "a4": ["a5"],
//   "a5": ["a2", "a4"],
//   "a6": ["a3"],
//   "a7": ["a9"],
//   "a9": ["a7"]
// }
var adjlist = convert_edgelist_to_adjlist(pairs);

for (v in adjlist) {
  if (adjlist.hasOwnProperty(v) && !visited[v]) {
    groups.push(bfs(v, adjlist, visited));
  }
}

console.log(groups);

OTHER TIPS

The task you want to solve is best sovled with the algorithm of Disjoint Set Forest.

Basically it is meant to solve exactly the problem you describe in an optimal manner.

  • assign every vertex the set of vertices in which it belong. Let us denote it as root(v). Every such set will be considered as planted tree throughout the whole algorithm. Thus we will associate the root(v) with the root of the tree. So let's assume root(v) returns a vertex index.
  • The algorithm will go easiest if you keep throughout the algorithm one auxiliary array - the parent vertex of each vertex in its planted tree. If no such vertex exists we will store -1 instead to denote that. Thus you start the algorithm with parent[v] = -1 for each v as all vertices are initially in their own trees and you have no parents
  • We will also need one additional array that stores something that is called the rank of a vertex. It is basically equal to the depth of the subtree having the vertex as root. You need not worry too much about it. The array is used for performance optimizations. Let's name it rank. We initialize all ranks to 1s
  • You start processing the edges one by one and each edge might possibly trigger merge of trees. That is the interesting part in which you can optimise.

Here is a pseudo code:

parent[v] = -1 for v in Vertices
rank[v] = 1 for v in Vertices
root (v):
   processed = []
   while parent[v] != -1
      processed << v
      v = parent[v]
   for vertex : processed
      parent = v // optimisation: here we move the assoc. trees to be directly connected the root
   return v

join (v1, v2):
  if rank[v1] < rank[v2]:
     parent[v1] = v2
  if rank[v1] > rank[v2]:
     parent[v2] = v1
  parent[v2] = v1
  rank[v1]++
   
merge_trees (v1, v2)
  root1 = root(v1)
  root2 = root(v2)
  if root1 == root2:
    // already in same tree nothing else to be done here
    return true
  else
    // join trees
    join (v1, v2)
    return false

main:
   numberTrees = size(Vertives)
   for edge: edges
      if merge_trees(edge.begin, edge.end):
         numberTrees--
   print numberTrees // this is the number you are interested in.

NOTE If you are not interested too much in performance you can omit the rank thing. Without it your algorithm might run slower, but maybe it will be easier for you to understand and maintain. In this scenario you can join vertices in any direction. I should warn you that then there will exist scenarios that will trigger your algorithm to run slower.

You want to do Graph Traversal

In your specific example, you have no # of nodes, and it may even be difficult to traverse the graph so first we'll get a "graph"

// It will return an object like Vertices{node: EdgesTo{node,node}, node:...}
function toGraph(arr) {
    var graph = {}; // this will hold the node "IDs"
    for (var i = 0; i < arr.length; i++) {
        // "create node" if the it's not added in the graph yet
        graph[arr[i][0]] = graph[arr[i][0]] || {};
        graph[arr[i][1]] = graph[arr[i][1]] || {};
        // add bidirectional "edges" to the "vertices"
        // Yes, we set the value to null, but what's important is to add the key.
        graph[arr[i][0]][arr[i][1]] = null;
        graph[arr[i][1]][arr[i][0]] = null;
    }
    return graph;
}

Then it is very easy to traverse the graph using any method that you choose (DFS, BFS)

I will make an example using DFS:

// to be called after getting the result from toGraph(arr)
function getSubGraphs(graph) {
    var subGraphs = []; // array of connected vertices
    var visited = {};
    for (var i in graph) { // for every node...
        var subGraph = dfs(graph, i, visited); // ... we call dfs
        if (subGraph != null) // if vertex is not added yet in another graph
        subGraphs.push(subGraph);
    }
    return subGraphs;
}

// it will return an array of all connected nodes in a subgraph
function dfs(graph, node, visited) {
    if (visited[node]) return null; // node is already visited, get out of here.
    var subGraph = [];
    visited[node] = true;
    subGraph.push(node);
    for (var i in graph[node]) {
        var result = dfs(graph, i, visited);
        if (result == null) continue;
        subGraph = subGraph.concat(result);
    }
    return subGraph;
}

And you would end up calling it like getSubGraphs(toGraph(myArray)); and do whatever you need with that.

Fiddle Here

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top