I'd go with a map of words, linking the sets they are currently in. The map (a javascript object) with nearly O(1) runtime for accessing a key should help the performance. Start with the same format as proposed by @matt3141:
var pairs = [
["car", "wheel"],
["wheel", "tyre"],
["bed", "sheets"],
["guitar", "strings"],
["guitar", "pickup"],
["rubber", "tyre"],
["truck", "wheel"],
["pickup", "car"]
];
var setsByWord = {};
for (var i=0; i<pairs.length; i++) {
var pair = pairs[i];
if (pair[0] in setsByWord && pair[1] in setsByWord) {
// both words are already known
if (setsByWord[pair[0]] === setsByWord[pair[1]]) {
; // We're lucky, they are in the same set
} else {
// combine the two sets
var sets = [setsByWord[pair[0]], setsByWord[pair[1]]];
var larger = sets[1].length > sets[0].length ? sets[1] : sets[0],
smaller = sets[+(larger===sets[0])];
for (var j=0; j<smaller.length; j++)
setsByWord[smaller[j]] = larger;
Array.prototype.push.apply(larger, smaller);
}
} else {
// add the missing word to the existing set
// or create a new set
var set = setsByWord[pair[0]] || setsByWord[pair[1]] || [];
if (!(pair[0] in setsByWord)) {
set.push(pair[0]);
setsByWord[pair[0]] = set;
}
if (!(pair[1] in setsByWord)) {
set.push(pair[1]);
setsByWord[pair[1]] = set;
}
}
}
return setsByWord;
This will split your graph in its connected components (In the setsByWord object these component arrays are indexed by the nodes):
> var results = [];
> for (var word in setsByWord)
> if (results.indexOf(setsByWord[word])<0)
> results.push(setsByWord[word]);
> return results;
[
["car","wheel","tyre","rubber","truck","guitar","strings","pickup"],
["bed","sheets"]
]
If you have a directed graph, and want arrays of all successors by word, you could use this:
var pairs = […],
graph = pairs.reduce(function(map, pair) {
(map[pair[0]] || (map[pair[0]] = [])).push(pair[1]);
return map;
}, {});
var successors = {};
for (var word in graph) (function getSuccessors(word) {
if (word in successors)
return successors[word];
successors[word] = [true]; // some marker against circles
return successors[word] = word in graph
? [].concat.apply(graph[word], graph[word].map(getSuccessors))
: [];
})(word);
return successors;
If you are sure to have no circles in the graph and only want lists for the beginners of paths, you might add this:
var results = [];
for (var word in successors)
for (var i=0; word in successors && i<successors[word].length; i++)
delete successors[successors[word][i]];
for (var word in successors)
results.push([word].concat(successors[word]));
return results;
// becomes:
[
["bed","sheets"],
["guitar","strings","pickup","car","wheel","tyre"],
["rubber","tyre"],
["truck","wheel","tyre"]
]