(I'm writing this in the context of JavaScript, but will accept an algorithmically correct answer in any language)
How do you find the shortest substring of each element in an array of strings where the substring is NOT contained within any of the other elements, ignoring case?
Suppose I have an input array such as:
var names = ["Anne", "Anthony", "LouAnn", "Kant", "Louise", "ark"];
The output should be something like:
var uniqueNames = ["ne", "h", "ua", "ka", "i", "r"];
For my purposes, you can safely assume that no element will be wholly contained within another element.
My Thoughts:
It seems that one could probably brute force this, along the lines of:
var names = ["Anne", "Anthony", "LouAnn", "Kant", "Louise", "ark"];
var uniqueNames = [], nameInd, windowSize, substrInd, substr, otherNameInd, foundMatch;
// For each name
for (nameInd = 0; nameInd < names.length; nameInd++)
{
var name = names[nameInd];
// For each possible substring length
windowLoop:
for (windowSize = 1; windowSize <= name.length; windowSize++)
{
// For each starting index of a substring
for (substrInd = 0; substrInd <= name.length-windowSize; substrInd++)
{
substr = name.substring(substrInd,substrInd+windowSize).toLowerCase();
foundMatch = false;
// For each other name
for (otherNameInd = 0; otherNameInd < names.length; otherNameInd++)
{
if (nameInd != otherNameInd && names[otherNameInd].toLowerCase().indexOf(substr) > -1)
{
foundMatch = true;
break;
}
}
if (!foundMatch)
{
// This substr works!
uniqueNames[nameInd] = substr;
break windowLoop;
}
}
}
}
But I have to imagine there's a more elegant solution using tries/prefix trees, suffix arrays, or something interesting like that.
Edit: I believe this is the form the selected answer would take programmatically in JavaScript:
var names = ["Anne", "Anthony", "LouAnn", "Kant", "Louise", "ark"];
var uniqueNames = [], permutations = {}, permutation, nameInd, windowSize, substrInd, substr;
// For each name
for (nameInd = 0; nameInd < names.length; nameInd++)
{
var name = names[nameInd];
// For each possible substring length
windowLoop:
for (windowSize = 1; windowSize <= name.length; windowSize++)
{
// For each starting index of a substring
for (substrInd = 0; substrInd <= name.length-windowSize; substrInd++)
{
substr = name.substring(substrInd,substrInd+windowSize).toLowerCase();
permutations[substr] = (typeof permutations[substr] === "undefined")?nameInd:-1;
}
}
}
for (substr in permutations)
{
permutation = permutations[substr];
if (permutation !== -1 && ((typeof uniqueNames[permutation] === "string" && substr.length < uniqueNames[permutation].length) || typeof uniqueNames[permutation] === "undefined"))
{
uniqueNames[permutation] = substr;
}
}
sandyin there whereas is seei, handr...sandyare not present only because I'm not looking for all smallest substrings that fit the criteria, rather any one is good enough. I would accept an answer that returned a two dimensional array of all of them, but I don't really need that level of detail. An equally valid output could bevar uniqueNames = ["ne", "y", "ua", "ka", "i", "s"];