Matching only characters in sequence of a word from a given string
-
14-06-2021 - |
Question
I am trying to find a closest match for a word by giving a specific string, for example:
so I would have:
"jonston" x "john" => "jo" //only "jo" is the part that matches
"joshua" x "john" => "jo"
"mark" x "marta" => "mar"
as you can see I only would like to retrieve the characters in sequence matching, that's why joshua
and john
only would have jo
in common sequence and not joh
since both have the letter h
I've tried that with regular expression by using the following:
"john".match(/["joshua"]+/) //=> outputs ["joh"] and not ["jo"]
is there any way I could match only the first chars that match?
I will be using javascript for the implementation
I hope that makes sense
Thanks in advance
Solution
initLCS = function(a, b) {
for (var i = 0; i < a.length && a[i] == b[i]; i++);
return a.substr(0, i);
}
initLCS("jonston", "john") // jo
initLCS("jonston", "j111") // j
initLCS("xx", "yy") // ""
If you insist on using regular expressions, it goes like this:
initLCS = function(a, b) {
function makeRe(x) {
return x.length ? "(" + x.shift() + makeRe(x) + ")?" : "";
}
var re = new RegExp('^' + makeRe(b.split("")), "g");
return a.match(re)[0];
}
This creates an expression like /^(j(o(h(n)?)?)?)?/g
from the second string and applies it to the first one. Not that it makes much sense, just for the heck of it.
OTHER TIPS
var a = "john";
var b = "joshua";
var x = "";
for (var i = 0; i < a.length; i++) {
if (x == "" && i > 0) break;
else if (a[i] == b[i]) x += a[i];
else if (x != "") break;
}
console.log(x);
Yet another solution:
if(typeof String.prototype.commonFirstChars !== 'function') {
String.prototype.commonFirstChars = function(s) {
var common = "";
for(var i=0; i<this.length; i++) {
if(this[i] !== s[i]) {
return common;
}
common += this[i];
}
};
}
You can use it like this:
var commonFirstChars = "john".commonFirstChars("joshua");
// "john".commonFirstChars("joshua") === "joshua".commonFirstChars("john")
This will return:
jo
You can not really do this with regex. Why dont you just loop through both string and compare the indexes? You can select the chars until you hit a char at the same index with a different value.
I'd do this in a recursive function like this:
EDIT: Updated example to make it more readable.
var testWords = [
['ted', 'terminator'],
['joe', 'john'],
['foo', 'bar']
];
var matches = testWords.map(function(wordPair) {
return (function matchChars(word1, word2, matches) {
if (word1[0] !== word2[0]) {
return [wordPair[0], wordPair[1], matches];
}
matches = matches || '';
matches += word1[0];
return matchChars(word1.slice(1), word2.slice(1), matches);
}(wordPair[0], wordPair[1]));
});
console.log(matches.map(function(match) { return match.join(', '); }).join('\n'));
Fiddle (updated): http://jsfiddle.net/VU5QT/2/