Question

I am trying to make a profanity filter with javascript. I was successful but when I encode the bad words I can't get it

to work. I have been working on this for two days straight.

I have tried to unescape the code in a variable and then use the variable when matching. I have tried unescaping in the

match code too. I have tried mixing in document.write and everything else I can think of.

My original functioning code:

var badwords = /fck|psssy|ssshole/i;

Baddata1 = FirstName.value;
Baddata2 = LastName.value;


if (Baddata1.match(badwords))
            {
                checker();
                FirstName.focus();
                return false;
            }
            if (Baddata2.match(badwords))
            {
                checker();
                LastName.focus();
                return false;
            }
function checker() 
        {
            window.alert("Please Remove Bad Words");
        }
Was it helpful?

Solution

You can reverse the string by subtracting char codes from 0xffff to encode, then, reverse it back again to get clear text. Use "new RegExp" to construct:

var encstr = "ン゙ロテム゙フヒニテネミヘロ";   // "bad|nasty|word" put through reverse() function
var badwords = new RegExp(reverse(encstr), "i");
var Baddata1 = "bad";
var Baddata2 = "LastName";

function reverse(str) {
    var sout = "", ix;
    if (!str) {
        return "";
    }
    for (ix = 0;  ix < str.length;  ++ix) {
        sout += String.fromCharCode(0xffff - str.charCodeAt(ix));
    }
    return sout;
}


if (Baddata1.match(badwords))
{
    checker();
    FirstName.focus();
    return false;
}
if (Baddata2.match(badwords))
{
    checker();
    LastName.focus();
    return false;
}
function checker()
{
    window.alert("Please Remove Bad Words");
}

Working jsfiddle here.

If you don't like using high character codes, I can easily substitute various encoding functions which don't, though this one is the most compact.

Edit: To get the reversed string, either use a JS debugger to call reverse, or, add temporary code like this:

console.log(reverse("bad|nasty|word"));

This works because reverse(reverse(string1)) === string1. reverse undoes itself.

You could also keep a list of words in a separate script, and use JS string join passed to reverse to make the list, for example:

var wordlist = ["bad", "nasty", "word"];
var joined = wordlist.join("|");
console.log('var encstr = "' + reverse(joined) + '"');

Once you've copied the string from the debug console and pasted it, the separate script could easily check that it's correct:

var encstr = "ン゙ロテム゙フヒニテネミヘロ";
alert("encstr " + (reverse(encstr) === joined ? "matches" : "does NOT match") + " original");

Edit 2: If you don't want to use high char codes that fall into international ranges, just use an encoding like base64, or this simple set:

function encodeStr(str) {
    var sout = "", ix;
    if (!str) {
        return "";
    }
    for (ix = 0;  ix < str.length;  ++ix) {
        if (sout.length)
            sout += ",";
        sout += str.charCodeAt(ix).toString(16);
    }
    return sout;
}

function decodeStr(str) {
    var sout = "", narr, ix;
    if (!str) {
        return "";
    }
    narr = str.split(",");
    for (ix = 0;  ix < narr.length;  ++ix) {
        sout += String.fromCharCode(parseInt(narr[ix], 16));
    }
    return sout;
}

// Using encodeStr on "bad|nasty|word" makes this:
var encstr = "62,61,64,7c,6e,61,73,74,79,7c,77,6f,72,64";
var badwords = new RegExp(decodeStr(encstr), "i");
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top