Question

I would like to sort an array of strings (in javascript) such that groups of digits within the strings are compared as integers not strings. I am not worried about signed or floating point numbers.

for example, the result should be ["a1b3","a9b2","a10b2","a10b11"] not ["a1b3","a10b11","a10b2","a9b2"]

The easiest way to do this seems to be splitting each string on boundaries around groups of digits. Is there a pattern I can pass to String.split to split on character boundaries without removing any characters?

"abc11def22ghi".split(/?/) = ["abc","11","def","22","ghi"];

Or is there another way to compare strings that does not involve splitting them up, perhaps by padding all groups of digits with leading zeros so they are the same length?

"aa1bb" => "aa00000001bb", "aa10bb" => "aa00000010bb"

I am working with arbitrary strings, not strings that have a specific arrangement of digit groups.

Edit:

I like the /(\d+)/ one liner from Gaby to split the array. How backwards compatible is that?

The solutions that parse the strings once in a way that can be used to rebuild the originals are much more efficient that this compare function. None of the answers handle some strings starting with digits and others not, but that would be easy enough to remedy and was not explicit in the original question.

["a100","a20","a3","a3b","a3b100","a3b20","a3b3","!!","~~","9","10","9.5"].sort( function ( inA , inB ) {
    var                     result = 0;

    var                     a , b , pattern = /(\d+)/;
    var                     as = inA.split( pattern );
    var                     bs = inB.split( pattern );
    var                     index , count = as.length;

    if ( ( '' === as[0] ) === ( '' === bs[0] ) ) {
        if ( count > bs.length ) count = bs.length;

        for ( index = 0 ; index < count && 0 === result ; ++index ) {
            a = as[index]; b = bs[index];

            if ( index & 1 ) {
                result = a - b;
            } else {
                result = !( a < b ) ? ( a > b ) ? 1 : 0 : -1;
            }
        }

        if ( 0 === result ) result = as.length - bs.length;
    } else {
        result = !( inA < inB ) ? ( inA > inB ) ? 1 : 0 : -1;
    }

    return result;
} ).toString();

result: "!!,9,9.5,10,a3,a3b,a3b3,a3b20,a3b100,a20,a100,~~"

Was it helpful?

Solution

I think this does what you want

function sortArray(arr) {
    var tempArr = [], n;
    for (var i in arr) {
        tempArr[i] = arr[i].match(/([^0-9]+)|([0-9]+)/g);
        for (var j in tempArr[i]) {
            if( ! isNaN(n = parseInt(tempArr[i][j])) ){
                tempArr[i][j] = n;
            }
        }
    }
    tempArr.sort(function (x, y) {
        for (var i in x) {
            if (y.length < i || x[i] < y[i]) {
                return -1; // x is longer
            }
            if (x[i] > y[i]) {
                return 1;
            }
        }
        return 0;
    });
    for (var i in tempArr) {
        arr[i] = tempArr[i].join('');
    }
    return arr;
}
alert(
    sortArray(["a1b3", "a10b11", "a10b2", "a9b2"]).join(",")
);

OTHER TIPS

Assuming what you want to do is just do a numeric sort by the digits in each array entry (ignoring the non-digits), you can use this:

function sortByDigits(array) {
    var re = /\D/g;

    array.sort(function(a, b) {
        return(parseInt(a.replace(re, ""), 10) - parseInt(b.replace(re, ""), 10));
    });
    return(array);
}

It uses a custom sort function that removes the digits and converts to a number each time it's asked to do a comparison. You can see it work here: http://jsfiddle.net/jfriend00/t87m2/.

If this isn't what you want, then please clarify as your question is not very clear on how the sort should actually work.

Use this compare function for sorting ..

function compareLists(a,b){
    var alist = a.split(/(\d+)/), // split text on change from anything to digit and digit to anything
        blist = b.split(/(\d+)/); // split text on change from anything to digit and digit to anything

    alist.slice(-1) == '' ? alist.pop() : null; // remove the last element if empty
    blist.slice(-1) == '' ? blist.pop() : null; // remove the last element if empty

    for (var i = 0, len = alist.length; i < len;i++){
        if (alist[i] != blist[i]){ // find the first non-equal part
           if (alist[i].match(/\d/)) // if numeric
           {
              return +alist[i] - +blist[i]; // compare as number
           } else {
              return alist[i].localeCompare(blist[i]); // compare as string
           }
        }
    }

    return true;
}

Syntax

var data = ["a1b3","a10b11","b10b2","a9b2","a1b20","a1c4"];
data.sort( compareLists );
alert(data);

demo at http://jsfiddle.net/h9Rqr/7/

Another variant is to use an instance of Intl.Collator with numeric option:

var array = ["a100","a20","a3","a3b","a3b100","a3b20","a3b3","!!","~~","9","10","9.5"];
var collator = new Intl.Collator([], {numeric: true});
array.sort((a, b) => collator.compare(a, b));
console.log(array);

Here's a more complete solution that sorts according to both letters and numbers in the strings

function sort(list) {
    var i, l, mi, ml, x;
    // copy the original array
    list = list.slice(0);

    // split the strings, converting numeric (integer) parts to integers
    // and leaving letters as strings
    for( i = 0, l = list.length; i < l; i++ ) {
        list[i] = list[i].match(/(\d+|[a-z]+)/g);
        for( mi = 0, ml = list[i].length; mi < ml ; mi++ ) {
            x = parseInt(list[i][mi], 10);
            list[i][mi] = !!x || x === 0 ? x : list[i][mi];
        }
    }

    // sort deeply, without comparing integers as strings
    list = list.sort(function(a, b) {
        var i = 0, l = a.length, res = 0;
        while( res === 0 && i < l) {
            if( a[i] !== b[i] ) {
                res = a[i] < b[i] ? -1 : 1;
                break;
            }

            // If you want to ignore the letters, and only sort by numbers
            // use this instead:
            // 
            // if( typeof a[i] === "number" && a[i] !== b[i] ) {
            //     res = a[i] < b[i] ? -1 : 1;
            //     break;
            // }

            i++;
        }
        return res;
    });

    // glue it together again
    for( i = 0, l = list.length; i < l; i++ ) {
        list[i] = list[i].join("");
    }
    return list;
}

Sorting occurs from left to right unless you create a custom algorithm. Letters or digits are compared digits first then letters.

However, what you want to accomplish as per your own example (a1, a9, a10) WON'T EVER HAPPEN. That would require you knowing the data before hand and spliting the string in every possible way before applying the sorting.

One final alternative would be:

a) break each and every string from left to right whenever is a change from letter to digit and vice versa; & b) then start the sorting on those groups from RIGHT-TO-LEFT. That will be a very demanding algorithm. Can be done!

Finally, if you are the GENERATOR of the original "text", you should consider NORMALIZING the output where a1 a9 a10 could be outputed as a01 a09 a10. This way you could have full cotnrol of the final version of the algorithm.

Good luck!

I needed a way to take a mixed string and create a string that could be sorted elsewhere, so that numbers sorted numerically and letters alphabetically. Based on answers above I created the following, which pads out all numbers in a way I can understand, wherever they appear in the string.

function padAllNumbers(strIn) {
    // Used to create mixed strings that sort numerically as well as non-numerically
    var patternDigits = /(\d+)/g; // This recognises digit/non-digit boundaries
    var astrIn = strIn.split( patternDigits ); // we create an array of alternating digit/non-digit groups

    var result = "";

    for (var i=0;i<astrIn.length;  i++) {
        if (astrIn[i] != "") { // first and last elements can be "" and we don't want these padded out
            if (isNaN(astrIn[i])) {
                result += astrIn[i];
            } else {
                result += padOneNumberString("000000000",astrIn[i]);
            }
        }
    }
    return result;
}

function padOneNumberString(pad,strNum,left) {
    // Pad out a string at left (or right)
    if (typeof strNum === "undefined") return pad;
    if (typeof left === "undefined") left = true;
    var padLen =  pad.length - (""+ strNum).length;
    var padding = pad.substr(0,padLen);
    return left?  padding + strNum : strNum + padding;
}
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top