JavaScript split Bugs: Fixed! (original) (raw)

The String.prototype.split method is very handy, so it's a shame that if you use a regular expression as its delimiter, the results can be so wildly different cross-browser that odds are you've just introduced bugs into your code (unless you know precisely what kind of data you're working with and are able to avoid the issues). Here's one example of other people venting about the problems. Following are the inconsistencies cross-browser when using regexes with split:

The situation is so bad that I've simply avoided using regex-based splitting in the past.

That ends now. wink

The following script provides a fast, uniform cross-browser implementation of String.prototype.split, and attempts to precisely follow the relevant spec (ECMA-262 v3 §15.5.4.14, pp.103,104).

I've also created a fairly quick and dirty page where you can test the result of more than 50 usages of JavaScript's split method, and quickly compare your browser's results with the correct implementation. On the test page, the pink lines in the third column highlight incorrect results from the native split method. The rightmost column shows the results of the below script. It's all green in every browser I've tested (IE 5.5 – 7, Firefox 2.0.0.4, Opera 9.21, Safari 3.0.1 beta, and Swift 0.2).

Run the tests in your browser.

Here's the script:

/*!

/**

// Avoid running twice; that would break the nativeSplit reference split = split || function (undef) {

var nativeSplit = String.prototype.split,
    compliantExecNpcg = /()??/.exec("")[1] === undef, // NPCG: nonparticipating capturing group
    self;

self = function (str, separator, limit) {
    // If `separator` is not a regex, use `nativeSplit`
    if (Object.prototype.toString.call(separator) !== "[object RegExp]") {
        return nativeSplit.call(str, separator, limit);
    }
    var output = [],
        flags = (separator.ignoreCase ? "i" : "") +
                (separator.multiline  ? "m" : "") +
                (separator.extended   ? "x" : "") + // Proposed for ES6
                (separator.sticky     ? "y" : ""), // Firefox 3+
        lastLastIndex = 0,
        // Make `global` and avoid `lastIndex` issues by working with a copy
        separator = new RegExp(separator.source, flags + "g"),
        separator2, match, lastIndex, lastLength;
    str += ""; // Type-convert
    if (!compliantExecNpcg) {
        // Doesn't need flags gy, but they don't hurt
        separator2 = new RegExp("^" + separator.source + "$(?!\\s)", flags);
    }
    /* Values for `limit`, per the spec:
     * If undefined: 4294967295 // Math.pow(2, 32) - 1
     * If 0, Infinity, or NaN: 0
     * If positive number: limit = Math.floor(limit); if (limit > 4294967295) limit -= 4294967296;
     * If negative number: 4294967296 - Math.floor(Math.abs(limit))
     * If other: Type-convert, then use the above rules
     */
    limit = limit === undef ?
        -1 >>> 0 : // Math.pow(2, 32) - 1
        limit >>> 0; // ToUint32(limit)
    while (match = separator.exec(str)) {
        // `separator.lastIndex` is not reliable cross-browser
        lastIndex = match.index + match[0].length;
        if (lastIndex > lastLastIndex) {
            output.push(str.slice(lastLastIndex, match.index));
            // Fix browsers whose `exec` methods don't consistently return `undefined` for
            // nonparticipating capturing groups
            if (!compliantExecNpcg && match.length > 1) {
                match[0].replace(separator2, function () {
                    for (var i = 1; i < arguments.length - 2; i++) {
                        if (arguments[i] === undef) {
                            match[i] = undef;
                        }
                    }
                });
            }
            if (match.length > 1 && match.index < str.length) {
                Array.prototype.push.apply(output, match.slice(1));
            }
            lastLength = match[0].length;
            lastLastIndex = lastIndex;
            if (output.length >= limit) {
                break;
            }
        }
        if (separator.lastIndex === match.index) {
            separator.lastIndex++; // Avoid an infinite loop
        }
    }
    if (lastLastIndex === str.length) {
        if (lastLength || !separator.test("")) {
            output.push("");
        }
    } else {
        output.push(str.slice(lastLastIndex));
    }
    return output.length > limit ? output.slice(0, limit) : output;
};

// For convenience
String.prototype.split = function (separator, limit) {
    return self(this, separator, limit);
};

return self;

}();

Download it.

Please let me know if you find any problems. Thanks!

Update: This script has become part of my XRegExp library, which includes many other JavaScript regular expression cross-browser compatibility fixes.

Post navigation