Generating a regular expression to match valid JavaScript identifiers (like https://mathiasbynens.be/demo/javascript-identifier-regex) in Node.js (original) (raw)
// Note: run `npm install` first!
'use strict';
const fs = require('fs');
const regenerate = require('regenerate');
const template = require('lodash.template');
// Which Unicode version should be used?
const version = '11.0.0';
// Set up a shorthand function to import Unicode data.
const get = function(what) {
return require(`unicode-${ version }/${ what }/code-points.js`);
};
// Get the Unicode categories needed to construct the ES5 regex.
const Lu = get('General_Category/Uppercase_Letter');
const Ll = get('General_Category/Lowercase_Letter');
const Lt = get('General_Category/Titlecase_Letter');
const Lm = get('General_Category/Modifier_Letter');
const Lo = get('General_Category/Other_Letter');
const Nl = get('General_Category/Letter_Number');
const Mn = get('General_Category/Nonspacing_Mark');
const Mc = get('General_Category/Spacing_Mark');
const Nd = get('General_Category/Decimal_Number');
const Pc = get('General_Category/Connector_Punctuation');
// Get the Unicode properties needed to construct the ES6 regex.
const ID_Start = get('Binary_Property/ID_Start');
const ID_Continue = get('Binary_Property/ID_Continue');
const Other_ID_Start = get('Binary_Property/Other_ID_Start');
const compileRegex = template('/^(?!(?:<%= reservedWords %>)$)' +
'(?:<%= identifierStart %>)(?:<%= identifierPart %>)*$/');
const generateES5Regex = function() { // ES 5.1
// https://mathiasbynens.be/notes/javascript-identifiers#valid-identifier-names
const identifierStart = regenerate('$', '_')
.add(Lu, Ll, Lt, Lm, Lo, Nl)
.removeRange(0x010000, 0x10FFFF); // Remove astral symbols.
const identifierPart = identifierStart.clone()
.add('\u200C', '\u200D', Mn, Mc, Nd, Pc)
.removeRange(0x010000, 0x10FFFF); // Remove astral symbols.
const reservedWords = [
// https://mathiasbynens.be/notes/reserved-keywords#ecmascript-5
'do', 'if', 'in', 'for', 'let', 'new', 'try', 'var', 'case', 'else',
'enum', 'eval', 'null', 'this', 'true', 'void', 'with', 'break', 'catch',
'class', 'const', 'false', 'super', 'throw', 'while', 'yield', 'delete',
'export', 'import', 'public', 'return', 'static', 'switch', 'typeof',
'default', 'extends', 'finally', 'package', 'private', 'continue',
'debugger', 'function', 'arguments', 'interface', 'protected',
'implements', 'instanceof',
// These aren’t strictly reserved words, but they kind of behave as if
// they were.
//'NaN', 'Infinity', 'undefined'
];
const regex = compileRegex({
'reservedWords': reservedWords.join('|'),
'identifierStart': identifierStart.toString(),
'identifierPart': identifierPart.toString()
});
return regex;
};
const generateES6Regex = function() {
// http://ecma-international.org/ecma-262/6.0/#sec-identifier-names-static-semantics-early-errors
// http://unicode.org/reports/tr31/#Default\_Identifier\_Syntax
// https://bugs.ecmascript.org/show\_bug.cgi?id=2717#c0
const identifierStart = regenerate(ID_Start)
// Note: this already includes `Other_ID_Start`. http://git.io/wRCAfQ
.add(
'$',
'_'
);
const identifierPart = regenerate(ID_Continue)
// Note: `ID_Continue` already includes `Other_ID_Continue`. http://git.io/wRCAfQ
.add(Other_ID_Start)
.add(
'$',
'_',
'\u200C',
'\u200D'
);
const reservedWords = [
// https://mathiasbynens.be/notes/reserved-keywords#ecmascript-6
'do', 'if', 'in', 'for', 'let', 'new', 'try', 'var', 'case', 'else',
'enum', 'eval', 'null', 'this', 'true', 'void', 'with', 'await', 'break',
'catch', 'class', 'const', 'false', 'super', 'throw', 'while', 'yield',
'delete', 'export', 'import', 'public', 'return', 'static', 'switch',
'typeof', 'default', 'extends', 'finally', 'package', 'private',
'continue', 'debugger', 'function', 'arguments', 'interface', 'protected',
'implements', 'instanceof',
// These aren’t strictly reserved words, but they kind of behave as if
// they were.
//'NaN', 'Infinity', 'undefined'
];
const regex = compileRegex({
'reservedWords': reservedWords.join('|'),
'identifierStart': identifierStart.toString(),
'identifierPart': identifierPart.toString()
});
return regex;
};
fs.writeFileSync(
'ecmascript-5.1.js',
`// ECMAScript 5.1:\n\n${ generateES5Regex() }\n`
);
fs.writeFileSync(
'ecmascript-6.js',
`// ECMAScript 6:\n\n${ generateES6Regex() }\n`
);