var util = require('./util');
|
|
var types = require('./types');
|
|
var sets = require('./sets');
|
|
var positions = require('./positions');
|
|
|
|
|
|
module.exports = function(regexpStr) {
|
|
var i = 0, l, c,
|
|
start = { type: types.ROOT, stack: []},
|
|
|
|
// Keep track of last clause/group and stack.
|
|
lastGroup = start,
|
|
last = start.stack,
|
|
groupStack = [];
|
|
|
|
|
|
var repeatErr = function(i) {
|
|
util.error(regexpStr, 'Nothing to repeat at column ' + (i - 1));
|
|
};
|
|
|
|
// Decode a few escaped characters.
|
|
var str = util.strToChars(regexpStr);
|
|
l = str.length;
|
|
|
|
// Iterate through each character in string.
|
|
while (i < l) {
|
|
c = str[i++];
|
|
|
|
switch (c) {
|
|
// Handle escaped characters, inclues a few sets.
|
|
case '\\':
|
|
c = str[i++];
|
|
|
|
switch (c) {
|
|
case 'b':
|
|
last.push(positions.wordBoundary());
|
|
break;
|
|
|
|
case 'B':
|
|
last.push(positions.nonWordBoundary());
|
|
break;
|
|
|
|
case 'w':
|
|
last.push(sets.words());
|
|
break;
|
|
|
|
case 'W':
|
|
last.push(sets.notWords());
|
|
break;
|
|
|
|
case 'd':
|
|
last.push(sets.ints());
|
|
break;
|
|
|
|
case 'D':
|
|
last.push(sets.notInts());
|
|
break;
|
|
|
|
case 's':
|
|
last.push(sets.whitespace());
|
|
break;
|
|
|
|
case 'S':
|
|
last.push(sets.notWhitespace());
|
|
break;
|
|
|
|
default:
|
|
// Check if c is integer.
|
|
// In which case it's a reference.
|
|
if (/\d/.test(c)) {
|
|
last.push({ type: types.REFERENCE, value: parseInt(c, 10) });
|
|
|
|
// Escaped character.
|
|
} else {
|
|
last.push({ type: types.CHAR, value: c.charCodeAt(0) });
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
// Positionals.
|
|
case '^':
|
|
last.push(positions.begin());
|
|
break;
|
|
|
|
case '$':
|
|
last.push(positions.end());
|
|
break;
|
|
|
|
|
|
// Handle custom sets.
|
|
case '[':
|
|
// Check if this class is 'anti' i.e. [^abc].
|
|
var not;
|
|
if (str[i] === '^') {
|
|
not = true;
|
|
i++;
|
|
} else {
|
|
not = false;
|
|
}
|
|
|
|
// Get all the characters in class.
|
|
var classTokens = util.tokenizeClass(str.slice(i), regexpStr);
|
|
|
|
// Increase index by length of class.
|
|
i += classTokens[1];
|
|
last.push({
|
|
type: types.SET,
|
|
set: classTokens[0],
|
|
not: not,
|
|
});
|
|
|
|
break;
|
|
|
|
|
|
// Class of any character except \n.
|
|
case '.':
|
|
last.push(sets.anyChar());
|
|
break;
|
|
|
|
|
|
// Push group onto stack.
|
|
case '(':
|
|
// Create group.
|
|
var group = {
|
|
type: types.GROUP,
|
|
stack: [],
|
|
remember: true,
|
|
};
|
|
|
|
c = str[i];
|
|
|
|
// If if this is a special kind of group.
|
|
if (c === '?') {
|
|
c = str[i + 1];
|
|
i += 2;
|
|
|
|
// Match if followed by.
|
|
if (c === '=') {
|
|
group.followedBy = true;
|
|
|
|
// Match if not followed by.
|
|
} else if (c === '!') {
|
|
group.notFollowedBy = true;
|
|
|
|
} else if (c !== ':') {
|
|
util.error(regexpStr,
|
|
'Invalid group, character \'' + c +
|
|
'\' after \'?\' at column ' + (i - 1));
|
|
}
|
|
|
|
group.remember = false;
|
|
}
|
|
|
|
// Insert subgroup into current group stack.
|
|
last.push(group);
|
|
|
|
// Remember the current group for when the group closes.
|
|
groupStack.push(lastGroup);
|
|
|
|
// Make this new group the current group.
|
|
lastGroup = group;
|
|
last = group.stack;
|
|
break;
|
|
|
|
|
|
// Pop group out of stack.
|
|
case ')':
|
|
if (groupStack.length === 0) {
|
|
util.error(regexpStr, 'Unmatched ) at column ' + (i - 1));
|
|
}
|
|
lastGroup = groupStack.pop();
|
|
|
|
// Check if this group has a PIPE.
|
|
// To get back the correct last stack.
|
|
last = lastGroup.options ?
|
|
lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack;
|
|
break;
|
|
|
|
|
|
// Use pipe character to give more choices.
|
|
case '|':
|
|
// Create array where options are if this is the first PIPE
|
|
// in this clause.
|
|
if (!lastGroup.options) {
|
|
lastGroup.options = [lastGroup.stack];
|
|
delete lastGroup.stack;
|
|
}
|
|
|
|
// Create a new stack and add to options for rest of clause.
|
|
var stack = [];
|
|
lastGroup.options.push(stack);
|
|
last = stack;
|
|
break;
|
|
|
|
|
|
// Repetition.
|
|
// For every repetition, remove last element from last stack
|
|
// then insert back a RANGE object.
|
|
// This design is chosen because there could be more than
|
|
// one repetition symbols in a regex i.e. `a?+{2,3}`.
|
|
case '{':
|
|
var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;
|
|
if (rs !== null) {
|
|
if (last.length === 0) {
|
|
repeatErr(i);
|
|
}
|
|
min = parseInt(rs[1], 10);
|
|
max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;
|
|
i += rs[0].length;
|
|
|
|
last.push({
|
|
type: types.REPETITION,
|
|
min: min,
|
|
max: max,
|
|
value: last.pop(),
|
|
});
|
|
} else {
|
|
last.push({
|
|
type: types.CHAR,
|
|
value: 123,
|
|
});
|
|
}
|
|
break;
|
|
|
|
case '?':
|
|
if (last.length === 0) {
|
|
repeatErr(i);
|
|
}
|
|
last.push({
|
|
type: types.REPETITION,
|
|
min: 0,
|
|
max: 1,
|
|
value: last.pop(),
|
|
});
|
|
break;
|
|
|
|
case '+':
|
|
if (last.length === 0) {
|
|
repeatErr(i);
|
|
}
|
|
last.push({
|
|
type: types.REPETITION,
|
|
min: 1,
|
|
max: Infinity,
|
|
value: last.pop(),
|
|
});
|
|
break;
|
|
|
|
case '*':
|
|
if (last.length === 0) {
|
|
repeatErr(i);
|
|
}
|
|
last.push({
|
|
type: types.REPETITION,
|
|
min: 0,
|
|
max: Infinity,
|
|
value: last.pop(),
|
|
});
|
|
break;
|
|
|
|
|
|
// Default is a character that is not `\[](){}?+*^$`.
|
|
default:
|
|
last.push({
|
|
type: types.CHAR,
|
|
value: c.charCodeAt(0),
|
|
});
|
|
}
|
|
|
|
}
|
|
|
|
// Check if any groups have not been closed.
|
|
if (groupStack.length !== 0) {
|
|
util.error(regexpStr, 'Unterminated group');
|
|
}
|
|
|
|
return start;
|
|
};
|
|
|
|
module.exports.types = types;
|