tentakelfabrik
/
tiny-consent

'use strict';
const generate = require('regjsgen').generate;const parse = require('regjsparser').parse;const regenerate = require('regenerate');const unicodeMatchProperty = require('unicode-match-property-ecmascript');const unicodeMatchPropertyValue = require('unicode-match-property-value-ecmascript');const iuMappings = require('./data/iu-mappings.js');const ESCAPE_SETS = require('./data/character-class-escape-sets.js');
// Prepare a Regenerate set containing all code points, used for negative
// character classes (if any).
const UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF);// Without the `u` flag, the range stops at 0xFFFF.
// https://mths.be/es6#sec-pattern-semantics
const BMP_SET = regenerate().addRange(0x0, 0xFFFF);
// Prepare a Regenerate set containing all code points that are supposed to be
// matched by `/./u`. https://mths.be/es6#sec-atom
const DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points
	.remove(		// minus `LineTerminator`s (https://mths.be/es6#sec-line-terminators):
		0x000A, // Line Feed <LF>
		0x000D, // Carriage Return <CR>
		0x2028, // Line Separator <LS>
		0x2029  // Paragraph Separator <PS>
	);
const getCharacterClassEscapeSet = (character, unicode, ignoreCase) => {	if (unicode) {		if (ignoreCase) {			return ESCAPE_SETS.UNICODE_IGNORE_CASE.get(character);		}		return ESCAPE_SETS.UNICODE.get(character);	}	return ESCAPE_SETS.REGULAR.get(character);};
const getUnicodeDotSet = (dotAll) => {	return dotAll ? UNICODE_SET : DOT_SET_UNICODE;};
const getUnicodePropertyValueSet = (property, value) => {	const path = value ?		`${ property }/${ value }` :		`Binary_Property/${ property }`;	try {		return require(`regenerate-unicode-properties/${ path }.js`);	} catch (exception) {		throw new Error(			`Failed to recognize value \`${ value }\` for property ` +			`\`${ property }\`.`		);	}};
const handleLoneUnicodePropertyNameOrValue = (value) => {	// It could be a `General_Category` value or a binary property.
	// Note: `unicodeMatchPropertyValue` throws on invalid values.
	try {		const property = 'General_Category';		const category = unicodeMatchPropertyValue(property, value);		return getUnicodePropertyValueSet(property, category);	} catch (exception) {}	// It’s not a `General_Category` value, so check if it’s a binary
	// property. Note: `unicodeMatchProperty` throws on invalid properties.
	const property = unicodeMatchProperty(value);	return getUnicodePropertyValueSet(property);};
const getUnicodePropertyEscapeSet = (value, isNegative) => {	const parts = value.split('=');	const firstPart = parts[0];	let set;	if (parts.length == 1) {		set = handleLoneUnicodePropertyNameOrValue(firstPart);	} else {		// The pattern consists of two parts, i.e. `Property=Value`.
		const property = unicodeMatchProperty(firstPart);		const value = unicodeMatchPropertyValue(property, parts[1]);		set = getUnicodePropertyValueSet(property, value);	}	if (isNegative) {		return UNICODE_SET.clone().remove(set);	}	return set.clone();};
// Given a range of code points, add any case-folded code points in that range
// to a set.
regenerate.prototype.iuAddRange = function(min, max) {	const $this = this;	do {		const folded = caseFold(min);		if (folded) {			$this.add(folded);		}	} while (++min <= max);	return $this;};
const update = (item, pattern) => {	let tree = parse(pattern, config.useUnicodeFlag ? 'u' : '');	switch (tree.type) {		case 'characterClass':		case 'group':		case 'value':			// No wrapping needed.
			break;		default:			// Wrap the pattern in a non-capturing group.
			tree = wrap(tree, pattern);	}	Object.assign(item, tree);};
const wrap = (tree, pattern) => {	// Wrap the pattern in a non-capturing group.
	return {		'type': 'group',		'behavior': 'ignore',		'body': [tree],		'raw': `(?:${ pattern })`	};};
const caseFold = (codePoint) => {	return iuMappings.get(codePoint) || false;};
const processCharacterClass = (characterClassItem, regenerateOptions) => {	let set = regenerate();	for (const item of characterClassItem.body) {		switch (item.type) {			case 'value':				set.add(item.codePoint);				if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {					const folded = caseFold(item.codePoint);					if (folded) {						set.add(folded);					}				}				break;			case 'characterClassRange':				const min = item.min.codePoint;				const max = item.max.codePoint;				set.addRange(min, max);				if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {					set.iuAddRange(min, max);				}				break;			case 'characterClassEscape':				set.add(getCharacterClassEscapeSet(					item.value,					config.unicode,					config.ignoreCase				));				break;			case 'unicodePropertyEscape':				set.add(getUnicodePropertyEscapeSet(item.value, item.negative));				break;			// The `default` clause is only here as a safeguard; it should never be
			// reached. Code coverage tools should ignore it.
			/* istanbul ignore next */			default:				throw new Error(`Unknown term type: ${ item.type }`);		}	}	if (characterClassItem.negative) {		set = (config.unicode ? UNICODE_SET : BMP_SET).clone().remove(set);	}	update(characterClassItem, set.toString(regenerateOptions));	return characterClassItem;};
const updateNamedReference = (item, index) => {	delete item.name;	item.matchIndex = index;};
const assertNoUnmatchedReferences = (groups) => {	const unmatchedReferencesNames = Object.keys(groups.unmatchedReferences);	if (unmatchedReferencesNames.length > 0) {		throw new Error(`Unknown group names: ${unmatchedReferencesNames}`);	}};
const processTerm = (item, regenerateOptions, groups) => {	switch (item.type) {		case 'dot':			if (config.unicode) {				update(					item,					getUnicodeDotSet(config.dotAll).toString(regenerateOptions)				);			} else if (config.dotAll) {				// TODO: consider changing this at the regenerate level.
				update(item, '[\\s\\S]');			}			break;		case 'characterClass':			item = processCharacterClass(item, regenerateOptions);			break;		case 'unicodePropertyEscape':			update(				item,				getUnicodePropertyEscapeSet(item.value, item.negative)					.toString(regenerateOptions)			);			break;		case 'characterClassEscape':			update(				item,				getCharacterClassEscapeSet(					item.value,					config.unicode,					config.ignoreCase				).toString(regenerateOptions)			);			break;		case 'group':			if (item.behavior == 'normal') {				groups.lastIndex++;			}			if (item.name) {				const name = item.name.value;
				if (groups.names[name]) {					throw new Error(						`Multiple groups with the same name (${ name }) are not allowed.`					);				}
				const index = groups.lastIndex;				delete item.name;
				groups.names[name] = index;				if (groups.onNamedGroup) {					groups.onNamedGroup.call(null, name, index);				}
				if (groups.unmatchedReferences[name]) {					groups.unmatchedReferences[name].forEach(reference => {						updateNamedReference(reference, index);					});					delete groups.unmatchedReferences[name];				}			}			/* falls through */		case 'alternative':		case 'disjunction':		case 'quantifier':			item.body = item.body.map(term => {				return processTerm(term, regenerateOptions, groups);			});			break;		case 'value':			const codePoint = item.codePoint;			const set = regenerate(codePoint);			if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {				const folded = caseFold(codePoint);				if (folded) {					set.add(folded);				}			}			update(item, set.toString(regenerateOptions));			break;		case 'reference':			if (item.name) {				const name = item.name.value;				const index = groups.names[name];				if (index) {					updateNamedReference(item, index);					break;				}
				if (!groups.unmatchedReferences[name]) {					groups.unmatchedReferences[name] = [];				}				// Keep track of references used before the corresponding group.
				groups.unmatchedReferences[name].push(item);			}			break;		case 'anchor':		case 'empty':		case 'group':			// Nothing to do here.
			break;		// The `default` clause is only here as a safeguard; it should never be
		// reached. Code coverage tools should ignore it.
		/* istanbul ignore next */		default:			throw new Error(`Unknown term type: ${ item.type }`);	}	return item;};
const config = {	'ignoreCase': false,	'unicode': false,	'dotAll': false,	'useUnicodeFlag': false};const rewritePattern = (pattern, flags, options) => {	const regjsparserFeatures = {		'unicodePropertyEscape': options && options.unicodePropertyEscape,		'namedGroups': options && options.namedGroup,		'lookbehind': options && options.lookbehind	};	config.ignoreCase = flags && flags.includes('i');	config.unicode = flags && flags.includes('u');	const supportDotAllFlag = options && options.dotAllFlag;	config.dotAll = supportDotAllFlag && flags && flags.includes('s');	config.useUnicodeFlag = options && options.useUnicodeFlag;	const regenerateOptions = {		'hasUnicodeFlag': config.useUnicodeFlag,		'bmpOnly': !config.unicode	};	const groups = {		'onNamedGroup': options && options.onNamedGroup,		'lastIndex': 0,		'names': Object.create(null), // { [name]: index }
		'unmatchedReferences': Object.create(null) // { [name]: Array<reference> }
	};	const tree = parse(pattern, flags, regjsparserFeatures);	// Note: `processTerm` mutates `tree` and `groups`.
	processTerm(tree, regenerateOptions, groups);	assertNoUnmatchedReferences(groups);	return generate(tree);};
module.exports = rewritePattern;