You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1757 lines
42 KiB

4 years ago
  1. 'use strict';
  2. Object.defineProperty(exports, '__esModule', { value: true });
  3. const JAVASCRIPT_OUTPUT_NAME = 'javascript';
  4. const CSS_OUTPUT_NAME = 'css';
  5. const TEMPLATE_OUTPUT_NAME = 'template';
  6. // Tag names
  7. const JAVASCRIPT_TAG = 'script';
  8. const STYLE_TAG = 'style';
  9. const TEXTAREA_TAG = 'textarea';
  10. // Boolean attributes
  11. const IS_RAW = 'isRaw';
  12. const IS_SELF_CLOSING = 'isSelfClosing';
  13. const IS_VOID = 'isVoid';
  14. const IS_BOOLEAN = 'isBoolean';
  15. const IS_CUSTOM = 'isCustom';
  16. const IS_SPREAD = 'isSpread';
  17. var c = /*#__PURE__*/Object.freeze({
  18. JAVASCRIPT_OUTPUT_NAME: JAVASCRIPT_OUTPUT_NAME,
  19. CSS_OUTPUT_NAME: CSS_OUTPUT_NAME,
  20. TEMPLATE_OUTPUT_NAME: TEMPLATE_OUTPUT_NAME,
  21. JAVASCRIPT_TAG: JAVASCRIPT_TAG,
  22. STYLE_TAG: STYLE_TAG,
  23. TEXTAREA_TAG: TEXTAREA_TAG,
  24. IS_RAW: IS_RAW,
  25. IS_SELF_CLOSING: IS_SELF_CLOSING,
  26. IS_VOID: IS_VOID,
  27. IS_BOOLEAN: IS_BOOLEAN,
  28. IS_CUSTOM: IS_CUSTOM,
  29. IS_SPREAD: IS_SPREAD
  30. });
  31. /**
  32. * Not all the types are handled in this module.
  33. *
  34. * @enum {number}
  35. * @readonly
  36. */
  37. const TAG = 1; /* TAG */
  38. const ATTR = 2; /* ATTR */
  39. const TEXT = 3; /* TEXT */
  40. const CDATA = 4; /* CDATA */
  41. const COMMENT = 8; /* COMMENT */
  42. const DOCUMENT = 9; /* DOCUMENT */
  43. const DOCTYPE = 10; /* DOCTYPE */
  44. const DOCUMENT_FRAGMENT = 11; /* DOCUMENT_FRAGMENT */
  45. var types = /*#__PURE__*/Object.freeze({
  46. TAG: TAG,
  47. ATTR: ATTR,
  48. TEXT: TEXT,
  49. CDATA: CDATA,
  50. COMMENT: COMMENT,
  51. DOCUMENT: DOCUMENT,
  52. DOCTYPE: DOCTYPE,
  53. DOCUMENT_FRAGMENT: DOCUMENT_FRAGMENT
  54. });
  55. const rootTagNotFound = 'Root tag not found.';
  56. const unclosedTemplateLiteral = 'Unclosed ES6 template literal.';
  57. const unexpectedEndOfFile = 'Unexpected end of file.';
  58. const unclosedComment = 'Unclosed comment.';
  59. const unclosedNamedBlock = 'Unclosed "%1" block.';
  60. const duplicatedNamedTag = 'Duplicate tag "<%1>".';
  61. const unexpectedCharInExpression = 'Unexpected character %1.';
  62. const unclosedExpression = 'Unclosed expression.';
  63. /**
  64. * Matches the start of valid tags names; used with the first 2 chars after the `'<'`.
  65. * @const
  66. * @private
  67. */
  68. const TAG_2C = /^(?:\/[a-zA-Z]|[a-zA-Z][^\s>/]?)/;
  69. /**
  70. * Matches valid tags names AFTER the validation with `TAG_2C`.
  71. * $1: tag name including any `'/'`, $2: non self-closing brace (`>`) w/o attributes.
  72. * @const
  73. * @private
  74. */
  75. const TAG_NAME = /(\/?[^\s>/]+)\s*(>)?/g;
  76. /**
  77. * Matches an attribute name-value pair (both can be empty).
  78. * $1: attribute name, $2: value including any quotes.
  79. * @const
  80. * @private
  81. */
  82. const ATTR_START = /(\S[^>/=\s]*)(?:\s*=\s*([^>/])?)?/g;
  83. /**
  84. * Matches the spread operator
  85. * it will be used for the spread attributes
  86. * @type {RegExp}
  87. */
  88. const SPREAD_OPERATOR = /\.\.\./;
  89. /**
  90. * Matches the closing tag of a `script` and `style` block.
  91. * Used by parseText fo find the end of the block.
  92. * @const
  93. * @private
  94. */
  95. const RE_SCRYLE = {
  96. script: /<\/script\s*>/gi,
  97. style: /<\/style\s*>/gi,
  98. textarea: /<\/textarea\s*>/gi
  99. };
  100. // Do not touch text content inside this tags
  101. const RAW_TAGS = /^\/?(?:pre|textarea)$/;
  102. /**
  103. * Add an item into a collection, if the collection is not an array
  104. * we create one and add the item to it
  105. * @param {Array} collection - target collection
  106. * @param {*} item - item to add to the collection
  107. * @returns {Array} array containing the new item added to it
  108. */
  109. function addToCollection(collection = [], item) {
  110. collection.push(item);
  111. return collection
  112. }
  113. /**
  114. * Run RegExp.exec starting from a specific position
  115. * @param {RegExp} re - regex
  116. * @param {number} pos - last index position
  117. * @param {string} string - regex target
  118. * @returns {Array} regex result
  119. */
  120. function execFromPos(re, pos, string) {
  121. re.lastIndex = pos;
  122. return re.exec(string)
  123. }
  124. /**
  125. * Escape special characters in a given string, in preparation to create a regex.
  126. *
  127. * @param {string} str - Raw string
  128. * @returns {string} Escaped string.
  129. */
  130. var escapeStr = (str) => str.replace(/(?=[-[\](){^*+?.$|\\])/g, '\\');
  131. function formatError(data, message, pos) {
  132. if (!pos) {
  133. pos = data.length;
  134. }
  135. // count unix/mac/win eols
  136. const line = (data.slice(0, pos).match(/\r\n?|\n/g) || '').length + 1;
  137. let col = 0;
  138. while (--pos >= 0 && !/[\r\n]/.test(data[pos])) {
  139. ++col;
  140. }
  141. return `[${line},${col}]: ${message}`
  142. }
  143. const $_ES6_BQ = '`';
  144. /**
  145. * Searches the next backquote that signals the end of the ES6 Template Literal
  146. * or the "${" sequence that starts a JS expression, skipping any escaped
  147. * character.
  148. *
  149. * @param {string} code - Whole code
  150. * @param {number} pos - The start position of the template
  151. * @param {string[]} stack - To save nested ES6 TL count
  152. * @returns {number} The end of the string (-1 if not found)
  153. */
  154. function skipES6TL(code, pos, stack) {
  155. // we are in the char following the backquote (`),
  156. // find the next unescaped backquote or the sequence "${"
  157. const re = /[`$\\]/g;
  158. let c;
  159. while (re.lastIndex = pos, re.exec(code)) {
  160. pos = re.lastIndex;
  161. c = code[pos - 1];
  162. if (c === '`') {
  163. return pos
  164. }
  165. if (c === '$' && code[pos++] === '{') {
  166. stack.push($_ES6_BQ, '}');
  167. return pos
  168. }
  169. // else this is an escaped char
  170. }
  171. throw formatError(code, unclosedTemplateLiteral, pos)
  172. }
  173. /**
  174. * Custom error handler can be implemented replacing this method.
  175. * The `state` object includes the buffer (`data`)
  176. * The error position (`loc`) contains line (base 1) and col (base 0).
  177. * @param {string} data - string containing the error
  178. * @param {string} msg - Error message
  179. * @param {number} pos - Position of the error
  180. * @returns {undefined} throw an exception error
  181. */
  182. function panic(data, msg, pos) {
  183. const message = formatError(data, msg, pos);
  184. throw new Error(message)
  185. }
  186. // forked from https://github.com/aMarCruz/skip-regex
  187. // safe characters to precced a regex (including `=>`, `**`, and `...`)
  188. const beforeReChars = '[{(,;:?=|&!^~>%*/';
  189. const beforeReSign = `${beforeReChars}+-`;
  190. // keyword that can preceed a regex (`in` is handled as special case)
  191. const beforeReWords = [
  192. 'case',
  193. 'default',
  194. 'do',
  195. 'else',
  196. 'in',
  197. 'instanceof',
  198. 'prefix',
  199. 'return',
  200. 'typeof',
  201. 'void',
  202. 'yield'
  203. ];
  204. // Last chars of all the beforeReWords elements to speed up the process.
  205. const wordsEndChar = beforeReWords.reduce((s, w) => s + w.slice(-1), '');
  206. // Matches literal regex from the start of the buffer.
  207. // The buffer to search must not include line-endings.
  208. const RE_LIT_REGEX = /^\/(?=[^*>/])[^[/\\]*(?:(?:\\.|\[(?:\\.|[^\]\\]*)*\])[^[\\/]*)*?\/[gimuy]*/;
  209. // Valid characters for JavaScript variable names and literal numbers.
  210. const RE_JS_VCHAR = /[$\w]/;
  211. // Match dot characters that could be part of tricky regex
  212. const RE_DOT_CHAR = /.*/g;
  213. /**
  214. * Searches the position of the previous non-blank character inside `code`,
  215. * starting with `pos - 1`.
  216. *
  217. * @param {string} code - Buffer to search
  218. * @param {number} pos - Starting position
  219. * @returns {number} Position of the first non-blank character to the left.
  220. * @private
  221. */
  222. function _prev(code, pos) {
  223. while (--pos >= 0 && /\s/.test(code[pos]));
  224. return pos
  225. }
  226. /**
  227. * Check if the character in the `start` position within `code` can be a regex
  228. * and returns the position following this regex or `start+1` if this is not
  229. * one.
  230. *
  231. * NOTE: Ensure `start` points to a slash (this is not checked).
  232. *
  233. * @function skipRegex
  234. * @param {string} code - Buffer to test in
  235. * @param {number} start - Position the first slash inside `code`
  236. * @returns {number} Position of the char following the regex.
  237. *
  238. */
  239. /* istanbul ignore next */
  240. function skipRegex(code, start) {
  241. let pos = RE_DOT_CHAR.lastIndex = start++;
  242. // `exec()` will extract from the slash to the end of the line
  243. // and the chained `match()` will match the possible regex.
  244. const match = (RE_DOT_CHAR.exec(code) || ' ')[0].match(RE_LIT_REGEX);
  245. if (match) {
  246. const next = pos + match[0].length; // result comes from `re.match`
  247. pos = _prev(code, pos);
  248. let c = code[pos];
  249. // start of buffer or safe prefix?
  250. if (pos < 0 || beforeReChars.includes(c)) {
  251. return next
  252. }
  253. // from here, `pos` is >= 0 and `c` is code[pos]
  254. if (c === '.') {
  255. // can be `...` or something silly like 5./2
  256. if (code[pos - 1] === '.') {
  257. start = next;
  258. }
  259. } else {
  260. if (c === '+' || c === '-') {
  261. // tricky case
  262. if (code[--pos] !== c || // if have a single operator or
  263. (pos = _prev(code, pos)) < 0 || // ...have `++` and no previous token
  264. beforeReSign.includes(c = code[pos])) {
  265. return next // ...this is a regex
  266. }
  267. }
  268. if (wordsEndChar.includes(c)) { // looks like a keyword?
  269. const end = pos + 1;
  270. // get the complete (previous) keyword
  271. while (--pos >= 0 && RE_JS_VCHAR.test(code[pos]));
  272. // it is in the allowed keywords list?
  273. if (beforeReWords.includes(code.slice(pos + 1, end))) {
  274. start = next;
  275. }
  276. }
  277. }
  278. }
  279. return start
  280. }
  281. /*
  282. * Mini-parser for expressions.
  283. * The main pourpose of this module is to find the end of an expression
  284. * and return its text without the enclosing brackets.
  285. * Does not works with comments, but supports ES6 template strings.
  286. */
  287. /**
  288. * @exports exprExtr
  289. */
  290. const S_SQ_STR = /'[^'\n\r\\]*(?:\\(?:\r\n?|[\S\s])[^'\n\r\\]*)*'/.source;
  291. /**
  292. * Matches double quoted JS strings taking care about nested quotes
  293. * and EOLs (escaped EOLs are Ok).
  294. *
  295. * @const
  296. * @private
  297. */
  298. const S_STRING = `${S_SQ_STR}|${S_SQ_STR.replace(/'/g, '"')}`;
  299. /**
  300. * Regex cache
  301. *
  302. * @type {Object.<string, RegExp>}
  303. * @const
  304. * @private
  305. */
  306. const reBr = {};
  307. /**
  308. * Makes an optimal regex that matches quoted strings, brackets, backquotes
  309. * and the closing brackets of an expression.
  310. *
  311. * @param {string} b - Closing brackets
  312. * @returns {RegExp} - optimized regex
  313. */
  314. function _regex(b) {
  315. let re = reBr[b];
  316. if (!re) {
  317. let s = escapeStr(b);
  318. if (b.length > 1) {
  319. s = `${s}|[`;
  320. } else {
  321. s = /[{}[\]()]/.test(b) ? '[' : `[${s}`;
  322. }
  323. reBr[b] = re = new RegExp(`${S_STRING}|${s}\`/\\{}[\\]()]`, 'g');
  324. }
  325. return re
  326. }
  327. /**
  328. * Update the scopes stack removing or adding closures to it
  329. * @param {Array} stack - array stacking the expression closures
  330. * @param {string} char - current char to add or remove from the stack
  331. * @param {string} idx - matching index
  332. * @param {string} code - expression code
  333. * @returns {Object} result
  334. * @returns {Object} result.char - either the char received or the closing braces
  335. * @returns {Object} result.index - either a new index to skip part of the source code,
  336. * or 0 to keep from parsing from the old position
  337. */
  338. function updateStack(stack, char, idx, code) {
  339. let index = 0;
  340. switch (char) {
  341. case '[':
  342. case '(':
  343. case '{':
  344. stack.push(char === '[' ? ']' : char === '(' ? ')' : '}');
  345. break
  346. case ')':
  347. case ']':
  348. case '}':
  349. if (char !== stack.pop()) {
  350. panic(code, unexpectedCharInExpression.replace('%1', char), index);
  351. }
  352. if (char === '}' && stack[stack.length - 1] === $_ES6_BQ) {
  353. char = stack.pop();
  354. }
  355. index = idx + 1;
  356. break
  357. case '/':
  358. index = skipRegex(code, idx);
  359. }
  360. return { char, index }
  361. }
  362. /**
  363. * Parses the code string searching the end of the expression.
  364. * It skips braces, quoted strings, regexes, and ES6 template literals.
  365. *
  366. * @function exprExtr
  367. * @param {string} code - Buffer to parse
  368. * @param {number} start - Position of the opening brace
  369. * @param {[string,string]} bp - Brackets pair
  370. * @returns {Object} Expression's end (after the closing brace) or -1
  371. * if it is not an expr.
  372. */
  373. function exprExtr(code, start, bp) {
  374. const [openingBraces, closingBraces] = bp;
  375. const offset = start + openingBraces.length; // skips the opening brace
  376. const stack = []; // expected closing braces ('`' for ES6 TL)
  377. const re = _regex(closingBraces);
  378. re.lastIndex = offset; // begining of the expression
  379. let end;
  380. let match;
  381. while (match = re.exec(code)) { // eslint-disable-line
  382. const idx = match.index;
  383. const str = match[0];
  384. end = re.lastIndex;
  385. // end the iteration
  386. if (str === closingBraces && !stack.length) {
  387. return {
  388. text: code.slice(offset, idx),
  389. start,
  390. end
  391. }
  392. }
  393. const { char, index } = updateStack(stack, str[0], idx, code);
  394. // update the end value depending on the new index received
  395. end = index || end;
  396. // update the regex last index
  397. re.lastIndex = char === $_ES6_BQ ? skipES6TL(code, end, stack) : end;
  398. }
  399. if (stack.length) {
  400. panic(code, unclosedExpression, end);
  401. }
  402. }
  403. /**
  404. * Outputs the last parsed node. Can be used with a builder too.
  405. *
  406. * @param {ParserStore} store - Parsing store
  407. * @returns {undefined} void function
  408. * @private
  409. */
  410. function flush(store) {
  411. const last = store.last;
  412. store.last = null;
  413. if (last && store.root) {
  414. store.builder.push(last);
  415. }
  416. }
  417. /**
  418. * Get the code chunks from start and end range
  419. * @param {string} source - source code
  420. * @param {number} start - Start position of the chunk we want to extract
  421. * @param {number} end - Ending position of the chunk we need
  422. * @returns {string} chunk of code extracted from the source code received
  423. * @private
  424. */
  425. function getChunk(source, start, end) {
  426. return source.slice(start, end)
  427. }
  428. /**
  429. * states text in the last text node, or creates a new one if needed.
  430. *
  431. * @param {ParserState} state - Current parser state
  432. * @param {number} start - Start position of the tag
  433. * @param {number} end - Ending position (last char of the tag)
  434. * @param {Object} extra - extra properties to add to the text node
  435. * @param {RawExpr[]} extra.expressions - Found expressions
  436. * @param {string} extra.unescape - Brackets to unescape
  437. * @returns {undefined} - void function
  438. * @private
  439. */
  440. function pushText(state, start, end, extra = {}) {
  441. const text = getChunk(state.data, start, end);
  442. const expressions = extra.expressions;
  443. const unescape = extra.unescape;
  444. let q = state.last;
  445. state.pos = end;
  446. if (q && q.type === TEXT) {
  447. q.text += text;
  448. q.end = end;
  449. } else {
  450. flush(state);
  451. state.last = q = { type: TEXT, text, start, end };
  452. }
  453. if (expressions && expressions.length) {
  454. q.expressions = (q.expressions || []).concat(expressions);
  455. }
  456. if (unescape) {
  457. q.unescape = unescape;
  458. }
  459. return TEXT
  460. }
  461. /**
  462. * Find the end of the attribute value or text node
  463. * Extract expressions.
  464. * Detect if value have escaped brackets.
  465. *
  466. * @param {ParserState} state - Parser state
  467. * @param {HasExpr} node - Node if attr, info if text
  468. * @param {string} endingChars - Ends the value or text
  469. * @param {number} start - Starting position
  470. * @returns {number} Ending position
  471. * @private
  472. */
  473. function expr(state, node, endingChars, start) {
  474. const re = b0re(state, endingChars);
  475. re.lastIndex = start; // reset re position
  476. const { unescape, expressions, end } = parseExpressions(state, re);
  477. if (node) {
  478. if (unescape) {
  479. node.unescape = unescape;
  480. }
  481. if (expressions.length) {
  482. node.expressions = expressions;
  483. }
  484. } else {
  485. pushText(state, start, end, {expressions, unescape});
  486. }
  487. return end
  488. }
  489. /**
  490. * Parse a text chunk finding all the expressions in it
  491. * @param {ParserState} state - Parser state
  492. * @param {RegExp} re - regex to match the expressions contents
  493. * @returns {Object} result containing the expression found, the string to unescape and the end position
  494. */
  495. function parseExpressions(state, re) {
  496. const { data, options } = state;
  497. const { brackets } = options;
  498. const expressions = [];
  499. let unescape, pos, match;
  500. // Anything captured in $1 (closing quote or character) ends the loop...
  501. while ((match = re.exec(data)) && !match[1]) {
  502. // ...else, we have an opening bracket and maybe an expression.
  503. pos = match.index;
  504. if (data[pos - 1] === '\\') {
  505. unescape = match[0]; // it is an escaped opening brace
  506. } else {
  507. const tmpExpr = exprExtr(data, pos, brackets);
  508. if (tmpExpr) {
  509. expressions.push(tmpExpr);
  510. re.lastIndex = tmpExpr.end;
  511. }
  512. }
  513. }
  514. // Even for text, the parser needs match a closing char
  515. if (!match) {
  516. panic(data, unexpectedEndOfFile, pos);
  517. }
  518. return {
  519. unescape,
  520. expressions,
  521. end: match.index
  522. }
  523. }
  524. /**
  525. * Creates a regex for the given string and the left bracket.
  526. * The string is captured in $1.
  527. *
  528. * @param {ParserState} state - Parser state
  529. * @param {string} str - String to search
  530. * @returns {RegExp} Resulting regex.
  531. * @private
  532. */
  533. function b0re(state, str) {
  534. const { brackets } = state.options;
  535. const re = state.regexCache[str];
  536. if (re) return re
  537. const b0 = escapeStr(brackets[0]);
  538. // cache the regex extending the regexCache object
  539. Object.assign(state.regexCache, { [str]: new RegExp(`(${str})|${b0}`, 'g') });
  540. return state.regexCache[str]
  541. }
  542. // similar to _.uniq
  543. const uniq = l => l.filter((x, i, a) => a.indexOf(x) === i);
  544. /**
  545. * SVG void elements that cannot be auto-closed and shouldn't contain child nodes.
  546. * @const {Array}
  547. */
  548. const VOID_SVG_TAGS_LIST = [
  549. 'circle',
  550. 'ellipse',
  551. 'line',
  552. 'path',
  553. 'polygon',
  554. 'polyline',
  555. 'rect',
  556. 'stop',
  557. 'use'
  558. ];
  559. /**
  560. * List of html elements where the value attribute is allowed
  561. * @type {Array}
  562. */
  563. const HTML_ELEMENTS_HAVING_VALUE_ATTRIBUTE_LIST = [
  564. 'button',
  565. 'data',
  566. 'input',
  567. 'select',
  568. 'li',
  569. 'meter',
  570. 'option',
  571. 'output',
  572. 'progress',
  573. 'textarea',
  574. 'param'
  575. ];
  576. /**
  577. * List of all the available svg tags
  578. * @const {Array}
  579. * @see {@link https://github.com/wooorm/svg-tag-names}
  580. */
  581. const SVG_TAGS_LIST = uniq([
  582. 'a',
  583. 'altGlyph',
  584. 'altGlyphDef',
  585. 'altGlyphItem',
  586. 'animate',
  587. 'animateColor',
  588. 'animateMotion',
  589. 'animateTransform',
  590. 'animation',
  591. 'audio',
  592. 'canvas',
  593. 'clipPath',
  594. 'color-profile',
  595. 'cursor',
  596. 'defs',
  597. 'desc',
  598. 'discard',
  599. 'feBlend',
  600. 'feColorMatrix',
  601. 'feComponentTransfer',
  602. 'feComposite',
  603. 'feConvolveMatrix',
  604. 'feDiffuseLighting',
  605. 'feDisplacementMap',
  606. 'feDistantLight',
  607. 'feDropShadow',
  608. 'feFlood',
  609. 'feFuncA',
  610. 'feFuncB',
  611. 'feFuncG',
  612. 'feFuncR',
  613. 'feGaussianBlur',
  614. 'feImage',
  615. 'feMerge',
  616. 'feMergeNode',
  617. 'feMorphology',
  618. 'feOffset',
  619. 'fePointLight',
  620. 'feSpecularLighting',
  621. 'feSpotLight',
  622. 'feTile',
  623. 'feTurbulence',
  624. 'filter',
  625. 'font',
  626. 'font-face',
  627. 'font-face-format',
  628. 'font-face-name',
  629. 'font-face-src',
  630. 'font-face-uri',
  631. 'foreignObject',
  632. 'g',
  633. 'glyph',
  634. 'glyphRef',
  635. 'handler',
  636. 'hatch',
  637. 'hatchpath',
  638. 'hkern',
  639. 'iframe',
  640. 'image',
  641. 'linearGradient',
  642. 'listener',
  643. 'marker',
  644. 'mask',
  645. 'mesh',
  646. 'meshgradient',
  647. 'meshpatch',
  648. 'meshrow',
  649. 'metadata',
  650. 'missing-glyph',
  651. 'mpath',
  652. 'pattern',
  653. 'prefetch',
  654. 'radialGradient',
  655. 'script',
  656. 'set',
  657. 'solidColor',
  658. 'solidcolor',
  659. 'style',
  660. 'svg',
  661. 'switch',
  662. 'symbol',
  663. 'tbreak',
  664. 'text',
  665. 'textArea',
  666. 'textPath',
  667. 'title',
  668. 'tref',
  669. 'tspan',
  670. 'unknown',
  671. 'video',
  672. 'view',
  673. 'vkern'
  674. ].concat(VOID_SVG_TAGS_LIST)).sort();
  675. /**
  676. * HTML void elements that cannot be auto-closed and shouldn't contain child nodes.
  677. * @type {Array}
  678. * @see {@link http://www.w3.org/TR/html-markup/syntax.html#syntax-elements}
  679. * @see {@link http://www.w3.org/TR/html5/syntax.html#void-elements}
  680. */
  681. const VOID_HTML_TAGS_LIST = [
  682. 'area',
  683. 'base',
  684. 'br',
  685. 'col',
  686. 'embed',
  687. 'hr',
  688. 'img',
  689. 'input',
  690. 'keygen',
  691. 'link',
  692. 'menuitem',
  693. 'meta',
  694. 'param',
  695. 'source',
  696. 'track',
  697. 'wbr'
  698. ];
  699. /**
  700. * List of all the html tags
  701. * @const {Array}
  702. * @see {@link https://github.com/sindresorhus/html-tags}
  703. */
  704. const HTML_TAGS_LIST = uniq([
  705. 'a',
  706. 'abbr',
  707. 'address',
  708. 'article',
  709. 'aside',
  710. 'audio',
  711. 'b',
  712. 'bdi',
  713. 'bdo',
  714. 'blockquote',
  715. 'body',
  716. 'canvas',
  717. 'caption',
  718. 'cite',
  719. 'code',
  720. 'colgroup',
  721. 'datalist',
  722. 'dd',
  723. 'del',
  724. 'details',
  725. 'dfn',
  726. 'dialog',
  727. 'div',
  728. 'dl',
  729. 'dt',
  730. 'em',
  731. 'fieldset',
  732. 'figcaption',
  733. 'figure',
  734. 'footer',
  735. 'form',
  736. 'h1',
  737. 'h2',
  738. 'h3',
  739. 'h4',
  740. 'h5',
  741. 'h6',
  742. 'head',
  743. 'header',
  744. 'hgroup',
  745. 'html',
  746. 'i',
  747. 'iframe',
  748. 'ins',
  749. 'kbd',
  750. 'label',
  751. 'legend',
  752. 'main',
  753. 'map',
  754. 'mark',
  755. 'math',
  756. 'menu',
  757. 'nav',
  758. 'noscript',
  759. 'object',
  760. 'ol',
  761. 'optgroup',
  762. 'p',
  763. 'picture',
  764. 'pre',
  765. 'q',
  766. 'rb',
  767. 'rp',
  768. 'rt',
  769. 'rtc',
  770. 'ruby',
  771. 's',
  772. 'samp',
  773. 'script',
  774. 'section',
  775. 'select',
  776. 'slot',
  777. 'small',
  778. 'span',
  779. 'strong',
  780. 'style',
  781. 'sub',
  782. 'summary',
  783. 'sup',
  784. 'svg',
  785. 'table',
  786. 'tbody',
  787. 'td',
  788. 'template',
  789. 'tfoot',
  790. 'th',
  791. 'thead',
  792. 'time',
  793. 'title',
  794. 'tr',
  795. 'u',
  796. 'ul',
  797. 'var',
  798. 'video'
  799. ]
  800. .concat(VOID_HTML_TAGS_LIST)
  801. .concat(HTML_ELEMENTS_HAVING_VALUE_ATTRIBUTE_LIST)
  802. ).sort();
  803. /**
  804. * List of all boolean HTML attributes
  805. * @const {RegExp}
  806. * @see {@link https://www.w3.org/TR/html5/infrastructure.html#sec-boolean-attributes}
  807. */
  808. const BOOLEAN_ATTRIBUTES_LIST = [
  809. 'disabled',
  810. 'visible',
  811. 'checked',
  812. 'readonly',
  813. 'required',
  814. 'allowfullscreen',
  815. 'autofocus',
  816. 'autoplay',
  817. 'compact',
  818. 'controls',
  819. 'default',
  820. 'formnovalidate',
  821. 'hidden',
  822. 'ismap',
  823. 'itemscope',
  824. 'loop',
  825. 'multiple',
  826. 'muted',
  827. 'noresize',
  828. 'noshade',
  829. 'novalidate',
  830. 'nowrap',
  831. 'open',
  832. 'reversed',
  833. 'seamless',
  834. 'selected',
  835. 'sortable',
  836. 'truespeed',
  837. 'typemustmatch'
  838. ];
  839. /**
  840. * Join a list of items with the pipe symbol (usefull for regex list concatenation)
  841. * @private
  842. * @param {Array} list - list of strings
  843. * @returns {string} the list received joined with pipes
  844. */
  845. function joinWithPipe(list) {
  846. return list.join('|')
  847. }
  848. /**
  849. * Convert list of strings to regex in order to test against it ignoring the cases
  850. * @private
  851. * @param {...Array} lists - array of strings
  852. * @returns {RegExp} regex that will match all the strings in the array received ignoring the cases
  853. */
  854. function listsToRegex(...lists) {
  855. return new RegExp(`^/?(?:${joinWithPipe(lists.map(joinWithPipe))})$`, 'i')
  856. }
  857. /**
  858. * Regex matching all the html tags ignoring the cases
  859. * @const {RegExp}
  860. */
  861. const HTML_TAGS_RE = listsToRegex(HTML_TAGS_LIST);
  862. /**
  863. * Regex matching all the svg tags ignoring the cases
  864. * @const {RegExp}
  865. */
  866. const SVG_TAGS_RE = listsToRegex(SVG_TAGS_LIST);
  867. /**
  868. * Regex matching all the void html tags ignoring the cases
  869. * @const {RegExp}
  870. */
  871. const VOID_HTML_TAGS_RE = listsToRegex(VOID_HTML_TAGS_LIST);
  872. /**
  873. * Regex matching all the void svg tags ignoring the cases
  874. * @const {RegExp}
  875. */
  876. const VOID_SVG_TAGS_RE = listsToRegex(VOID_SVG_TAGS_LIST);
  877. /**
  878. * Regex matching all the html tags where the value tag is allowed
  879. * @const {RegExp}
  880. */
  881. const HTML_ELEMENTS_HAVING_VALUE_ATTRIBUTE_RE = listsToRegex(HTML_ELEMENTS_HAVING_VALUE_ATTRIBUTE_LIST);
  882. /**
  883. * Regex matching all the boolean attributes
  884. * @const {RegExp}
  885. */
  886. const BOOLEAN_ATTRIBUTES_RE = listsToRegex(BOOLEAN_ATTRIBUTES_LIST);
  887. /**
  888. * True if it's a self closing tag
  889. * @param {string} tag - test tag
  890. * @returns {boolean} true if void
  891. * @example
  892. * isVoid('meta') // true
  893. * isVoid('circle') // true
  894. * isVoid('IMG') // true
  895. * isVoid('div') // false
  896. * isVoid('mask') // false
  897. */
  898. function isVoid(tag) {
  899. return [
  900. VOID_HTML_TAGS_RE,
  901. VOID_SVG_TAGS_RE
  902. ].some(r => r.test(tag))
  903. }
  904. /**
  905. * True if it's not SVG nor a HTML known tag
  906. * @param {string} tag - test tag
  907. * @returns {boolean} true if custom element
  908. * @example
  909. * isCustom('my-component') // true
  910. * isCustom('div') // false
  911. */
  912. function isCustom(tag) {
  913. return [
  914. HTML_TAGS_RE,
  915. SVG_TAGS_RE
  916. ].every(l => !l.test(tag))
  917. }
  918. /**
  919. * True if it's a boolean attribute
  920. * @param {string} attribute - test attribute
  921. * @returns {boolean} true if the attribute is a boolean type
  922. * @example
  923. * isBoolAttribute('selected') // true
  924. * isBoolAttribute('class') // false
  925. */
  926. function isBoolAttribute(attribute) {
  927. return BOOLEAN_ATTRIBUTES_RE.test(attribute)
  928. }
  929. /**
  930. * Memoization function
  931. * @param {Function} fn - function to memoize
  932. * @returns {*} return of the function to memoize
  933. */
  934. function memoize(fn) {
  935. const cache = new WeakMap();
  936. return (...args) => {
  937. if (cache.has(args[0])) return cache.get(args[0])
  938. const ret = fn(...args);
  939. cache.set(args[0], ret);
  940. return ret
  941. }
  942. }
  943. const expressionsContentRe = memoize(brackets => RegExp(`(${brackets[0]}[^${brackets[1]}]*?${brackets[1]})`, 'g'));
  944. const isSpreadAttribute = name => SPREAD_OPERATOR.test(name);
  945. const isAttributeExpression = (name, brackets) => name[0] === brackets[0];
  946. const getAttributeEnd = (state, attr) => expr(state, attr, '[>/\\s]', attr.start);
  947. /**
  948. * The more complex parsing is for attributes as it can contain quoted or
  949. * unquoted values or expressions.
  950. *
  951. * @param {ParserStore} state - Parser state
  952. * @returns {number} New parser mode.
  953. * @private
  954. */
  955. function attr(state) {
  956. const { data, last, pos, root } = state;
  957. const tag = last; // the last (current) tag in the output
  958. const _CH = /\S/g; // matches the first non-space char
  959. const ch = execFromPos(_CH, pos, data);
  960. switch (true) {
  961. case !ch:
  962. state.pos = data.length; // reaching the end of the buffer with
  963. // NodeTypes.ATTR will generate error
  964. break
  965. case ch[0] === '>':
  966. // closing char found. If this is a self-closing tag with the name of the
  967. // Root tag, we need decrement the counter as we are changing mode.
  968. state.pos = tag.end = _CH.lastIndex;
  969. if (tag[IS_SELF_CLOSING]) {
  970. state.scryle = null; // allow selfClosing script/style tags
  971. if (root && root.name === tag.name) {
  972. state.count--; // "pop" root tag
  973. }
  974. }
  975. return TEXT
  976. case ch[0] === '/':
  977. state.pos = _CH.lastIndex; // maybe. delegate the validation
  978. tag[IS_SELF_CLOSING] = true; // the next loop
  979. break
  980. default:
  981. delete tag[IS_SELF_CLOSING]; // ensure unmark as selfclosing tag
  982. setAttribute(state, ch.index, tag);
  983. }
  984. return ATTR
  985. }
  986. /**
  987. * Parses an attribute and its expressions.
  988. *
  989. * @param {ParserStore} state - Parser state
  990. * @param {number} pos - Starting position of the attribute
  991. * @param {Object} tag - Current parent tag
  992. * @returns {undefined} void function
  993. * @private
  994. */
  995. function setAttribute(state, pos, tag) {
  996. const { data } = state;
  997. const expressionContent = expressionsContentRe(state.options.brackets);
  998. const re = ATTR_START; // (\S[^>/=\s]*)(?:\s*=\s*([^>/])?)? g
  999. const start = re.lastIndex = expressionContent.lastIndex = pos; // first non-whitespace
  1000. const attrMatches = re.exec(data);
  1001. const isExpressionName = isAttributeExpression(attrMatches[1], state.options.brackets);
  1002. const match = isExpressionName ? [null, expressionContent.exec(data)[1], null] : attrMatches;
  1003. if (match) {
  1004. const end = re.lastIndex;
  1005. const attr = parseAttribute(state, match, start, end, isExpressionName);
  1006. //assert(q && q.type === Mode.TAG, 'no previous tag for the attr!')
  1007. // Pushes the attribute and shifts the `end` position of the tag (`last`).
  1008. state.pos = tag.end = attr.end;
  1009. tag.attributes = addToCollection(tag.attributes, attr);
  1010. }
  1011. }
  1012. function parseNomalAttribute(state, attr, quote) {
  1013. const { data } = state;
  1014. let { end } = attr;
  1015. if (isBoolAttribute(attr.name)) {
  1016. attr[IS_BOOLEAN] = true;
  1017. }
  1018. // parse the whole value (if any) and get any expressions on it
  1019. if (quote) {
  1020. // Usually, the value's first char (`quote`) is a quote and the lastIndex
  1021. // (`end`) is the start of the value.
  1022. let valueStart = end;
  1023. // If it not, this is an unquoted value and we need adjust the start.
  1024. if (quote !== '"' && quote !== '\'') {
  1025. quote = ''; // first char of value is not a quote
  1026. valueStart--; // adjust the starting position
  1027. }
  1028. end = expr(state, attr, quote || '[>/\\s]', valueStart);
  1029. // adjust the bounds of the value and save its content
  1030. return Object.assign(attr, {
  1031. value: getChunk(data, valueStart, end),
  1032. valueStart,
  1033. end: quote ? ++end : end
  1034. })
  1035. }
  1036. return attr
  1037. }
  1038. /**
  1039. * Parse expression names <a {href}>
  1040. * @param {ParserStore} state - Parser state
  1041. * @param {Object} attr - attribute object parsed
  1042. * @returns {Object} normalized attribute object
  1043. */
  1044. function parseSpreadAttribute(state, attr) {
  1045. const end = getAttributeEnd(state, attr);
  1046. return {
  1047. [IS_SPREAD]: true,
  1048. start: attr.start,
  1049. expressions: attr.expressions.map(expr => Object.assign(expr, {
  1050. text: expr.text.replace(SPREAD_OPERATOR, '').trim()
  1051. })),
  1052. end: end
  1053. }
  1054. }
  1055. /**
  1056. * Parse expression names <a {href}>
  1057. * @param {ParserStore} state - Parser state
  1058. * @param {Object} attr - attribute object parsed
  1059. * @returns {Object} normalized attribute object
  1060. */
  1061. function parseExpressionNameAttribute(state, attr) {
  1062. const end = getAttributeEnd(state, attr);
  1063. return {
  1064. start: attr.start,
  1065. name: attr.expressions[0].text.trim(),
  1066. expressions: attr.expressions,
  1067. end: end
  1068. }
  1069. }
  1070. /**
  1071. * Parse the attribute values normalising the quotes
  1072. * @param {ParserStore} state - Parser state
  1073. * @param {Array} match - results of the attributes regex
  1074. * @param {number} start - attribute start position
  1075. * @param {number} end - attribute end position
  1076. * @param {boolean} isExpressionName - true if the attribute name is an expression
  1077. * @returns {Object} attribute object
  1078. */
  1079. function parseAttribute(state, match, start, end, isExpressionName) {
  1080. const attr = {
  1081. name: match[1],
  1082. value: '',
  1083. start,
  1084. end
  1085. };
  1086. const quote = match[2]; // first letter of value or nothing
  1087. switch (true) {
  1088. case isSpreadAttribute(attr.name):
  1089. return parseSpreadAttribute(state, attr)
  1090. case isExpressionName === true:
  1091. return parseExpressionNameAttribute(state, attr)
  1092. default:
  1093. return parseNomalAttribute(state, attr, quote)
  1094. }
  1095. }
  1096. /**
  1097. * Function to curry any javascript method
  1098. * @param {Function} fn - the target function we want to curry
  1099. * @param {...[args]} acc - initial arguments
  1100. * @returns {Function|*} it will return a function until the target function
  1101. * will receive all of its arguments
  1102. */
  1103. function curry(fn, ...acc) {
  1104. return (...args) => {
  1105. args = [...acc, ...args];
  1106. return args.length < fn.length ?
  1107. curry(fn, ...args) :
  1108. fn(...args)
  1109. }
  1110. }
  1111. /**
  1112. * Parses comments in long or short form
  1113. * (any DOCTYPE & CDATA blocks are parsed as comments).
  1114. *
  1115. * @param {ParserState} state - Parser state
  1116. * @param {string} data - Buffer to parse
  1117. * @param {number} start - Position of the '<!' sequence
  1118. * @returns {number} node type id
  1119. * @private
  1120. */
  1121. function comment(state, data, start) {
  1122. const pos = start + 2; // skip '<!'
  1123. const str = data.substr(pos, 2) === '--' ? '-->' : '>';
  1124. const end = data.indexOf(str, pos);
  1125. if (end < 0) {
  1126. panic(data, unclosedComment, start);
  1127. }
  1128. pushComment(state, start, end + str.length);
  1129. return TEXT
  1130. }
  1131. /**
  1132. * Parse a comment.
  1133. *
  1134. * @param {ParserState} state - Current parser state
  1135. * @param {number} start - Start position of the tag
  1136. * @param {number} end - Ending position (last char of the tag)
  1137. * @returns {undefined} void function
  1138. * @private
  1139. */
  1140. function pushComment(state, start, end) {
  1141. flush(state);
  1142. state.pos = end;
  1143. if (state.options.comments === true) {
  1144. state.last = { type: COMMENT, start, end };
  1145. }
  1146. }
  1147. /**
  1148. * Pushes a new *tag* and set `last` to this, so any attributes
  1149. * will be included on this and shifts the `end`.
  1150. *
  1151. * @param {ParserState} state - Current parser state
  1152. * @param {string} name - Name of the node including any slash
  1153. * @param {number} start - Start position of the tag
  1154. * @param {number} end - Ending position (last char of the tag + 1)
  1155. * @returns {undefined} - void function
  1156. * @private
  1157. */
  1158. function pushTag(state, name, start, end) {
  1159. const root = state.root;
  1160. const last = { type: TAG, name, start, end };
  1161. if (isCustom(name)) {
  1162. last[IS_CUSTOM] = true;
  1163. }
  1164. if (isVoid(name)) {
  1165. last[IS_VOID] = true;
  1166. }
  1167. state.pos = end;
  1168. if (root) {
  1169. if (name === root.name) {
  1170. state.count++;
  1171. } else if (name === root.close) {
  1172. state.count--;
  1173. }
  1174. flush(state);
  1175. } else {
  1176. // start with root (keep ref to output)
  1177. state.root = { name: last.name, close: `/${name}` };
  1178. state.count = 1;
  1179. }
  1180. state.last = last;
  1181. }
  1182. /**
  1183. * Parse the tag following a '<' character, or delegate to other parser
  1184. * if an invalid tag name is found.
  1185. *
  1186. * @param {ParserState} state - Parser state
  1187. * @returns {number} New parser mode
  1188. * @private
  1189. */
  1190. function tag(state) {
  1191. const { pos, data } = state; // pos of the char following '<'
  1192. const start = pos - 1; // pos of '<'
  1193. const str = data.substr(pos, 2); // first two chars following '<'
  1194. switch (true) {
  1195. case str[0] === '!':
  1196. return comment(state, data, start)
  1197. case TAG_2C.test(str):
  1198. return parseTag(state, start)
  1199. default:
  1200. return pushText(state, start, pos) // pushes the '<' as text
  1201. }
  1202. }
  1203. function parseTag(state, start) {
  1204. const { data, pos } = state;
  1205. const re = TAG_NAME; // (\/?[^\s>/]+)\s*(>)? g
  1206. const match = execFromPos(re, pos, data);
  1207. const end = re.lastIndex;
  1208. const name = match[1].toLowerCase(); // $1: tag name including any '/'
  1209. // script/style block is parsed as another tag to extract attributes
  1210. if (name in RE_SCRYLE) {
  1211. state.scryle = name; // used by parseText
  1212. }
  1213. pushTag(state, name, start, end);
  1214. // only '>' can ends the tag here, the '/' is handled in parseAttribute
  1215. if (!match[2]) {
  1216. return ATTR
  1217. }
  1218. return TEXT
  1219. }
  1220. /**
  1221. * Parses regular text and script/style blocks ...scryle for short :-)
  1222. * (the content of script and style is text as well)
  1223. *
  1224. * @param {ParserState} state - Parser state
  1225. * @returns {number} New parser mode.
  1226. * @private
  1227. */
  1228. function text(state) {
  1229. const { pos, data, scryle } = state;
  1230. switch (true) {
  1231. case typeof scryle === 'string': {
  1232. const name = scryle;
  1233. const re = RE_SCRYLE[name];
  1234. const match = execFromPos(re, pos, data);
  1235. if (!match) {
  1236. panic(data, unclosedNamedBlock.replace('%1', name), pos - 1);
  1237. }
  1238. const start = match.index;
  1239. const end = re.lastIndex;
  1240. state.scryle = null; // reset the script/style flag now
  1241. // write the tag content, if any
  1242. if (start > pos) {
  1243. parseSpecialTagsContent(state, name, match);
  1244. }
  1245. // now the closing tag, either </script> or </style>
  1246. pushTag(state, `/${name}`, start, end);
  1247. break
  1248. }
  1249. case data[pos] === '<':
  1250. state.pos++;
  1251. return TAG
  1252. default:
  1253. expr(state, null, '<', pos);
  1254. }
  1255. return TEXT
  1256. }
  1257. /**
  1258. * Parse the text content depending on the name
  1259. * @param {ParserState} state - Parser state
  1260. * @param {string} name - one of the tags matched by the RE_SCRYLE regex
  1261. * @param {Array} match - result of the regex matching the content of the parsed tag
  1262. * @returns {undefined} void function
  1263. */
  1264. function parseSpecialTagsContent(state, name, match) {
  1265. const { pos } = state;
  1266. const start = match.index;
  1267. if (name === TEXTAREA_TAG) {
  1268. expr(state, null, match[0], pos);
  1269. } else {
  1270. pushText(state, pos, start);
  1271. }
  1272. }
  1273. /*---------------------------------------------------------------------
  1274. * Tree builder for the riot tag parser.
  1275. *
  1276. * The output has a root property and separate arrays for `html`, `css`,
  1277. * and `js` tags.
  1278. *
  1279. * The root tag is included as first element in the `html` array.
  1280. * Script tags marked with "defer" are included in `html` instead `js`.
  1281. *
  1282. * - Mark SVG tags
  1283. * - Mark raw tags
  1284. * - Mark void tags
  1285. * - Split prefixes from expressions
  1286. * - Unescape escaped brackets and escape EOLs and backslashes
  1287. * - Compact whitespace (option `compact`) for non-raw tags
  1288. * - Create an array `parts` for text nodes and attributes
  1289. *
  1290. * Throws on unclosed tags or closing tags without start tag.
  1291. * Selfclosing and void tags has no nodes[] property.
  1292. */
  1293. /**
  1294. * Escape the carriage return and the line feed from a string
  1295. * @param {string} string - input string
  1296. * @returns {string} output string escaped
  1297. */
  1298. function escapeReturn(string) {
  1299. return string
  1300. .replace(/\r/g, '\\r')
  1301. .replace(/\n/g, '\\n')
  1302. }
  1303. /**
  1304. * Escape double slashes in a string
  1305. * @param {string} string - input string
  1306. * @returns {string} output string escaped
  1307. */
  1308. function escapeSlashes(string) {
  1309. return string.replace(/\\/g, '\\\\')
  1310. }
  1311. /**
  1312. * Replace the multiple spaces with only one
  1313. * @param {string} string - input string
  1314. * @returns {string} string without trailing spaces
  1315. */
  1316. function cleanSpaces(string) {
  1317. return string.replace(/\s+/g, ' ')
  1318. }
  1319. const TREE_BUILDER_STRUCT = Object.seal({
  1320. get() {
  1321. const store = this.store;
  1322. // The real root tag is in store.root.nodes[0]
  1323. return {
  1324. [TEMPLATE_OUTPUT_NAME]: store.root.nodes[0],
  1325. [CSS_OUTPUT_NAME]: store[STYLE_TAG],
  1326. [JAVASCRIPT_OUTPUT_NAME]: store[JAVASCRIPT_TAG]
  1327. }
  1328. },
  1329. /**
  1330. * Process the current tag or text.
  1331. * @param {Object} node - Raw pseudo-node from the parser
  1332. * @returns {undefined} void function
  1333. */
  1334. push(node) {
  1335. const store = this.store;
  1336. switch (node.type) {
  1337. case TEXT:
  1338. this.pushText(store, node);
  1339. break
  1340. case TAG: {
  1341. const name = node.name;
  1342. const closingTagChar = '/';
  1343. const [firstChar] = name;
  1344. if (firstChar === closingTagChar && !node.isVoid) {
  1345. this.closeTag(store, node, name);
  1346. } else if (firstChar !== closingTagChar) {
  1347. this.openTag(store, node);
  1348. }
  1349. break
  1350. }
  1351. }
  1352. },
  1353. closeTag(store, node) {
  1354. const last = store.scryle || store.last;
  1355. last.end = node.end;
  1356. if (store.scryle) {
  1357. store.scryle = null;
  1358. } else {
  1359. store.last = store.stack.pop();
  1360. }
  1361. },
  1362. openTag(store, node) {
  1363. const name = node.name;
  1364. const attrs = node.attributes;
  1365. if ([JAVASCRIPT_TAG, STYLE_TAG].includes(name)) {
  1366. // Only accept one of each
  1367. if (store[name]) {
  1368. panic(this.store.data, duplicatedNamedTag.replace('%1', name), node.start);
  1369. }
  1370. store[name] = node;
  1371. store.scryle = store[name];
  1372. } else {
  1373. // store.last holds the last tag pushed in the stack and this are
  1374. // non-void, non-empty tags, so we are sure the `lastTag` here
  1375. // have a `nodes` property.
  1376. const lastTag = store.last;
  1377. const newNode = node;
  1378. lastTag.nodes.push(newNode);
  1379. if (lastTag[IS_RAW] || RAW_TAGS.test(name)) {
  1380. node[IS_RAW] = true;
  1381. }
  1382. if (!node[IS_SELF_CLOSING] && !node[IS_VOID]) {
  1383. store.stack.push(lastTag);
  1384. newNode.nodes = [];
  1385. store.last = newNode;
  1386. }
  1387. }
  1388. if (attrs) {
  1389. this.attrs(attrs);
  1390. }
  1391. },
  1392. attrs(attributes) {
  1393. attributes.forEach(attr => {
  1394. if (attr.value) {
  1395. this.split(attr, attr.value, attr.valueStart, true);
  1396. }
  1397. });
  1398. },
  1399. pushText(store, node) {
  1400. const text = node.text;
  1401. const empty = !/\S/.test(text);
  1402. const scryle = store.scryle;
  1403. if (!scryle) {
  1404. // store.last always have a nodes property
  1405. const parent = store.last;
  1406. const pack = this.compact && !parent[IS_RAW];
  1407. if (pack && empty) {
  1408. return
  1409. }
  1410. this.split(node, text, node.start, pack);
  1411. parent.nodes.push(node);
  1412. } else if (!empty) {
  1413. scryle.text = node;
  1414. }
  1415. },
  1416. split(node, source, start, pack) {
  1417. const expressions = node.expressions;
  1418. const parts = [];
  1419. if (expressions) {
  1420. let pos = 0;
  1421. expressions.forEach(expr => {
  1422. const text = source.slice(pos, expr.start - start);
  1423. const code = expr.text;
  1424. parts.push(this.sanitise(node, text, pack), escapeReturn(escapeSlashes(code).trim()));
  1425. pos = expr.end - start;
  1426. });
  1427. if (pos < node.end) {
  1428. parts.push(this.sanitise(node, source.slice(pos), pack));
  1429. }
  1430. } else {
  1431. parts[0] = this.sanitise(node, source, pack);
  1432. }
  1433. node.parts = parts.filter(p => p); // remove the empty strings
  1434. },
  1435. // unescape escaped brackets and split prefixes of expressions
  1436. sanitise(node, text, pack) {
  1437. let rep = node.unescape;
  1438. if (rep) {
  1439. let idx = 0;
  1440. rep = `\\${rep}`;
  1441. while ((idx = text.indexOf(rep, idx)) !== -1) {
  1442. text = text.substr(0, idx) + text.substr(idx + 1);
  1443. idx++;
  1444. }
  1445. }
  1446. text = escapeSlashes(text);
  1447. return pack ? cleanSpaces(text) : escapeReturn(text)
  1448. }
  1449. });
  1450. function createTreeBuilder(data, options) {
  1451. const root = {
  1452. type: TAG,
  1453. name: '',
  1454. start: 0,
  1455. end: 0,
  1456. nodes: []
  1457. };
  1458. return Object.assign(Object.create(TREE_BUILDER_STRUCT), {
  1459. compact: options.compact !== false,
  1460. store: {
  1461. last: root,
  1462. stack: [],
  1463. scryle: null,
  1464. root,
  1465. style: null,
  1466. script: null,
  1467. data
  1468. }
  1469. })
  1470. }
  1471. /**
  1472. * Factory for the Parser class, exposing only the `parse` method.
  1473. * The export adds the Parser class as property.
  1474. *
  1475. * @param {Object} options - User Options
  1476. * @param {Function} customBuilder - Tree builder factory
  1477. * @returns {Function} Public Parser implementation.
  1478. */
  1479. function parser(options, customBuilder) {
  1480. const state = curry(createParserState)(options, customBuilder || createTreeBuilder);
  1481. return {
  1482. parse: (data) => parse(state(data))
  1483. }
  1484. }
  1485. /**
  1486. * Create a new state object
  1487. * @param {Object} userOptions - parser options
  1488. * @param {Function} builder - Tree builder factory
  1489. * @param {string} data - data to parse
  1490. * @returns {ParserState} it represents the current parser state
  1491. */
  1492. function createParserState(userOptions, builder, data) {
  1493. const options = Object.assign({
  1494. brackets: ['{', '}']
  1495. }, userOptions);
  1496. return {
  1497. options,
  1498. regexCache: {},
  1499. pos: 0,
  1500. count: -1,
  1501. root: null,
  1502. last: null,
  1503. scryle: null,
  1504. builder: builder(data, options),
  1505. data
  1506. }
  1507. }
  1508. /**
  1509. * It creates a raw output of pseudo-nodes with one of three different types,
  1510. * all of them having a start/end position:
  1511. *
  1512. * - TAG -- Opening or closing tags
  1513. * - TEXT -- Raw text
  1514. * - COMMENT -- Comments
  1515. *
  1516. * @param {ParserState} state - Current parser state
  1517. * @returns {ParserResult} Result, contains data and output properties.
  1518. */
  1519. function parse(state) {
  1520. const { data } = state;
  1521. walk(state);
  1522. flush(state);
  1523. if (state.count) {
  1524. panic(data, state.count > 0 ? unexpectedEndOfFile : rootTagNotFound, state.pos);
  1525. }
  1526. return {
  1527. data,
  1528. output: state.builder.get()
  1529. }
  1530. }
  1531. /**
  1532. * Parser walking recursive function
  1533. * @param {ParserState} state - Current parser state
  1534. * @param {string} type - current parsing context
  1535. * @returns {undefined} void function
  1536. */
  1537. function walk(state, type) {
  1538. const { data } = state;
  1539. // extend the state adding the tree builder instance and the initial data
  1540. const length = data.length;
  1541. // The "count" property is set to 1 when the first tag is found.
  1542. // This becomes the root and precedent text or comments are discarded.
  1543. // So, at the end of the parsing count must be zero.
  1544. if (state.pos < length && state.count) {
  1545. walk(state, eat(state, type));
  1546. }
  1547. }
  1548. /**
  1549. * Function to help iterating on the current parser state
  1550. * @param {ParserState} state - Current parser state
  1551. * @param {string} type - current parsing context
  1552. * @returns {string} parsing context
  1553. */
  1554. function eat(state, type) {
  1555. switch (type) {
  1556. case TAG:
  1557. return tag(state)
  1558. case ATTR:
  1559. return attr(state)
  1560. default:
  1561. return text(state)
  1562. }
  1563. }
  1564. /**
  1565. * Expose the internal constants
  1566. */
  1567. const constants = c;
  1568. /**
  1569. * The nodeTypes definition
  1570. */
  1571. const nodeTypes = types;
  1572. exports.constants = constants;
  1573. exports.default = parser;
  1574. exports.nodeTypes = nodeTypes;