You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

139 lines
3.8 KiB

4 years ago
  1. 'use strict';
  2. // TODO: Use the `URL` global when targeting Node.js 10
  3. const URLParser = typeof URL === 'undefined' ? require('url').URL : URL;
  4. const testParameter = (name, filters) => {
  5. return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name);
  6. };
  7. module.exports = (urlString, opts) => {
  8. opts = Object.assign({
  9. defaultProtocol: 'http:',
  10. normalizeProtocol: true,
  11. forceHttp: false,
  12. forceHttps: false,
  13. stripHash: true,
  14. stripWWW: true,
  15. removeQueryParameters: [/^utm_\w+/i],
  16. removeTrailingSlash: true,
  17. removeDirectoryIndex: false,
  18. sortQueryParameters: true
  19. }, opts);
  20. // Backwards compatibility
  21. if (Reflect.has(opts, 'normalizeHttps')) {
  22. opts.forceHttp = opts.normalizeHttps;
  23. }
  24. if (Reflect.has(opts, 'normalizeHttp')) {
  25. opts.forceHttps = opts.normalizeHttp;
  26. }
  27. if (Reflect.has(opts, 'stripFragment')) {
  28. opts.stripHash = opts.stripFragment;
  29. }
  30. urlString = urlString.trim();
  31. const hasRelativeProtocol = urlString.startsWith('//');
  32. const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString);
  33. // Prepend protocol
  34. if (!isRelativeUrl) {
  35. urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, opts.defaultProtocol);
  36. }
  37. const urlObj = new URLParser(urlString);
  38. if (opts.forceHttp && opts.forceHttps) {
  39. throw new Error('The `forceHttp` and `forceHttps` options cannot be used together');
  40. }
  41. if (opts.forceHttp && urlObj.protocol === 'https:') {
  42. urlObj.protocol = 'http:';
  43. }
  44. if (opts.forceHttps && urlObj.protocol === 'http:') {
  45. urlObj.protocol = 'https:';
  46. }
  47. // Remove hash
  48. if (opts.stripHash) {
  49. urlObj.hash = '';
  50. }
  51. // Remove duplicate slashes if not preceded by a protocol
  52. if (urlObj.pathname) {
  53. // TODO: Use the following instead when targeting Node.js 10
  54. // `urlObj.pathname = urlObj.pathname.replace(/(?<!https?:)\/{2,}/g, '/');`
  55. urlObj.pathname = urlObj.pathname.replace(/((?![https?:]).)\/{2,}/g, (_, p1) => {
  56. if (/^(?!\/)/g.test(p1)) {
  57. return `${p1}/`;
  58. }
  59. return '/';
  60. });
  61. }
  62. // Decode URI octets
  63. if (urlObj.pathname) {
  64. urlObj.pathname = decodeURI(urlObj.pathname);
  65. }
  66. // Remove directory index
  67. if (opts.removeDirectoryIndex === true) {
  68. opts.removeDirectoryIndex = [/^index\.[a-z]+$/];
  69. }
  70. if (Array.isArray(opts.removeDirectoryIndex) && opts.removeDirectoryIndex.length > 0) {
  71. let pathComponents = urlObj.pathname.split('/');
  72. const lastComponent = pathComponents[pathComponents.length - 1];
  73. if (testParameter(lastComponent, opts.removeDirectoryIndex)) {
  74. pathComponents = pathComponents.slice(0, pathComponents.length - 1);
  75. urlObj.pathname = pathComponents.slice(1).join('/') + '/';
  76. }
  77. }
  78. if (urlObj.hostname) {
  79. // Remove trailing dot
  80. urlObj.hostname = urlObj.hostname.replace(/\.$/, '');
  81. // Remove `www.`
  82. // eslint-disable-next-line no-useless-escape
  83. if (opts.stripWWW && /^www\.([a-z\-\d]{2,63})\.([a-z\.]{2,5})$/.test(urlObj.hostname)) {
  84. // Each label should be max 63 at length (min: 2).
  85. // The extension should be max 5 at length (min: 2).
  86. // Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
  87. urlObj.hostname = urlObj.hostname.replace(/^www\./, '');
  88. }
  89. }
  90. // Remove query unwanted parameters
  91. if (Array.isArray(opts.removeQueryParameters)) {
  92. for (const key of [...urlObj.searchParams.keys()]) {
  93. if (testParameter(key, opts.removeQueryParameters)) {
  94. urlObj.searchParams.delete(key);
  95. }
  96. }
  97. }
  98. // Sort query parameters
  99. if (opts.sortQueryParameters) {
  100. urlObj.searchParams.sort();
  101. }
  102. // Take advantage of many of the Node `url` normalizations
  103. urlString = urlObj.toString();
  104. // Remove ending `/`
  105. if (opts.removeTrailingSlash || urlObj.pathname === '/') {
  106. urlString = urlString.replace(/\/$/, '');
  107. }
  108. // Restore relative protocol, if applicable
  109. if (hasRelativeProtocol && !opts.normalizeProtocol) {
  110. urlString = urlString.replace(/^http:\/\//, '//');
  111. }
  112. return urlString;
  113. };