You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

450 lines
12 KiB

4 years ago
  1. 'use strict';
  2. var required = require('requires-port')
  3. , qs = require('querystringify')
  4. , slashes = /^[A-Za-z][A-Za-z0-9+-.]*:\/\//
  5. , protocolre = /^([a-z][a-z0-9.+-]*:)?(\/\/)?([\S\s]*)/i
  6. , whitespace = '[\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\xA0\\u1680\\u180E\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200A\\u202F\\u205F\\u3000\\u2028\\u2029\\uFEFF]'
  7. , left = new RegExp('^'+ whitespace +'+');
  8. /**
  9. * Trim a given string.
  10. *
  11. * @param {String} str String to trim.
  12. * @public
  13. */
  14. function trimLeft(str) {
  15. return (str ? str : '').toString().replace(left, '');
  16. }
  17. /**
  18. * These are the parse rules for the URL parser, it informs the parser
  19. * about:
  20. *
  21. * 0. The char it Needs to parse, if it's a string it should be done using
  22. * indexOf, RegExp using exec and NaN means set as current value.
  23. * 1. The property we should set when parsing this value.
  24. * 2. Indication if it's backwards or forward parsing, when set as number it's
  25. * the value of extra chars that should be split off.
  26. * 3. Inherit from location if non existing in the parser.
  27. * 4. `toLowerCase` the resulting value.
  28. */
  29. var rules = [
  30. ['#', 'hash'], // Extract from the back.
  31. ['?', 'query'], // Extract from the back.
  32. function sanitize(address) { // Sanitize what is left of the address
  33. return address.replace('\\', '/');
  34. },
  35. ['/', 'pathname'], // Extract from the back.
  36. ['@', 'auth', 1], // Extract from the front.
  37. [NaN, 'host', undefined, 1, 1], // Set left over value.
  38. [/:(\d+)$/, 'port', undefined, 1], // RegExp the back.
  39. [NaN, 'hostname', undefined, 1, 1] // Set left over.
  40. ];
  41. /**
  42. * These properties should not be copied or inherited from. This is only needed
  43. * for all non blob URL's as a blob URL does not include a hash, only the
  44. * origin.
  45. *
  46. * @type {Object}
  47. * @private
  48. */
  49. var ignore = { hash: 1, query: 1 };
  50. /**
  51. * The location object differs when your code is loaded through a normal page,
  52. * Worker or through a worker using a blob. And with the blobble begins the
  53. * trouble as the location object will contain the URL of the blob, not the
  54. * location of the page where our code is loaded in. The actual origin is
  55. * encoded in the `pathname` so we can thankfully generate a good "default"
  56. * location from it so we can generate proper relative URL's again.
  57. *
  58. * @param {Object|String} loc Optional default location object.
  59. * @returns {Object} lolcation object.
  60. * @public
  61. */
  62. function lolcation(loc) {
  63. var globalVar;
  64. if (typeof window !== 'undefined') globalVar = window;
  65. else if (typeof global !== 'undefined') globalVar = global;
  66. else if (typeof self !== 'undefined') globalVar = self;
  67. else globalVar = {};
  68. var location = globalVar.location || {};
  69. loc = loc || location;
  70. var finaldestination = {}
  71. , type = typeof loc
  72. , key;
  73. if ('blob:' === loc.protocol) {
  74. finaldestination = new Url(unescape(loc.pathname), {});
  75. } else if ('string' === type) {
  76. finaldestination = new Url(loc, {});
  77. for (key in ignore) delete finaldestination[key];
  78. } else if ('object' === type) {
  79. for (key in loc) {
  80. if (key in ignore) continue;
  81. finaldestination[key] = loc[key];
  82. }
  83. if (finaldestination.slashes === undefined) {
  84. finaldestination.slashes = slashes.test(loc.href);
  85. }
  86. }
  87. return finaldestination;
  88. }
  89. /**
  90. * @typedef ProtocolExtract
  91. * @type Object
  92. * @property {String} protocol Protocol matched in the URL, in lowercase.
  93. * @property {Boolean} slashes `true` if protocol is followed by "//", else `false`.
  94. * @property {String} rest Rest of the URL that is not part of the protocol.
  95. */
  96. /**
  97. * Extract protocol information from a URL with/without double slash ("//").
  98. *
  99. * @param {String} address URL we want to extract from.
  100. * @return {ProtocolExtract} Extracted information.
  101. * @private
  102. */
  103. function extractProtocol(address) {
  104. address = trimLeft(address);
  105. var match = protocolre.exec(address);
  106. return {
  107. protocol: match[1] ? match[1].toLowerCase() : '',
  108. slashes: !!match[2],
  109. rest: match[3]
  110. };
  111. }
  112. /**
  113. * Resolve a relative URL pathname against a base URL pathname.
  114. *
  115. * @param {String} relative Pathname of the relative URL.
  116. * @param {String} base Pathname of the base URL.
  117. * @return {String} Resolved pathname.
  118. * @private
  119. */
  120. function resolve(relative, base) {
  121. if (relative === '') return base;
  122. var path = (base || '/').split('/').slice(0, -1).concat(relative.split('/'))
  123. , i = path.length
  124. , last = path[i - 1]
  125. , unshift = false
  126. , up = 0;
  127. while (i--) {
  128. if (path[i] === '.') {
  129. path.splice(i, 1);
  130. } else if (path[i] === '..') {
  131. path.splice(i, 1);
  132. up++;
  133. } else if (up) {
  134. if (i === 0) unshift = true;
  135. path.splice(i, 1);
  136. up--;
  137. }
  138. }
  139. if (unshift) path.unshift('');
  140. if (last === '.' || last === '..') path.push('');
  141. return path.join('/');
  142. }
  143. /**
  144. * The actual URL instance. Instead of returning an object we've opted-in to
  145. * create an actual constructor as it's much more memory efficient and
  146. * faster and it pleases my OCD.
  147. *
  148. * It is worth noting that we should not use `URL` as class name to prevent
  149. * clashes with the global URL instance that got introduced in browsers.
  150. *
  151. * @constructor
  152. * @param {String} address URL we want to parse.
  153. * @param {Object|String} [location] Location defaults for relative paths.
  154. * @param {Boolean|Function} [parser] Parser for the query string.
  155. * @private
  156. */
  157. function Url(address, location, parser) {
  158. address = trimLeft(address);
  159. if (!(this instanceof Url)) {
  160. return new Url(address, location, parser);
  161. }
  162. var relative, extracted, parse, instruction, index, key
  163. , instructions = rules.slice()
  164. , type = typeof location
  165. , url = this
  166. , i = 0;
  167. //
  168. // The following if statements allows this module two have compatibility with
  169. // 2 different API:
  170. //
  171. // 1. Node.js's `url.parse` api which accepts a URL, boolean as arguments
  172. // where the boolean indicates that the query string should also be parsed.
  173. //
  174. // 2. The `URL` interface of the browser which accepts a URL, object as
  175. // arguments. The supplied object will be used as default values / fall-back
  176. // for relative paths.
  177. //
  178. if ('object' !== type && 'string' !== type) {
  179. parser = location;
  180. location = null;
  181. }
  182. if (parser && 'function' !== typeof parser) parser = qs.parse;
  183. location = lolcation(location);
  184. //
  185. // Extract protocol information before running the instructions.
  186. //
  187. extracted = extractProtocol(address || '');
  188. relative = !extracted.protocol && !extracted.slashes;
  189. url.slashes = extracted.slashes || relative && location.slashes;
  190. url.protocol = extracted.protocol || location.protocol || '';
  191. address = extracted.rest;
  192. //
  193. // When the authority component is absent the URL starts with a path
  194. // component.
  195. //
  196. if (!extracted.slashes) instructions[3] = [/(.*)/, 'pathname'];
  197. for (; i < instructions.length; i++) {
  198. instruction = instructions[i];
  199. if (typeof instruction === 'function') {
  200. address = instruction(address);
  201. continue;
  202. }
  203. parse = instruction[0];
  204. key = instruction[1];
  205. if (parse !== parse) {
  206. url[key] = address;
  207. } else if ('string' === typeof parse) {
  208. if (~(index = address.indexOf(parse))) {
  209. if ('number' === typeof instruction[2]) {
  210. url[key] = address.slice(0, index);
  211. address = address.slice(index + instruction[2]);
  212. } else {
  213. url[key] = address.slice(index);
  214. address = address.slice(0, index);
  215. }
  216. }
  217. } else if ((index = parse.exec(address))) {
  218. url[key] = index[1];
  219. address = address.slice(0, index.index);
  220. }
  221. url[key] = url[key] || (
  222. relative && instruction[3] ? location[key] || '' : ''
  223. );
  224. //
  225. // Hostname, host and protocol should be lowercased so they can be used to
  226. // create a proper `origin`.
  227. //
  228. if (instruction[4]) url[key] = url[key].toLowerCase();
  229. }
  230. //
  231. // Also parse the supplied query string in to an object. If we're supplied
  232. // with a custom parser as function use that instead of the default build-in
  233. // parser.
  234. //
  235. if (parser) url.query = parser(url.query);
  236. //
  237. // If the URL is relative, resolve the pathname against the base URL.
  238. //
  239. if (
  240. relative
  241. && location.slashes
  242. && url.pathname.charAt(0) !== '/'
  243. && (url.pathname !== '' || location.pathname !== '')
  244. ) {
  245. url.pathname = resolve(url.pathname, location.pathname);
  246. }
  247. //
  248. // We should not add port numbers if they are already the default port number
  249. // for a given protocol. As the host also contains the port number we're going
  250. // override it with the hostname which contains no port number.
  251. //
  252. if (!required(url.port, url.protocol)) {
  253. url.host = url.hostname;
  254. url.port = '';
  255. }
  256. //
  257. // Parse down the `auth` for the username and password.
  258. //
  259. url.username = url.password = '';
  260. if (url.auth) {
  261. instruction = url.auth.split(':');
  262. url.username = instruction[0] || '';
  263. url.password = instruction[1] || '';
  264. }
  265. url.origin = url.protocol && url.host && url.protocol !== 'file:'
  266. ? url.protocol +'//'+ url.host
  267. : 'null';
  268. //
  269. // The href is just the compiled result.
  270. //
  271. url.href = url.toString();
  272. }
  273. /**
  274. * This is convenience method for changing properties in the URL instance to
  275. * insure that they all propagate correctly.
  276. *
  277. * @param {String} part Property we need to adjust.
  278. * @param {Mixed} value The newly assigned value.
  279. * @param {Boolean|Function} fn When setting the query, it will be the function
  280. * used to parse the query.
  281. * When setting the protocol, double slash will be
  282. * removed from the final url if it is true.
  283. * @returns {URL} URL instance for chaining.
  284. * @public
  285. */
  286. function set(part, value, fn) {
  287. var url = this;
  288. switch (part) {
  289. case 'query':
  290. if ('string' === typeof value && value.length) {
  291. value = (fn || qs.parse)(value);
  292. }
  293. url[part] = value;
  294. break;
  295. case 'port':
  296. url[part] = value;
  297. if (!required(value, url.protocol)) {
  298. url.host = url.hostname;
  299. url[part] = '';
  300. } else if (value) {
  301. url.host = url.hostname +':'+ value;
  302. }
  303. break;
  304. case 'hostname':
  305. url[part] = value;
  306. if (url.port) value += ':'+ url.port;
  307. url.host = value;
  308. break;
  309. case 'host':
  310. url[part] = value;
  311. if (/:\d+$/.test(value)) {
  312. value = value.split(':');
  313. url.port = value.pop();
  314. url.hostname = value.join(':');
  315. } else {
  316. url.hostname = value;
  317. url.port = '';
  318. }
  319. break;
  320. case 'protocol':
  321. url.protocol = value.toLowerCase();
  322. url.slashes = !fn;
  323. break;
  324. case 'pathname':
  325. case 'hash':
  326. if (value) {
  327. var char = part === 'pathname' ? '/' : '#';
  328. url[part] = value.charAt(0) !== char ? char + value : value;
  329. } else {
  330. url[part] = value;
  331. }
  332. break;
  333. default:
  334. url[part] = value;
  335. }
  336. for (var i = 0; i < rules.length; i++) {
  337. var ins = rules[i];
  338. if (ins[4]) url[ins[1]] = url[ins[1]].toLowerCase();
  339. }
  340. url.origin = url.protocol && url.host && url.protocol !== 'file:'
  341. ? url.protocol +'//'+ url.host
  342. : 'null';
  343. url.href = url.toString();
  344. return url;
  345. }
  346. /**
  347. * Transform the properties back in to a valid and full URL string.
  348. *
  349. * @param {Function} stringify Optional query stringify function.
  350. * @returns {String} Compiled version of the URL.
  351. * @public
  352. */
  353. function toString(stringify) {
  354. if (!stringify || 'function' !== typeof stringify) stringify = qs.stringify;
  355. var query
  356. , url = this
  357. , protocol = url.protocol;
  358. if (protocol && protocol.charAt(protocol.length - 1) !== ':') protocol += ':';
  359. var result = protocol + (url.slashes ? '//' : '');
  360. if (url.username) {
  361. result += url.username;
  362. if (url.password) result += ':'+ url.password;
  363. result += '@';
  364. }
  365. result += url.host + url.pathname;
  366. query = 'object' === typeof url.query ? stringify(url.query) : url.query;
  367. if (query) result += '?' !== query.charAt(0) ? '?'+ query : query;
  368. if (url.hash) result += url.hash;
  369. return result;
  370. }
  371. Url.prototype = { set: set, toString: toString };
  372. //
  373. // Expose the URL parser and some additional properties that might be useful for
  374. // others or testing.
  375. //
  376. Url.extractProtocol = extractProtocol;
  377. Url.location = lolcation;
  378. Url.trimLeft = trimLeft;
  379. Url.qs = qs;
  380. module.exports = Url;