Revision 69e53743
Added by koszko almost 2 years ago
common/patterns.js | ||
---|---|---|
7 | 7 |
* Redistribution terms are gathered in the `copyright' file. |
8 | 8 |
*/ |
9 | 9 |
|
10 |
const MAX_URL_PATH_LEN = 12; |
|
11 |
const MAX_URL_PATH_CHARS = 255; |
|
12 |
const MAX_DOMAIN_LEN = 7; |
|
13 |
const MAX_DOMAIN_CHARS = 100; |
|
10 |
const MAX = { |
|
11 |
URL_PATH_LEN: 12, |
|
12 |
URL_PATH_CHARS: 255, |
|
13 |
DOMAIN_LEN: 7, |
|
14 |
DOMAIN_CHARS: 100 |
|
15 |
}; |
|
14 | 16 |
|
15 | 17 |
const proto_regex = /^(\w+):\/\/(.*)$/; |
16 | 18 |
|
... | ... | |
25 | 27 |
|
26 | 28 |
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`); |
27 | 29 |
|
28 |
function deconstruct_url(url) |
|
30 |
function deconstruct_url(url, use_limits=true)
|
|
29 | 31 |
{ |
32 |
const max = MAX; |
|
33 |
if (!use_limits) { |
|
34 |
for (key in MAX) |
|
35 |
max[key] = Infinity; |
|
36 |
} |
|
37 |
|
|
30 | 38 |
const proto_match = proto_regex.exec(url); |
31 | 39 |
if (proto_match === null) |
32 |
return undefined;
|
|
40 |
throw `bad url '${url}'`;
|
|
33 | 41 |
|
34 | 42 |
const deco = {proto: proto_match[1]}; |
35 | 43 |
|
... | ... | |
37 | 45 |
deco.path = file_regex.exec(proto_match[2])[1]; |
38 | 46 |
} else if (deco.proto === "ftp") { |
39 | 47 |
[deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4); |
40 |
} else { |
|
48 |
} else if (deco.proto === "http" || deco.proto === "https") {
|
|
41 | 49 |
const http_match = http_regex.exec(proto_match[2]); |
42 | 50 |
if (!http_match) |
43 | 51 |
return undefined; |
44 | 52 |
[deco.domain, deco.path, deco.query] = http_match.slice(1, 4); |
53 |
} else { |
|
54 |
throw `unsupported protocol in url '${url}'`; |
|
45 | 55 |
} |
46 | 56 |
|
47 |
const leading_dash = deco.path[0] === "/"; |
|
48 | 57 |
deco.trailing_dash = deco.path[deco.path.length - 1] === "/"; |
49 | 58 |
|
50 | 59 |
if (deco.domain) { |
51 |
if (deco.domain.length > MAX_DOMAIN_CHARS) {
|
|
60 |
if (deco.domain.length > max.DOMAIN_CHARS) {
|
|
52 | 61 |
const idx = deco.domain.indexOf(".", deco.domain.length - |
53 |
MAX_DOMAIN_CHARS);
|
|
62 |
max.DOMAIN_CHARS);
|
|
54 | 63 |
if (idx === -1) |
55 | 64 |
deco.domain = []; |
56 | 65 |
else |
... | ... | |
59 | 68 |
deco.domain_truncated = true; |
60 | 69 |
} |
61 | 70 |
|
62 |
if (deco.path.length > MAX_URL_PATH_CHARS) {
|
|
71 |
if (deco.path.length > max.URL_PATH_CHARS) {
|
|
63 | 72 |
deco.path = deco.path.substring(0, deco.path.lastIndexOf("/")); |
64 | 73 |
deco.path_truncated = true; |
65 | 74 |
} |
... | ... | |
67 | 76 |
|
68 | 77 |
if (typeof deco.domain === "string") { |
69 | 78 |
deco.domain = deco.domain.split("."); |
70 |
if (deco.domain.splice(0, deco.domain.length - MAX_DOMAIN_LEN).length
|
|
79 |
if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length
|
|
71 | 80 |
> 0) |
72 | 81 |
deco.domain_truncated = true; |
73 | 82 |
} |
74 | 83 |
|
75 | 84 |
deco.path = deco.path.split("/").filter(s => s !== ""); |
76 |
if (deco.domain && deco.path.splice(MAX_URL_PATH_LEN).length > 0)
|
|
85 |
if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0)
|
|
77 | 86 |
deco.path_truncated = true; |
78 |
if (leading_dash || deco.path.length === 0) |
|
79 |
deco.path.unshift(""); |
|
80 | 87 |
|
81 | 88 |
return deco; |
82 | 89 |
} |
... | ... | |
98 | 105 |
|
99 | 106 |
function* each_path_pattern(deco) |
100 | 107 |
{ |
101 |
for (let slice = deco.path.length; slice > 0; slice--) { |
|
102 |
const path_part = deco.path.slice(0, slice).join("/");
|
|
108 |
for (let slice = deco.path.length; slice >= 0; slice--) {
|
|
109 |
const path_part = ["", ...deco.path.slice(0, slice)].join("/");
|
|
103 | 110 |
const path_wildcards = []; |
104 | 111 |
if (slice === deco.path.length && !deco.path_truncated) { |
105 |
if (deco.trailing_dash) |
|
112 |
if (deco.trailing_dash && path_part !== )
|
|
106 | 113 |
yield path_part + "/"; |
107 |
yield path_part; |
|
114 |
if (part_part !== "" || deco.proto !== "file") |
|
115 |
yield path_part; |
|
108 | 116 |
} |
109 | 117 |
if (slice === deco.path.length - 1 && !deco.path_truncated && |
110 | 118 |
deco.path[slice] !== "*") |
... | ... | |
137 | 145 |
/* |
138 | 146 |
* EXPORTS_START |
139 | 147 |
* EXPORT each_url_pattern |
148 |
* EXPORT deconstruct_url |
|
140 | 149 |
* EXPORTS_END |
141 | 150 |
*/ |
Also available in: Unified diff
implement more efficient querying of URL patterns