Revision 93dd7360
Added by koszko almost 2 years ago
common/patterns.js | ||
---|---|---|
7 | 7 |
* Redistribution terms are gathered in the `copyright' file. |
8 | 8 |
*/ |
9 | 9 |
|
10 |
const MAX_URL_PATH_LEN = 12; |
|
11 |
const MAX_URL_PATH_CHARS = 255; |
|
12 |
const MAX_DOMAIN_LEN = 7; |
|
13 |
const MAX_DOMAIN_CHARS = 100; |
|
10 |
const MAX = { |
|
11 |
URL_PATH_LEN: 12, |
|
12 |
URL_PATH_CHARS: 255, |
|
13 |
DOMAIN_LEN: 7, |
|
14 |
DOMAIN_CHARS: 100 |
|
15 |
}; |
|
14 | 16 |
|
15 | 17 |
const proto_regex = /^(\w+):\/\/(.*)$/; |
16 | 18 |
|
17 | 19 |
const user_re = "[^/?#@]+@" |
18 |
const domain_re = "[^/?#]+";
|
|
20 |
const domain_re = "[.a-zA-Z0-9-]+";
|
|
19 | 21 |
const path_re = "[^?#]*"; |
20 | 22 |
const query_re = "\\??[^#]*"; |
21 | 23 |
|
... | ... | |
25 | 27 |
|
26 | 28 |
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`); |
27 | 29 |
|
28 |
function deconstruct_url(url) |
|
30 |
function deconstruct_url(url, use_limits=true)
|
|
29 | 31 |
{ |
32 |
const max = MAX; |
|
33 |
if (!use_limits) { |
|
34 |
for (key in MAX) |
|
35 |
max[key] = Infinity; |
|
36 |
} |
|
37 |
|
|
30 | 38 |
const proto_match = proto_regex.exec(url); |
31 | 39 |
if (proto_match === null) |
32 |
return undefined;
|
|
40 |
throw `bad url '${url}'`;
|
|
33 | 41 |
|
34 | 42 |
const deco = {proto: proto_match[1]}; |
35 | 43 |
|
... | ... | |
37 | 45 |
deco.path = file_regex.exec(proto_match[2])[1]; |
38 | 46 |
} else if (deco.proto === "ftp") { |
39 | 47 |
[deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4); |
40 |
} else { |
|
48 |
} else if (deco.proto === "http" || deco.proto === "https") {
|
|
41 | 49 |
const http_match = http_regex.exec(proto_match[2]); |
42 | 50 |
if (!http_match) |
43 | 51 |
return undefined; |
44 | 52 |
[deco.domain, deco.path, deco.query] = http_match.slice(1, 4); |
53 |
deco.domain = deco.domain.toLowerCase(); |
|
54 |
} else { |
|
55 |
throw `unsupported protocol in url '${url}'`; |
|
45 | 56 |
} |
46 | 57 |
|
47 |
const leading_dash = deco.path[0] === "/"; |
|
48 | 58 |
deco.trailing_dash = deco.path[deco.path.length - 1] === "/"; |
49 | 59 |
|
50 | 60 |
if (deco.domain) { |
51 |
if (deco.domain.length > MAX_DOMAIN_CHARS) {
|
|
61 |
if (deco.domain.length > max.DOMAIN_CHARS) {
|
|
52 | 62 |
const idx = deco.domain.indexOf(".", deco.domain.length - |
53 |
MAX_DOMAIN_CHARS);
|
|
63 |
max.DOMAIN_CHARS);
|
|
54 | 64 |
if (idx === -1) |
55 | 65 |
deco.domain = []; |
56 | 66 |
else |
... | ... | |
59 | 69 |
deco.domain_truncated = true; |
60 | 70 |
} |
61 | 71 |
|
62 |
if (deco.path.length > MAX_URL_PATH_CHARS) {
|
|
72 |
if (deco.path.length > max.URL_PATH_CHARS) {
|
|
63 | 73 |
deco.path = deco.path.substring(0, deco.path.lastIndexOf("/")); |
64 | 74 |
deco.path_truncated = true; |
65 | 75 |
} |
... | ... | |
67 | 77 |
|
68 | 78 |
if (typeof deco.domain === "string") { |
69 | 79 |
deco.domain = deco.domain.split("."); |
70 |
if (deco.domain.splice(0, deco.domain.length - MAX_DOMAIN_LEN).length
|
|
80 |
if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length
|
|
71 | 81 |
> 0) |
72 | 82 |
deco.domain_truncated = true; |
73 | 83 |
} |
74 | 84 |
|
75 | 85 |
deco.path = deco.path.split("/").filter(s => s !== ""); |
76 |
if (deco.domain && deco.path.splice(MAX_URL_PATH_LEN).length > 0)
|
|
86 |
if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0)
|
|
77 | 87 |
deco.path_truncated = true; |
78 |
if (leading_dash || deco.path.length === 0) |
|
79 |
deco.path.unshift(""); |
|
80 | 88 |
|
81 | 89 |
return deco; |
82 | 90 |
} |
... | ... | |
98 | 106 |
|
99 | 107 |
function* each_path_pattern(deco) |
100 | 108 |
{ |
101 |
for (let slice = deco.path.length; slice > 0; slice--) { |
|
102 |
const path_part = deco.path.slice(0, slice).join("/");
|
|
109 |
for (let slice = deco.path.length; slice >= 0; slice--) {
|
|
110 |
const path_part = ["", ...deco.path.slice(0, slice)].join("/");
|
|
103 | 111 |
const path_wildcards = []; |
104 | 112 |
if (slice === deco.path.length && !deco.path_truncated) { |
105 | 113 |
if (deco.trailing_dash) |
106 | 114 |
yield path_part + "/"; |
107 |
yield path_part; |
|
115 |
if (slice > 0 || deco.proto !== "file") |
|
116 |
yield path_part; |
|
108 | 117 |
} |
109 | 118 |
if (slice === deco.path.length - 1 && !deco.path_truncated && |
110 | 119 |
deco.path[slice] !== "*") |
... | ... | |
137 | 146 |
/* |
138 | 147 |
* EXPORTS_START |
139 | 148 |
* EXPORT each_url_pattern |
149 |
* EXPORT deconstruct_url |
|
140 | 150 |
* EXPORTS_END |
141 | 151 |
*/ |
Also available in: Unified diff
improve unit testing approach
Unit tests were moved to their own subdirectory.
Fixtures common to many unit tests were moved to test/unit/conftest.py.
A facility to execute scripts in page's global scope was added.
A workaround was employed to present information about errors in injected scripts.
Sample unit tests for regexes in common/patterns.js were added.