Revision 93dd7360
Added by koszko almost 2 years ago
| common/patterns.js | ||
|---|---|---|
| 7 | 7 |
* Redistribution terms are gathered in the `copyright' file. |
| 8 | 8 |
*/ |
| 9 | 9 |
|
| 10 |
const MAX_URL_PATH_LEN = 12; |
|
| 11 |
const MAX_URL_PATH_CHARS = 255; |
|
| 12 |
const MAX_DOMAIN_LEN = 7; |
|
| 13 |
const MAX_DOMAIN_CHARS = 100; |
|
| 10 |
const MAX = {
|
|
| 11 |
URL_PATH_LEN: 12, |
|
| 12 |
URL_PATH_CHARS: 255, |
|
| 13 |
DOMAIN_LEN: 7, |
|
| 14 |
DOMAIN_CHARS: 100 |
|
| 15 |
}; |
|
| 14 | 16 |
|
| 15 | 17 |
const proto_regex = /^(\w+):\/\/(.*)$/; |
| 16 | 18 |
|
| 17 | 19 |
const user_re = "[^/?#@]+@" |
| 18 |
const domain_re = "[^/?#]+";
|
|
| 20 |
const domain_re = "[.a-zA-Z0-9-]+";
|
|
| 19 | 21 |
const path_re = "[^?#]*"; |
| 20 | 22 |
const query_re = "\\??[^#]*"; |
| 21 | 23 |
|
| ... | ... | |
| 25 | 27 |
|
| 26 | 28 |
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`);
|
| 27 | 29 |
|
| 28 |
function deconstruct_url(url) |
|
| 30 |
function deconstruct_url(url, use_limits=true)
|
|
| 29 | 31 |
{
|
| 32 |
const max = MAX; |
|
| 33 |
if (!use_limits) {
|
|
| 34 |
for (key in MAX) |
|
| 35 |
max[key] = Infinity; |
|
| 36 |
} |
|
| 37 |
|
|
| 30 | 38 |
const proto_match = proto_regex.exec(url); |
| 31 | 39 |
if (proto_match === null) |
| 32 |
return undefined;
|
|
| 40 |
throw `bad url '${url}'`;
|
|
| 33 | 41 |
|
| 34 | 42 |
const deco = {proto: proto_match[1]};
|
| 35 | 43 |
|
| ... | ... | |
| 37 | 45 |
deco.path = file_regex.exec(proto_match[2])[1]; |
| 38 | 46 |
} else if (deco.proto === "ftp") {
|
| 39 | 47 |
[deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4); |
| 40 |
} else {
|
|
| 48 |
} else if (deco.proto === "http" || deco.proto === "https") {
|
|
| 41 | 49 |
const http_match = http_regex.exec(proto_match[2]); |
| 42 | 50 |
if (!http_match) |
| 43 | 51 |
return undefined; |
| 44 | 52 |
[deco.domain, deco.path, deco.query] = http_match.slice(1, 4); |
| 53 |
deco.domain = deco.domain.toLowerCase(); |
|
| 54 |
} else {
|
|
| 55 |
throw `unsupported protocol in url '${url}'`;
|
|
| 45 | 56 |
} |
| 46 | 57 |
|
| 47 |
const leading_dash = deco.path[0] === "/"; |
|
| 48 | 58 |
deco.trailing_dash = deco.path[deco.path.length - 1] === "/"; |
| 49 | 59 |
|
| 50 | 60 |
if (deco.domain) {
|
| 51 |
if (deco.domain.length > MAX_DOMAIN_CHARS) {
|
|
| 61 |
if (deco.domain.length > max.DOMAIN_CHARS) {
|
|
| 52 | 62 |
const idx = deco.domain.indexOf(".", deco.domain.length -
|
| 53 |
MAX_DOMAIN_CHARS);
|
|
| 63 |
max.DOMAIN_CHARS);
|
|
| 54 | 64 |
if (idx === -1) |
| 55 | 65 |
deco.domain = []; |
| 56 | 66 |
else |
| ... | ... | |
| 59 | 69 |
deco.domain_truncated = true; |
| 60 | 70 |
} |
| 61 | 71 |
|
| 62 |
if (deco.path.length > MAX_URL_PATH_CHARS) {
|
|
| 72 |
if (deco.path.length > max.URL_PATH_CHARS) {
|
|
| 63 | 73 |
deco.path = deco.path.substring(0, deco.path.lastIndexOf("/"));
|
| 64 | 74 |
deco.path_truncated = true; |
| 65 | 75 |
} |
| ... | ... | |
| 67 | 77 |
|
| 68 | 78 |
if (typeof deco.domain === "string") {
|
| 69 | 79 |
deco.domain = deco.domain.split(".");
|
| 70 |
if (deco.domain.splice(0, deco.domain.length - MAX_DOMAIN_LEN).length
|
|
| 80 |
if (deco.domain.splice(0, deco.domain.length - max.DOMAIN_LEN).length
|
|
| 71 | 81 |
> 0) |
| 72 | 82 |
deco.domain_truncated = true; |
| 73 | 83 |
} |
| 74 | 84 |
|
| 75 | 85 |
deco.path = deco.path.split("/").filter(s => s !== "");
|
| 76 |
if (deco.domain && deco.path.splice(MAX_URL_PATH_LEN).length > 0)
|
|
| 86 |
if (deco.domain && deco.path.splice(max.URL_PATH_LEN).length > 0)
|
|
| 77 | 87 |
deco.path_truncated = true; |
| 78 |
if (leading_dash || deco.path.length === 0) |
|
| 79 |
deco.path.unshift("");
|
|
| 80 | 88 |
|
| 81 | 89 |
return deco; |
| 82 | 90 |
} |
| ... | ... | |
| 98 | 106 |
|
| 99 | 107 |
function* each_path_pattern(deco) |
| 100 | 108 |
{
|
| 101 |
for (let slice = deco.path.length; slice > 0; slice--) {
|
|
| 102 |
const path_part = deco.path.slice(0, slice).join("/");
|
|
| 109 |
for (let slice = deco.path.length; slice >= 0; slice--) {
|
|
| 110 |
const path_part = ["", ...deco.path.slice(0, slice)].join("/");
|
|
| 103 | 111 |
const path_wildcards = []; |
| 104 | 112 |
if (slice === deco.path.length && !deco.path_truncated) {
|
| 105 | 113 |
if (deco.trailing_dash) |
| 106 | 114 |
yield path_part + "/"; |
| 107 |
yield path_part; |
|
| 115 |
if (slice > 0 || deco.proto !== "file") |
|
| 116 |
yield path_part; |
|
| 108 | 117 |
} |
| 109 | 118 |
if (slice === deco.path.length - 1 && !deco.path_truncated && |
| 110 | 119 |
deco.path[slice] !== "*") |
| ... | ... | |
| 137 | 146 |
/* |
| 138 | 147 |
* EXPORTS_START |
| 139 | 148 |
* EXPORT each_url_pattern |
| 149 |
* EXPORT deconstruct_url |
|
| 140 | 150 |
* EXPORTS_END |
| 141 | 151 |
*/ |
Also available in: Unified diff
improve unit testing approach
Unit tests were moved to their own subdirectory.
Fixtures common to many unit tests were moved to test/unit/conftest.py.
A facility to execute scripts in page's global scope was added.
A workaround was employed to present information about errors in injected scripts.
Sample unit tests for regexes in common/patterns.js were added.