Revision 53837634
Added by koszko about 2 years ago
| common/patterns.js | ||
|---|---|---|
| 5 | 5 |
* Redistribution terms are gathered in the `copyright' file. |
| 6 | 6 |
*/ |
| 7 | 7 |
|
| 8 |
const proto_re = "[a-zA-Z]*:\/\/"; |
|
| 8 |
const proto_regex = /^(\w+):\/\/(.*)$/; |
|
| 9 |
|
|
| 9 | 10 |
const domain_re = "[^/?#]+"; |
| 10 |
const segments_re = "/[^?#]*"; |
|
| 11 |
const query_re = "\\?[^#]*"; |
|
| 12 |
|
|
| 13 |
const url_regex = new RegExp(`\ |
|
| 14 |
^\ |
|
| 15 |
(${proto_re})\
|
|
| 16 |
(${domain_re})\
|
|
| 17 |
(${segments_re})?\
|
|
| 18 |
(${query_re})?\
|
|
| 19 |
#?.*\$\ |
|
| 20 |
`); |
|
| 11 |
const path_re = "[^?#]*"; |
|
| 12 |
const query_re = "\\??[^#]*"; |
|
| 13 |
|
|
| 14 |
const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`);
|
|
| 15 |
|
|
| 16 |
const file_regex = new RegExp(`^(${path_re}).*`);
|
|
| 21 | 17 |
|
| 22 | 18 |
function deconstruct_url(url) |
| 23 | 19 |
{
|
| 24 |
const regex_match = url_regex.exec(url);
|
|
| 25 |
if (regex_match === null)
|
|
| 20 |
const proto_match = proto_regex.exec(url);
|
|
| 21 |
if (proto_match === null)
|
|
| 26 | 22 |
return undefined; |
| 27 | 23 |
|
| 28 |
let [_, proto, domain, path, query] = regex_match;
|
|
| 24 |
const deco = {proto: proto_match[1]};
|
|
| 29 | 25 |
|
| 30 |
domain = domain.split(".");
|
|
| 31 |
let path_trailing_dash = |
|
| 32 |
path && path[path.length - 1] === "/"; |
|
| 33 |
path = (path || "").split("/").filter(s => s !== "");
|
|
| 34 |
path.unshift("");
|
|
| 26 |
if (deco.proto === "file") {
|
|
| 27 |
deco.path = file_regex.exec(proto_match[2])[1]; |
|
| 28 |
} else {
|
|
| 29 |
const http_match = http_regex.exec(proto_match[2]); |
|
| 30 |
if (!http_match) |
|
| 31 |
return undefined; |
|
| 32 |
[deco.domain, deco.path, deco.query] = http_match.slice(1, 4); |
|
| 33 |
deco.domain = deco.domain.split(".");
|
|
| 34 |
} |
|
| 35 | 35 |
|
| 36 |
return {proto, domain, path, query, path_trailing_dash};
|
|
| 36 |
const leading_dash = deco.path[0] === "/"; |
|
| 37 |
deco.trailing_dash = deco.path[deco.path.length - 1] === "/"; |
|
| 38 |
deco.path = deco.path.split("/").filter(s => s !== "");
|
|
| 39 |
if (leading_dash || deco.path.length === 0) |
|
| 40 |
deco.path.unshift("");
|
|
| 41 |
|
|
| 42 |
return deco; |
|
| 37 | 43 |
} |
| 38 | 44 |
|
| 39 | 45 |
/* Be sane: both arguments should be arrays of length >= 2 */ |
| ... | ... | |
| 104 | 110 |
return false |
| 105 | 111 |
} |
| 106 | 112 |
|
| 107 |
if (pattern_deco.proto !== url_deco.proto) |
|
| 108 |
return false; |
|
| 109 |
|
|
| 110 |
return domain_matches(url_deco.domain, pattern_deco.domain) && |
|
| 111 |
path_matches(url_deco.path, url_deco.path_trailing_dash, |
|
| 112 |
pattern_deco.path, pattern_deco.path_trailing_dash); |
|
| 113 |
return pattern_deco.proto === url_deco.proto && |
|
| 114 |
!(pattern_deco.proto === "file" && pattern_deco.trailing_dash) && |
|
| 115 |
!!url_deco.domain === !!pattern_deco.domain && |
|
| 116 |
(!url_deco.domain || |
|
| 117 |
domain_matches(url_deco.domain, pattern_deco.domain)) && |
|
| 118 |
path_matches(url_deco.path, url_deco.trailing_dash, |
|
| 119 |
pattern_deco.path, pattern_deco.trailing_dash); |
|
| 113 | 120 |
} |
| 114 | 121 |
|
| 115 |
/* |
|
| 116 |
* Call callback for every possible pattern that matches url. Return when there |
|
| 117 |
* are no more patterns or callback returns false. |
|
| 118 |
*/ |
|
| 119 |
function for_each_possible_pattern(url, callback) |
|
| 122 |
function* each_domain_pattern(domain_segments) |
|
| 120 | 123 |
{
|
| 121 |
const deco = deconstruct_url(url); |
|
| 122 |
|
|
| 123 |
if (deco === undefined) {
|
|
| 124 |
console.log("bad url format", url);
|
|
| 125 |
return; |
|
| 124 |
for (let slice = 0; slice < domain_segments.length; slice++) {
|
|
| 125 |
const domain_part = domain_segments.slice(slice).join(".");
|
|
| 126 |
const domain_wildcards = []; |
|
| 127 |
if (slice === 0) |
|
| 128 |
yield domain_part; |
|
| 129 |
if (slice === 1) |
|
| 130 |
yield "*." + domain_part; |
|
| 131 |
if (slice > 1) |
|
| 132 |
yield "**." + domain_part; |
|
| 133 |
yield "***." + domain_part; |
|
| 126 | 134 |
} |
| 135 |
} |
|
| 127 | 136 |
|
| 128 |
for (let d_slice = 0; d_slice < deco.domain.length; d_slice++) {
|
|
| 129 |
const domain_part = deco.domain.slice(d_slice).join(".");
|
|
| 130 |
const domain_wildcards = []; |
|
| 131 |
if (d_slice === 0) |
|
| 132 |
domain_wildcards.push("");
|
|
| 133 |
if (d_slice === 1) |
|
| 134 |
domain_wildcards.push("*.");
|
|
| 135 |
if (d_slice > 0) |
|
| 136 |
domain_wildcards.push("**.");
|
|
| 137 |
domain_wildcards.push("***.");
|
|
| 138 |
|
|
| 139 |
for (const domain_wildcard of domain_wildcards) {
|
|
| 140 |
const domain_pattern = domain_wildcard + domain_part; |
|
| 141 |
|
|
| 142 |
for (let s_slice = deco.path.length; s_slice > 0; s_slice--) {
|
|
| 143 |
const path_part = deco.path.slice(0, s_slice).join("/");
|
|
| 144 |
const path_wildcards = []; |
|
| 145 |
if (s_slice === deco.path.length) {
|
|
| 146 |
if (deco.path_trailing_dash) |
|
| 147 |
path_wildcards.push("/");
|
|
| 148 |
path_wildcards.push("");
|
|
| 149 |
} |
|
| 150 |
if (s_slice === deco.path.length - 1 && |
|
| 151 |
deco.path[s_slice] !== "*") |
|
| 152 |
path_wildcards.push("/*");
|
|
| 153 |
if (s_slice < deco.path.length && |
|
| 154 |
(deco.path[s_slice] !== "**" || |
|
| 155 |
s_slice < deco.path.length - 1)) |
|
| 156 |
path_wildcards.push("/**");
|
|
| 157 |
if (deco.path[s_slice] !== "***" || s_slice < deco.path.length) |
|
| 158 |
path_wildcards.push("/***");
|
|
| 159 |
|
|
| 160 |
for (const path_wildcard of path_wildcards) {
|
|
| 161 |
const path_pattern = path_part + path_wildcard; |
|
| 162 |
|
|
| 163 |
const pattern = deco.proto + domain_pattern + path_pattern; |
|
| 164 |
|
|
| 165 |
if (callback(pattern) === false) |
|
| 166 |
return; |
|
| 167 |
} |
|
| 168 |
} |
|
| 137 |
function* each_path_pattern(path_segments, trailing_dash) |
|
| 138 |
{
|
|
| 139 |
for (let slice = path_segments.length; slice > 0; slice--) {
|
|
| 140 |
const path_part = path_segments.slice(0, slice).join("/");
|
|
| 141 |
const path_wildcards = []; |
|
| 142 |
if (slice === path_segments.length) {
|
|
| 143 |
if (trailing_dash) |
|
| 144 |
yield path_part + "/"; |
|
| 145 |
yield path_part; |
|
| 169 | 146 |
} |
| 147 |
if (slice === path_segments.length - 1 && path_segments[slice] !== "*") |
|
| 148 |
yield path_part + "/*"; |
|
| 149 |
if (slice < path_segments.length - 1) |
|
| 150 |
yield path_part + "/**"; |
|
| 151 |
if (slice < path_segments.length - 1 || |
|
| 152 |
path_segments[path_segments.length - 1] !== "***") |
|
| 153 |
yield path_part + "/***"; |
|
| 170 | 154 |
} |
| 171 | 155 |
} |
| 172 | 156 |
|
| 173 |
function possible_patterns(url) |
|
| 157 |
/* Generate every possible pattern that matches url. */ |
|
| 158 |
function* each_url_pattern(url) |
|
| 174 | 159 |
{
|
| 175 |
const patterns = []; |
|
| 176 |
for_each_possible_pattern(url, patterns.push); |
|
| 160 |
const deco = deconstruct_url(url); |
|
| 177 | 161 |
|
| 178 |
return patterns; |
|
| 162 |
if (deco === undefined) {
|
|
| 163 |
console.log("bad url format", url);
|
|
| 164 |
return false; |
|
| 165 |
} |
|
| 166 |
|
|
| 167 |
const all_domains = deco.domain ? each_domain_pattern(deco.domain) : [""]; |
|
| 168 |
for (const domain of all_domains) {
|
|
| 169 |
for (const path of each_path_pattern(deco.path, deco.trailing_dash)) |
|
| 170 |
yield `${deco.proto}://${domain}${path}`;
|
|
| 171 |
} |
|
| 179 | 172 |
} |
| 180 | 173 |
|
| 181 | 174 |
/* |
| 182 | 175 |
* EXPORTS_START |
| 183 | 176 |
* EXPORT url_matches |
| 184 |
* EXPORT for_each_possible_pattern |
|
| 185 |
* EXPORT possible_patterns |
|
| 177 |
* EXPORT each_url_pattern |
|
| 186 | 178 |
* EXPORTS_END |
| 187 | 179 |
*/ |
Also available in: Unified diff
enable whitelisting of `file://' protocol\n\nThis commit additionally also changes the semantics of triple asterisk wildcard in URL path.