1
|
/**
|
2
|
* Hydrilla/Lernette operations on page url patterns
|
3
|
*
|
4
|
* Copyright (C) 2021 Wojtek Kosior
|
5
|
* Redistribution terms are gathered in the `copyright' file.
|
6
|
*/
|
7
|
|
8
|
const proto_re = "[a-zA-Z]*:\/\/";
|
9
|
const domain_re = "[^/?#]+";
|
10
|
const segments_re = "/[^?#]*";
|
11
|
const query_re = "\\?[^#]*";
|
12
|
|
13
|
const url_regex = new RegExp(`\
|
14
|
^\
|
15
|
(${proto_re})\
|
16
|
(${domain_re})\
|
17
|
(${segments_re})?\
|
18
|
(${query_re})?\
|
19
|
#?.*\$\
|
20
|
`);
|
21
|
|
22
|
function deconstruct_url(url)
|
23
|
{
|
24
|
const regex_match = url_regex.exec(url);
|
25
|
if (regex_match === null)
|
26
|
return undefined;
|
27
|
|
28
|
let [_, proto, domain, path, query] = regex_match;
|
29
|
|
30
|
domain = domain.split(".");
|
31
|
let path_trailing_dash =
|
32
|
path && path[path.length - 1] === "/";
|
33
|
path = (path || "").split("/").filter(s => s !== "");
|
34
|
path.unshift("");
|
35
|
|
36
|
return {proto, domain, path, query, path_trailing_dash};
|
37
|
}
|
38
|
|
39
|
/* Be sane: both arguments should be arrays of length >= 2 */
|
40
|
function domain_matches(url_domain, pattern_domain)
|
41
|
{
|
42
|
const length_difference = url_domain.length - pattern_domain.length;
|
43
|
|
44
|
for (let i = 1; i <= url_domain.length; i++) {
|
45
|
const url_part = url_domain[url_domain.length - i];
|
46
|
const pattern_part = pattern_domain[pattern_domain.length - i];
|
47
|
|
48
|
if (pattern_domain.length === i) {
|
49
|
if (pattern_part === "*")
|
50
|
return length_difference === 0;
|
51
|
if (pattern_part === "**")
|
52
|
return length_difference > 0;
|
53
|
if (pattern_part === "***")
|
54
|
return true;
|
55
|
return length_difference === 0 && pattern_part === url_part;
|
56
|
}
|
57
|
|
58
|
if (pattern_part !== url_part)
|
59
|
return false;
|
60
|
}
|
61
|
|
62
|
return pattern_domain.length === url_domain.length + 1 &&
|
63
|
pattern_domain[0] === "***";
|
64
|
}
|
65
|
|
66
|
function path_matches(url_path, url_trailing_dash,
|
67
|
pattern_path, pattern_trailing_dash)
|
68
|
{
|
69
|
const dashes_ok = !(pattern_trailing_dash && !url_trailing_dash);
|
70
|
|
71
|
if (pattern_path.length === 0)
|
72
|
return url_path.length === 0 && dashes_ok;
|
73
|
|
74
|
const length_difference = url_path.length - pattern_path.length;
|
75
|
|
76
|
for (let i = 0; i < url_path.length; i++) {
|
77
|
if (pattern_path.length === i + 1) {
|
78
|
if (pattern_path[i] === "*")
|
79
|
return length_difference === 0;
|
80
|
if (pattern_path[i] === "**") {
|
81
|
return length_difference > 0 ||
|
82
|
(url_path[i] === "**" && dashes_ok);
|
83
|
}
|
84
|
if (pattern_path[i] === "***")
|
85
|
return length_difference >= 0;
|
86
|
return length_difference === 0 &&
|
87
|
pattern_path[i] === url_path[i] && dashes_ok;
|
88
|
}
|
89
|
|
90
|
if (pattern_path[i] !== url_path[i])
|
91
|
return false;
|
92
|
}
|
93
|
|
94
|
return false;
|
95
|
}
|
96
|
|
97
|
function url_matches(url, pattern)
|
98
|
{
|
99
|
const url_deco = deconstruct_url(url);
|
100
|
const pattern_deco = deconstruct_url(pattern);
|
101
|
|
102
|
if (url_deco === undefined || pattern_deco === undefined) {
|
103
|
console.log(`bad comparison: ${url} and ${pattern}`);
|
104
|
return false
|
105
|
}
|
106
|
|
107
|
if (pattern_deco.proto !== url_deco.proto)
|
108
|
return false;
|
109
|
|
110
|
return domain_matches(url_deco.domain, pattern_deco.domain) &&
|
111
|
path_matches(url_deco.path, url_deco.path_trailing_dash,
|
112
|
pattern_deco.path, pattern_deco.path_trailing_dash);
|
113
|
}
|
114
|
|
115
|
/*
|
116
|
* Call callback for every possible pattern that matches url. Return when there
|
117
|
* are no more patterns or callback returns false.
|
118
|
*/
|
119
|
function for_each_possible_pattern(url, callback)
|
120
|
{
|
121
|
const deco = deconstruct_url(url);
|
122
|
|
123
|
if (deco === undefined) {
|
124
|
console.log("bad url format", url);
|
125
|
return;
|
126
|
}
|
127
|
|
128
|
for (let d_slice = 0; d_slice < deco.domain.length; d_slice++) {
|
129
|
const domain_part = deco.domain.slice(d_slice).join(".");
|
130
|
const domain_wildcards = [];
|
131
|
if (d_slice === 0)
|
132
|
domain_wildcards.push("");
|
133
|
if (d_slice === 1)
|
134
|
domain_wildcards.push("*.");
|
135
|
if (d_slice > 0)
|
136
|
domain_wildcards.push("**.");
|
137
|
domain_wildcards.push("***.");
|
138
|
|
139
|
for (const domain_wildcard of domain_wildcards) {
|
140
|
const domain_pattern = domain_wildcard + domain_part;
|
141
|
|
142
|
for (let s_slice = deco.path.length; s_slice > 0; s_slice--) {
|
143
|
const path_part = deco.path.slice(0, s_slice).join("/");
|
144
|
const path_wildcards = [];
|
145
|
if (s_slice === deco.path.length) {
|
146
|
if (deco.path_trailing_dash)
|
147
|
path_wildcards.push("/");
|
148
|
path_wildcards.push("");
|
149
|
}
|
150
|
if (s_slice === deco.path.length - 1 &&
|
151
|
deco.path[s_slice] !== "*")
|
152
|
path_wildcards.push("/*");
|
153
|
if (s_slice < deco.path.length &&
|
154
|
(deco.path[s_slice] !== "**" ||
|
155
|
s_slice < deco.path.length - 1))
|
156
|
path_wildcards.push("/**");
|
157
|
if (deco.path[s_slice] !== "***" || s_slice < deco.path.length)
|
158
|
path_wildcards.push("/***");
|
159
|
|
160
|
for (const path_wildcard of path_wildcards) {
|
161
|
const path_pattern = path_part + path_wildcard;
|
162
|
|
163
|
const pattern = deco.proto + domain_pattern + path_pattern;
|
164
|
|
165
|
if (callback(pattern) === false)
|
166
|
return;
|
167
|
}
|
168
|
}
|
169
|
}
|
170
|
}
|
171
|
}
|
172
|
|
173
|
function possible_patterns(url)
|
174
|
{
|
175
|
const patterns = [];
|
176
|
for_each_possible_pattern(url, patterns.push);
|
177
|
|
178
|
return patterns;
|
179
|
}
|
180
|
|
181
|
/*
|
182
|
* EXPORTS_START
|
183
|
* EXPORT url_matches
|
184
|
* EXPORT for_each_possible_pattern
|
185
|
* EXPORT possible_patterns
|
186
|
* EXPORTS_END
|
187
|
*/
|