1
|
/**
|
2
|
* Hachette operations on page url patterns
|
3
|
*
|
4
|
* Copyright (C) 2021 Wojtek Kosior
|
5
|
* Redistribution terms are gathered in the `copyright' file.
|
6
|
*/
|
7
|
|
8
|
const proto_regex = /^(\w+):\/\/(.*)$/;
|
9
|
|
10
|
const user_re = "[^/?#@]+@"
|
11
|
const domain_re = "[^/?#]+";
|
12
|
const path_re = "[^?#]*";
|
13
|
const query_re = "\\??[^#]*";
|
14
|
|
15
|
const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`);
|
16
|
|
17
|
const file_regex = new RegExp(`^(${path_re}).*`);
|
18
|
|
19
|
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`);
|
20
|
|
21
|
function deconstruct_url(url)
|
22
|
{
|
23
|
const proto_match = proto_regex.exec(url);
|
24
|
if (proto_match === null)
|
25
|
return undefined;
|
26
|
|
27
|
const deco = {proto: proto_match[1]};
|
28
|
|
29
|
if (deco.proto === "file") {
|
30
|
deco.path = file_regex.exec(proto_match[2])[1];
|
31
|
} else if (deco.proto === "ftp") {
|
32
|
[deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4);
|
33
|
} else {
|
34
|
const http_match = http_regex.exec(proto_match[2]);
|
35
|
if (!http_match)
|
36
|
return undefined;
|
37
|
[deco.domain, deco.path, deco.query] = http_match.slice(1, 4);
|
38
|
}
|
39
|
|
40
|
if (deco.domain)
|
41
|
deco.domain = deco.domain.split(".");
|
42
|
|
43
|
const leading_dash = deco.path[0] === "/";
|
44
|
deco.trailing_dash = deco.path[deco.path.length - 1] === "/";
|
45
|
deco.path = deco.path.split("/").filter(s => s !== "");
|
46
|
if (leading_dash || deco.path.length === 0)
|
47
|
deco.path.unshift("");
|
48
|
|
49
|
return deco;
|
50
|
}
|
51
|
|
52
|
/* Be sane: both arguments should be arrays of length >= 2 */
|
53
|
function domain_matches(url_domain, pattern_domain)
|
54
|
{
|
55
|
const length_difference = url_domain.length - pattern_domain.length;
|
56
|
|
57
|
for (let i = 1; i <= url_domain.length; i++) {
|
58
|
const url_part = url_domain[url_domain.length - i];
|
59
|
const pattern_part = pattern_domain[pattern_domain.length - i];
|
60
|
|
61
|
if (pattern_domain.length === i) {
|
62
|
if (pattern_part === "*")
|
63
|
return length_difference === 0;
|
64
|
if (pattern_part === "**")
|
65
|
return length_difference > 0;
|
66
|
if (pattern_part === "***")
|
67
|
return true;
|
68
|
return length_difference === 0 && pattern_part === url_part;
|
69
|
}
|
70
|
|
71
|
if (pattern_part !== url_part)
|
72
|
return false;
|
73
|
}
|
74
|
|
75
|
return pattern_domain.length === url_domain.length + 1 &&
|
76
|
pattern_domain[0] === "***";
|
77
|
}
|
78
|
|
79
|
function path_matches(url_path, url_trailing_dash,
|
80
|
pattern_path, pattern_trailing_dash)
|
81
|
{
|
82
|
const dashes_ok = !(pattern_trailing_dash && !url_trailing_dash);
|
83
|
|
84
|
if (pattern_path.length === 0)
|
85
|
return url_path.length === 0 && dashes_ok;
|
86
|
|
87
|
const length_difference = url_path.length - pattern_path.length;
|
88
|
|
89
|
for (let i = 0; i < url_path.length; i++) {
|
90
|
if (pattern_path.length === i + 1) {
|
91
|
if (pattern_path[i] === "*")
|
92
|
return length_difference === 0;
|
93
|
if (pattern_path[i] === "**") {
|
94
|
return length_difference > 0 ||
|
95
|
(url_path[i] === "**" && dashes_ok);
|
96
|
}
|
97
|
if (pattern_path[i] === "***")
|
98
|
return length_difference >= 0;
|
99
|
return length_difference === 0 &&
|
100
|
pattern_path[i] === url_path[i] && dashes_ok;
|
101
|
}
|
102
|
|
103
|
if (pattern_path[i] !== url_path[i])
|
104
|
return false;
|
105
|
}
|
106
|
|
107
|
return false;
|
108
|
}
|
109
|
|
110
|
function url_matches(url, pattern)
|
111
|
{
|
112
|
const url_deco = deconstruct_url(url);
|
113
|
const pattern_deco = deconstruct_url(pattern);
|
114
|
|
115
|
if (url_deco === undefined || pattern_deco === undefined) {
|
116
|
console.log(`bad comparison: ${url} and ${pattern}`);
|
117
|
return false
|
118
|
}
|
119
|
|
120
|
return pattern_deco.proto === url_deco.proto &&
|
121
|
!(pattern_deco.proto === "file" && pattern_deco.trailing_dash) &&
|
122
|
!!url_deco.domain === !!pattern_deco.domain &&
|
123
|
(!url_deco.domain ||
|
124
|
domain_matches(url_deco.domain, pattern_deco.domain)) &&
|
125
|
path_matches(url_deco.path, url_deco.trailing_dash,
|
126
|
pattern_deco.path, pattern_deco.trailing_dash);
|
127
|
}
|
128
|
|
129
|
function* each_domain_pattern(domain_segments)
|
130
|
{
|
131
|
for (let slice = 0; slice < domain_segments.length; slice++) {
|
132
|
const domain_part = domain_segments.slice(slice).join(".");
|
133
|
const domain_wildcards = [];
|
134
|
if (slice === 0)
|
135
|
yield domain_part;
|
136
|
if (slice === 1)
|
137
|
yield "*." + domain_part;
|
138
|
if (slice > 1)
|
139
|
yield "**." + domain_part;
|
140
|
yield "***." + domain_part;
|
141
|
}
|
142
|
}
|
143
|
|
144
|
function* each_path_pattern(path_segments, trailing_dash)
|
145
|
{
|
146
|
for (let slice = path_segments.length; slice > 0; slice--) {
|
147
|
const path_part = path_segments.slice(0, slice).join("/");
|
148
|
const path_wildcards = [];
|
149
|
if (slice === path_segments.length) {
|
150
|
if (trailing_dash)
|
151
|
yield path_part + "/";
|
152
|
yield path_part;
|
153
|
}
|
154
|
if (slice === path_segments.length - 1 && path_segments[slice] !== "*")
|
155
|
yield path_part + "/*";
|
156
|
if (slice < path_segments.length - 1)
|
157
|
yield path_part + "/**";
|
158
|
if (slice < path_segments.length - 1 ||
|
159
|
path_segments[path_segments.length - 1] !== "***")
|
160
|
yield path_part + "/***";
|
161
|
}
|
162
|
}
|
163
|
|
164
|
/* Generate every possible pattern that matches url. */
|
165
|
function* each_url_pattern(url)
|
166
|
{
|
167
|
const deco = deconstruct_url(url);
|
168
|
|
169
|
if (deco === undefined) {
|
170
|
console.log("bad url format", url);
|
171
|
return false;
|
172
|
}
|
173
|
|
174
|
const all_domains = deco.domain ? each_domain_pattern(deco.domain) : [""];
|
175
|
for (const domain of all_domains) {
|
176
|
for (const path of each_path_pattern(deco.path, deco.trailing_dash))
|
177
|
yield `${deco.proto}://${domain}${path}`;
|
178
|
}
|
179
|
}
|
180
|
|
181
|
/*
|
182
|
* EXPORTS_START
|
183
|
* EXPORT url_matches
|
184
|
* EXPORT each_url_pattern
|
185
|
* EXPORTS_END
|
186
|
*/
|