1
|
/**
|
2
|
* This file is part of Haketilo.
|
3
|
*
|
4
|
* Function: Data structure to query items by URL patterns.
|
5
|
*
|
6
|
* Copyright (C) 2021 Wojtek Kosior
|
7
|
*
|
8
|
* This program is free software: you can redistribute it and/or modify
|
9
|
* it under the terms of the GNU General Public License as published by
|
10
|
* the Free Software Foundation, either version 3 of the License, or
|
11
|
* (at your option) any later version.
|
12
|
*
|
13
|
* This program is distributed in the hope that it will be useful,
|
14
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
* GNU General Public License for more details.
|
17
|
*
|
18
|
* As additional permission under GNU GPL version 3 section 7, you
|
19
|
* may distribute forms of that code without the copy of the GNU
|
20
|
* GPL normally required by section 4, provided you include this
|
21
|
* license notice and, in case of non-source distribution, a URL
|
22
|
* through which recipients can access the Corresponding Source.
|
23
|
* If you modify file(s) with this exception, you may extend this
|
24
|
* exception to your version of the file(s), but you are not
|
25
|
* obligated to do so. If you do not wish to do so, delete this
|
26
|
* exception statement from your version.
|
27
|
*
|
28
|
* As a special exception to the GPL, any HTML file which merely
|
29
|
* makes function calls to this code, and for that purpose
|
30
|
* includes it by reference shall be deemed a separate work for
|
31
|
* copyright law purposes. If you modify this code, you may extend
|
32
|
* this exception to your version of the code, but you are not
|
33
|
* obligated to do so. If you do not wish to do so, delete this
|
34
|
* exception statement from your version.
|
35
|
*
|
36
|
* You should have received a copy of the GNU General Public License
|
37
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
38
|
*
|
39
|
* I, Wojtek Kosior, thereby promise not to sue for violation of this file's
|
40
|
* license. Although I request that you do not make use this code in a
|
41
|
* proprietary program, I am not going to enforce this in court.
|
42
|
*/
|
43
|
|
44
|
/*
|
45
|
* IMPORTS_START
|
46
|
* IMPORT deconstruct_url
|
47
|
* IMPORTS_END
|
48
|
*/
|
49
|
|
50
|
/* "Pattern Tree" is how we refer to the data structure used for querying
|
51
|
* Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
|
52
|
* is to make it possible for given URL to quickly retrieve all known patterns
|
53
|
* that match it.
|
54
|
*/
|
55
|
function empty_node() {
|
56
|
return {
|
57
|
wildcard_matches: [null, null, null],
|
58
|
literal_match: null,
|
59
|
children: {}
|
60
|
};
|
61
|
}
|
62
|
|
63
|
function is_empty_node(tree_node) {
|
64
|
const children = tree_node.children;
|
65
|
for (const key in children) {
|
66
|
if (children.hasOwnProperty(key))
|
67
|
return false;
|
68
|
}
|
69
|
|
70
|
if (Array.reduce(tree_node.wildcard_matches, (a, b) => b && a !== null, 1))
|
71
|
return false;
|
72
|
|
73
|
return tree_node.literal_match === null;
|
74
|
}
|
75
|
|
76
|
const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0;
|
77
|
|
78
|
/*
|
79
|
* Yields all matches of this segments sequence against the tree that starts at
|
80
|
* this node. Results are produces in order from greatest to lowest pattern
|
81
|
* specificity.
|
82
|
*/
|
83
|
function* search_sequence(tree_node, segments)
|
84
|
{
|
85
|
const nodes = [tree_node];
|
86
|
|
87
|
for (const segment of segments) {
|
88
|
const next_node = nodes[nodes.length - 1].children[segment];
|
89
|
if (next_node === undefined)
|
90
|
break;
|
91
|
|
92
|
nodes.push(next_node);
|
93
|
}
|
94
|
|
95
|
const nsegments = segments.length;
|
96
|
|
97
|
const conds = [
|
98
|
/* literal pattern match */
|
99
|
() => nodes.length == nsegments,
|
100
|
/* wildcard pattern matches */
|
101
|
() => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*",
|
102
|
() => nodes.length + 1 < nsegments,
|
103
|
() => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***"
|
104
|
];
|
105
|
|
106
|
while (nodes.length) {
|
107
|
const node = nodes.pop();
|
108
|
const items = [node.literal_match, ...node.wildcard_matches];
|
109
|
|
110
|
for (let i = 0; i < 4; i++) {
|
111
|
if (items[i] !== null && conds[i]())
|
112
|
yield items[i];
|
113
|
}
|
114
|
}
|
115
|
}
|
116
|
|
117
|
/*
|
118
|
* Make item queryable through (this branch of) the Pattern Tree or remove its
|
119
|
* path from there.
|
120
|
*
|
121
|
* item_modifier should be a function that accepts 1 argument, the item stored
|
122
|
* in the tree (or `null` if there wasn't any item there), and returns an item
|
123
|
* that should be used in place of the first one. It is also legal for it to
|
124
|
* return the same item modifying it first. If it returns `null`, it means the
|
125
|
* item should be deleted from the Tree.
|
126
|
*
|
127
|
* If there was not yet any item associated with the tree path designated by
|
128
|
* segments and value returned by item_modifier is not `null`, make the value
|
129
|
* queryable by this path.
|
130
|
*/
|
131
|
function modify_sequence(tree_node, segments, item_modifier)
|
132
|
{
|
133
|
const nodes = [tree_node];
|
134
|
let removed = true;
|
135
|
|
136
|
for (var current_segment of segments) {
|
137
|
wildcards = tree_node.wildcard_matches;
|
138
|
|
139
|
const child = tree_node.children[current_segment] || empty_node();
|
140
|
tree_node.children[current_segment] = child;
|
141
|
tree_node = child;
|
142
|
nodes.push(tree_node);
|
143
|
}
|
144
|
|
145
|
tree_node.literal_match = item_modifier(tree_node.literal_match);
|
146
|
if (tree_node.literal_match !== null)
|
147
|
removed = false;
|
148
|
|
149
|
let i = segments.length;
|
150
|
|
151
|
if (is_wildcard(current_segment)) {
|
152
|
const asterisks = current_segment.length - 1;
|
153
|
const wildcards = nodes[i - 1].wildcard_matches;
|
154
|
wildcards[asterisks] = item_modifier(wildcards[asterisks]);
|
155
|
if (wildcards[asterisks] !== null)
|
156
|
removed = false;
|
157
|
}
|
158
|
|
159
|
while (!removed)
|
160
|
return;
|
161
|
|
162
|
while (i > 0) {
|
163
|
tree_node = nodes[i--];
|
164
|
if (is_empty_node(tree_node))
|
165
|
delete nodes[i].children[segments[i]];
|
166
|
}
|
167
|
}
|
168
|
|
169
|
/* Helper function for modify_tree(). */
|
170
|
function modify_path(tree_node, deco, item_modifier)
|
171
|
{
|
172
|
tree_node = tree_node || empty_node();
|
173
|
modify_sequence(tree_node, deco.path, item_modifier);
|
174
|
return is_empty_node(tree_node) ? null : tree_node;
|
175
|
}
|
176
|
|
177
|
/* Helper function for modify_tree(). */
|
178
|
function modify_domain(tree_node, deco, item_modifier)
|
179
|
{
|
180
|
const path_modifier = branch => modify_path(branch, deco, item_modifier);
|
181
|
tree_node = tree_node || empty_node();
|
182
|
/* We need an array of domain labels ordered most-significant-first. */
|
183
|
modify_sequence(tree_node, [...deco.domain].reverse(), path_modifier);
|
184
|
return is_empty_node(tree_node) ? null : tree_node;
|
185
|
}
|
186
|
|
187
|
/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */
|
188
|
function modify_tree(patterns_by_proto, pattern, item_modifier)
|
189
|
{
|
190
|
/*
|
191
|
* We pass 'false' to disable length limits on URL parts. Length limits are
|
192
|
* mostly useful in case of iteration over all patterns matching given URL.
|
193
|
* Here we don't do that.
|
194
|
*/
|
195
|
const deco = deconstruct_url(pattern, false);
|
196
|
|
197
|
let tree_for_proto = patterns_by_proto[deco.proto];
|
198
|
|
199
|
tree_for_proto = deco.domain === undefined ?
|
200
|
modify_path(tree_for_proto, deco, item_modifier) :
|
201
|
modify_domain(tree_for_proto, deco, item_modifier);
|
202
|
|
203
|
patterns_by_proto[deco.proto] = tree_for_proto;
|
204
|
if (tree_for_proto === null)
|
205
|
delete patterns_by_proto[deco.proto];
|
206
|
}
|
207
|
|
208
|
/*
|
209
|
* Make item queryable through the Pattern Tree that starts with the protocols
|
210
|
* dictionary object passed in the first argument.
|
211
|
*/
|
212
|
function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
|
213
|
{
|
214
|
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
|
215
|
item_name = key_prefix + item_name;
|
216
|
const add_item = obj => Object.assign(obj || {}, {[item_name]: item});
|
217
|
modify_tree(patterns_by_proto, pattern, add_item);
|
218
|
}
|
219
|
|
220
|
/* Helper function for pattern_tree_deregister(). */
|
221
|
function _remove_item(obj, item_name)
|
222
|
{
|
223
|
obj = obj || {};
|
224
|
delete obj[item_name];
|
225
|
for (const key in obj)
|
226
|
return obj;
|
227
|
return null;
|
228
|
}
|
229
|
|
230
|
/*
|
231
|
* Remove registered item from the Pattern Tree that starts with the protocols
|
232
|
* dictionary object passed in the first argument. The remaining 2 arguments
|
233
|
* should be pattern and name that have been earlier passed to
|
234
|
* pattern_tree_register().
|
235
|
*/
|
236
|
function pattern_tree_deregister(patterns_by_proto, pattern, item_name)
|
237
|
{
|
238
|
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
|
239
|
item_name = key_prefix + item_name;
|
240
|
const remove_item = obj => _remove_item(obj, item_name);
|
241
|
modify_tree(patterns_by_proto, pattern, remove_item);
|
242
|
}
|
243
|
|
244
|
/*
|
245
|
* Yield registered items that match url. Each yielded value is an object with
|
246
|
* key being matched item names and values being the items. One such object
|
247
|
* shall contain all items matched with given pattern specificity. Objects are
|
248
|
* yielded in order from greatest to lowest pattern specificity.
|
249
|
*/
|
250
|
function* pattern_tree_search(patterns_by_proto, url)
|
251
|
{
|
252
|
const deco = deconstruct_url(url, false);
|
253
|
|
254
|
const tree_for_proto = patterns_by_proto[deco.proto] || empty_node();
|
255
|
let by_path = [tree_for_proto];
|
256
|
|
257
|
/* We need an array of domain labels ordered most-significant-first. */
|
258
|
if (deco.domain !== undefined)
|
259
|
by_path = search_sequence(tree_for_proto, [...deco.domain].reverse());
|
260
|
|
261
|
for (const path_tree of by_path) {
|
262
|
for (const match_obj of search_sequence(path_tree, deco.path)) {
|
263
|
let result_obj_slash = null;
|
264
|
let result_obj_no_slash = null;
|
265
|
|
266
|
for (const [key, item] of Object.entries(match_obj)) {
|
267
|
if (deco.trailing_slash && key[0] === '/') {
|
268
|
result_obj_slash = result_obj_slash || {};
|
269
|
result_obj_slash[key.substring(1)] = item;
|
270
|
} else if (key[0] !== '/') {
|
271
|
result_obj_no_slash = result_obj_no_slash || {};
|
272
|
result_obj_no_slash[key.substring(1)] = item;
|
273
|
}
|
274
|
}
|
275
|
|
276
|
if (deco.trailing_slash && result_obj_slash)
|
277
|
yield result_obj_slash;
|
278
|
|
279
|
if (result_obj_no_slash)
|
280
|
yield result_obj_no_slash;
|
281
|
}
|
282
|
}
|
283
|
}
|
284
|
|
285
|
const pattern_tree = {
|
286
|
make: () => ({}),
|
287
|
register: pattern_tree_register,
|
288
|
deregister: pattern_tree_deregister,
|
289
|
search: pattern_tree_search
|
290
|
}
|
291
|
|
292
|
/*
|
293
|
* EXPORTS_START
|
294
|
* EXPORT pattern_tree
|
295
|
* EXPORTS_END
|
296
|
*/
|