1
|
/**
|
2
|
* This file is part of Haketilo.
|
3
|
*
|
4
|
* Function: Data structure to query items by URL patterns.
|
5
|
*
|
6
|
* Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
|
7
|
*
|
8
|
* This program is free software: you can redistribute it and/or modify
|
9
|
* it under the terms of the GNU General Public License as published by
|
10
|
* the Free Software Foundation, either version 3 of the License, or
|
11
|
* (at your option) any later version.
|
12
|
*
|
13
|
* This program is distributed in the hope that it will be useful,
|
14
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
* GNU General Public License for more details.
|
17
|
*
|
18
|
* As additional permission under GNU GPL version 3 section 7, you
|
19
|
* may distribute forms of that code without the copy of the GNU
|
20
|
* GPL normally required by section 4, provided you include this
|
21
|
* license notice and, in case of non-source distribution, a URL
|
22
|
* through which recipients can access the Corresponding Source.
|
23
|
* If you modify file(s) with this exception, you may extend this
|
24
|
* exception to your version of the file(s), but you are not
|
25
|
* obligated to do so. If you do not wish to do so, delete this
|
26
|
* exception statement from your version.
|
27
|
*
|
28
|
* As a special exception to the GPL, any HTML file which merely
|
29
|
* makes function calls to this code, and for that purpose
|
30
|
* includes it by reference shall be deemed a separate work for
|
31
|
* copyright law purposes. If you modify this code, you may extend
|
32
|
* this exception to your version of the code, but you are not
|
33
|
* obligated to do so. If you do not wish to do so, delete this
|
34
|
* exception statement from your version.
|
35
|
*
|
36
|
* You should have received a copy of the GNU General Public License
|
37
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
38
|
*
|
39
|
* I, Wojtek Kosior, thereby promise not to sue for violation of this file's
|
40
|
* license. Although I request that you do not make use of this code in a
|
41
|
* proprietary program, I am not going to enforce this in court.
|
42
|
*/
|
43
|
|
44
|
#FROM common/patterns.js IMPORT deconstruct_url
|
45
|
|
46
|
/* "Pattern Tree" is how we refer to the data structure used for querying
|
47
|
* Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
|
48
|
* is to make it possible for given URL to quickly retrieve all known patterns
|
49
|
* that match it.
|
50
|
*/
|
51
|
const pattern_tree_make = () => ({})
|
52
|
#EXPORT pattern_tree_make AS make
|
53
|
|
54
|
const empty_node = () => ({});
|
55
|
|
56
|
function is_empty_node(tree_node) {
|
57
|
for (const key in tree_node)
|
58
|
return false;
|
59
|
|
60
|
return true;
|
61
|
}
|
62
|
|
63
|
const wildcard_matches = node => [node["*"], node["**"], node["***"]];
|
64
|
|
65
|
const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0;
|
66
|
|
67
|
/*
|
68
|
* Remove reference to given child fron node and leave the node in consistent
|
69
|
* state afterwards, i.e. remove the "c" property if no childs are left.
|
70
|
*/
|
71
|
function delete_child(node, child_name) {
|
72
|
if (node.c) {
|
73
|
delete node.c[child_name];
|
74
|
|
75
|
for (const key in node.c)
|
76
|
return;
|
77
|
|
78
|
delete node.c;
|
79
|
}
|
80
|
}
|
81
|
|
82
|
/*
|
83
|
* Yields all matches of this segments sequence against the tree that starts at
|
84
|
* this node. Results are produces in order from greatest to lowest pattern
|
85
|
* specificity.
|
86
|
*/
|
87
|
function* search_sequence(tree_node, segments) {
|
88
|
const nodes = [tree_node];
|
89
|
|
90
|
for (const current_segment of segments) {
|
91
|
const children = nodes[nodes.length - 1].c || {};
|
92
|
if (!Object.hasOwnProperty.call(children, current_segment))
|
93
|
break;
|
94
|
|
95
|
nodes.push(children[current_segment]);
|
96
|
}
|
97
|
|
98
|
const nsegments = segments.length;
|
99
|
|
100
|
const conds = [
|
101
|
/* literal pattern match */
|
102
|
() => nodes.length == nsegments,
|
103
|
/* wildcard pattern matches */
|
104
|
() => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*",
|
105
|
() => nodes.length + 1 < nsegments,
|
106
|
() => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***"
|
107
|
];
|
108
|
|
109
|
while (nodes.length) {
|
110
|
const node = nodes.pop();
|
111
|
const literal_match = node.l;
|
112
|
const items = [literal_match, ...wildcard_matches(node)];
|
113
|
|
114
|
for (let i = 0; i < items.length; i++) {
|
115
|
if (items[i] !== undefined && conds[i]())
|
116
|
yield items[i];
|
117
|
}
|
118
|
}
|
119
|
}
|
120
|
|
121
|
/*
|
122
|
* Make item queryable through (this branch of) the Pattern Tree or remove its
|
123
|
* path from there.
|
124
|
*
|
125
|
* item_modifier should be a function that accepts 1 argument, the item stored
|
126
|
* in the tree (or `null` if there wasn't any item there), and returns an item
|
127
|
* that should be used in place of the first one. It is also legal for it to
|
128
|
* return the same item modifying it first. If it returns `null`, it means the
|
129
|
* item should be deleted from the Tree.
|
130
|
*
|
131
|
* If there was not yet any item associated with the tree path designated by
|
132
|
* segments and value returned by item_modifier is not `null`, make the value
|
133
|
* queryable by this path.
|
134
|
*/
|
135
|
function modify_sequence(tree_node, segments, item_modifier) {
|
136
|
const nodes = [tree_node];
|
137
|
let removed = true;
|
138
|
|
139
|
for (var current_segment of segments) {
|
140
|
const children = tree_node.c || {};
|
141
|
tree_node.c = children;
|
142
|
|
143
|
const child = Object.hasOwnProperty.call(children, current_segment) ?
|
144
|
children[current_segment] : empty_node();
|
145
|
children[current_segment] = child;
|
146
|
|
147
|
tree_node = child;
|
148
|
nodes.push(tree_node);
|
149
|
}
|
150
|
|
151
|
tree_node.l = item_modifier(tree_node.l || null);
|
152
|
if (tree_node.l === null)
|
153
|
delete tree_node.l;
|
154
|
else
|
155
|
removed = false;
|
156
|
|
157
|
let i = segments.length;
|
158
|
|
159
|
if (is_wildcard(current_segment)) {
|
160
|
nodes[i - 1][current_segment] =
|
161
|
item_modifier(nodes[i - 1][current_segment] || null);
|
162
|
if (nodes[i - 1][current_segment] === null)
|
163
|
delete nodes[i - 1][current_segment];
|
164
|
else
|
165
|
removed = false;
|
166
|
}
|
167
|
|
168
|
if (!removed)
|
169
|
return;
|
170
|
|
171
|
while (i > 0) {
|
172
|
tree_node = nodes[i--];
|
173
|
if (is_empty_node(tree_node))
|
174
|
delete_child(nodes[i], segments[i]);
|
175
|
else
|
176
|
break;
|
177
|
}
|
178
|
}
|
179
|
|
180
|
/* Helper function for modify_tree(). */
|
181
|
function modify_path(tree_node, deco, item_modifier) {
|
182
|
tree_node = tree_node || empty_node();
|
183
|
modify_sequence(tree_node, deco.path, item_modifier);
|
184
|
return is_empty_node(tree_node) ? null : tree_node;
|
185
|
}
|
186
|
|
187
|
/* Helper function for modify_tree(). */
|
188
|
function modify_domain(tree_node, deco, item_modifier) {
|
189
|
const path_modifier = branch => modify_path(branch, deco, item_modifier);
|
190
|
tree_node = tree_node || empty_node();
|
191
|
/* We need an array of domain labels ordered most-significant-first. */
|
192
|
modify_sequence(tree_node, [...deco.domain].reverse(), path_modifier);
|
193
|
return is_empty_node(tree_node) ? null : tree_node;
|
194
|
}
|
195
|
|
196
|
/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */
|
197
|
function modify_tree(patterns_by_proto, pattern, item_modifier) {
|
198
|
/*
|
199
|
* We pass 'false' to disable length limits on URL parts. Length limits are
|
200
|
* mostly useful in case of iteration over all patterns matching given URL.
|
201
|
* Here we don't do that.
|
202
|
*/
|
203
|
const deco = deconstruct_url(pattern, false);
|
204
|
|
205
|
let tree_for_proto = patterns_by_proto[deco.proto];
|
206
|
|
207
|
tree_for_proto = deco.domain === undefined ?
|
208
|
modify_path(tree_for_proto, deco, item_modifier) :
|
209
|
modify_domain(tree_for_proto, deco, item_modifier);
|
210
|
|
211
|
patterns_by_proto[deco.proto] = tree_for_proto;
|
212
|
if (tree_for_proto === null)
|
213
|
delete patterns_by_proto[deco.proto];
|
214
|
}
|
215
|
|
216
|
/*
|
217
|
* Make item queryable through the Pattern Tree that starts with the protocols
|
218
|
* dictionary object passed in the first argument.
|
219
|
*/
|
220
|
function pattern_tree_register(patterns_by_proto, pattern, item_name, item) {
|
221
|
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
|
222
|
item_name = key_prefix + item_name;
|
223
|
const add_item = obj => Object.assign(obj || {}, {[item_name]: item});
|
224
|
modify_tree(patterns_by_proto, pattern, add_item);
|
225
|
}
|
226
|
#EXPORT pattern_tree_register AS register
|
227
|
|
228
|
/* Helper function for pattern_tree_deregister(). */
|
229
|
function _remove_item(obj, item_name) {
|
230
|
obj = obj || {};
|
231
|
delete obj[item_name];
|
232
|
for (const key in obj)
|
233
|
return obj;
|
234
|
return null;
|
235
|
}
|
236
|
|
237
|
/*
|
238
|
* Remove registered item from the Pattern Tree that starts with the protocols
|
239
|
* dictionary object passed in the first argument. The remaining 2 arguments
|
240
|
* should be pattern and name that have been earlier passed to
|
241
|
* pattern_tree_register().
|
242
|
*/
|
243
|
function pattern_tree_deregister(patterns_by_proto, pattern, item_name) {
|
244
|
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
|
245
|
item_name = key_prefix + item_name;
|
246
|
const remove_item = obj => _remove_item(obj, item_name);
|
247
|
modify_tree(patterns_by_proto, pattern, remove_item);
|
248
|
}
|
249
|
#EXPORT pattern_tree_deregister AS deregister
|
250
|
|
251
|
/*
|
252
|
* Yield registered items that match url. Each yielded value is an object with
|
253
|
* keys being matched item names and values being the items. One such object
|
254
|
* shall contain all items matched with given pattern specificity. Objects are
|
255
|
* yielded in order from greatest to lowest pattern specificity.
|
256
|
*/
|
257
|
function* pattern_tree_search(patterns_by_proto, url) {
|
258
|
const deco = deconstruct_url(url, false);
|
259
|
|
260
|
const tree_for_proto = patterns_by_proto[deco.proto] || empty_node();
|
261
|
let by_path = [tree_for_proto];
|
262
|
|
263
|
/* We need an array of domain labels ordered most-significant-first. */
|
264
|
if (deco.domain !== undefined)
|
265
|
by_path = search_sequence(tree_for_proto, [...deco.domain].reverse());
|
266
|
|
267
|
for (const path_tree of by_path) {
|
268
|
for (const match_obj of search_sequence(path_tree, deco.path)) {
|
269
|
let result_obj_slash = null;
|
270
|
let result_obj_no_slash = null;
|
271
|
|
272
|
for (const [key, item] of Object.entries(match_obj)) {
|
273
|
if (deco.trailing_slash && key[0] === '/') {
|
274
|
result_obj_slash = result_obj_slash || {};
|
275
|
result_obj_slash[key.substring(1)] = item;
|
276
|
} else if (key[0] !== '/') {
|
277
|
result_obj_no_slash = result_obj_no_slash || {};
|
278
|
result_obj_no_slash[key.substring(1)] = item;
|
279
|
}
|
280
|
}
|
281
|
|
282
|
if (deco.trailing_slash && result_obj_slash)
|
283
|
yield result_obj_slash;
|
284
|
|
285
|
if (result_obj_no_slash)
|
286
|
yield result_obj_no_slash;
|
287
|
}
|
288
|
}
|
289
|
}
|
290
|
#EXPORT pattern_tree_search AS search
|