Project

General

Profile

Download (9.5 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / common / patterns_query_tree.js @ e1282a63

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Data structure to query items by URL patterns.
5
 *
6
 * Copyright (C) 2021 Wojtek Kosior
7
 *
8
 * This program is free software: you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * As additional permission under GNU GPL version 3 section 7, you
19
 * may distribute forms of that code without the copy of the GNU
20
 * GPL normally required by section 4, provided you include this
21
 * license notice and, in case of non-source distribution, a URL
22
 * through which recipients can access the Corresponding Source.
23
 * If you modify file(s) with this exception, you may extend this
24
 * exception to your version of the file(s), but you are not
25
 * obligated to do so. If you do not wish to do so, delete this
26
 * exception statement from your version.
27
 *
28
 * As a special exception to the GPL, any HTML file which merely
29
 * makes function calls to this code, and for that purpose
30
 * includes it by reference shall be deemed a separate work for
31
 * copyright law purposes. If you modify this code, you may extend
32
 * this exception to your version of the code, but you are not
33
 * obligated to do so. If you do not wish to do so, delete this
34
 * exception statement from your version.
35
 *
36
 * You should have received a copy of the GNU General Public License
37
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
38
 *
39
 * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
40
 * license. Although I request that you do not make use this code in a
41
 * proprietary program, I am not going to enforce this in court.
42
 */
43

    
44
/*
45
 * IMPORTS_START
46
 * IMPORT deconstruct_url
47
 * IMPORTS_END
48
 */
49

    
50
/* "Pattern Tree" is how we refer to the data structure used for querying
51
 * Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
52
 * is to make it possible for given URL to quickly retrieve all known patterns
53
 * that match it.
54
 */
55
function empty_node() {
56
    return {
57
	wildcard_matches: [null, null, null],
58
	literal_match:    null,
59
	children:         {}
60
    };
61
}
62

    
63
function is_empty_node(tree_node) {
64
    const children = tree_node.children;
65
    for (const key in children) {
66
	if (children.hasOwnProperty(key))
67
	    return false;
68
    }
69

    
70
    if (Array.reduce(tree_node.wildcard_matches, (a, b) => b && a !== null, 1))
71
	return false;
72

    
73
    return tree_node.literal_match === null;
74
}
75

    
76
const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0;
77

    
78
/*
79
 * Yields all matches of this segments sequence against the tree that starts at
80
 * this node. Results are produces in order from greatest to lowest pattern
81
 * specificity.
82
 */
83
function* search_sequence(tree_node, segments)
84
{
85
    const nodes = [tree_node];
86

    
87
    for (const segment of segments) {
88
	const next_node = nodes[nodes.length - 1].children[segment];
89
	if (next_node === undefined)
90
	    break;
91

    
92
	nodes.push(next_node);
93
    }
94

    
95
    const nsegments = segments.length;
96

    
97
    const conds = [
98
	/* literal pattern match */
99
	() => nodes.length     == nsegments,
100
	/* wildcard pattern matches */
101
	() => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*",
102
	() => nodes.length + 1 <  nsegments,
103
	() => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***"
104
    ];
105

    
106
    while (nodes.length) {
107
	const node = nodes.pop();
108
	const items = [node.literal_match, ...node.wildcard_matches];
109

    
110
	for (let i = 0; i < 4; i++) {
111
	    if (items[i] !== null && conds[i]())
112
		yield items[i];
113
	}
114
    }
115
}
116

    
117
/*
118
 * Make item queryable through (this branch of) the Pattern Tree or remove its
119
 * path from there.
120
 *
121
 * item_modifier should be a function that accepts 1 argument, the item stored
122
 * in the tree (or `null` if there wasn't any item there), and returns an item
123
 * that should be used in place of the first one. It is also legal for it to
124
 * return the same item modifying it first. If it returns `null`, it means the
125
 * item should be deleted from the Tree.
126
 *
127
 * If there was not yet any item associated with the tree path designated by
128
 * segments and value returned by item_modifier is not `null`, make the value
129
 * queryable by this path.
130
 */
131
function modify_sequence(tree_node, segments, item_modifier)
132
{
133
    const nodes = [tree_node];
134
    let removed = true;
135

    
136
    for (var current_segment of segments) {
137
	wildcards = tree_node.wildcard_matches;
138

    
139
	const child = tree_node.children[current_segment] || empty_node();
140
	tree_node.children[current_segment] = child;
141
	tree_node = child;
142
	nodes.push(tree_node);
143
    }
144

    
145
    tree_node.literal_match = item_modifier(tree_node.literal_match);
146
    if (tree_node.literal_match !== null)
147
	removed = false;
148

    
149
    let i = segments.length;
150

    
151
    if (is_wildcard(current_segment)) {
152
	const asterisks = current_segment.length - 1;
153
	const wildcards = nodes[i - 1].wildcard_matches;
154
	wildcards[asterisks] = item_modifier(wildcards[asterisks]);
155
	if (wildcards[asterisks] !== null)
156
	    removed = false;
157
    }
158

    
159
    while (!removed)
160
	return;
161

    
162
    while (i > 0) {
163
	tree_node = nodes[i--];
164
	if (is_empty_node(tree_node))
165
	    delete nodes[i].children[segments[i]];
166
    }
167
}
168

    
169
/* Helper function for modify_tree(). */
170
function modify_path(tree_node, deco, item_modifier)
171
{
172
    tree_node = tree_node || empty_node();
173
    modify_sequence(tree_node, deco.path, item_modifier);
174
    return is_empty_node(tree_node) ? null : tree_node;
175
}
176

    
177
/* Helper function for modify_tree(). */
178
function modify_domain(tree_node, deco, item_modifier)
179
{
180
    const path_modifier = branch => modify_path(branch, deco, item_modifier);
181
    tree_node = tree_node || empty_node();
182
    /* We need an array of domain labels ordered most-significant-first. */
183
    modify_sequence(tree_node, [...deco.domain].reverse(), path_modifier);
184
    return is_empty_node(tree_node) ? null : tree_node;
185
}
186

    
187
/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */
188
function modify_tree(patterns_by_proto, pattern, item_modifier)
189
{
190
    /*
191
     * We pass 'false' to disable length limits on URL parts. Length limits are
192
     * mostly useful in case of iteration over all patterns matching given URL.
193
     * Here we don't do that.
194
     */
195
    const deco = deconstruct_url(pattern, false);
196

    
197
    let tree_for_proto = patterns_by_proto[deco.proto];
198

    
199
    tree_for_proto = deco.domain === undefined ?
200
	modify_path(tree_for_proto, deco, item_modifier) :
201
	modify_domain(tree_for_proto, deco, item_modifier);
202

    
203
    patterns_by_proto[deco.proto] = tree_for_proto;
204
    if (tree_for_proto === null)
205
	delete patterns_by_proto[deco.proto];
206
}
207

    
208
/*
209
 * Make item queryable through the Pattern Tree that starts with the protocols
210
 * dictionary object passed in the first argument.
211
 */
212
function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
213
{
214
    const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
215
    item_name = key_prefix + item_name;
216
    const add_item = obj => Object.assign(obj || {}, {[item_name]: item});
217
    modify_tree(patterns_by_proto, pattern, add_item);
218
}
219

    
220
/* Helper function for pattern_tree_deregister(). */
221
function _remove_item(obj, item_name)
222
{
223
    obj = obj || {};
224
    delete obj[item_name];
225
    for (const key in obj)
226
	return obj;
227
    return null;
228
}
229

    
230
/*
231
 * Remove registered item from the Pattern Tree that starts with the protocols
232
 * dictionary object passed in the first argument. The remaining 2 arguments
233
 * should be pattern and name that have been earlier passed to
234
 * pattern_tree_register().
235
 */
236
function pattern_tree_deregister(patterns_by_proto, pattern, item_name)
237
{
238
    const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
239
    item_name = key_prefix + item_name;
240
    const remove_item = obj => _remove_item(obj, item_name);
241
    modify_tree(patterns_by_proto, pattern, remove_item);
242
}
243

    
244
/*
245
 * Yield registered items that match url. Each yielded value is an object with
246
 * key being matched item names and values being the items. One such object
247
 * shall contain all items matched with given pattern specificity. Objects are
248
 * yielded in order from greatest to lowest pattern specificity.
249
 */
250
function* pattern_tree_search(patterns_by_proto, url)
251
{
252
    const deco = deconstruct_url(url, false);
253

    
254
    const tree_for_proto = patterns_by_proto[deco.proto] || empty_node();
255
    let by_path = [tree_for_proto];
256

    
257
    /* We need an array of domain labels ordered most-significant-first. */
258
    if (deco.domain !== undefined)
259
	by_path = search_sequence(tree_for_proto, [...deco.domain].reverse());
260

    
261
    for (const path_tree of by_path) {
262
	for (const match_obj of search_sequence(path_tree, deco.path)) {
263
	    let result_obj_slash    = null;
264
	    let result_obj_no_slash = null;
265

    
266
	    for (const [key, item] of Object.entries(match_obj)) {
267
		if (deco.trailing_slash && key[0] === '/') {
268
		    result_obj_slash = result_obj_slash || {};
269
		    result_obj_slash[key.substring(1)] = item;
270
		} else if (key[0] !== '/') {
271
		    result_obj_no_slash = result_obj_no_slash || {};
272
		    result_obj_no_slash[key.substring(1)] = item;
273
		}
274
	    }
275

    
276
	    if (deco.trailing_slash && result_obj_slash)
277
		yield result_obj_slash;
278

    
279
	    if (result_obj_no_slash)
280
		yield result_obj_no_slash;
281
	}
282
    }
283
}
284

    
285
const pattern_tree = {
286
    make:       () => ({}),
287
    register:   pattern_tree_register,
288
    deregister: pattern_tree_deregister,
289
    search:     pattern_tree_search
290
}
291

    
292
/*
293
 * EXPORTS_START
294
 * EXPORT pattern_tree
295
 * EXPORTS_END
296
 */
(9-9/16)