Project

General

Profile

Download (9.71 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / common / patterns_query_tree.js @ c8294257

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Data structure to query items by URL patterns.
5
 *
6
 * Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
7
 *
8
 * This program is free software: you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * As additional permission under GNU GPL version 3 section 7, you
19
 * may distribute forms of that code without the copy of the GNU
20
 * GPL normally required by section 4, provided you include this
21
 * license notice and, in case of non-source distribution, a URL
22
 * through which recipients can access the Corresponding Source.
23
 * If you modify file(s) with this exception, you may extend this
24
 * exception to your version of the file(s), but you are not
25
 * obligated to do so. If you do not wish to do so, delete this
26
 * exception statement from your version.
27
 *
28
 * As a special exception to the GPL, any HTML file which merely
29
 * makes function calls to this code, and for that purpose
30
 * includes it by reference shall be deemed a separate work for
31
 * copyright law purposes. If you modify this code, you may extend
32
 * this exception to your version of the code, but you are not
33
 * obligated to do so. If you do not wish to do so, delete this
34
 * exception statement from your version.
35
 *
36
 * You should have received a copy of the GNU General Public License
37
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
38
 *
39
 * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
40
 * license. Although I request that you do not make use of this code in a
41
 * proprietary program, I am not going to enforce this in court.
42
 */
43

    
44
#FROM common/patterns.js IMPORT deconstruct_url
45

    
46
/* "Pattern Tree" is how we refer to the data structure used for querying
47
 * Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
48
 * is to make it possible for given URL to quickly retrieve all known patterns
49
 * that match it.
50
 */
51
const pattern_tree_make = () => ({})
52
#EXPORT  pattern_tree_make  AS make
53

    
54
const empty_node = () => ({});
55

    
56
function is_empty_node(tree_node) {
57
    for (const key in tree_node)
58
	return false;
59

    
60
    return true;
61
}
62

    
63
const wildcard_matches = node => [node["*"], node["**"], node["***"]];
64

    
65
const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0;
66

    
67
/*
68
 * Remove reference to given child fron node and leave the node in consistent
69
 * state afterwards, i.e. remove the "c" property if no childs are left.
70
 */
71
function delete_child(node, child_name) {
72
    if (node.c) {
73
	delete node.c[child_name];
74

    
75
	for (const key in node.c)
76
	    return;
77

    
78
	delete node.c;
79
    }
80
}
81

    
82
/*
83
 * Yields all matches of this segments sequence against the tree that starts at
84
 * this node. Results are produces in order from greatest to lowest pattern
85
 * specificity.
86
 */
87
function* search_sequence(tree_node, segments) {
88
    const nodes = [tree_node];
89

    
90
    for (const current_segment of segments) {
91
	const children = nodes[nodes.length - 1].c || {};
92
	if (!Object.hasOwnProperty.call(children, current_segment))
93
	    break;
94

    
95
	nodes.push(children[current_segment]);
96
    }
97

    
98
    const nsegments = segments.length;
99

    
100
    const conds = [
101
	/* literal pattern match */
102
	() => nodes.length     == nsegments,
103
	/* wildcard pattern matches */
104
	() => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*",
105
	() => nodes.length + 1 <  nsegments,
106
	() => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***"
107
    ];
108

    
109
    while (nodes.length) {
110
	const node = nodes.pop();
111
	const literal_match = node.l;
112
	const items = [literal_match, ...wildcard_matches(node)];
113

    
114
	for (let i = 0; i < items.length; i++) {
115
	    if (items[i] !== undefined && conds[i]())
116
		yield items[i];
117
	}
118
    }
119
}
120

    
121
/*
122
 * Make item queryable through (this branch of) the Pattern Tree or remove its
123
 * path from there.
124
 *
125
 * item_modifier should be a function that accepts 1 argument, the item stored
126
 * in the tree (or `null` if there wasn't any item there), and returns an item
127
 * that should be used in place of the first one. It is also legal for it to
128
 * return the same item modifying it first. If it returns `null`, it means the
129
 * item should be deleted from the Tree.
130
 *
131
 * If there was not yet any item associated with the tree path designated by
132
 * segments and value returned by item_modifier is not `null`, make the value
133
 * queryable by this path.
134
 */
135
function modify_sequence(tree_node, segments, item_modifier) {
136
    const nodes = [tree_node];
137
    let removed = true;
138

    
139
    for (var current_segment of segments) {
140
	const children = tree_node.c || {};
141
	tree_node.c = children;
142

    
143
	const child = Object.hasOwnProperty.call(children, current_segment) ?
144
	      children[current_segment] : empty_node();
145
	children[current_segment] = child;
146

    
147
	tree_node = child;
148
	nodes.push(tree_node);
149
    }
150

    
151
    tree_node.l = item_modifier(tree_node.l || null);
152
    if (tree_node.l === null)
153
	delete tree_node.l;
154
    else
155
	removed = false;
156

    
157
    let i = segments.length;
158

    
159
    if (is_wildcard(current_segment)) {
160
	nodes[i - 1][current_segment] =
161
	    item_modifier(nodes[i - 1][current_segment] || null);
162
	if (nodes[i - 1][current_segment] === null)
163
	    delete nodes[i - 1][current_segment];
164
	else
165
	    removed = false;
166
    }
167

    
168
    if (!removed)
169
	return;
170

    
171
    while (i > 0) {
172
	tree_node = nodes[i--];
173
	if (is_empty_node(tree_node))
174
	    delete_child(nodes[i], segments[i]);
175
	else
176
	    break;
177
    }
178
}
179

    
180
/* Helper function for modify_tree(). */
181
function modify_path(tree_node, deco, item_modifier) {
182
    tree_node = tree_node || empty_node();
183
    modify_sequence(tree_node, deco.path, item_modifier);
184
    return is_empty_node(tree_node) ? null : tree_node;
185
}
186

    
187
/* Helper function for modify_tree(). */
188
function modify_domain(tree_node, deco, item_modifier) {
189
    const path_modifier = branch => modify_path(branch, deco, item_modifier);
190
    tree_node = tree_node || empty_node();
191
    /* We need an array of domain labels ordered most-significant-first. */
192
    modify_sequence(tree_node, [...deco.domain].reverse(), path_modifier);
193
    return is_empty_node(tree_node) ? null : tree_node;
194
}
195

    
196
/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */
197
function modify_tree(patterns_by_proto, pattern, item_modifier) {
198
    /*
199
     * We pass 'false' to disable length limits on URL parts. Length limits are
200
     * mostly useful in case of iteration over all patterns matching given URL.
201
     * Here we don't do that.
202
     */
203
    const deco = deconstruct_url(pattern, false);
204

    
205
    let tree_for_proto = patterns_by_proto[deco.proto];
206

    
207
    tree_for_proto = deco.domain === undefined ?
208
	modify_path(tree_for_proto, deco, item_modifier) :
209
	modify_domain(tree_for_proto, deco, item_modifier);
210

    
211
    patterns_by_proto[deco.proto] = tree_for_proto;
212
    if (tree_for_proto === null)
213
	delete patterns_by_proto[deco.proto];
214
}
215

    
216
/*
217
 * Make item queryable through the Pattern Tree that starts with the protocols
218
 * dictionary object passed in the first argument.
219
 */
220
function pattern_tree_register(patterns_by_proto, pattern, item_name, item) {
221
    const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
222
    item_name = key_prefix + item_name;
223
    const add_item = obj => Object.assign(obj || {}, {[item_name]: item});
224
    modify_tree(patterns_by_proto, pattern, add_item);
225
}
226
#EXPORT  pattern_tree_register  AS register
227

    
228
/* Helper function for pattern_tree_deregister(). */
229
function _remove_item(obj, item_name) {
230
    obj = obj || {};
231
    delete obj[item_name];
232
    for (const key in obj)
233
	return obj;
234
    return null;
235
}
236

    
237
/*
238
 * Remove registered item from the Pattern Tree that starts with the protocols
239
 * dictionary object passed in the first argument. The remaining 2 arguments
240
 * should be pattern and name that have been earlier passed to
241
 * pattern_tree_register().
242
 */
243
function pattern_tree_deregister(patterns_by_proto, pattern, item_name) {
244
    const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
245
    item_name = key_prefix + item_name;
246
    const remove_item = obj => _remove_item(obj, item_name);
247
    modify_tree(patterns_by_proto, pattern, remove_item);
248
}
249
#EXPORT  pattern_tree_deregister  AS deregister
250

    
251
/*
252
 * Yield registered items that match url. Each yielded value is an object with
253
 * keys being matched item names and values being the items. One such object
254
 * shall contain all items matched with given pattern specificity. Objects are
255
 * yielded in order from greatest to lowest pattern specificity.
256
 */
257
function* pattern_tree_search(patterns_by_proto, url) {
258
    const deco = deconstruct_url(url, false);
259

    
260
    const tree_for_proto = patterns_by_proto[deco.proto] || empty_node();
261
    let by_path = [tree_for_proto];
262

    
263
    /* We need an array of domain labels ordered most-significant-first. */
264
    if (deco.domain !== undefined)
265
	by_path = search_sequence(tree_for_proto, [...deco.domain].reverse());
266

    
267
    for (const path_tree of by_path) {
268
	for (const match_obj of search_sequence(path_tree, deco.path)) {
269
	    let result_obj_slash    = null;
270
	    let result_obj_no_slash = null;
271

    
272
	    for (const [key, item] of Object.entries(match_obj)) {
273
		if (deco.trailing_slash && key[0] === '/') {
274
		    result_obj_slash = result_obj_slash || {};
275
		    result_obj_slash[key.substring(1)] = item;
276
		} else if (key[0] !== '/') {
277
		    result_obj_no_slash = result_obj_no_slash || {};
278
		    result_obj_no_slash[key.substring(1)] = item;
279
		}
280
	    }
281

    
282
	    if (deco.trailing_slash && result_obj_slash)
283
		yield result_obj_slash;
284

    
285
	    if (result_obj_no_slash)
286
		yield result_obj_no_slash;
287
	}
288
    }
289
}
290
#EXPORT  pattern_tree_search  AS search
(9-9/11)