Project

General

Profile

Download (8.93 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / common / patterns_query_tree.js @ 69e53743

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Data structure to query items by URL patterns.
5
 *
6
 * Copyright (C) 2021 Wojtek Kosior
7
 *
8
 * This program is free software: you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * As additional permission under GNU GPL version 3 section 7, you
19
 * may distribute forms of that code without the copy of the GNU
20
 * GPL normally required by section 4, provided you include this
21
 * license notice and, in case of non-source distribution, a URL
22
 * through which recipients can access the Corresponding Source.
23
 * If you modify file(s) with this exception, you may extend this
24
 * exception to your version of the file(s), but you are not
25
 * obligated to do so. If you do not wish to do so, delete this
26
 * exception statement from your version.
27
 *
28
 * As a special exception to the GPL, any HTML file which merely
29
 * makes function calls to this code, and for that purpose
30
 * includes it by reference shall be deemed a separate work for
31
 * copyright law purposes. If you modify this code, you may extend
32
 * this exception to your version of the code, but you are not
33
 * obligated to do so. If you do not wish to do so, delete this
34
 * exception statement from your version.
35
 *
36
 * You should have received a copy of the GNU General Public License
37
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
38
 *
39
 * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
40
 * license. Although I request that you do not make use this code in a
41
 * proprietary program, I am not going to enforce this in court.
42
 */
43

    
44
/*
45
 * IMPORTS_START
46
 * IMPORT deconstruct_url
47
 * IMPORTS_END
48
 */
49

    
50
/*
51
 * This is a javascript rewrite of Python implementation of the algorithm here:
52
 * https://git.koszko.org/pydrilla/tree/src/pydrilla/pydrilla.py?id=f4edcbe7f4739d6f82a2e1bb180960b003b30862#n205
53
 */
54

    
55
/* "Pattern Tree" is how we refer to the data structure used for querying
56
 * Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
57
 * is to make it possible for given URL to quickly retrieve all known patterns
58
 * that match it.
59
 */
60
function make_tree_node() {
61
    return {
62
	wildcard_matches: [null, null, null],
63
	literal_match:    null,
64
	children:         null
65
    };
66
}
67

    
68
const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0;
69

    
70
/*
71
 * Yields all matches of this segments sequence against the tree that starts at
72
 * this node. Results are produces in order from greatest to lowest pattern
73
 * specificity.
74
 */
75
function* search_sequence(tree_node, segments)
76
{
77
    const nodes = [tree_node];
78

    
79
    for (const segment of segments) {
80
	const next_node = nodes[nodes.length - 1].children[segment];
81
	if (next_node === undefined)
82
	    break;
83

    
84
	nodes.push(next_node);
85
    }
86

    
87
    const nsegments = segments.length;
88

    
89
    const conds = [
90
	/* literal pattern match */
91
	() => nodes.length     == nsegments,
92
	/* wildcard pattern matches */
93
	() => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*",
94
	() => nodes.length + 1 <  nsegments,
95
	() => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***"
96
    ];
97

    
98
    while (nodes.length) {
99
	const node = nodes.pop();
100
	const items = [node.literal_match, ...node.wildcard_matches];
101

    
102
	for (let i = 0; i < 4; i++) {
103
	    if (items[i] !== null && conds[i]())
104
		yield items[i];
105
	}
106
    }
107
}
108

    
109
/*
110
 * Make item queryable through (this branch of) the Pattern Tree. If there was
111
 * not yet any item associated with the tree path designated by segments, create
112
 * a new one using item_instantiator() function. Return all items matching this
113
 * path (both the ones that existed and the ones just created).
114
 */
115
function add_sequence(tree_node, segments, item_instantiator)
116
{
117
    let wildcards;
118

    
119
    for (var segment of segments) {
120
	wildcards = tree_node.wildcard_matches;
121

    
122
	const child = tree_node.children[segment] || make_tree_node();
123
	tree_node.children[segment] = child;
124
	tree_node = child;
125
    }
126

    
127
    if (tree_node.literal_match === null)
128
	tree_node.literal_match = item_instantiator();
129

    
130
    if (!is_wildcard(segment))
131
	return [tree_node.literal_match];
132

    
133
    if (wildcards[segment.length - 1] === null)
134
	wildcards[segment.length - 1] = item_instantiator();
135

    
136
    return [tree_node.literal_match, wildcards[segment.length - 1]];
137
}
138

    
139
/*
140
 * Can be used to remove item from (this branch of) the Pattern Tree.
141
 * item_modifier should be a function that accepts 1 argument, the item stored
142
 * in the tree, and returns an item that should be used in place of the first
143
 * one. It is also legal for it to return the same item modifying it first. If
144
 * it returns `undefined`, it means the item should be deleted from the Tree.
145
 *
146
 * item_modifier() will be called with all items in (this branch of) the Pattern
147
 * Tree that correspond to the path designated by segments. The Tree will be
148
 * updated accordingly.
149
 */
150
function modify_sequence(tree_node, segments, item_modifier)
151
{
152
    const nodes = [tree_node];
153

    
154
    for (const segment of segments) {
155
	const next_node = nodes[nodes.length - 1].children[segment];
156
	nodes.push(next_node);
157
    }
158

    
159
    const nsegments = segments.length;
160
    let i = segments.length - 1;
161

    
162
    if (nsegments > 0 && is_wildcard(segments[i])) {
163
	const node = nodes[nodes.length - 2];
164
	const asterisks = segments[i].length;
165
    }
166

    
167
    ............................
168

    
169
    for (let i = segments.length - 1; i >= 0; i--) {
170

    
171
	if (i
172
    }
173

    
174
    segments
175

    
176
    const conds = [
177
	/* literal pattern match */
178
	() => nodes.length     == nsegments,
179
	/* wildcard pattern matches */
180
	() => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*",
181
	() => nodes.length + 1 <  nsegments,
182
	() => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***"
183
    ];
184

    
185
    while (nodes.length) {
186
	const node = nodes.pop();
187
	const items = [node.literal_match, ...node.wildcard_matches];
188

    
189
	for (let i = 0; i < 4; i++) {
190
	    if (items[i] !== null && conds[i]())
191
		yield items[i];
192
	}
193
    }
194
    .................
195
}
196

    
197
/*
198
 * Make item queryable through the Pattern Tree that starts with the protocols
199
 * dictionary object passed in the first argument.
200
 */
201
function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
202
{
203
    /*
204
     * We pass 'false' to disable length limits on URL parts. Length limits are
205
     * mostly useful in case of iteration over all patterns matching given URL.
206
     * Here we don't do that.
207
     */
208
    const deco = deconstruct_url(pattern, false);
209

    
210
    const tree = patterns_by_proto[deco.proto] || make_tree_node();
211
    patterns_by_proto[deco.proto] = tree;
212

    
213
    let path_trees;
214

    
215
    if (deco.proto === "file") {
216
	path_trees = [tree];
217
    } else {
218
	/* We need an aray of domain labels ordered most-significant-first. */
219
	const domain = [...deco.domain].reverse();
220
	path_trees = add_sequence(tree, domain, make_tree_node);
221
    }
222

    
223
    for (const path_tree of path_trees) {
224
        for (const match_obj of add_sequence(path_tree, deco.path, () => {}))
225
            match_obj[item_name] = item;
226
    }
227
}
228

    
229
/*
230
 * Remove registered item from the Pattern Tree that starts with the protocols
231
 * dictionary object passed in the first argument. The remaining 2 arguments
232
 * should be pattern and name that have been earlier passed to
233
 * pattern_tree_register().
234
 */
235
function pattern_tree_deregister(patterns_by_proto, pattern, item_name)
236
{
237
    const deco = deconstruct_url(pattern, false);
238

    
239
    const tree = patterns_by_proto[deco.proto] || make_tree_node();
240
    patterns_by_proto[deco.proto] = tree;
241

    
242
    let path_trees;
243

    
244
    if (deco.proto === "file") {
245
	path_trees = [tree];
246
    } else {
247
	/* We need an aray of domain labels ordered most-significant-first. */
248
	const domain = [...deco.domain].reverse();
249
	..........................
250
	path_trees = add_sequence(tree, domain, make_tree_node);
251
    }
252
}
253

    
254
/*
255
 * Yield registered items (as [item_name, item] arrays) that match url. For
256
 * cosistent behavior, please don't modify the pattern tree while iterating over
257
 * the results.
258
 */
259
function* pattern_tree_search(patterns_by_proto, url)
260
{
261
    const deco = deconstruct_url(url, false);
262

    
263
    tree = patterns_by_proto[deco.proto] || make_tree_node();
264

    
265
    let path_trees;
266

    
267
    if (deco.proto === "file") {
268
	path_trees = [tree];
269
    } else {
270
	/* We need an aray of domain labels ordered most-significant-first. */
271
	const domain = [...deco.domain].reverse();
272
	path_trees = search_sequence(tree, domain);
273
    }
274

    
275
    for (const path_tree of path_trees) {
276
	for (const match_obj of search_sequence(path_tree, deco.path)) {
277
	    for (const entry of Object.entries(match_obj))
278
		yield entry;
279
	}
280
    }
281
}
282

    
283
const patterns_tree = {
284
    make: () => {},
285
    register: patterns_tree_register,
286
    search: patterns_tree_search
287
}
288

    
289
/*
290
 * EXPORTS_START
291
 * EXPORT patterns_tree
292
 * EXPORTS_END
293
 */
(9-9/16)