Project

General

Profile

Download (8.35 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / common / patterns_query_tree.js @ 5c583de8

1
/**
2
 * This file is part of Haketilo.
3
 *
4
 * Function: Data structure to query items by URL patterns.
5
 *
6
 * Copyright (C) 2021 Wojtek Kosior
7
 *
8
 * This program is free software: you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation, either version 3 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * As additional permission under GNU GPL version 3 section 7, you
19
 * may distribute forms of that code without the copy of the GNU
20
 * GPL normally required by section 4, provided you include this
21
 * license notice and, in case of non-source distribution, a URL
22
 * through which recipients can access the Corresponding Source.
23
 * If you modify file(s) with this exception, you may extend this
24
 * exception to your version of the file(s), but you are not
25
 * obligated to do so. If you do not wish to do so, delete this
26
 * exception statement from your version.
27
 *
28
 * As a special exception to the GPL, any HTML file which merely
29
 * makes function calls to this code, and for that purpose
30
 * includes it by reference shall be deemed a separate work for
31
 * copyright law purposes. If you modify this code, you may extend
32
 * this exception to your version of the code, but you are not
33
 * obligated to do so. If you do not wish to do so, delete this
34
 * exception statement from your version.
35
 *
36
 * You should have received a copy of the GNU General Public License
37
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
38
 *
39
 * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
40
 * license. Although I request that you do not make use this code in a
41
 * proprietary program, I am not going to enforce this in court.
42
 */
43

    
44
/*
45
 * IMPORTS_START
46
 * IMPORT deconstruct_url
47
 * IMPORTS_END
48
 */
49

    
50
/*
51
 * This is a javascript rewrite of Python implementation of the algorithm here:
52
 * https://git.koszko.org/pydrilla/tree/src/pydrilla/pydrilla.py?id=f4edcbe7f4739d6f82a2e1bb180960b003b30862#n205
53
 */
54

    
55
/* "Pattern Tree" is how we refer to the data structure used for querying
56
 * Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
57
 * is to make it possible for given URL to quickly retrieve all known patterns
58
 * that match it.
59
 */
60
function make_tree_node() {
61
    return {
62
	wildcard_matches: [null, null, null],
63
	literal_match:    null,
64
	children:         {}
65
    };
66
}
67

    
68
function is_empty_node(tree_node) {
69
    const children = tree_node.children;
70
    for (const key in children) {
71
	if (children.hasOwnProperty(key))
72
	    return false;
73
    }
74

    
75
    if (Array.reduce(tree_node.wildcard_matches, (a, b) => b && a !== null, 1))
76
	return false;
77

    
78
    return tree_node.literal_match === null;
79
}
80

    
81
const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0;
82

    
83
/*
84
 * Yields all matches of this segments sequence against the tree that starts at
85
 * this node. Results are produces in order from greatest to lowest pattern
86
 * specificity.
87
 */
88
function* search_sequence(tree_node, segments)
89
{
90
    const nodes = [tree_node];
91

    
92
    for (const segment of segments) {
93
	const next_node = nodes[nodes.length - 1].children[segment];
94
	if (next_node === undefined)
95
	    break;
96

    
97
	nodes.push(next_node);
98
    }
99

    
100
    const nsegments = segments.length;
101

    
102
    const conds = [
103
	/* literal pattern match */
104
	() => nodes.length     == nsegments,
105
	/* wildcard pattern matches */
106
	() => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*",
107
	() => nodes.length + 1 <  nsegments,
108
	() => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***"
109
    ];
110

    
111
    while (nodes.length) {
112
	const node = nodes.pop();
113
	const items = [node.literal_match, ...node.wildcard_matches];
114

    
115
	for (let i = 0; i < 4; i++) {
116
	    if (items[i] !== null && conds[i]())
117
		yield items[i];
118
	}
119
    }
120
}
121

    
122
/*
123
 * Make item queryable through (this branch of) the Pattern Tree or remove its
124
 * path from there.
125
 *
126
 * item_modifier should be a function that accepts 1 argument, the item stored
127
 * in the tree (or `null` if there wasn't any item there), and returns an item
128
 * that should be used in place of the first one. It is also legal for it to
129
 * return the same item modifying it first. If it returns `null`, it means the
130
 * item should be deleted from the Tree.
131
 *
132
 * If there was not yet any item associated with the tree path designated by
133
 * segments and value returned by item_modifier is not `null`, make the value
134
 * queryable by this path.
135
 */
136
function modify_sequence(tree_node, segments, item_modifier)
137
{
138
    const nodes = [tree_node];
139
    let removed = true;
140

    
141
    for (var current_segment of segments) {
142
	wildcards = tree_node.wildcard_matches;
143

    
144
	const child = tree_node.children[current_segment] || make_tree_node();
145
	tree_node.children[current_segment] = child;
146
	tree_node = child;
147
	nodes.push(tree_node);
148
    }
149

    
150
    tree_node.literal_match = item_modifier(tree_node.literal_match);
151
    if (tree_node.literal_match !== null)
152
	removed = false;
153

    
154
    let i = segments.length;
155

    
156
    if (is_wildcard(current_segment)) {
157
	const asterisks = current_segment.length - 1;
158
	const wildcards = nodes[i - 1].wildcard_matches;
159
	wildcards[asterisks] = item_modifier(wildcards[asterisks]);
160
	if (wildcards[asterisks] !== null)
161
	    removed = false;
162
    }
163

    
164
    while (!removed)
165
	return;
166

    
167
    while (i > 0) {
168
	tree_node = nodes[i--];
169
	if (is_empty_node(tree_node))
170
	    delete nodes[i].children[segments[i]];
171
    }
172
}
173

    
174
/*
175
 * Make item queryable through the Pattern Tree that starts with the protocols
176
 * dictionary object passed in the first argument.
177
 */
178
function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
179
{
180
    /*
181
     * We pass 'false' to disable length limits on URL parts. Length limits are
182
     * mostly useful in case of iteration over all patterns matching given URL.
183
     * Here we don't do that.
184
     */
185
    const deco = deconstruct_url(pattern, false);
186

    
187
    const tree = patterns_by_proto[deco.proto] || make_tree_node();
188
    patterns_by_proto[deco.proto] = tree;
189

    
190
    let path_trees;
191

    
192
    if (deco.proto === "file") {
193
	path_trees = [tree];
194
    } else {
195
	/* We need an aray of domain labels ordered most-significant-first. */
196
	const domain = [...deco.domain].reverse();
197
	path_trees = add_sequence(tree, domain, make_tree_node);
198
    }
199

    
200
    for (const path_tree of path_trees) {
201
        for (const match_obj of add_sequence(path_tree, deco.path, () => {}))
202
            match_obj[item_name] = item;
203
    }
204
}
205

    
206
// /*
207
//  * Remove registered item from the Pattern Tree that starts with the protocols
208
//  * dictionary object passed in the first argument. The remaining 2 arguments
209
//  * should be pattern and name that have been earlier passed to
210
//  * pattern_tree_register().
211
//  */
212
// function pattern_tree_deregister(patterns_by_proto, pattern, item_name)
213
// {
214
//     const deco = deconstruct_url(pattern, false);
215

    
216
//     const tree = patterns_by_proto[deco.proto] || make_tree_node();
217
//     patterns_by_proto[deco.proto] = tree;
218

    
219
//     let path_trees;
220

    
221
//     if (deco.proto === "file") {
222
// 	path_trees = [tree];
223
//     } else {
224
// 	/* We need an aray of domain labels ordered most-significant-first. */
225
// 	const domain = [...deco.domain].reverse();
226
// 	..........................
227
// 	path_trees = add_sequence(tree, domain, make_tree_node);
228
//     }
229
// }
230

    
231
/*
232
 * Yield registered items (as [item_name, item] arrays) that match url. For
233
 * cosistent behavior, please don't modify the pattern tree while iterating over
234
 * the results.
235
 */
236
function* pattern_tree_search(patterns_by_proto, url)
237
{
238
    const deco = deconstruct_url(url, false);
239

    
240
    tree = patterns_by_proto[deco.proto] || make_tree_node();
241

    
242
    let path_trees;
243

    
244
    if (deco.proto === "file") {
245
	path_trees = [tree];
246
    } else {
247
	/* We need an aray of domain labels ordered most-significant-first. */
248
	const domain = [...deco.domain].reverse();
249
	path_trees = search_sequence(tree, domain);
250
    }
251

    
252
    for (const path_tree of path_trees) {
253
	for (const match_obj of search_sequence(path_tree, deco.path)) {
254
	    for (const entry of Object.entries(match_obj))
255
		yield entry;
256
	}
257
    }
258
}
259

    
260
const pattern_tree = {
261
    make: () => {},
262
    register: pattern_tree_register,
263
    // deregister: pattern_tree_deregister,
264
    search: pattern_tree_search
265
}
266

    
267
/*
268
 * EXPORTS_START
269
 * EXPORT pattern_tree
270
 * EXPORTS_END
271
 */
(9-9/16)