Project

General

Profile

« Previous | Next » 

Revision e1282a63

Added by koszko almost 2 years ago

finish implementing more efficient querying of URL patterns

The algorithm is implemented and tested. However, it is yet to be hooked into the actual extension.

View differences:

common/patterns_query_tree.js
47 47
 * IMPORTS_END
48 48
 */
49 49

  
50
/*
51
 * This is a javascript rewrite of Python implementation of the algorithm here:
52
 * https://git.koszko.org/pydrilla/tree/src/pydrilla/pydrilla.py?id=f4edcbe7f4739d6f82a2e1bb180960b003b30862#n205
53
 */
54

  
55 50
/* "Pattern Tree" is how we refer to the data structure used for querying
56 51
 * Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal
57 52
 * is to make it possible for given URL to quickly retrieve all known patterns
58 53
 * that match it.
59 54
 */
60
function make_tree_node() {
55
function empty_node() {
61 56
    return {
62 57
	wildcard_matches: [null, null, null],
63 58
	literal_match:    null,
......
141 136
    for (var current_segment of segments) {
142 137
	wildcards = tree_node.wildcard_matches;
143 138

  
144
	const child = tree_node.children[current_segment] || make_tree_node();
139
	const child = tree_node.children[current_segment] || empty_node();
145 140
	tree_node.children[current_segment] = child;
146 141
	tree_node = child;
147 142
	nodes.push(tree_node);
......
171 166
    }
172 167
}
173 168

  
174
/*
175
 * Make item queryable through the Pattern Tree that starts with the protocols
176
 * dictionary object passed in the first argument.
177
 */
178
function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
169
/* Helper function for modify_tree(). */
170
function modify_path(tree_node, deco, item_modifier)
171
{
172
    tree_node = tree_node || empty_node();
173
    modify_sequence(tree_node, deco.path, item_modifier);
174
    return is_empty_node(tree_node) ? null : tree_node;
175
}
176

  
177
/* Helper function for modify_tree(). */
178
function modify_domain(tree_node, deco, item_modifier)
179
{
180
    const path_modifier = branch => modify_path(branch, deco, item_modifier);
181
    tree_node = tree_node || empty_node();
182
    /* We need an array of domain labels ordered most-significant-first. */
183
    modify_sequence(tree_node, [...deco.domain].reverse(), path_modifier);
184
    return is_empty_node(tree_node) ? null : tree_node;
185
}
186

  
187
/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */
188
function modify_tree(patterns_by_proto, pattern, item_modifier)
179 189
{
180 190
    /*
181 191
     * We pass 'false' to disable length limits on URL parts. Length limits are
......
184 194
     */
185 195
    const deco = deconstruct_url(pattern, false);
186 196

  
187
    const tree = patterns_by_proto[deco.proto] || make_tree_node();
188
    patterns_by_proto[deco.proto] = tree;
197
    let tree_for_proto = patterns_by_proto[deco.proto];
189 198

  
190
    let path_trees;
199
    tree_for_proto = deco.domain === undefined ?
200
	modify_path(tree_for_proto, deco, item_modifier) :
201
	modify_domain(tree_for_proto, deco, item_modifier);
191 202

  
192
    if (deco.proto === "file") {
193
	path_trees = [tree];
194
    } else {
195
	/* We need an aray of domain labels ordered most-significant-first. */
196
	const domain = [...deco.domain].reverse();
197
	path_trees = add_sequence(tree, domain, make_tree_node);
198
    }
203
    patterns_by_proto[deco.proto] = tree_for_proto;
204
    if (tree_for_proto === null)
205
	delete patterns_by_proto[deco.proto];
206
}
199 207

  
200
    for (const path_tree of path_trees) {
201
        for (const match_obj of add_sequence(path_tree, deco.path, () => {}))
202
            match_obj[item_name] = item;
203
    }
208
/*
209
 * Make item queryable through the Pattern Tree that starts with the protocols
210
 * dictionary object passed in the first argument.
211
 */
212
function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
213
{
214
    const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
215
    item_name = key_prefix + item_name;
216
    const add_item = obj => Object.assign(obj || {}, {[item_name]: item});
217
    modify_tree(patterns_by_proto, pattern, add_item);
218
}
219

  
220
/* Helper function for pattern_tree_deregister(). */
221
function _remove_item(obj, item_name)
222
{
223
    obj = obj || {};
224
    delete obj[item_name];
225
    for (const key in obj)
226
	return obj;
227
    return null;
204 228
}
205 229

  
206
// /*
207
//  * Remove registered item from the Pattern Tree that starts with the protocols
208
//  * dictionary object passed in the first argument. The remaining 2 arguments
209
//  * should be pattern and name that have been earlier passed to
210
//  * pattern_tree_register().
211
//  */
212
// function pattern_tree_deregister(patterns_by_proto, pattern, item_name)
213
// {
214
//     const deco = deconstruct_url(pattern, false);
215

  
216
//     const tree = patterns_by_proto[deco.proto] || make_tree_node();
217
//     patterns_by_proto[deco.proto] = tree;
218

  
219
//     let path_trees;
220

  
221
//     if (deco.proto === "file") {
222
// 	path_trees = [tree];
223
//     } else {
224
// 	/* We need an aray of domain labels ordered most-significant-first. */
225
// 	const domain = [...deco.domain].reverse();
226
// 	..........................
227
// 	path_trees = add_sequence(tree, domain, make_tree_node);
228
//     }
229
// }
230
/*
231
 * Remove registered item from the Pattern Tree that starts with the protocols
232
 * dictionary object passed in the first argument. The remaining 2 arguments
233
 * should be pattern and name that have been earlier passed to
234
 * pattern_tree_register().
235
 */
236
function pattern_tree_deregister(patterns_by_proto, pattern, item_name)
237
{
238
    const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
239
    item_name = key_prefix + item_name;
240
    const remove_item = obj => _remove_item(obj, item_name);
241
    modify_tree(patterns_by_proto, pattern, remove_item);
242
}
230 243

  
231 244
/*
232
 * Yield registered items (as [item_name, item] arrays) that match url. For
233
 * cosistent behavior, please don't modify the pattern tree while iterating over
234
 * the results.
245
 * Yield registered items that match url. Each yielded value is an object with
246
 * key being matched item names and values being the items. One such object
247
 * shall contain all items matched with given pattern specificity. Objects are
248
 * yielded in order from greatest to lowest pattern specificity.
235 249
 */
236 250
function* pattern_tree_search(patterns_by_proto, url)
237 251
{
238 252
    const deco = deconstruct_url(url, false);
239 253

  
240
    tree = patterns_by_proto[deco.proto] || make_tree_node();
254
    const tree_for_proto = patterns_by_proto[deco.proto] || empty_node();
255
    let by_path = [tree_for_proto];
241 256

  
242
    let path_trees;
243

  
244
    if (deco.proto === "file") {
245
	path_trees = [tree];
246
    } else {
247
	/* We need an aray of domain labels ordered most-significant-first. */
248
	const domain = [...deco.domain].reverse();
249
	path_trees = search_sequence(tree, domain);
250
    }
257
    /* We need an array of domain labels ordered most-significant-first. */
258
    if (deco.domain !== undefined)
259
	by_path = search_sequence(tree_for_proto, [...deco.domain].reverse());
251 260

  
252
    for (const path_tree of path_trees) {
261
    for (const path_tree of by_path) {
253 262
	for (const match_obj of search_sequence(path_tree, deco.path)) {
254
	    for (const entry of Object.entries(match_obj))
255
		yield entry;
263
	    let result_obj_slash    = null;
264
	    let result_obj_no_slash = null;
265

  
266
	    for (const [key, item] of Object.entries(match_obj)) {
267
		if (deco.trailing_slash && key[0] === '/') {
268
		    result_obj_slash = result_obj_slash || {};
269
		    result_obj_slash[key.substring(1)] = item;
270
		} else if (key[0] !== '/') {
271
		    result_obj_no_slash = result_obj_no_slash || {};
272
		    result_obj_no_slash[key.substring(1)] = item;
273
		}
274
	    }
275

  
276
	    if (deco.trailing_slash && result_obj_slash)
277
		yield result_obj_slash;
278

  
279
	    if (result_obj_no_slash)
280
		yield result_obj_no_slash;
256 281
	}
257 282
    }
258 283
}
259 284

  
260 285
const pattern_tree = {
261
    make: () => {},
262
    register: pattern_tree_register,
263
    // deregister: pattern_tree_deregister,
264
    search: pattern_tree_search
286
    make:       () => ({}),
287
    register:   pattern_tree_register,
288
    deregister: pattern_tree_deregister,
289
    search:     pattern_tree_search
265 290
}
266 291

  
267 292
/*

Also available in: Unified diff