Revision e1282a63
Added by koszko almost 2 years ago
common/patterns_query_tree.js | ||
---|---|---|
47 | 47 |
* IMPORTS_END |
48 | 48 |
*/ |
49 | 49 |
|
50 |
/* |
|
51 |
* This is a javascript rewrite of Python implementation of the algorithm here: |
|
52 |
* https://git.koszko.org/pydrilla/tree/src/pydrilla/pydrilla.py?id=f4edcbe7f4739d6f82a2e1bb180960b003b30862#n205 |
|
53 |
*/ |
|
54 |
|
|
55 | 50 |
/* "Pattern Tree" is how we refer to the data structure used for querying |
56 | 51 |
* Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal |
57 | 52 |
* is to make it possible for given URL to quickly retrieve all known patterns |
58 | 53 |
* that match it. |
59 | 54 |
*/ |
60 |
function make_tree_node() {
|
|
55 |
function empty_node() {
|
|
61 | 56 |
return { |
62 | 57 |
wildcard_matches: [null, null, null], |
63 | 58 |
literal_match: null, |
... | ... | |
141 | 136 |
for (var current_segment of segments) { |
142 | 137 |
wildcards = tree_node.wildcard_matches; |
143 | 138 |
|
144 |
const child = tree_node.children[current_segment] || make_tree_node();
|
|
139 |
const child = tree_node.children[current_segment] || empty_node();
|
|
145 | 140 |
tree_node.children[current_segment] = child; |
146 | 141 |
tree_node = child; |
147 | 142 |
nodes.push(tree_node); |
... | ... | |
171 | 166 |
} |
172 | 167 |
} |
173 | 168 |
|
174 |
/* |
|
175 |
* Make item queryable through the Pattern Tree that starts with the protocols |
|
176 |
* dictionary object passed in the first argument. |
|
177 |
*/ |
|
178 |
function pattern_tree_register(patterns_by_proto, pattern, item_name, item) |
|
169 |
/* Helper function for modify_tree(). */ |
|
170 |
function modify_path(tree_node, deco, item_modifier) |
|
171 |
{ |
|
172 |
tree_node = tree_node || empty_node(); |
|
173 |
modify_sequence(tree_node, deco.path, item_modifier); |
|
174 |
return is_empty_node(tree_node) ? null : tree_node; |
|
175 |
} |
|
176 |
|
|
177 |
/* Helper function for modify_tree(). */ |
|
178 |
function modify_domain(tree_node, deco, item_modifier) |
|
179 |
{ |
|
180 |
const path_modifier = branch => modify_path(branch, deco, item_modifier); |
|
181 |
tree_node = tree_node || empty_node(); |
|
182 |
/* We need an array of domain labels ordered most-significant-first. */ |
|
183 |
modify_sequence(tree_node, [...deco.domain].reverse(), path_modifier); |
|
184 |
return is_empty_node(tree_node) ? null : tree_node; |
|
185 |
} |
|
186 |
|
|
187 |
/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */ |
|
188 |
function modify_tree(patterns_by_proto, pattern, item_modifier) |
|
179 | 189 |
{ |
180 | 190 |
/* |
181 | 191 |
* We pass 'false' to disable length limits on URL parts. Length limits are |
... | ... | |
184 | 194 |
*/ |
185 | 195 |
const deco = deconstruct_url(pattern, false); |
186 | 196 |
|
187 |
const tree = patterns_by_proto[deco.proto] || make_tree_node(); |
|
188 |
patterns_by_proto[deco.proto] = tree; |
|
197 |
let tree_for_proto = patterns_by_proto[deco.proto]; |
|
189 | 198 |
|
190 |
let path_trees; |
|
199 |
tree_for_proto = deco.domain === undefined ? |
|
200 |
modify_path(tree_for_proto, deco, item_modifier) : |
|
201 |
modify_domain(tree_for_proto, deco, item_modifier); |
|
191 | 202 |
|
192 |
if (deco.proto === "file") { |
|
193 |
path_trees = [tree]; |
|
194 |
} else { |
|
195 |
/* We need an aray of domain labels ordered most-significant-first. */ |
|
196 |
const domain = [...deco.domain].reverse(); |
|
197 |
path_trees = add_sequence(tree, domain, make_tree_node); |
|
198 |
} |
|
203 |
patterns_by_proto[deco.proto] = tree_for_proto; |
|
204 |
if (tree_for_proto === null) |
|
205 |
delete patterns_by_proto[deco.proto]; |
|
206 |
} |
|
199 | 207 |
|
200 |
for (const path_tree of path_trees) { |
|
201 |
for (const match_obj of add_sequence(path_tree, deco.path, () => {})) |
|
202 |
match_obj[item_name] = item; |
|
203 |
} |
|
208 |
/* |
|
209 |
* Make item queryable through the Pattern Tree that starts with the protocols |
|
210 |
* dictionary object passed in the first argument. |
|
211 |
*/ |
|
212 |
function pattern_tree_register(patterns_by_proto, pattern, item_name, item) |
|
213 |
{ |
|
214 |
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_'; |
|
215 |
item_name = key_prefix + item_name; |
|
216 |
const add_item = obj => Object.assign(obj || {}, {[item_name]: item}); |
|
217 |
modify_tree(patterns_by_proto, pattern, add_item); |
|
218 |
} |
|
219 |
|
|
220 |
/* Helper function for pattern_tree_deregister(). */ |
|
221 |
function _remove_item(obj, item_name) |
|
222 |
{ |
|
223 |
obj = obj || {}; |
|
224 |
delete obj[item_name]; |
|
225 |
for (const key in obj) |
|
226 |
return obj; |
|
227 |
return null; |
|
204 | 228 |
} |
205 | 229 |
|
206 |
// /* |
|
207 |
// * Remove registered item from the Pattern Tree that starts with the protocols |
|
208 |
// * dictionary object passed in the first argument. The remaining 2 arguments |
|
209 |
// * should be pattern and name that have been earlier passed to |
|
210 |
// * pattern_tree_register(). |
|
211 |
// */ |
|
212 |
// function pattern_tree_deregister(patterns_by_proto, pattern, item_name) |
|
213 |
// { |
|
214 |
// const deco = deconstruct_url(pattern, false); |
|
215 |
|
|
216 |
// const tree = patterns_by_proto[deco.proto] || make_tree_node(); |
|
217 |
// patterns_by_proto[deco.proto] = tree; |
|
218 |
|
|
219 |
// let path_trees; |
|
220 |
|
|
221 |
// if (deco.proto === "file") { |
|
222 |
// path_trees = [tree]; |
|
223 |
// } else { |
|
224 |
// /* We need an aray of domain labels ordered most-significant-first. */ |
|
225 |
// const domain = [...deco.domain].reverse(); |
|
226 |
// .......................... |
|
227 |
// path_trees = add_sequence(tree, domain, make_tree_node); |
|
228 |
// } |
|
229 |
// } |
|
230 |
/* |
|
231 |
* Remove registered item from the Pattern Tree that starts with the protocols |
|
232 |
* dictionary object passed in the first argument. The remaining 2 arguments |
|
233 |
* should be pattern and name that have been earlier passed to |
|
234 |
* pattern_tree_register(). |
|
235 |
*/ |
|
236 |
function pattern_tree_deregister(patterns_by_proto, pattern, item_name) |
|
237 |
{ |
|
238 |
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_'; |
|
239 |
item_name = key_prefix + item_name; |
|
240 |
const remove_item = obj => _remove_item(obj, item_name); |
|
241 |
modify_tree(patterns_by_proto, pattern, remove_item); |
|
242 |
} |
|
230 | 243 |
|
231 | 244 |
/* |
232 |
* Yield registered items (as [item_name, item] arrays) that match url. For |
|
233 |
* cosistent behavior, please don't modify the pattern tree while iterating over |
|
234 |
* the results. |
|
245 |
* Yield registered items that match url. Each yielded value is an object with |
|
246 |
* key being matched item names and values being the items. One such object |
|
247 |
* shall contain all items matched with given pattern specificity. Objects are |
|
248 |
* yielded in order from greatest to lowest pattern specificity. |
|
235 | 249 |
*/ |
236 | 250 |
function* pattern_tree_search(patterns_by_proto, url) |
237 | 251 |
{ |
238 | 252 |
const deco = deconstruct_url(url, false); |
239 | 253 |
|
240 |
tree = patterns_by_proto[deco.proto] || make_tree_node(); |
|
254 |
const tree_for_proto = patterns_by_proto[deco.proto] || empty_node(); |
|
255 |
let by_path = [tree_for_proto]; |
|
241 | 256 |
|
242 |
let path_trees; |
|
243 |
|
|
244 |
if (deco.proto === "file") { |
|
245 |
path_trees = [tree]; |
|
246 |
} else { |
|
247 |
/* We need an aray of domain labels ordered most-significant-first. */ |
|
248 |
const domain = [...deco.domain].reverse(); |
|
249 |
path_trees = search_sequence(tree, domain); |
|
250 |
} |
|
257 |
/* We need an array of domain labels ordered most-significant-first. */ |
|
258 |
if (deco.domain !== undefined) |
|
259 |
by_path = search_sequence(tree_for_proto, [...deco.domain].reverse()); |
|
251 | 260 |
|
252 |
for (const path_tree of path_trees) {
|
|
261 |
for (const path_tree of by_path) {
|
|
253 | 262 |
for (const match_obj of search_sequence(path_tree, deco.path)) { |
254 |
for (const entry of Object.entries(match_obj)) |
|
255 |
yield entry; |
|
263 |
let result_obj_slash = null; |
|
264 |
let result_obj_no_slash = null; |
|
265 |
|
|
266 |
for (const [key, item] of Object.entries(match_obj)) { |
|
267 |
if (deco.trailing_slash && key[0] === '/') { |
|
268 |
result_obj_slash = result_obj_slash || {}; |
|
269 |
result_obj_slash[key.substring(1)] = item; |
|
270 |
} else if (key[0] !== '/') { |
|
271 |
result_obj_no_slash = result_obj_no_slash || {}; |
|
272 |
result_obj_no_slash[key.substring(1)] = item; |
|
273 |
} |
|
274 |
} |
|
275 |
|
|
276 |
if (deco.trailing_slash && result_obj_slash) |
|
277 |
yield result_obj_slash; |
|
278 |
|
|
279 |
if (result_obj_no_slash) |
|
280 |
yield result_obj_no_slash; |
|
256 | 281 |
} |
257 | 282 |
} |
258 | 283 |
} |
259 | 284 |
|
260 | 285 |
const pattern_tree = { |
261 |
make: () => {},
|
|
262 |
register: pattern_tree_register, |
|
263 |
// deregister: pattern_tree_deregister,
|
|
264 |
search: pattern_tree_search |
|
286 |
make: () => ({}),
|
|
287 |
register: pattern_tree_register,
|
|
288 |
deregister: pattern_tree_deregister, |
|
289 |
search: pattern_tree_search
|
|
265 | 290 |
} |
266 | 291 |
|
267 | 292 |
/* |
Also available in: Unified diff
finish implementing more efficient querying of URL patterns
The algorithm is implemented and tested. However, it is yet to be hooked into the actual extension.