Revision 17adc623
Added by koszko over 1 year ago
common/patterns_query_tree.js | ||
---|---|---|
3 | 3 |
* |
4 | 4 |
* Function: Data structure to query items by URL patterns. |
5 | 5 |
* |
6 |
* Copyright (C) 2021 Wojtek Kosior
|
|
6 |
* Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
|
|
7 | 7 |
* |
8 | 8 |
* This program is free software: you can redistribute it and/or modify |
9 | 9 |
* it under the terms of the GNU General Public License as published by |
... | ... | |
41 | 41 |
* proprietary program, I am not going to enforce this in court. |
42 | 42 |
*/ |
43 | 43 |
|
44 |
// TODO! Modify the code to use `Object.create(null)` instead of `{}`. |
|
45 |
|
|
46 | 44 |
#FROM common/patterns.js IMPORT deconstruct_url |
47 | 45 |
|
48 | 46 |
/* "Pattern Tree" is how we refer to the data structure used for querying |
... | ... | |
53 | 51 |
const pattern_tree_make = () => ({}) |
54 | 52 |
#EXPORT pattern_tree_make AS make |
55 | 53 |
|
56 |
function empty_node() { |
|
57 |
return { |
|
58 |
wildcard_matches: [null, null, null], |
|
59 |
literal_match: null, |
|
60 |
children: {} |
|
61 |
}; |
|
62 |
} |
|
54 |
const empty_node = () => ({}); |
|
63 | 55 |
|
64 | 56 |
function is_empty_node(tree_node) { |
65 |
const children = tree_node.children; |
|
66 |
for (const key in children) { |
|
67 |
if (children.hasOwnProperty(key)) |
|
68 |
return false; |
|
69 |
} |
|
70 |
|
|
71 |
if (tree_node.wildcard_matches.reduce((a, b) => b && a !== null, 1)) |
|
57 |
for (const key in tree_node) |
|
72 | 58 |
return false; |
73 | 59 |
|
74 |
return tree_node.literal_match === null;
|
|
60 |
return true;
|
|
75 | 61 |
} |
76 | 62 |
|
63 |
const wildcard_matches = node => [node["*"], node["**"], node["***"]]; |
|
64 |
|
|
77 | 65 |
const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0; |
78 | 66 |
|
67 |
/* |
|
68 |
* Remove reference to given child fron node and leave the node in consistent |
|
69 |
* state afterwards, i.e. remove the "c" property if no childs are left. |
|
70 |
*/ |
|
71 |
function delete_child(node, child_name) { |
|
72 |
if (node.c) { |
|
73 |
delete node.c[child_name]; |
|
74 |
|
|
75 |
for (const key in node.c) |
|
76 |
return; |
|
77 |
|
|
78 |
delete node.c; |
|
79 |
} |
|
80 |
} |
|
81 |
|
|
79 | 82 |
/* |
80 | 83 |
* Yields all matches of this segments sequence against the tree that starts at |
81 | 84 |
* this node. Results are produces in order from greatest to lowest pattern |
82 | 85 |
* specificity. |
83 | 86 |
*/ |
84 |
function* search_sequence(tree_node, segments) |
|
85 |
{ |
|
87 |
function* search_sequence(tree_node, segments) { |
|
86 | 88 |
const nodes = [tree_node]; |
87 | 89 |
|
88 |
for (const segment of segments) { |
|
89 |
const next_node = nodes[nodes.length - 1].children[segment];
|
|
90 |
if (next_node === undefined)
|
|
90 |
for (const current_segment of segments) {
|
|
91 |
const children = nodes[nodes.length - 1].c || {};
|
|
92 |
if (!Object.hasOwnProperty.call(children, current_segment))
|
|
91 | 93 |
break; |
92 | 94 |
|
93 |
nodes.push(next_node);
|
|
95 |
nodes.push(children[current_segment]);
|
|
94 | 96 |
} |
95 | 97 |
|
96 | 98 |
const nsegments = segments.length; |
... | ... | |
106 | 108 |
|
107 | 109 |
while (nodes.length) { |
108 | 110 |
const node = nodes.pop(); |
109 |
const items = [node.literal_match, ...node.wildcard_matches]; |
|
111 |
const literal_match = node.l; |
|
112 |
const items = [literal_match, ...wildcard_matches(node)]; |
|
110 | 113 |
|
111 |
for (let i = 0; i < 4; i++) {
|
|
112 |
if (items[i] !== null && conds[i]())
|
|
114 |
for (let i = 0; i < items.length; i++) {
|
|
115 |
if (items[i] !== undefined && conds[i]())
|
|
113 | 116 |
yield items[i]; |
114 | 117 |
} |
115 | 118 |
} |
... | ... | |
129 | 132 |
* segments and value returned by item_modifier is not `null`, make the value |
130 | 133 |
* queryable by this path. |
131 | 134 |
*/ |
132 |
function modify_sequence(tree_node, segments, item_modifier) |
|
133 |
{ |
|
135 |
function modify_sequence(tree_node, segments, item_modifier) { |
|
134 | 136 |
const nodes = [tree_node]; |
135 | 137 |
let removed = true; |
136 | 138 |
|
137 | 139 |
for (var current_segment of segments) { |
138 |
const child = tree_node.children[current_segment] || empty_node(); |
|
139 |
tree_node.children[current_segment] = child; |
|
140 |
const children = tree_node.c || {}; |
|
141 |
tree_node.c = children; |
|
142 |
|
|
143 |
const child = Object.hasOwnProperty.call(children, current_segment) ? |
|
144 |
children[current_segment] : empty_node(); |
|
145 |
children[current_segment] = child; |
|
146 |
|
|
140 | 147 |
tree_node = child; |
141 | 148 |
nodes.push(tree_node); |
142 | 149 |
} |
143 | 150 |
|
144 |
tree_node.literal_match = item_modifier(tree_node.literal_match); |
|
145 |
if (tree_node.literal_match !== null) |
|
151 |
tree_node.l = item_modifier(tree_node.l || null); |
|
152 |
if (tree_node.l === null) |
|
153 |
delete tree_node.l; |
|
154 |
else |
|
146 | 155 |
removed = false; |
147 | 156 |
|
148 | 157 |
let i = segments.length; |
149 | 158 |
|
150 | 159 |
if (is_wildcard(current_segment)) { |
151 |
const asterisks = current_segment.length - 1; |
|
152 |
const wildcards = nodes[i - 1].wildcard_matches; |
|
153 |
wildcards[asterisks] = item_modifier(wildcards[asterisks]); |
|
154 |
if (wildcards[asterisks] !== null) |
|
160 |
nodes[i - 1][current_segment] = |
|
161 |
item_modifier(nodes[i - 1][current_segment] || null); |
|
162 |
if (nodes[i - 1][current_segment] === null) |
|
163 |
delete nodes[i - 1][current_segment]; |
|
164 |
else |
|
155 | 165 |
removed = false; |
156 | 166 |
} |
157 | 167 |
|
158 |
while (!removed)
|
|
168 |
if (!removed)
|
|
159 | 169 |
return; |
160 | 170 |
|
161 | 171 |
while (i > 0) { |
162 | 172 |
tree_node = nodes[i--]; |
163 | 173 |
if (is_empty_node(tree_node)) |
164 |
delete nodes[i].children[segments[i]]; |
|
174 |
delete_child(nodes[i], segments[i]); |
|
175 |
else |
|
176 |
break; |
|
165 | 177 |
} |
166 | 178 |
} |
167 | 179 |
|
168 | 180 |
/* Helper function for modify_tree(). */ |
169 |
function modify_path(tree_node, deco, item_modifier) |
|
170 |
{ |
|
181 |
function modify_path(tree_node, deco, item_modifier) { |
|
171 | 182 |
tree_node = tree_node || empty_node(); |
172 | 183 |
modify_sequence(tree_node, deco.path, item_modifier); |
173 | 184 |
return is_empty_node(tree_node) ? null : tree_node; |
174 | 185 |
} |
175 | 186 |
|
176 | 187 |
/* Helper function for modify_tree(). */ |
177 |
function modify_domain(tree_node, deco, item_modifier) |
|
178 |
{ |
|
188 |
function modify_domain(tree_node, deco, item_modifier) { |
|
179 | 189 |
const path_modifier = branch => modify_path(branch, deco, item_modifier); |
180 | 190 |
tree_node = tree_node || empty_node(); |
181 | 191 |
/* We need an array of domain labels ordered most-significant-first. */ |
... | ... | |
184 | 194 |
} |
185 | 195 |
|
186 | 196 |
/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */ |
187 |
function modify_tree(patterns_by_proto, pattern, item_modifier) |
|
188 |
{ |
|
197 |
function modify_tree(patterns_by_proto, pattern, item_modifier) { |
|
189 | 198 |
/* |
190 | 199 |
* We pass 'false' to disable length limits on URL parts. Length limits are |
191 | 200 |
* mostly useful in case of iteration over all patterns matching given URL. |
... | ... | |
208 | 217 |
* Make item queryable through the Pattern Tree that starts with the protocols |
209 | 218 |
* dictionary object passed in the first argument. |
210 | 219 |
*/ |
211 |
function pattern_tree_register(patterns_by_proto, pattern, item_name, item) |
|
212 |
{ |
|
220 |
function pattern_tree_register(patterns_by_proto, pattern, item_name, item) { |
|
213 | 221 |
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_'; |
214 | 222 |
item_name = key_prefix + item_name; |
215 | 223 |
const add_item = obj => Object.assign(obj || {}, {[item_name]: item}); |
... | ... | |
218 | 226 |
#EXPORT pattern_tree_register AS register |
219 | 227 |
|
220 | 228 |
/* Helper function for pattern_tree_deregister(). */ |
221 |
function _remove_item(obj, item_name) |
|
222 |
{ |
|
229 |
function _remove_item(obj, item_name) { |
|
223 | 230 |
obj = obj || {}; |
224 | 231 |
delete obj[item_name]; |
225 | 232 |
for (const key in obj) |
... | ... | |
233 | 240 |
* should be pattern and name that have been earlier passed to |
234 | 241 |
* pattern_tree_register(). |
235 | 242 |
*/ |
236 |
function pattern_tree_deregister(patterns_by_proto, pattern, item_name) |
|
237 |
{ |
|
243 |
function pattern_tree_deregister(patterns_by_proto, pattern, item_name) { |
|
238 | 244 |
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_'; |
239 | 245 |
item_name = key_prefix + item_name; |
240 | 246 |
const remove_item = obj => _remove_item(obj, item_name); |
... | ... | |
244 | 250 |
|
245 | 251 |
/* |
246 | 252 |
* Yield registered items that match url. Each yielded value is an object with |
247 |
* key being matched item names and values being the items. One such object |
|
253 |
* keys being matched item names and values being the items. One such object
|
|
248 | 254 |
* shall contain all items matched with given pattern specificity. Objects are |
249 | 255 |
* yielded in order from greatest to lowest pattern specificity. |
250 | 256 |
*/ |
251 |
function* pattern_tree_search(patterns_by_proto, url) |
|
252 |
{ |
|
257 |
function* pattern_tree_search(patterns_by_proto, url) { |
|
253 | 258 |
const deco = deconstruct_url(url, false); |
254 | 259 |
|
255 | 260 |
const tree_for_proto = patterns_by_proto[deco.proto] || empty_node(); |
Also available in: Unified diff
optimize Pattern Query Tree for size of its JSON.stringify()'ed representation