| 3 |
3 |
*
|
| 4 |
4 |
* Function: Data structure to query items by URL patterns.
|
| 5 |
5 |
*
|
| 6 |
|
* Copyright (C) 2021 Wojtek Kosior
|
|
6 |
* Copyright (C) 2021, 2022 Wojtek Kosior <koszko@koszko.org>
|
| 7 |
7 |
*
|
| 8 |
8 |
* This program is free software: you can redistribute it and/or modify
|
| 9 |
9 |
* it under the terms of the GNU General Public License as published by
|
| ... | ... | |
| 41 |
41 |
* proprietary program, I am not going to enforce this in court.
|
| 42 |
42 |
*/
|
| 43 |
43 |
|
| 44 |
|
// TODO! Modify the code to use `Object.create(null)` instead of `{}`.
|
| 45 |
|
|
| 46 |
44 |
#FROM common/patterns.js IMPORT deconstruct_url
|
| 47 |
45 |
|
| 48 |
46 |
/* "Pattern Tree" is how we refer to the data structure used for querying
|
| ... | ... | |
| 53 |
51 |
const pattern_tree_make = () => ({})
|
| 54 |
52 |
#EXPORT pattern_tree_make AS make
|
| 55 |
53 |
|
| 56 |
|
function empty_node() {
|
| 57 |
|
return {
|
| 58 |
|
wildcard_matches: [null, null, null],
|
| 59 |
|
literal_match: null,
|
| 60 |
|
children: {}
|
| 61 |
|
};
|
| 62 |
|
}
|
|
54 |
const empty_node = () => ({});
|
| 63 |
55 |
|
| 64 |
56 |
function is_empty_node(tree_node) {
|
| 65 |
|
const children = tree_node.children;
|
| 66 |
|
for (const key in children) {
|
| 67 |
|
if (children.hasOwnProperty(key))
|
| 68 |
|
return false;
|
| 69 |
|
}
|
| 70 |
|
|
| 71 |
|
if (tree_node.wildcard_matches.reduce((a, b) => b && a !== null, 1))
|
|
57 |
for (const key in tree_node)
|
| 72 |
58 |
return false;
|
| 73 |
59 |
|
| 74 |
|
return tree_node.literal_match === null;
|
|
60 |
return true;
|
| 75 |
61 |
}
|
| 76 |
62 |
|
|
63 |
const wildcard_matches = node => [node["*"], node["**"], node["***"]];
|
|
64 |
|
| 77 |
65 |
const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0;
|
| 78 |
66 |
|
|
67 |
/*
|
|
68 |
* Remove reference to given child fron node and leave the node in consistent
|
|
69 |
* state afterwards, i.e. remove the "c" property if no childs are left.
|
|
70 |
*/
|
|
71 |
function delete_child(node, child_name) {
|
|
72 |
if (node.c) {
|
|
73 |
delete node.c[child_name];
|
|
74 |
|
|
75 |
for (const key in node.c)
|
|
76 |
return;
|
|
77 |
|
|
78 |
delete node.c;
|
|
79 |
}
|
|
80 |
}
|
|
81 |
|
| 79 |
82 |
/*
|
| 80 |
83 |
* Yields all matches of this segments sequence against the tree that starts at
|
| 81 |
84 |
* this node. Results are produces in order from greatest to lowest pattern
|
| 82 |
85 |
* specificity.
|
| 83 |
86 |
*/
|
| 84 |
|
function* search_sequence(tree_node, segments)
|
| 85 |
|
{
|
|
87 |
function* search_sequence(tree_node, segments) {
|
| 86 |
88 |
const nodes = [tree_node];
|
| 87 |
89 |
|
| 88 |
|
for (const segment of segments) {
|
| 89 |
|
const next_node = nodes[nodes.length - 1].children[segment];
|
| 90 |
|
if (next_node === undefined)
|
|
90 |
for (const current_segment of segments) {
|
|
91 |
const children = nodes[nodes.length - 1].c || {};
|
|
92 |
if (!Object.hasOwnProperty.call(children, current_segment))
|
| 91 |
93 |
break;
|
| 92 |
94 |
|
| 93 |
|
nodes.push(next_node);
|
|
95 |
nodes.push(children[current_segment]);
|
| 94 |
96 |
}
|
| 95 |
97 |
|
| 96 |
98 |
const nsegments = segments.length;
|
| ... | ... | |
| 106 |
108 |
|
| 107 |
109 |
while (nodes.length) {
|
| 108 |
110 |
const node = nodes.pop();
|
| 109 |
|
const items = [node.literal_match, ...node.wildcard_matches];
|
|
111 |
const literal_match = node.l;
|
|
112 |
const items = [literal_match, ...wildcard_matches(node)];
|
| 110 |
113 |
|
| 111 |
|
for (let i = 0; i < 4; i++) {
|
| 112 |
|
if (items[i] !== null && conds[i]())
|
|
114 |
for (let i = 0; i < items.length; i++) {
|
|
115 |
if (items[i] !== undefined && conds[i]())
|
| 113 |
116 |
yield items[i];
|
| 114 |
117 |
}
|
| 115 |
118 |
}
|
| ... | ... | |
| 129 |
132 |
* segments and value returned by item_modifier is not `null`, make the value
|
| 130 |
133 |
* queryable by this path.
|
| 131 |
134 |
*/
|
| 132 |
|
function modify_sequence(tree_node, segments, item_modifier)
|
| 133 |
|
{
|
|
135 |
function modify_sequence(tree_node, segments, item_modifier) {
|
| 134 |
136 |
const nodes = [tree_node];
|
| 135 |
137 |
let removed = true;
|
| 136 |
138 |
|
| 137 |
139 |
for (var current_segment of segments) {
|
| 138 |
|
const child = tree_node.children[current_segment] || empty_node();
|
| 139 |
|
tree_node.children[current_segment] = child;
|
|
140 |
const children = tree_node.c || {};
|
|
141 |
tree_node.c = children;
|
|
142 |
|
|
143 |
const child = Object.hasOwnProperty.call(children, current_segment) ?
|
|
144 |
children[current_segment] : empty_node();
|
|
145 |
children[current_segment] = child;
|
|
146 |
|
| 140 |
147 |
tree_node = child;
|
| 141 |
148 |
nodes.push(tree_node);
|
| 142 |
149 |
}
|
| 143 |
150 |
|
| 144 |
|
tree_node.literal_match = item_modifier(tree_node.literal_match);
|
| 145 |
|
if (tree_node.literal_match !== null)
|
|
151 |
tree_node.l = item_modifier(tree_node.l || null);
|
|
152 |
if (tree_node.l === null)
|
|
153 |
delete tree_node.l;
|
|
154 |
else
|
| 146 |
155 |
removed = false;
|
| 147 |
156 |
|
| 148 |
157 |
let i = segments.length;
|
| 149 |
158 |
|
| 150 |
159 |
if (is_wildcard(current_segment)) {
|
| 151 |
|
const asterisks = current_segment.length - 1;
|
| 152 |
|
const wildcards = nodes[i - 1].wildcard_matches;
|
| 153 |
|
wildcards[asterisks] = item_modifier(wildcards[asterisks]);
|
| 154 |
|
if (wildcards[asterisks] !== null)
|
|
160 |
nodes[i - 1][current_segment] =
|
|
161 |
item_modifier(nodes[i - 1][current_segment] || null);
|
|
162 |
if (nodes[i - 1][current_segment] === null)
|
|
163 |
delete nodes[i - 1][current_segment];
|
|
164 |
else
|
| 155 |
165 |
removed = false;
|
| 156 |
166 |
}
|
| 157 |
167 |
|
| 158 |
|
while (!removed)
|
|
168 |
if (!removed)
|
| 159 |
169 |
return;
|
| 160 |
170 |
|
| 161 |
171 |
while (i > 0) {
|
| 162 |
172 |
tree_node = nodes[i--];
|
| 163 |
173 |
if (is_empty_node(tree_node))
|
| 164 |
|
delete nodes[i].children[segments[i]];
|
|
174 |
delete_child(nodes[i], segments[i]);
|
|
175 |
else
|
|
176 |
break;
|
| 165 |
177 |
}
|
| 166 |
178 |
}
|
| 167 |
179 |
|
| 168 |
180 |
/* Helper function for modify_tree(). */
|
| 169 |
|
function modify_path(tree_node, deco, item_modifier)
|
| 170 |
|
{
|
|
181 |
function modify_path(tree_node, deco, item_modifier) {
|
| 171 |
182 |
tree_node = tree_node || empty_node();
|
| 172 |
183 |
modify_sequence(tree_node, deco.path, item_modifier);
|
| 173 |
184 |
return is_empty_node(tree_node) ? null : tree_node;
|
| 174 |
185 |
}
|
| 175 |
186 |
|
| 176 |
187 |
/* Helper function for modify_tree(). */
|
| 177 |
|
function modify_domain(tree_node, deco, item_modifier)
|
| 178 |
|
{
|
|
188 |
function modify_domain(tree_node, deco, item_modifier) {
|
| 179 |
189 |
const path_modifier = branch => modify_path(branch, deco, item_modifier);
|
| 180 |
190 |
tree_node = tree_node || empty_node();
|
| 181 |
191 |
/* We need an array of domain labels ordered most-significant-first. */
|
| ... | ... | |
| 184 |
194 |
}
|
| 185 |
195 |
|
| 186 |
196 |
/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */
|
| 187 |
|
function modify_tree(patterns_by_proto, pattern, item_modifier)
|
| 188 |
|
{
|
|
197 |
function modify_tree(patterns_by_proto, pattern, item_modifier) {
|
| 189 |
198 |
/*
|
| 190 |
199 |
* We pass 'false' to disable length limits on URL parts. Length limits are
|
| 191 |
200 |
* mostly useful in case of iteration over all patterns matching given URL.
|
| ... | ... | |
| 208 |
217 |
* Make item queryable through the Pattern Tree that starts with the protocols
|
| 209 |
218 |
* dictionary object passed in the first argument.
|
| 210 |
219 |
*/
|
| 211 |
|
function pattern_tree_register(patterns_by_proto, pattern, item_name, item)
|
| 212 |
|
{
|
|
220 |
function pattern_tree_register(patterns_by_proto, pattern, item_name, item) {
|
| 213 |
221 |
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
|
| 214 |
222 |
item_name = key_prefix + item_name;
|
| 215 |
223 |
const add_item = obj => Object.assign(obj || {}, {[item_name]: item});
|
| ... | ... | |
| 218 |
226 |
#EXPORT pattern_tree_register AS register
|
| 219 |
227 |
|
| 220 |
228 |
/* Helper function for pattern_tree_deregister(). */
|
| 221 |
|
function _remove_item(obj, item_name)
|
| 222 |
|
{
|
|
229 |
function _remove_item(obj, item_name) {
|
| 223 |
230 |
obj = obj || {};
|
| 224 |
231 |
delete obj[item_name];
|
| 225 |
232 |
for (const key in obj)
|
| ... | ... | |
| 233 |
240 |
* should be pattern and name that have been earlier passed to
|
| 234 |
241 |
* pattern_tree_register().
|
| 235 |
242 |
*/
|
| 236 |
|
function pattern_tree_deregister(patterns_by_proto, pattern, item_name)
|
| 237 |
|
{
|
|
243 |
function pattern_tree_deregister(patterns_by_proto, pattern, item_name) {
|
| 238 |
244 |
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_';
|
| 239 |
245 |
item_name = key_prefix + item_name;
|
| 240 |
246 |
const remove_item = obj => _remove_item(obj, item_name);
|
| ... | ... | |
| 244 |
250 |
|
| 245 |
251 |
/*
|
| 246 |
252 |
* Yield registered items that match url. Each yielded value is an object with
|
| 247 |
|
* key being matched item names and values being the items. One such object
|
|
253 |
* keys being matched item names and values being the items. One such object
|
| 248 |
254 |
* shall contain all items matched with given pattern specificity. Objects are
|
| 249 |
255 |
* yielded in order from greatest to lowest pattern specificity.
|
| 250 |
256 |
*/
|
| 251 |
|
function* pattern_tree_search(patterns_by_proto, url)
|
| 252 |
|
{
|
|
257 |
function* pattern_tree_search(patterns_by_proto, url) {
|
| 253 |
258 |
const deco = deconstruct_url(url, false);
|
| 254 |
259 |
|
| 255 |
260 |
const tree_for_proto = patterns_by_proto[deco.proto] || empty_node();
|
optimize Pattern Query Tree for size of its JSON.stringify()'ed representation