Revision 5c583de8
Added by koszko almost 2 years ago
common/patterns.js | ||
---|---|---|
17 | 17 |
const proto_regex = /^(\w+):\/\/(.*)$/; |
18 | 18 |
|
19 | 19 |
const user_re = "[^/?#@]+@" |
20 |
const domain_re = "[.a-zA-Z0-9-]+"; |
|
20 |
const domain_re = "[.*a-zA-Z0-9-]+";
|
|
21 | 21 |
const path_re = "[^?#]*"; |
22 | 22 |
const query_re = "\\??[^#]*"; |
23 | 23 |
|
24 | 24 |
const http_regex = new RegExp(`^(${domain_re})(${path_re})(${query_re}).*`); |
25 | 25 |
|
26 |
const file_regex = new RegExp(`^(${path_re}).*`); |
|
26 |
const file_regex = new RegExp(`^(/${path_re}).*`);
|
|
27 | 27 |
|
28 | 28 |
const ftp_regex = new RegExp(`^(${user_re})?(${domain_re})(${path_re}).*`); |
29 | 29 |
|
30 |
function match_or_throw(regex, string, error_msg) |
|
31 |
{ |
|
32 |
const match = regex.exec(string); |
|
33 |
if (match === null) |
|
34 |
throw error_msg; |
|
35 |
|
|
36 |
return match; |
|
37 |
} |
|
38 |
|
|
30 | 39 |
function deconstruct_url(url, use_limits=true) |
31 | 40 |
{ |
32 | 41 |
const max = MAX; |
... | ... | |
35 | 44 |
max[key] = Infinity; |
36 | 45 |
} |
37 | 46 |
|
38 |
const proto_match = proto_regex.exec(url); |
|
39 |
if (proto_match === null) |
|
40 |
throw `bad url '${url}'`; |
|
47 |
const matcher = (re, str) => match_or_throw(re, str, `bad url '${url}'`) |
|
41 | 48 |
|
49 |
const proto_match = matcher(proto_regex, url); |
|
42 | 50 |
const deco = {proto: proto_match[1]}; |
43 | 51 |
|
44 | 52 |
if (deco.proto === "file") { |
45 |
deco.path = file_regex.exec(proto_match[2])[1];
|
|
53 |
deco.path = matcher(file_regex, proto_match[2])[1];
|
|
46 | 54 |
} else if (deco.proto === "ftp") { |
47 |
[deco.domain, deco.path] = ftp_regex.exec(proto_match[2]).slice(2, 4); |
|
55 |
[deco.domain, deco.path] = |
|
56 |
matcher(ftp_regex, proto_match[2]).slice(2, 4); |
|
48 | 57 |
} else if (deco.proto === "http" || deco.proto === "https") { |
49 |
const http_match = http_regex.exec(proto_match[2]); |
|
50 |
if (!http_match) |
|
51 |
return undefined; |
|
52 |
[deco.domain, deco.path, deco.query] = http_match.slice(1, 4); |
|
58 |
[deco.domain, deco.path, deco.query] = |
|
59 |
matcher(http_regex, proto_match[2]).slice(1, 4); |
|
53 | 60 |
deco.domain = deco.domain.toLowerCase(); |
54 | 61 |
} else { |
55 | 62 |
throw `unsupported protocol in url '${url}'`; |
common/patterns_query_tree.js | ||
---|---|---|
1 |
/** |
|
2 |
* This file is part of Haketilo. |
|
3 |
* |
|
4 |
* Function: Data structure to query items by URL patterns. |
|
5 |
* |
|
6 |
* Copyright (C) 2021 Wojtek Kosior |
|
7 |
* |
|
8 |
* This program is free software: you can redistribute it and/or modify |
|
9 |
* it under the terms of the GNU General Public License as published by |
|
10 |
* the Free Software Foundation, either version 3 of the License, or |
|
11 |
* (at your option) any later version. |
|
12 |
* |
|
13 |
* This program is distributed in the hope that it will be useful, |
|
14 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
15 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
16 |
* GNU General Public License for more details. |
|
17 |
* |
|
18 |
* As additional permission under GNU GPL version 3 section 7, you |
|
19 |
* may distribute forms of that code without the copy of the GNU |
|
20 |
* GPL normally required by section 4, provided you include this |
|
21 |
* license notice and, in case of non-source distribution, a URL |
|
22 |
* through which recipients can access the Corresponding Source. |
|
23 |
* If you modify file(s) with this exception, you may extend this |
|
24 |
* exception to your version of the file(s), but you are not |
|
25 |
* obligated to do so. If you do not wish to do so, delete this |
|
26 |
* exception statement from your version. |
|
27 |
* |
|
28 |
* As a special exception to the GPL, any HTML file which merely |
|
29 |
* makes function calls to this code, and for that purpose |
|
30 |
* includes it by reference shall be deemed a separate work for |
|
31 |
* copyright law purposes. If you modify this code, you may extend |
|
32 |
* this exception to your version of the code, but you are not |
|
33 |
* obligated to do so. If you do not wish to do so, delete this |
|
34 |
* exception statement from your version. |
|
35 |
* |
|
36 |
* You should have received a copy of the GNU General Public License |
|
37 |
* along with this program. If not, see <https://www.gnu.org/licenses/>. |
|
38 |
* |
|
39 |
* I, Wojtek Kosior, thereby promise not to sue for violation of this file's |
|
40 |
* license. Although I request that you do not make use this code in a |
|
41 |
* proprietary program, I am not going to enforce this in court. |
|
42 |
*/ |
|
43 |
|
|
44 |
/* |
|
45 |
* IMPORTS_START |
|
46 |
* IMPORT deconstruct_url |
|
47 |
* IMPORTS_END |
|
48 |
*/ |
|
49 |
|
|
50 |
/* |
|
51 |
* This is a javascript rewrite of Python implementation of the algorithm here: |
|
52 |
* https://git.koszko.org/pydrilla/tree/src/pydrilla/pydrilla.py?id=f4edcbe7f4739d6f82a2e1bb180960b003b30862#n205 |
|
53 |
*/ |
|
54 |
|
|
55 |
/* "Pattern Tree" is how we refer to the data structure used for querying |
|
56 |
* Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal |
|
57 |
* is to make it possible for given URL to quickly retrieve all known patterns |
|
58 |
* that match it. |
|
59 |
*/ |
|
60 |
function make_tree_node() { |
|
61 |
return { |
|
62 |
wildcard_matches: [null, null, null], |
|
63 |
literal_match: null, |
|
64 |
children: {} |
|
65 |
}; |
|
66 |
} |
|
67 |
|
|
68 |
function is_empty_node(tree_node) { |
|
69 |
const children = tree_node.children; |
|
70 |
for (const key in children) { |
|
71 |
if (children.hasOwnProperty(key)) |
|
72 |
return false; |
|
73 |
} |
|
74 |
|
|
75 |
if (Array.reduce(tree_node.wildcard_matches, (a, b) => b && a !== null, 1)) |
|
76 |
return false; |
|
77 |
|
|
78 |
return tree_node.literal_match === null; |
|
79 |
} |
|
80 |
|
|
81 |
const is_wildcard = segment => ["*", "**", "***"].lastIndexOf(segment) >= 0; |
|
82 |
|
|
83 |
/* |
|
84 |
* Yields all matches of this segments sequence against the tree that starts at |
|
85 |
* this node. Results are produces in order from greatest to lowest pattern |
|
86 |
* specificity. |
|
87 |
*/ |
|
88 |
function* search_sequence(tree_node, segments) |
|
89 |
{ |
|
90 |
const nodes = [tree_node]; |
|
91 |
|
|
92 |
for (const segment of segments) { |
|
93 |
const next_node = nodes[nodes.length - 1].children[segment]; |
|
94 |
if (next_node === undefined) |
|
95 |
break; |
|
96 |
|
|
97 |
nodes.push(next_node); |
|
98 |
} |
|
99 |
|
|
100 |
const nsegments = segments.length; |
|
101 |
|
|
102 |
const conds = [ |
|
103 |
/* literal pattern match */ |
|
104 |
() => nodes.length == nsegments, |
|
105 |
/* wildcard pattern matches */ |
|
106 |
() => nodes.length + 1 == nsegments && segments[nsegments - 1] != "*", |
|
107 |
() => nodes.length + 1 < nsegments, |
|
108 |
() => nodes.length + 1 != nsegments || segments[nsegments - 1] != "***" |
|
109 |
]; |
|
110 |
|
|
111 |
while (nodes.length) { |
|
112 |
const node = nodes.pop(); |
|
113 |
const items = [node.literal_match, ...node.wildcard_matches]; |
|
114 |
|
|
115 |
for (let i = 0; i < 4; i++) { |
|
116 |
if (items[i] !== null && conds[i]()) |
|
117 |
yield items[i]; |
|
118 |
} |
|
119 |
} |
|
120 |
} |
|
121 |
|
|
122 |
/* |
|
123 |
* Make item queryable through (this branch of) the Pattern Tree or remove its |
|
124 |
* path from there. |
|
125 |
* |
|
126 |
* item_modifier should be a function that accepts 1 argument, the item stored |
|
127 |
* in the tree (or `null` if there wasn't any item there), and returns an item |
|
128 |
* that should be used in place of the first one. It is also legal for it to |
|
129 |
* return the same item modifying it first. If it returns `null`, it means the |
|
130 |
* item should be deleted from the Tree. |
|
131 |
* |
|
132 |
* If there was not yet any item associated with the tree path designated by |
|
133 |
* segments and value returned by item_modifier is not `null`, make the value |
|
134 |
* queryable by this path. |
|
135 |
*/ |
|
136 |
function modify_sequence(tree_node, segments, item_modifier) |
|
137 |
{ |
|
138 |
const nodes = [tree_node]; |
|
139 |
let removed = true; |
|
140 |
|
|
141 |
for (var current_segment of segments) { |
|
142 |
wildcards = tree_node.wildcard_matches; |
|
143 |
|
|
144 |
const child = tree_node.children[current_segment] || make_tree_node(); |
|
145 |
tree_node.children[current_segment] = child; |
|
146 |
tree_node = child; |
|
147 |
nodes.push(tree_node); |
|
148 |
} |
|
149 |
|
|
150 |
tree_node.literal_match = item_modifier(tree_node.literal_match); |
|
151 |
if (tree_node.literal_match !== null) |
|
152 |
removed = false; |
|
153 |
|
|
154 |
let i = segments.length; |
|
155 |
|
|
156 |
if (is_wildcard(current_segment)) { |
|
157 |
const asterisks = current_segment.length - 1; |
|
158 |
const wildcards = nodes[i - 1].wildcard_matches; |
|
159 |
wildcards[asterisks] = item_modifier(wildcards[asterisks]); |
|
160 |
if (wildcards[asterisks] !== null) |
|
161 |
removed = false; |
|
162 |
} |
|
163 |
|
|
164 |
while (!removed) |
|
165 |
return; |
|
166 |
|
|
167 |
while (i > 0) { |
|
168 |
tree_node = nodes[i--]; |
|
169 |
if (is_empty_node(tree_node)) |
|
170 |
delete nodes[i].children[segments[i]]; |
|
171 |
} |
|
172 |
} |
|
173 |
|
|
174 |
/* |
|
175 |
* Make item queryable through the Pattern Tree that starts with the protocols |
|
176 |
* dictionary object passed in the first argument. |
|
177 |
*/ |
|
178 |
function pattern_tree_register(patterns_by_proto, pattern, item_name, item) |
|
179 |
{ |
|
180 |
/* |
|
181 |
* We pass 'false' to disable length limits on URL parts. Length limits are |
|
182 |
* mostly useful in case of iteration over all patterns matching given URL. |
|
183 |
* Here we don't do that. |
|
184 |
*/ |
|
185 |
const deco = deconstruct_url(pattern, false); |
|
186 |
|
|
187 |
const tree = patterns_by_proto[deco.proto] || make_tree_node(); |
|
188 |
patterns_by_proto[deco.proto] = tree; |
|
189 |
|
|
190 |
let path_trees; |
|
191 |
|
|
192 |
if (deco.proto === "file") { |
|
193 |
path_trees = [tree]; |
|
194 |
} else { |
|
195 |
/* We need an aray of domain labels ordered most-significant-first. */ |
|
196 |
const domain = [...deco.domain].reverse(); |
|
197 |
path_trees = add_sequence(tree, domain, make_tree_node); |
|
198 |
} |
|
199 |
|
|
200 |
for (const path_tree of path_trees) { |
|
201 |
for (const match_obj of add_sequence(path_tree, deco.path, () => {})) |
|
202 |
match_obj[item_name] = item; |
|
203 |
} |
|
204 |
} |
|
205 |
|
|
206 |
// /* |
|
207 |
// * Remove registered item from the Pattern Tree that starts with the protocols |
|
208 |
// * dictionary object passed in the first argument. The remaining 2 arguments |
|
209 |
// * should be pattern and name that have been earlier passed to |
|
210 |
// * pattern_tree_register(). |
|
211 |
// */ |
|
212 |
// function pattern_tree_deregister(patterns_by_proto, pattern, item_name) |
|
213 |
// { |
|
214 |
// const deco = deconstruct_url(pattern, false); |
|
215 |
|
|
216 |
// const tree = patterns_by_proto[deco.proto] || make_tree_node(); |
|
217 |
// patterns_by_proto[deco.proto] = tree; |
|
218 |
|
|
219 |
// let path_trees; |
|
220 |
|
|
221 |
// if (deco.proto === "file") { |
|
222 |
// path_trees = [tree]; |
|
223 |
// } else { |
|
224 |
// /* We need an aray of domain labels ordered most-significant-first. */ |
|
225 |
// const domain = [...deco.domain].reverse(); |
|
226 |
// .......................... |
|
227 |
// path_trees = add_sequence(tree, domain, make_tree_node); |
|
228 |
// } |
|
229 |
// } |
|
230 |
|
|
231 |
/* |
|
232 |
* Yield registered items (as [item_name, item] arrays) that match url. For |
|
233 |
* cosistent behavior, please don't modify the pattern tree while iterating over |
|
234 |
* the results. |
|
235 |
*/ |
|
236 |
function* pattern_tree_search(patterns_by_proto, url) |
|
237 |
{ |
|
238 |
const deco = deconstruct_url(url, false); |
|
239 |
|
|
240 |
tree = patterns_by_proto[deco.proto] || make_tree_node(); |
|
241 |
|
|
242 |
let path_trees; |
|
243 |
|
|
244 |
if (deco.proto === "file") { |
|
245 |
path_trees = [tree]; |
|
246 |
} else { |
|
247 |
/* We need an aray of domain labels ordered most-significant-first. */ |
|
248 |
const domain = [...deco.domain].reverse(); |
|
249 |
path_trees = search_sequence(tree, domain); |
|
250 |
} |
|
251 |
|
|
252 |
for (const path_tree of path_trees) { |
|
253 |
for (const match_obj of search_sequence(path_tree, deco.path)) { |
|
254 |
for (const entry of Object.entries(match_obj)) |
|
255 |
yield entry; |
|
256 |
} |
|
257 |
} |
|
258 |
} |
|
259 |
|
|
260 |
const pattern_tree = { |
|
261 |
make: () => {}, |
|
262 |
register: pattern_tree_register, |
|
263 |
// deregister: pattern_tree_deregister, |
|
264 |
search: pattern_tree_search |
|
265 |
} |
|
266 |
|
|
267 |
/* |
|
268 |
* EXPORTS_START |
|
269 |
* EXPORT pattern_tree |
|
270 |
* EXPORTS_END |
|
271 |
*/ |
test/unit/conftest.py | ||
---|---|---|
76 | 76 |
return window.haketilo_selenium_return_value; |
77 | 77 |
''' |
78 | 78 |
|
79 |
def _execute_in_page_context(driver, script, *args):
|
|
79 |
def _execute_in_page_context(driver, script, args): |
|
80 | 80 |
script = script + '\n;\nwindow.haketilo_selenium_exception = false;' |
81 | 81 |
try: |
82 | 82 |
return driver.execute_script(script_injecting_script, script, args) |
test/unit/test_patterns.py | ||
---|---|---|
89 | 89 |
match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));', |
90 | 90 |
'@bad.url/') |
91 | 91 |
assert match is None |
92 |
|
|
93 |
def test_deconstruct_url(execute_in_page, patterns_code): |
|
94 |
""" |
|
95 |
patterns.js contains deconstruct_url() function that handles URL parsing. |
|
96 |
Verify it works properly. |
|
97 |
""" |
|
98 |
execute_in_page(patterns_code, page='https://gotmyowndoma.in') |
|
99 |
|
|
100 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', |
|
101 |
'https://eXaMpLe.com/a/b?ver=1.2.3#heading2') |
|
102 |
assert deco |
|
103 |
assert deco['trailing_dash'] == False |
|
104 |
assert deco['proto'] == 'https' |
|
105 |
assert deco['domain'] == ['example', 'com'] |
|
106 |
assert deco['path'] == ['a', 'b'] |
|
107 |
|
|
108 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', |
|
109 |
'http://**.example.com/') |
|
110 |
assert deco |
|
111 |
assert deco['trailing_dash'] == True |
|
112 |
assert deco['proto'] == 'http' |
|
113 |
assert deco['domain'] == ['**', 'example', 'com'] |
|
114 |
assert deco['path'] == [] |
|
115 |
|
|
116 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', |
|
117 |
'ftp://user@ftp.example.com/all///passwords.txt/') |
|
118 |
assert deco |
|
119 |
assert deco['trailing_dash'] == True |
|
120 |
assert deco['proto'] == 'ftp' |
|
121 |
assert deco['domain'] == ['ftp', 'example', 'com'] |
|
122 |
assert deco['path'] == ['all', 'passwords.txt'] |
|
123 |
|
|
124 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', |
|
125 |
'ftp://mirror.edu.pl.eu.org') |
|
126 |
assert deco |
|
127 |
assert deco['trailing_dash'] == False |
|
128 |
assert deco['proto'] == 'ftp' |
|
129 |
assert deco['domain'] == ['mirror', 'edu', 'pl', 'eu', 'org'] |
|
130 |
assert deco['path'] == [] |
|
131 |
|
|
132 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', |
|
133 |
'file:///mnt/parabola_chroot///etc/passwd') |
|
134 |
assert deco |
|
135 |
assert deco['trailing_dash'] == False |
|
136 |
assert deco['proto'] == 'file' |
|
137 |
assert deco['path'] == ['mnt', 'parabola_chroot', 'etc', 'passwd'] |
|
138 |
|
|
139 |
for bad_url in [ |
|
140 |
'://bad-url.missing/protocol', |
|
141 |
'http:/example.com/a/b', |
|
142 |
'unknown://example.com/a/b', |
|
143 |
'idontfancypineapple', |
|
144 |
'ftp://@example.org/', |
|
145 |
'https:///some/path/', |
|
146 |
'file://non-absolute/path' |
|
147 |
]: |
|
148 |
with pytest.raises(Exception, match=r'Error in injected script'): |
|
149 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', |
|
150 |
bad_url) |
|
151 |
|
|
152 |
# at some point we might also consider testing url deconstruction with |
|
153 |
# length limits... |
test/unit/test_patterns_query_tree.py | ||
---|---|---|
1 |
# SPDX-License-Identifier: CC0-1.0 |
|
2 |
|
|
3 |
""" |
|
4 |
Haketilo unit tests - URL patterns |
|
5 |
""" |
|
6 |
|
|
7 |
# This file is part of Haketilo |
|
8 |
# |
|
9 |
# Copyright (C) 2021, Wojtek Kosior |
|
10 |
# |
|
11 |
# This program is free software: you can redistribute it and/or modify |
|
12 |
# it under the terms of the CC0 1.0 Universal License as published by |
|
13 |
# the Creative Commons Corporation. |
|
14 |
# |
|
15 |
# This program is distributed in the hope that it will be useful, |
|
16 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
17 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
18 |
# CC0 1.0 Universal License for more details. |
|
19 |
|
|
20 |
import pytest |
|
21 |
|
|
22 |
from ..script_loader import load_script |
|
23 |
|
|
24 |
@pytest.fixture(scope="session") |
|
25 |
def patterns_tree_code(): |
|
26 |
yield load_script('common/patterns_query_tree.js', ['common']) |
|
27 |
|
|
28 |
def test_modify_branch(execute_in_page, patterns_tree_code): |
|
29 |
""" |
|
30 |
patterns_query_tree.js contains Patterns Tree data structure that allows |
|
31 |
arrays of string labels to be mapped to items. |
|
32 |
Verify operations modifying a single branch of such tree work properly. |
|
33 |
""" |
|
34 |
execute_in_page(patterns_tree_code, page='https://gotmyowndoma.in') |
|
35 |
execute_in_page( |
|
36 |
''' |
|
37 |
let items_added; |
|
38 |
let items_removed; |
|
39 |
|
|
40 |
function _item_adder(item, array) |
|
41 |
{ |
|
42 |
items_added++; |
|
43 |
return [...(array || []), item]; |
|
44 |
} |
|
45 |
|
|
46 |
function item_adder(item) |
|
47 |
{ |
|
48 |
items_added = 0; |
|
49 |
return array => _item_adder(item, array); |
|
50 |
} |
|
51 |
|
|
52 |
function _item_remover(array) |
|
53 |
{ |
|
54 |
if (array !== null) { |
|
55 |
items_removed++; |
|
56 |
array.pop(); |
|
57 |
} |
|
58 |
return (array && array.length > 0) ? array : null; |
|
59 |
} |
|
60 |
|
|
61 |
function item_remover() |
|
62 |
{ |
|
63 |
items_removed = 0; |
|
64 |
return _item_remover; |
|
65 |
}''') |
|
66 |
|
|
67 |
# Let's construct some tree branch while checking that each addition gives |
|
68 |
# the right result. |
|
69 |
branch = execute_in_page( |
|
70 |
'''{ |
|
71 |
const branch = make_tree_node(); |
|
72 |
modify_sequence(branch, ['com', 'example'], item_adder('some_item')); |
|
73 |
returnval(branch); |
|
74 |
}''') |
|
75 |
assert branch == { |
|
76 |
'literal_match': None, |
|
77 |
'wildcard_matches': [None, None, None], |
|
78 |
'children': { |
|
79 |
'com': { |
|
80 |
'literal_match': None, |
|
81 |
'wildcard_matches': [None, None, None], |
|
82 |
'children': { |
|
83 |
'example': { |
|
84 |
'literal_match': ['some_item'], |
|
85 |
'wildcard_matches': [None, None, None], |
|
86 |
'children': { |
|
87 |
} |
|
88 |
} |
|
89 |
} |
|
90 |
} |
|
91 |
} |
|
92 |
} |
|
93 |
|
|
94 |
branch, items_added = execute_in_page( |
|
95 |
'''{ |
|
96 |
const branch = arguments[0]; |
|
97 |
modify_sequence(branch, ['com', 'example'], item_adder('other_item')); |
|
98 |
returnval([branch, items_added]); |
|
99 |
}''', branch) |
|
100 |
assert items_added == 1 |
|
101 |
assert branch['children']['com']['children']['example']['literal_match'] \ |
|
102 |
== ['some_item', 'other_item'] |
|
103 |
|
|
104 |
for i in range(3): |
|
105 |
for expected_array in [['third_item'], ['third_item', '4th_item']]: |
|
106 |
wildcard = '*' * (i + 1) |
|
107 |
branch, items_added = execute_in_page( |
|
108 |
'''{ |
|
109 |
const branch = arguments[0]; |
|
110 |
modify_sequence(branch, ['com', 'sample', arguments[1]], |
|
111 |
item_adder(arguments[2])); |
|
112 |
returnval([branch, items_added]); |
|
113 |
}''', |
|
114 |
branch, wildcard, expected_array[-1]) |
|
115 |
assert items_added == 2 |
|
116 |
sample = branch['children']['com']['children']['sample'] |
|
117 |
assert sample['wildcard_matches'][i] == expected_array |
|
118 |
assert sample['children'][wildcard]['literal_match'] \ |
|
119 |
== expected_array |
|
120 |
|
|
121 |
branch, items_added = execute_in_page( |
|
122 |
'''{ |
|
123 |
const branch = arguments[0]; |
|
124 |
modify_sequence(branch, ['org', 'koszko', '***', '123'], |
|
125 |
item_adder('5th_item')); |
|
126 |
returnval([branch, items_added]); |
|
127 |
}''', |
|
128 |
branch) |
|
129 |
assert items_added == 1 |
|
130 |
assert branch['children']['org']['children']['koszko']['children']['***']\ |
|
131 |
['children']['123']['literal_match'] == ['5th_item'] |
|
132 |
|
|
133 |
# Let's verify that removing a nonexistent element doesn't modify the tree. |
|
134 |
branch2, items_removed = execute_in_page( |
|
135 |
'''{ |
|
136 |
const branch = arguments[0]; |
|
137 |
modify_sequence(branch, ['com', 'not', 'registered', '*'], |
|
138 |
item_remover()); |
|
139 |
returnval([branch, items_removed]); |
|
140 |
}''', |
|
141 |
branch) |
|
142 |
assert branch == branch2 |
|
143 |
assert items_removed == 0 |
|
144 |
|
|
145 |
# Let's remove all elements in the tree branch while checking that each |
|
146 |
# removal gives the right result. |
|
147 |
branch, items_removed = execute_in_page( |
|
148 |
'''{ |
|
149 |
const branch = arguments[0]; |
|
150 |
modify_sequence(branch, ['org', 'koszko', '***', '123'], |
|
151 |
item_remover()); |
|
152 |
returnval([branch, items_removed]); |
|
153 |
}''', |
|
154 |
branch) |
|
155 |
assert items_removed == 1 |
|
156 |
assert 'org' not in branch['children'] |
|
157 |
|
|
158 |
for i in range(3): |
|
159 |
for expected_array in [['third_item'], None]: |
|
160 |
wildcard = '*' * (i + 1) |
|
161 |
branch, items_removed = execute_in_page( |
|
162 |
'''{ |
|
163 |
const branch = arguments[0]; |
|
164 |
modify_sequence(branch, ['com', 'sample', arguments[1]], |
|
165 |
item_remover()); |
|
166 |
returnval([branch, items_removed]); |
|
167 |
}''', |
|
168 |
branch, wildcard) |
|
169 |
assert items_removed == 2 |
|
170 |
if i == 2 and expected_array == []: |
|
171 |
break |
|
172 |
sample = branch['children']['com']['children'].get('sample', {}) |
|
173 |
assert sample.get('wildcard_matches', [None, None, None])[i] \ |
|
174 |
== expected_array |
|
175 |
assert sample.get('children', {}).get(wildcard, {})\ |
|
176 |
.get('literal_match') == expected_array |
|
177 |
|
|
178 |
for i in range(2): |
|
179 |
branch, items_removed = execute_in_page( |
|
180 |
'''{ |
|
181 |
const branch = arguments[0]; |
|
182 |
modify_sequence(branch, ['com', 'example'], item_remover()); |
|
183 |
returnval([branch, items_removed]); |
|
184 |
}''', |
|
185 |
branch) |
|
186 |
assert items_removed == 1 |
|
187 |
if i == 0: |
|
188 |
assert branch['children']['com']['children']['example']\ |
|
189 |
['literal_match'] == ['some_item'] |
|
190 |
else: |
|
191 |
assert branch == { |
|
192 |
'literal_match': None, |
|
193 |
'wildcard_matches': [None, None, None], |
|
194 |
'children': { |
|
195 |
} |
|
196 |
} |
|
197 |
|
|
198 |
def test_search_branch(execute_in_page, patterns_tree_code): |
|
199 |
""" |
|
200 |
patterns_query_tree.js contains Patterns Tree data structure that allows |
|
201 |
arrays of string labels to be mapped to items. |
|
202 |
Verify searching a single branch of such tree work properly. |
|
203 |
""" |
|
204 |
execute_in_page(patterns_tree_code, page='https://gotmyowndoma.in') |
|
205 |
execute_in_page( |
|
206 |
''' |
|
207 |
const item_adder = item => (array => [...(array || []), item]); |
|
208 |
''') |
|
209 |
|
|
210 |
# Let's construct some tree branch to test on. |
|
211 |
execute_in_page( |
|
212 |
''' |
|
213 |
var branch = make_tree_node(); |
|
214 |
|
|
215 |
for (const [item, sequence] of [ |
|
216 |
['(root)', []], |
|
217 |
['***', ['***']], |
|
218 |
['**', ['**']], |
|
219 |
['*', ['*']], |
|
220 |
|
|
221 |
['a', ['a']], |
|
222 |
['A', ['a']], |
|
223 |
['b', ['b']], |
|
224 |
|
|
225 |
['a/***', ['a', '***']], |
|
226 |
['A/***', ['a', '***']], |
|
227 |
['a/**', ['a', '**']], |
|
228 |
['A/**', ['a', '**']], |
|
229 |
['a/*', ['a', '*']], |
|
230 |
['A/*', ['a', '*']], |
|
231 |
['a/sth', ['a', 'sth']], |
|
232 |
['A/sth', ['a', 'sth']], |
|
233 |
|
|
234 |
['b/***', ['b', '***']], |
|
235 |
['b/**', ['b', '**']], |
|
236 |
['b/*', ['b', '*']], |
|
237 |
['b/sth', ['b', 'sth']], |
|
238 |
]) |
|
239 |
modify_sequence(branch, sequence, item_adder(item)); |
|
240 |
''') |
|
241 |
|
|
242 |
# Let's make the actual searches on our testing branch. |
|
243 |
for sequence, expected in [ |
|
244 |
([], [{'(root)'}, {'***'}]), |
|
245 |
(['a'], [{'a', 'A'}, {'a/***', 'A/***'}, {'*'}, {'***'}]), |
|
246 |
(['b'], [{'b'}, {'b/***'}, {'*'}, {'***'}]), |
|
247 |
(['c'], [ {'*'}, {'***'}]), |
|
248 |
(['***'], [{'***'}, {'*'} ]), |
|
249 |
(['**'], [{'**'}, {'*'}, {'***'}]), |
|
250 |
(['**'], [{'**'}, {'*'}, {'***'}]), |
|
251 |
(['*'], [{'*'}, {'***'}]), |
|
252 |
|
|
253 |
(['a', 'sth'], [{'a/sth', 'A/sth'}, {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]), |
|
254 |
(['b', 'sth'], [{'b/sth'}, {'b/*'}, {'b/***'}, {'**'}, {'***'}]), |
|
255 |
(['a', 'hts'], [ {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]), |
|
256 |
(['b', 'hts'], [ {'b/*'}, {'b/***'}, {'**'}, {'***'}]), |
|
257 |
(['a', '***'], [{'a/***', 'A/***'}, {'a/*', 'A/*'}, {'**'}, {'***'}]), |
|
258 |
(['b', '***'], [{'b/***'}, {'b/*'}, {'**'}, {'***'}]), |
|
259 |
(['a', '**'], [{'a/**', 'A/**'}, {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]), |
|
260 |
(['b', '**'], [{'b/**'}, {'b/*'}, {'b/***'}, {'**'}, {'***'}]), |
|
261 |
(['a', '*'], [{'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]), |
|
262 |
(['b', '*'], [{'b/*'}, {'b/***'}, {'**'}, {'***'}]), |
|
263 |
|
|
264 |
(['a', 'c', 'd'], [{'a/**', 'A/**'}, {'a/***', 'A/***'}, {'**'}, {'***'}]), |
|
265 |
(['b', 'c', 'd'], [{'b/**'}, {'b/***'}, {'**'}, {'***'}]) |
|
266 |
]: |
|
267 |
result = execute_in_page( |
|
268 |
''' |
|
269 |
returnval([...search_sequence(branch, arguments[0])]); |
|
270 |
''', |
|
271 |
sequence) |
|
272 |
|
|
273 |
try: |
|
274 |
assert len(result) == len(expected) |
|
275 |
|
|
276 |
for expected_set, result_array in zip(expected, result): |
|
277 |
assert len(expected_set) == len(result_array) |
|
278 |
assert expected_set == set(result_array) |
|
279 |
except Exception as e: |
|
280 |
import sys |
|
281 |
print('sequence:', sequence, '\nexpected:', expected, |
|
282 |
'\nresult:', result, file=sys.stderr) |
|
283 |
raise e from None |
Also available in: Unified diff
start implementing more efficient querying of URL patterns