Revision e1282a63
Added by koszko almost 2 years ago
common/patterns.js | ||
---|---|---|
96 | 96 |
throw `unsupported protocol in url '${url}'`; |
97 | 97 |
} |
98 | 98 |
|
99 |
deco.trailing_dash = deco.path[deco.path.length - 1] === "/";
|
|
99 |
deco.trailing_slash = deco.path[deco.path.length - 1] === "/";
|
|
100 | 100 |
|
101 | 101 |
if (deco.domain) { |
102 | 102 |
if (deco.domain.length > max.DOMAIN_CHARS) { |
... | ... | |
151 | 151 |
const path_part = ["", ...deco.path.slice(0, slice)].join("/"); |
152 | 152 |
const path_wildcards = []; |
153 | 153 |
if (slice === deco.path.length && !deco.path_truncated) { |
154 |
if (deco.trailing_dash)
|
|
154 |
if (deco.trailing_slash)
|
|
155 | 155 |
yield path_part + "/"; |
156 | 156 |
if (slice > 0 || deco.proto !== "file") |
157 | 157 |
yield path_part; |
common/patterns_query_tree.js | ||
---|---|---|
47 | 47 |
* IMPORTS_END |
48 | 48 |
*/ |
49 | 49 |
|
50 |
/* |
|
51 |
* This is a javascript rewrite of Python implementation of the algorithm here: |
|
52 |
* https://git.koszko.org/pydrilla/tree/src/pydrilla/pydrilla.py?id=f4edcbe7f4739d6f82a2e1bb180960b003b30862#n205 |
|
53 |
*/ |
|
54 |
|
|
55 | 50 |
/* "Pattern Tree" is how we refer to the data structure used for querying |
56 | 51 |
* Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal |
57 | 52 |
* is to make it possible for given URL to quickly retrieve all known patterns |
58 | 53 |
* that match it. |
59 | 54 |
*/ |
60 |
function make_tree_node() {
|
|
55 |
function empty_node() {
|
|
61 | 56 |
return { |
62 | 57 |
wildcard_matches: [null, null, null], |
63 | 58 |
literal_match: null, |
... | ... | |
141 | 136 |
for (var current_segment of segments) { |
142 | 137 |
wildcards = tree_node.wildcard_matches; |
143 | 138 |
|
144 |
const child = tree_node.children[current_segment] || make_tree_node();
|
|
139 |
const child = tree_node.children[current_segment] || empty_node();
|
|
145 | 140 |
tree_node.children[current_segment] = child; |
146 | 141 |
tree_node = child; |
147 | 142 |
nodes.push(tree_node); |
... | ... | |
171 | 166 |
} |
172 | 167 |
} |
173 | 168 |
|
174 |
/* |
|
175 |
* Make item queryable through the Pattern Tree that starts with the protocols |
|
176 |
* dictionary object passed in the first argument. |
|
177 |
*/ |
|
178 |
function pattern_tree_register(patterns_by_proto, pattern, item_name, item) |
|
169 |
/* Helper function for modify_tree(). */ |
|
170 |
function modify_path(tree_node, deco, item_modifier) |
|
171 |
{ |
|
172 |
tree_node = tree_node || empty_node(); |
|
173 |
modify_sequence(tree_node, deco.path, item_modifier); |
|
174 |
return is_empty_node(tree_node) ? null : tree_node; |
|
175 |
} |
|
176 |
|
|
177 |
/* Helper function for modify_tree(). */ |
|
178 |
function modify_domain(tree_node, deco, item_modifier) |
|
179 |
{ |
|
180 |
const path_modifier = branch => modify_path(branch, deco, item_modifier); |
|
181 |
tree_node = tree_node || empty_node(); |
|
182 |
/* We need an array of domain labels ordered most-significant-first. */ |
|
183 |
modify_sequence(tree_node, [...deco.domain].reverse(), path_modifier); |
|
184 |
return is_empty_node(tree_node) ? null : tree_node; |
|
185 |
} |
|
186 |
|
|
187 |
/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */ |
|
188 |
function modify_tree(patterns_by_proto, pattern, item_modifier) |
|
179 | 189 |
{ |
180 | 190 |
/* |
181 | 191 |
* We pass 'false' to disable length limits on URL parts. Length limits are |
... | ... | |
184 | 194 |
*/ |
185 | 195 |
const deco = deconstruct_url(pattern, false); |
186 | 196 |
|
187 |
const tree = patterns_by_proto[deco.proto] || make_tree_node(); |
|
188 |
patterns_by_proto[deco.proto] = tree; |
|
197 |
let tree_for_proto = patterns_by_proto[deco.proto]; |
|
189 | 198 |
|
190 |
let path_trees; |
|
199 |
tree_for_proto = deco.domain === undefined ? |
|
200 |
modify_path(tree_for_proto, deco, item_modifier) : |
|
201 |
modify_domain(tree_for_proto, deco, item_modifier); |
|
191 | 202 |
|
192 |
if (deco.proto === "file") { |
|
193 |
path_trees = [tree]; |
|
194 |
} else { |
|
195 |
/* We need an aray of domain labels ordered most-significant-first. */ |
|
196 |
const domain = [...deco.domain].reverse(); |
|
197 |
path_trees = add_sequence(tree, domain, make_tree_node); |
|
198 |
} |
|
203 |
patterns_by_proto[deco.proto] = tree_for_proto; |
|
204 |
if (tree_for_proto === null) |
|
205 |
delete patterns_by_proto[deco.proto]; |
|
206 |
} |
|
199 | 207 |
|
200 |
for (const path_tree of path_trees) { |
|
201 |
for (const match_obj of add_sequence(path_tree, deco.path, () => {})) |
|
202 |
match_obj[item_name] = item; |
|
203 |
} |
|
208 |
/* |
|
209 |
* Make item queryable through the Pattern Tree that starts with the protocols |
|
210 |
* dictionary object passed in the first argument. |
|
211 |
*/ |
|
212 |
function pattern_tree_register(patterns_by_proto, pattern, item_name, item) |
|
213 |
{ |
|
214 |
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_'; |
|
215 |
item_name = key_prefix + item_name; |
|
216 |
const add_item = obj => Object.assign(obj || {}, {[item_name]: item}); |
|
217 |
modify_tree(patterns_by_proto, pattern, add_item); |
|
218 |
} |
|
219 |
|
|
220 |
/* Helper function for pattern_tree_deregister(). */ |
|
221 |
function _remove_item(obj, item_name) |
|
222 |
{ |
|
223 |
obj = obj || {}; |
|
224 |
delete obj[item_name]; |
|
225 |
for (const key in obj) |
|
226 |
return obj; |
|
227 |
return null; |
|
204 | 228 |
} |
205 | 229 |
|
206 |
// /* |
|
207 |
// * Remove registered item from the Pattern Tree that starts with the protocols |
|
208 |
// * dictionary object passed in the first argument. The remaining 2 arguments |
|
209 |
// * should be pattern and name that have been earlier passed to |
|
210 |
// * pattern_tree_register(). |
|
211 |
// */ |
|
212 |
// function pattern_tree_deregister(patterns_by_proto, pattern, item_name) |
|
213 |
// { |
|
214 |
// const deco = deconstruct_url(pattern, false); |
|
215 |
|
|
216 |
// const tree = patterns_by_proto[deco.proto] || make_tree_node(); |
|
217 |
// patterns_by_proto[deco.proto] = tree; |
|
218 |
|
|
219 |
// let path_trees; |
|
220 |
|
|
221 |
// if (deco.proto === "file") { |
|
222 |
// path_trees = [tree]; |
|
223 |
// } else { |
|
224 |
// /* We need an aray of domain labels ordered most-significant-first. */ |
|
225 |
// const domain = [...deco.domain].reverse(); |
|
226 |
// .......................... |
|
227 |
// path_trees = add_sequence(tree, domain, make_tree_node); |
|
228 |
// } |
|
229 |
// } |
|
230 |
/* |
|
231 |
* Remove registered item from the Pattern Tree that starts with the protocols |
|
232 |
* dictionary object passed in the first argument. The remaining 2 arguments |
|
233 |
* should be pattern and name that have been earlier passed to |
|
234 |
* pattern_tree_register(). |
|
235 |
*/ |
|
236 |
function pattern_tree_deregister(patterns_by_proto, pattern, item_name) |
|
237 |
{ |
|
238 |
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_'; |
|
239 |
item_name = key_prefix + item_name; |
|
240 |
const remove_item = obj => _remove_item(obj, item_name); |
|
241 |
modify_tree(patterns_by_proto, pattern, remove_item); |
|
242 |
} |
|
230 | 243 |
|
231 | 244 |
/* |
232 |
* Yield registered items (as [item_name, item] arrays) that match url. For |
|
233 |
* cosistent behavior, please don't modify the pattern tree while iterating over |
|
234 |
* the results. |
|
245 |
* Yield registered items that match url. Each yielded value is an object with |
|
246 |
* key being matched item names and values being the items. One such object |
|
247 |
* shall contain all items matched with given pattern specificity. Objects are |
|
248 |
* yielded in order from greatest to lowest pattern specificity. |
|
235 | 249 |
*/ |
236 | 250 |
function* pattern_tree_search(patterns_by_proto, url) |
237 | 251 |
{ |
238 | 252 |
const deco = deconstruct_url(url, false); |
239 | 253 |
|
240 |
tree = patterns_by_proto[deco.proto] || make_tree_node(); |
|
254 |
const tree_for_proto = patterns_by_proto[deco.proto] || empty_node(); |
|
255 |
let by_path = [tree_for_proto]; |
|
241 | 256 |
|
242 |
let path_trees; |
|
243 |
|
|
244 |
if (deco.proto === "file") { |
|
245 |
path_trees = [tree]; |
|
246 |
} else { |
|
247 |
/* We need an aray of domain labels ordered most-significant-first. */ |
|
248 |
const domain = [...deco.domain].reverse(); |
|
249 |
path_trees = search_sequence(tree, domain); |
|
250 |
} |
|
257 |
/* We need an array of domain labels ordered most-significant-first. */ |
|
258 |
if (deco.domain !== undefined) |
|
259 |
by_path = search_sequence(tree_for_proto, [...deco.domain].reverse()); |
|
251 | 260 |
|
252 |
for (const path_tree of path_trees) {
|
|
261 |
for (const path_tree of by_path) {
|
|
253 | 262 |
for (const match_obj of search_sequence(path_tree, deco.path)) { |
254 |
for (const entry of Object.entries(match_obj)) |
|
255 |
yield entry; |
|
263 |
let result_obj_slash = null; |
|
264 |
let result_obj_no_slash = null; |
|
265 |
|
|
266 |
for (const [key, item] of Object.entries(match_obj)) { |
|
267 |
if (deco.trailing_slash && key[0] === '/') { |
|
268 |
result_obj_slash = result_obj_slash || {}; |
|
269 |
result_obj_slash[key.substring(1)] = item; |
|
270 |
} else if (key[0] !== '/') { |
|
271 |
result_obj_no_slash = result_obj_no_slash || {}; |
|
272 |
result_obj_no_slash[key.substring(1)] = item; |
|
273 |
} |
|
274 |
} |
|
275 |
|
|
276 |
if (deco.trailing_slash && result_obj_slash) |
|
277 |
yield result_obj_slash; |
|
278 |
|
|
279 |
if (result_obj_no_slash) |
|
280 |
yield result_obj_no_slash; |
|
256 | 281 |
} |
257 | 282 |
} |
258 | 283 |
} |
259 | 284 |
|
260 | 285 |
const pattern_tree = { |
261 |
make: () => {},
|
|
262 |
register: pattern_tree_register, |
|
263 |
// deregister: pattern_tree_deregister,
|
|
264 |
search: pattern_tree_search |
|
286 |
make: () => ({}),
|
|
287 |
register: pattern_tree_register,
|
|
288 |
deregister: pattern_tree_deregister, |
|
289 |
search: pattern_tree_search
|
|
265 | 290 |
} |
266 | 291 |
|
267 | 292 |
/* |
test/profiles.py | ||
---|---|---|
31 | 31 |
|
32 | 32 |
from .misc_constants import * |
33 | 33 |
|
34 |
class HaketiloFirefox(webdriver.Firefox): |
|
35 |
""" |
|
36 |
This wrapper class around selenium.webdriver.Firefox adds a `loaded_scripts` |
|
37 |
instance property that gets resetted to an empty array every time the |
|
38 |
`get()` method is called. |
|
39 |
""" |
|
40 |
def __init__(self, *args, **kwargs): |
|
41 |
super().__init__(*args, **kwargs) |
|
42 |
self.reset_loaded_scripts() |
|
43 |
|
|
44 |
def reset_loaded_scripts(self): |
|
45 |
self.loaded_scripts = [] |
|
46 |
|
|
47 |
def get(self, *args, **kwargs): |
|
48 |
self.reset_loaded_scripts() |
|
49 |
super().get(*args, **kwargs) |
|
50 |
|
|
34 | 51 |
def set_profile_proxy(profile, proxy_host, proxy_port): |
52 |
""" |
|
53 |
Create a Firefox profile that uses the specified HTTP proxy for all |
|
54 |
protocols. |
|
55 |
""" |
|
35 | 56 |
# proxy type 1 designates "manual" |
36 | 57 |
profile.set_preference('network.proxy.type', 1) |
37 | 58 |
profile.set_preference('network.proxy.no_proxies_on', '') |
... | ... | |
49 | 70 |
def firefox_safe_mode(firefox_binary=default_firefox_binary, |
50 | 71 |
proxy_host=default_proxy_host, |
51 | 72 |
proxy_port=default_proxy_port): |
73 |
""" |
|
74 |
Initialize a Firefox instance controlled by selenium. The instance is |
|
75 |
started in safe mode. |
|
76 |
""" |
|
52 | 77 |
profile = webdriver.FirefoxProfile() |
53 | 78 |
set_profile_proxy(profile, proxy_host, proxy_port) |
54 | 79 |
set_profile_console_logging(profile) |
... | ... | |
56 | 81 |
options = Options() |
57 | 82 |
options.add_argument('--safe-mode') |
58 | 83 |
|
59 |
return webdriver.Firefox(options=options, firefox_profile=profile,
|
|
60 |
firefox_binary=firefox_binary)
|
|
84 |
return HaketiloFirefox(options=options, firefox_profile=profile,
|
|
85 |
firefox_binary=firefox_binary) |
|
61 | 86 |
|
62 | 87 |
def firefox_with_profile(firefox_binary=default_firefox_binary, |
63 | 88 |
profile_dir=default_clean_profile_dir, |
64 | 89 |
proxy_host=default_proxy_host, |
65 | 90 |
proxy_port=default_proxy_port): |
91 |
""" |
|
92 |
Initialize a Firefox instance controlled by selenium. The instance is |
|
93 |
started using an empty profile (either the default one or the one passed to |
|
94 |
`configure` script). The empty profile is meant to make Firefox start with |
|
95 |
globally-installed extensions disabled. |
|
96 |
""" |
|
66 | 97 |
profile = webdriver.FirefoxProfile(profile_dir) |
67 | 98 |
set_profile_proxy(profile, proxy_host, proxy_port) |
68 | 99 |
set_profile_console_logging(profile) |
69 | 100 |
|
70 |
return webdriver.Firefox(firefox_profile=profile, |
|
71 |
firefox_binary=firefox_binary) |
|
101 |
return HaketiloFirefox(firefox_profile=profile, |
|
102 |
firefox_binary=firefox_binary) |
test/unit/conftest.py | ||
---|---|---|
78 | 78 |
|
79 | 79 |
def _execute_in_page_context(driver, script, args): |
80 | 80 |
script = script + '\n;\nwindow.haketilo_selenium_exception = false;' |
81 |
driver.loaded_scripts.append(script) |
|
81 | 82 |
try: |
82 | 83 |
return driver.execute_script(script_injecting_script, script, args) |
83 | 84 |
except Exception as e: |
84 | 85 |
import sys |
85 |
lines = enumerate(script.split('\n'), 1) |
|
86 |
for err_info in [('Failing script\n',), *lines]: |
|
87 |
print(*err_info, file=sys.stderr) |
|
86 |
|
|
87 |
print("Scripts loaded since driver's last get() method call:", |
|
88 |
file=sys.stderr) |
|
89 |
|
|
90 |
for script in driver.loaded_scripts: |
|
91 |
lines = enumerate(script.split('\n'), 1) |
|
92 |
for err_info in [('===',), *lines]: |
|
93 |
print(*err_info, file=sys.stderr) |
|
88 | 94 |
|
89 | 95 |
raise e from None |
90 | 96 |
|
test/unit/test_patterns.py | ||
---|---|---|
100 | 100 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', |
101 | 101 |
'https://eXaMpLe.com/a/b?ver=1.2.3#heading2') |
102 | 102 |
assert deco |
103 |
assert deco['trailing_dash'] == False
|
|
104 |
assert deco['proto'] == 'https' |
|
105 |
assert deco['domain'] == ['example', 'com'] |
|
106 |
assert deco['path'] == ['a', 'b'] |
|
103 |
assert deco['trailing_slash'] == False
|
|
104 |
assert deco['proto'] == 'https'
|
|
105 |
assert deco['domain'] == ['example', 'com']
|
|
106 |
assert deco['path'] == ['a', 'b']
|
|
107 | 107 |
|
108 | 108 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', |
109 | 109 |
'http://**.example.com/') |
110 | 110 |
assert deco |
111 |
assert deco['trailing_dash'] == True
|
|
112 |
assert deco['proto'] == 'http' |
|
113 |
assert deco['domain'] == ['**', 'example', 'com'] |
|
114 |
assert deco['path'] == [] |
|
111 |
assert deco['trailing_slash'] == True
|
|
112 |
assert deco['proto'] == 'http'
|
|
113 |
assert deco['domain'] == ['**', 'example', 'com']
|
|
114 |
assert deco['path'] == []
|
|
115 | 115 |
|
116 | 116 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', |
117 | 117 |
'ftp://user@ftp.example.com/all///passwords.txt/') |
118 | 118 |
assert deco |
119 |
assert deco['trailing_dash'] == True
|
|
120 |
assert deco['proto'] == 'ftp' |
|
121 |
assert deco['domain'] == ['ftp', 'example', 'com'] |
|
122 |
assert deco['path'] == ['all', 'passwords.txt'] |
|
119 |
assert deco['trailing_slash'] == True
|
|
120 |
assert deco['proto'] == 'ftp'
|
|
121 |
assert deco['domain'] == ['ftp', 'example', 'com']
|
|
122 |
assert deco['path'] == ['all', 'passwords.txt']
|
|
123 | 123 |
|
124 | 124 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', |
125 | 125 |
'ftp://mirror.edu.pl.eu.org') |
126 | 126 |
assert deco |
127 |
assert deco['trailing_dash'] == False
|
|
128 |
assert deco['proto'] == 'ftp' |
|
129 |
assert deco['domain'] == ['mirror', 'edu', 'pl', 'eu', 'org'] |
|
130 |
assert deco['path'] == [] |
|
127 |
assert deco['trailing_slash'] == False
|
|
128 |
assert deco['proto'] == 'ftp'
|
|
129 |
assert deco['domain'] == ['mirror', 'edu', 'pl', 'eu', 'org']
|
|
130 |
assert deco['path'] == []
|
|
131 | 131 |
|
132 | 132 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));', |
133 | 133 |
'file:///mnt/parabola_chroot///etc/passwd') |
134 | 134 |
assert deco |
135 |
assert deco['trailing_dash'] == False |
|
136 |
assert deco['proto'] == 'file' |
|
137 |
assert deco['path'] == ['mnt', 'parabola_chroot', 'etc', 'passwd'] |
|
135 |
assert deco['trailing_slash'] == False |
|
136 |
assert deco['proto'] == 'file' |
|
137 |
assert deco['path'] == ['mnt', 'parabola_chroot', 'etc', 'passwd'] |
|
138 |
assert 'domain' not in deco |
|
138 | 139 |
|
139 | 140 |
for bad_url in [ |
140 | 141 |
'://bad-url.missing/protocol', |
test/unit/test_patterns_query_tree.py | ||
---|---|---|
27 | 27 |
|
28 | 28 |
def test_modify_branch(execute_in_page, patterns_tree_code): |
29 | 29 |
""" |
30 |
patterns_query_tree.js contains Patterns Tree data structure that allows
|
|
30 |
patterns_query_tree.js contains Pattern Tree data structure that allows |
|
31 | 31 |
arrays of string labels to be mapped to items. |
32 | 32 |
Verify operations modifying a single branch of such tree work properly. |
33 | 33 |
""" |
... | ... | |
68 | 68 |
# the right result. |
69 | 69 |
branch = execute_in_page( |
70 | 70 |
'''{ |
71 |
const branch = make_tree_node();
|
|
71 |
const branch = empty_node();
|
|
72 | 72 |
modify_sequence(branch, ['com', 'example'], item_adder('some_item')); |
73 | 73 |
returnval(branch); |
74 | 74 |
}''') |
... | ... | |
197 | 197 |
|
198 | 198 |
def test_search_branch(execute_in_page, patterns_tree_code): |
199 | 199 |
""" |
200 |
patterns_query_tree.js contains Patterns Tree data structure that allows
|
|
200 |
patterns_query_tree.js contains Pattern Tree data structure that allows |
|
201 | 201 |
arrays of string labels to be mapped to items. |
202 | 202 |
Verify searching a single branch of such tree work properly. |
203 | 203 |
""" |
... | ... | |
210 | 210 |
# Let's construct some tree branch to test on. |
211 | 211 |
execute_in_page( |
212 | 212 |
''' |
213 |
var branch = make_tree_node();
|
|
213 |
var branch = empty_node();
|
|
214 | 214 |
|
215 | 215 |
for (const [item, sequence] of [ |
216 | 216 |
['(root)', []], |
... | ... | |
281 | 281 |
print('sequence:', sequence, '\nexpected:', expected, |
282 | 282 |
'\nresult:', result, file=sys.stderr) |
283 | 283 |
raise e from None |
284 |
|
|
285 |
def test_pattern_tree(execute_in_page, patterns_tree_code): |
|
286 |
""" |
|
287 |
patterns_query_tree.js contains Pattern Tree data structure that allows |
|
288 |
arrays of string labels to be mapped to items. |
|
289 |
Verify operations on entire such tree work properly. |
|
290 |
""" |
|
291 |
execute_in_page(patterns_tree_code, page='https://gotmyowndoma.in') |
|
292 |
|
|
293 |
# Perform tests with all possible patterns for a simple URL. |
|
294 |
url = 'https://example.com' |
|
295 |
patterns = [ |
|
296 |
'https://example.com', |
|
297 |
'https://example.com/***', |
|
298 |
'https://***.example.com', |
|
299 |
'https://***.example.com/***' |
|
300 |
] |
|
301 |
bad_patterns = [ |
|
302 |
'http://example.com', |
|
303 |
'https://a.example.com', |
|
304 |
'https://*.example.com', |
|
305 |
'https://**.example.com', |
|
306 |
'https://example.com/a', |
|
307 |
'https://example.com/*', |
|
308 |
'https://example.com/**', |
|
309 |
] |
|
310 |
|
|
311 |
expected = [{'key': p} for p in patterns] |
|
312 |
|
|
313 |
tree, result = execute_in_page( |
|
314 |
'''{ |
|
315 |
const tree = pattern_tree.make(); |
|
316 |
for (const pattern of arguments[0].concat(arguments[1])) { |
|
317 |
pattern_tree.register(tree, pattern, 'key', pattern); |
|
318 |
pattern_tree.register(tree, pattern + '/', 'key', pattern + '/'); |
|
319 |
} |
|
320 |
returnval([tree, [...pattern_tree.search(tree, arguments[2])]]); |
|
321 |
}''', |
|
322 |
patterns, bad_patterns, url) |
|
323 |
assert expected == result |
|
324 |
|
|
325 |
# Also verify that deregistering half of the good patterns works correctly. |
|
326 |
patterns_removed = [pattern for i, pattern in enumerate(patterns) if i % 2] |
|
327 |
patterns = [pattern for i, pattern in enumerate(patterns) if not (i % 2)] |
|
328 |
expected = [{'key': p} for p in patterns] |
|
329 |
tree, result = execute_in_page( |
|
330 |
'''{ |
|
331 |
const tree = arguments[0]; |
|
332 |
for (const pattern of arguments[1]) { |
|
333 |
pattern_tree.deregister(tree, pattern, 'key'); |
|
334 |
pattern_tree.deregister(tree, pattern + '/', 'key'); |
|
335 |
} |
|
336 |
returnval([tree, [...pattern_tree.search(tree, arguments[2])]]); |
|
337 |
}''', |
|
338 |
tree, patterns_removed, url) |
|
339 |
assert expected == result |
|
340 |
|
|
341 |
# Also verify that deregistering all the patterns works correctly. |
|
342 |
tree = execute_in_page( |
|
343 |
'''{ |
|
344 |
const tree = arguments[0]; |
|
345 |
for (const pattern of arguments[1].concat(arguments[2])) { |
|
346 |
pattern_tree.deregister(tree, pattern, 'key'); |
|
347 |
pattern_tree.deregister(tree, pattern + '/', 'key'); |
|
348 |
} |
|
349 |
returnval(tree); |
|
350 |
}''', |
|
351 |
tree, patterns, bad_patterns) |
|
352 |
assert tree == {} |
|
353 |
|
|
354 |
# Perform tests with all possible patterns for a complex URL. |
|
355 |
url = 'http://settings.query.example.com/google/tries/destroy/adblockers//' |
|
356 |
patterns = [ |
|
357 |
'http://settings.query.example.com/google/tries/destroy/adblockers', |
|
358 |
'http://settings.query.example.com/google/tries/destroy/adblockers/***', |
|
359 |
'http://settings.query.example.com/google/tries/destroy/*', |
|
360 |
'http://settings.query.example.com/google/tries/destroy/***', |
|
361 |
'http://settings.query.example.com/google/tries/**', |
|
362 |
'http://settings.query.example.com/google/tries/***', |
|
363 |
'http://settings.query.example.com/google/**', |
|
364 |
'http://settings.query.example.com/google/***', |
|
365 |
'http://settings.query.example.com/**', |
|
366 |
'http://settings.query.example.com/***', |
|
367 |
|
|
368 |
'http://***.settings.query.example.com/google/tries/destroy/adblockers', |
|
369 |
'http://***.settings.query.example.com/google/tries/destroy/adblockers/***', |
|
370 |
'http://***.settings.query.example.com/google/tries/destroy/*', |
|
371 |
'http://***.settings.query.example.com/google/tries/destroy/***', |
|
372 |
'http://***.settings.query.example.com/google/tries/**', |
|
373 |
'http://***.settings.query.example.com/google/tries/***', |
|
374 |
'http://***.settings.query.example.com/google/**', |
|
375 |
'http://***.settings.query.example.com/google/***', |
|
376 |
'http://***.settings.query.example.com/**', |
|
377 |
'http://***.settings.query.example.com/***', |
|
378 |
'http://*.query.example.com/google/tries/destroy/adblockers', |
|
379 |
'http://*.query.example.com/google/tries/destroy/adblockers/***', |
|
380 |
'http://*.query.example.com/google/tries/destroy/*', |
|
381 |
'http://*.query.example.com/google/tries/destroy/***', |
|
382 |
'http://*.query.example.com/google/tries/**', |
|
383 |
'http://*.query.example.com/google/tries/***', |
|
384 |
'http://*.query.example.com/google/**', |
|
385 |
'http://*.query.example.com/google/***', |
|
386 |
'http://*.query.example.com/**', |
|
387 |
'http://*.query.example.com/***', |
|
388 |
'http://***.query.example.com/google/tries/destroy/adblockers', |
|
389 |
'http://***.query.example.com/google/tries/destroy/adblockers/***', |
|
390 |
'http://***.query.example.com/google/tries/destroy/*', |
|
391 |
'http://***.query.example.com/google/tries/destroy/***', |
|
392 |
'http://***.query.example.com/google/tries/**', |
|
393 |
'http://***.query.example.com/google/tries/***', |
|
394 |
'http://***.query.example.com/google/**', |
|
395 |
'http://***.query.example.com/google/***', |
|
396 |
'http://***.query.example.com/**', |
|
397 |
'http://***.query.example.com/***', |
|
398 |
'http://**.example.com/google/tries/destroy/adblockers', |
|
399 |
'http://**.example.com/google/tries/destroy/adblockers/***', |
|
400 |
'http://**.example.com/google/tries/destroy/*', |
|
401 |
'http://**.example.com/google/tries/destroy/***', |
|
402 |
'http://**.example.com/google/tries/**', |
|
403 |
'http://**.example.com/google/tries/***', |
|
404 |
'http://**.example.com/google/**', |
|
405 |
'http://**.example.com/google/***', |
|
406 |
'http://**.example.com/**', |
|
407 |
'http://**.example.com/***', |
|
408 |
'http://***.example.com/google/tries/destroy/adblockers', |
|
409 |
'http://***.example.com/google/tries/destroy/adblockers/***', |
|
410 |
'http://***.example.com/google/tries/destroy/*', |
|
411 |
'http://***.example.com/google/tries/destroy/***', |
|
412 |
'http://***.example.com/google/tries/**', |
|
413 |
'http://***.example.com/google/tries/***', |
|
414 |
'http://***.example.com/google/**', |
|
415 |
'http://***.example.com/google/***', |
|
416 |
'http://***.example.com/**', |
|
417 |
'http://***.example.com/***' |
|
418 |
] |
|
419 |
bad_patterns = [ |
|
420 |
'https://settings.query.example.com/google/tries/destroy/adblockers', |
|
421 |
'http://settings.query.example.com/google/tries/destroy/adblockers/a', |
|
422 |
'http://settings.query.example.com/google/tries/destroy/adblockers/*', |
|
423 |
'http://settings.query.example.com/google/tries/destroy/adblockers/**', |
|
424 |
'http://settings.query.example.com/google/tries/destroy/a', |
|
425 |
'http://settings.query.example.com/google/tries/destroy/**', |
|
426 |
'http://settings.query.example.com/google/tries/*', |
|
427 |
'http://a.settings.query.example.com/google/tries/destroy/adblockers', |
|
428 |
'http://*.settings.query.example.com/google/tries/destroy/adblockers', |
|
429 |
'http://**.settings.query.example.com/google/tries/destroy/adblockers', |
|
430 |
'http://a.query.example.com/google/tries/destroy/adblockers', |
|
431 |
'http://**.query.example.com/google/tries/destroy/adblockers', |
|
432 |
'http://*.example.com/google/tries/destroy/adblockers' |
|
433 |
] |
|
434 |
|
|
435 |
expected = [{'key': p + s} for p in patterns for s in ['/', '']] |
|
436 |
|
|
437 |
tree, result = execute_in_page( |
|
438 |
'''{ |
|
439 |
const tree = pattern_tree.make(); |
|
440 |
for (const pattern of arguments[0].concat(arguments[1])) { |
|
441 |
pattern_tree.register(tree, pattern, 'key', pattern); |
|
442 |
pattern_tree.register(tree, pattern + '/', 'key', pattern + '/'); |
|
443 |
} |
|
444 |
returnval([tree, [...pattern_tree.search(tree, arguments[2])]]); |
|
445 |
}''', |
|
446 |
patterns, bad_patterns, url) |
|
447 |
assert expected == result |
|
448 |
|
|
449 |
# Also verify that deregistering all patterns with trailing slash works |
|
450 |
# correctly. |
|
451 |
expected = [{'key': p} for p in patterns] |
|
452 |
tree, result = execute_in_page( |
|
453 |
'''{ |
|
454 |
const tree = arguments[0]; |
|
455 |
for (const pattern of arguments[1]) |
|
456 |
pattern_tree.deregister(tree, pattern + '/', 'key'); |
|
457 |
returnval([tree, [...pattern_tree.search(tree, arguments[2])]]); |
|
458 |
}''', |
|
459 |
tree, patterns, url) |
|
460 |
assert expected == result |
|
461 |
|
|
462 |
# Also verify that deregistering all the patterns works correctly. |
|
463 |
tree = execute_in_page( |
|
464 |
'''{ |
|
465 |
const tree = arguments[0]; |
|
466 |
for (const pattern of arguments[1]) |
|
467 |
pattern_tree.deregister(tree, pattern, 'key'); |
|
468 |
for (const pattern of arguments[2]) { |
|
469 |
pattern_tree.deregister(tree, pattern, 'key'); |
|
470 |
pattern_tree.deregister(tree, pattern + '/', 'key'); |
|
471 |
} |
|
472 |
returnval(tree); |
|
473 |
}''', |
|
474 |
tree, patterns, bad_patterns) |
|
475 |
assert tree == {} |
Also available in: Unified diff
finish implementing more efficient querying of URL patterns
The algorithm is implemented and tested. However, it is yet to be hooked into the actual extension.