Revision e1282a63
Added by koszko almost 2 years ago
| common/patterns.js | ||
|---|---|---|
| 96 | 96 |
throw `unsupported protocol in url '${url}'`;
|
| 97 | 97 |
} |
| 98 | 98 |
|
| 99 |
deco.trailing_dash = deco.path[deco.path.length - 1] === "/";
|
|
| 99 |
deco.trailing_slash = deco.path[deco.path.length - 1] === "/";
|
|
| 100 | 100 |
|
| 101 | 101 |
if (deco.domain) {
|
| 102 | 102 |
if (deco.domain.length > max.DOMAIN_CHARS) {
|
| ... | ... | |
| 151 | 151 |
const path_part = ["", ...deco.path.slice(0, slice)].join("/");
|
| 152 | 152 |
const path_wildcards = []; |
| 153 | 153 |
if (slice === deco.path.length && !deco.path_truncated) {
|
| 154 |
if (deco.trailing_dash)
|
|
| 154 |
if (deco.trailing_slash)
|
|
| 155 | 155 |
yield path_part + "/"; |
| 156 | 156 |
if (slice > 0 || deco.proto !== "file") |
| 157 | 157 |
yield path_part; |
| common/patterns_query_tree.js | ||
|---|---|---|
| 47 | 47 |
* IMPORTS_END |
| 48 | 48 |
*/ |
| 49 | 49 |
|
| 50 |
/* |
|
| 51 |
* This is a javascript rewrite of Python implementation of the algorithm here: |
|
| 52 |
* https://git.koszko.org/pydrilla/tree/src/pydrilla/pydrilla.py?id=f4edcbe7f4739d6f82a2e1bb180960b003b30862#n205 |
|
| 53 |
*/ |
|
| 54 |
|
|
| 55 | 50 |
/* "Pattern Tree" is how we refer to the data structure used for querying |
| 56 | 51 |
* Haketilo patterns. Those look like 'https://*.example.com/ab/***'. The goal |
| 57 | 52 |
* is to make it possible for given URL to quickly retrieve all known patterns |
| 58 | 53 |
* that match it. |
| 59 | 54 |
*/ |
| 60 |
function make_tree_node() {
|
|
| 55 |
function empty_node() {
|
|
| 61 | 56 |
return {
|
| 62 | 57 |
wildcard_matches: [null, null, null], |
| 63 | 58 |
literal_match: null, |
| ... | ... | |
| 141 | 136 |
for (var current_segment of segments) {
|
| 142 | 137 |
wildcards = tree_node.wildcard_matches; |
| 143 | 138 |
|
| 144 |
const child = tree_node.children[current_segment] || make_tree_node();
|
|
| 139 |
const child = tree_node.children[current_segment] || empty_node();
|
|
| 145 | 140 |
tree_node.children[current_segment] = child; |
| 146 | 141 |
tree_node = child; |
| 147 | 142 |
nodes.push(tree_node); |
| ... | ... | |
| 171 | 166 |
} |
| 172 | 167 |
} |
| 173 | 168 |
|
| 174 |
/* |
|
| 175 |
* Make item queryable through the Pattern Tree that starts with the protocols |
|
| 176 |
* dictionary object passed in the first argument. |
|
| 177 |
*/ |
|
| 178 |
function pattern_tree_register(patterns_by_proto, pattern, item_name, item) |
|
| 169 |
/* Helper function for modify_tree(). */ |
|
| 170 |
function modify_path(tree_node, deco, item_modifier) |
|
| 171 |
{
|
|
| 172 |
tree_node = tree_node || empty_node(); |
|
| 173 |
modify_sequence(tree_node, deco.path, item_modifier); |
|
| 174 |
return is_empty_node(tree_node) ? null : tree_node; |
|
| 175 |
} |
|
| 176 |
|
|
| 177 |
/* Helper function for modify_tree(). */ |
|
| 178 |
function modify_domain(tree_node, deco, item_modifier) |
|
| 179 |
{
|
|
| 180 |
const path_modifier = branch => modify_path(branch, deco, item_modifier); |
|
| 181 |
tree_node = tree_node || empty_node(); |
|
| 182 |
/* We need an array of domain labels ordered most-significant-first. */ |
|
| 183 |
modify_sequence(tree_node, [...deco.domain].reverse(), path_modifier); |
|
| 184 |
return is_empty_node(tree_node) ? null : tree_node; |
|
| 185 |
} |
|
| 186 |
|
|
| 187 |
/* Helper function for pattern_tree_register() and pattern_tree_deregister(). */ |
|
| 188 |
function modify_tree(patterns_by_proto, pattern, item_modifier) |
|
| 179 | 189 |
{
|
| 180 | 190 |
/* |
| 181 | 191 |
* We pass 'false' to disable length limits on URL parts. Length limits are |
| ... | ... | |
| 184 | 194 |
*/ |
| 185 | 195 |
const deco = deconstruct_url(pattern, false); |
| 186 | 196 |
|
| 187 |
const tree = patterns_by_proto[deco.proto] || make_tree_node(); |
|
| 188 |
patterns_by_proto[deco.proto] = tree; |
|
| 197 |
let tree_for_proto = patterns_by_proto[deco.proto]; |
|
| 189 | 198 |
|
| 190 |
let path_trees; |
|
| 199 |
tree_for_proto = deco.domain === undefined ? |
|
| 200 |
modify_path(tree_for_proto, deco, item_modifier) : |
|
| 201 |
modify_domain(tree_for_proto, deco, item_modifier); |
|
| 191 | 202 |
|
| 192 |
if (deco.proto === "file") {
|
|
| 193 |
path_trees = [tree]; |
|
| 194 |
} else {
|
|
| 195 |
/* We need an aray of domain labels ordered most-significant-first. */ |
|
| 196 |
const domain = [...deco.domain].reverse(); |
|
| 197 |
path_trees = add_sequence(tree, domain, make_tree_node); |
|
| 198 |
} |
|
| 203 |
patterns_by_proto[deco.proto] = tree_for_proto; |
|
| 204 |
if (tree_for_proto === null) |
|
| 205 |
delete patterns_by_proto[deco.proto]; |
|
| 206 |
} |
|
| 199 | 207 |
|
| 200 |
for (const path_tree of path_trees) {
|
|
| 201 |
for (const match_obj of add_sequence(path_tree, deco.path, () => {}))
|
|
| 202 |
match_obj[item_name] = item; |
|
| 203 |
} |
|
| 208 |
/* |
|
| 209 |
* Make item queryable through the Pattern Tree that starts with the protocols |
|
| 210 |
* dictionary object passed in the first argument. |
|
| 211 |
*/ |
|
| 212 |
function pattern_tree_register(patterns_by_proto, pattern, item_name, item) |
|
| 213 |
{
|
|
| 214 |
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_'; |
|
| 215 |
item_name = key_prefix + item_name; |
|
| 216 |
const add_item = obj => Object.assign(obj || {}, {[item_name]: item});
|
|
| 217 |
modify_tree(patterns_by_proto, pattern, add_item); |
|
| 218 |
} |
|
| 219 |
|
|
| 220 |
/* Helper function for pattern_tree_deregister(). */ |
|
| 221 |
function _remove_item(obj, item_name) |
|
| 222 |
{
|
|
| 223 |
obj = obj || {};
|
|
| 224 |
delete obj[item_name]; |
|
| 225 |
for (const key in obj) |
|
| 226 |
return obj; |
|
| 227 |
return null; |
|
| 204 | 228 |
} |
| 205 | 229 |
|
| 206 |
// /* |
|
| 207 |
// * Remove registered item from the Pattern Tree that starts with the protocols |
|
| 208 |
// * dictionary object passed in the first argument. The remaining 2 arguments |
|
| 209 |
// * should be pattern and name that have been earlier passed to |
|
| 210 |
// * pattern_tree_register(). |
|
| 211 |
// */ |
|
| 212 |
// function pattern_tree_deregister(patterns_by_proto, pattern, item_name) |
|
| 213 |
// {
|
|
| 214 |
// const deco = deconstruct_url(pattern, false); |
|
| 215 |
|
|
| 216 |
// const tree = patterns_by_proto[deco.proto] || make_tree_node(); |
|
| 217 |
// patterns_by_proto[deco.proto] = tree; |
|
| 218 |
|
|
| 219 |
// let path_trees; |
|
| 220 |
|
|
| 221 |
// if (deco.proto === "file") {
|
|
| 222 |
// path_trees = [tree]; |
|
| 223 |
// } else {
|
|
| 224 |
// /* We need an aray of domain labels ordered most-significant-first. */ |
|
| 225 |
// const domain = [...deco.domain].reverse(); |
|
| 226 |
// .......................... |
|
| 227 |
// path_trees = add_sequence(tree, domain, make_tree_node); |
|
| 228 |
// } |
|
| 229 |
// } |
|
| 230 |
/* |
|
| 231 |
* Remove registered item from the Pattern Tree that starts with the protocols |
|
| 232 |
* dictionary object passed in the first argument. The remaining 2 arguments |
|
| 233 |
* should be pattern and name that have been earlier passed to |
|
| 234 |
* pattern_tree_register(). |
|
| 235 |
*/ |
|
| 236 |
function pattern_tree_deregister(patterns_by_proto, pattern, item_name) |
|
| 237 |
{
|
|
| 238 |
const key_prefix = pattern[pattern.length - 1] === '/' ? '/' : '_'; |
|
| 239 |
item_name = key_prefix + item_name; |
|
| 240 |
const remove_item = obj => _remove_item(obj, item_name); |
|
| 241 |
modify_tree(patterns_by_proto, pattern, remove_item); |
|
| 242 |
} |
|
| 230 | 243 |
|
| 231 | 244 |
/* |
| 232 |
* Yield registered items (as [item_name, item] arrays) that match url. For |
|
| 233 |
* cosistent behavior, please don't modify the pattern tree while iterating over |
|
| 234 |
* the results. |
|
| 245 |
* Yield registered items that match url. Each yielded value is an object with |
|
| 246 |
* key being matched item names and values being the items. One such object |
|
| 247 |
* shall contain all items matched with given pattern specificity. Objects are |
|
| 248 |
* yielded in order from greatest to lowest pattern specificity. |
|
| 235 | 249 |
*/ |
| 236 | 250 |
function* pattern_tree_search(patterns_by_proto, url) |
| 237 | 251 |
{
|
| 238 | 252 |
const deco = deconstruct_url(url, false); |
| 239 | 253 |
|
| 240 |
tree = patterns_by_proto[deco.proto] || make_tree_node(); |
|
| 254 |
const tree_for_proto = patterns_by_proto[deco.proto] || empty_node(); |
|
| 255 |
let by_path = [tree_for_proto]; |
|
| 241 | 256 |
|
| 242 |
let path_trees; |
|
| 243 |
|
|
| 244 |
if (deco.proto === "file") {
|
|
| 245 |
path_trees = [tree]; |
|
| 246 |
} else {
|
|
| 247 |
/* We need an aray of domain labels ordered most-significant-first. */ |
|
| 248 |
const domain = [...deco.domain].reverse(); |
|
| 249 |
path_trees = search_sequence(tree, domain); |
|
| 250 |
} |
|
| 257 |
/* We need an array of domain labels ordered most-significant-first. */ |
|
| 258 |
if (deco.domain !== undefined) |
|
| 259 |
by_path = search_sequence(tree_for_proto, [...deco.domain].reverse()); |
|
| 251 | 260 |
|
| 252 |
for (const path_tree of path_trees) {
|
|
| 261 |
for (const path_tree of by_path) {
|
|
| 253 | 262 |
for (const match_obj of search_sequence(path_tree, deco.path)) {
|
| 254 |
for (const entry of Object.entries(match_obj)) |
|
| 255 |
yield entry; |
|
| 263 |
let result_obj_slash = null; |
|
| 264 |
let result_obj_no_slash = null; |
|
| 265 |
|
|
| 266 |
for (const [key, item] of Object.entries(match_obj)) {
|
|
| 267 |
if (deco.trailing_slash && key[0] === '/') {
|
|
| 268 |
result_obj_slash = result_obj_slash || {};
|
|
| 269 |
result_obj_slash[key.substring(1)] = item; |
|
| 270 |
} else if (key[0] !== '/') {
|
|
| 271 |
result_obj_no_slash = result_obj_no_slash || {};
|
|
| 272 |
result_obj_no_slash[key.substring(1)] = item; |
|
| 273 |
} |
|
| 274 |
} |
|
| 275 |
|
|
| 276 |
if (deco.trailing_slash && result_obj_slash) |
|
| 277 |
yield result_obj_slash; |
|
| 278 |
|
|
| 279 |
if (result_obj_no_slash) |
|
| 280 |
yield result_obj_no_slash; |
|
| 256 | 281 |
} |
| 257 | 282 |
} |
| 258 | 283 |
} |
| 259 | 284 |
|
| 260 | 285 |
const pattern_tree = {
|
| 261 |
make: () => {},
|
|
| 262 |
register: pattern_tree_register, |
|
| 263 |
// deregister: pattern_tree_deregister,
|
|
| 264 |
search: pattern_tree_search |
|
| 286 |
make: () => ({}),
|
|
| 287 |
register: pattern_tree_register,
|
|
| 288 |
deregister: pattern_tree_deregister, |
|
| 289 |
search: pattern_tree_search
|
|
| 265 | 290 |
} |
| 266 | 291 |
|
| 267 | 292 |
/* |
| test/profiles.py | ||
|---|---|---|
| 31 | 31 |
|
| 32 | 32 |
from .misc_constants import * |
| 33 | 33 |
|
| 34 |
class HaketiloFirefox(webdriver.Firefox): |
|
| 35 |
""" |
|
| 36 |
This wrapper class around selenium.webdriver.Firefox adds a `loaded_scripts` |
|
| 37 |
instance property that gets resetted to an empty array every time the |
|
| 38 |
`get()` method is called. |
|
| 39 |
""" |
|
| 40 |
def __init__(self, *args, **kwargs): |
|
| 41 |
super().__init__(*args, **kwargs) |
|
| 42 |
self.reset_loaded_scripts() |
|
| 43 |
|
|
| 44 |
def reset_loaded_scripts(self): |
|
| 45 |
self.loaded_scripts = [] |
|
| 46 |
|
|
| 47 |
def get(self, *args, **kwargs): |
|
| 48 |
self.reset_loaded_scripts() |
|
| 49 |
super().get(*args, **kwargs) |
|
| 50 |
|
|
| 34 | 51 |
def set_profile_proxy(profile, proxy_host, proxy_port): |
| 52 |
""" |
|
| 53 |
Create a Firefox profile that uses the specified HTTP proxy for all |
|
| 54 |
protocols. |
|
| 55 |
""" |
|
| 35 | 56 |
# proxy type 1 designates "manual" |
| 36 | 57 |
profile.set_preference('network.proxy.type', 1)
|
| 37 | 58 |
profile.set_preference('network.proxy.no_proxies_on', '')
|
| ... | ... | |
| 49 | 70 |
def firefox_safe_mode(firefox_binary=default_firefox_binary, |
| 50 | 71 |
proxy_host=default_proxy_host, |
| 51 | 72 |
proxy_port=default_proxy_port): |
| 73 |
""" |
|
| 74 |
Initialize a Firefox instance controlled by selenium. The instance is |
|
| 75 |
started in safe mode. |
|
| 76 |
""" |
|
| 52 | 77 |
profile = webdriver.FirefoxProfile() |
| 53 | 78 |
set_profile_proxy(profile, proxy_host, proxy_port) |
| 54 | 79 |
set_profile_console_logging(profile) |
| ... | ... | |
| 56 | 81 |
options = Options() |
| 57 | 82 |
options.add_argument('--safe-mode')
|
| 58 | 83 |
|
| 59 |
return webdriver.Firefox(options=options, firefox_profile=profile,
|
|
| 60 |
firefox_binary=firefox_binary)
|
|
| 84 |
return HaketiloFirefox(options=options, firefox_profile=profile,
|
|
| 85 |
firefox_binary=firefox_binary) |
|
| 61 | 86 |
|
| 62 | 87 |
def firefox_with_profile(firefox_binary=default_firefox_binary, |
| 63 | 88 |
profile_dir=default_clean_profile_dir, |
| 64 | 89 |
proxy_host=default_proxy_host, |
| 65 | 90 |
proxy_port=default_proxy_port): |
| 91 |
""" |
|
| 92 |
Initialize a Firefox instance controlled by selenium. The instance is |
|
| 93 |
started using an empty profile (either the default one or the one passed to |
|
| 94 |
`configure` script). The empty profile is meant to make Firefox start with |
|
| 95 |
globally-installed extensions disabled. |
|
| 96 |
""" |
|
| 66 | 97 |
profile = webdriver.FirefoxProfile(profile_dir) |
| 67 | 98 |
set_profile_proxy(profile, proxy_host, proxy_port) |
| 68 | 99 |
set_profile_console_logging(profile) |
| 69 | 100 |
|
| 70 |
return webdriver.Firefox(firefox_profile=profile, |
|
| 71 |
firefox_binary=firefox_binary) |
|
| 101 |
return HaketiloFirefox(firefox_profile=profile, |
|
| 102 |
firefox_binary=firefox_binary) |
|
| test/unit/conftest.py | ||
|---|---|---|
| 78 | 78 |
|
| 79 | 79 |
def _execute_in_page_context(driver, script, args): |
| 80 | 80 |
script = script + '\n;\nwindow.haketilo_selenium_exception = false;' |
| 81 |
driver.loaded_scripts.append(script) |
|
| 81 | 82 |
try: |
| 82 | 83 |
return driver.execute_script(script_injecting_script, script, args) |
| 83 | 84 |
except Exception as e: |
| 84 | 85 |
import sys |
| 85 |
lines = enumerate(script.split('\n'), 1)
|
|
| 86 |
for err_info in [('Failing script\n',), *lines]:
|
|
| 87 |
print(*err_info, file=sys.stderr) |
|
| 86 |
|
|
| 87 |
print("Scripts loaded since driver's last get() method call:",
|
|
| 88 |
file=sys.stderr) |
|
| 89 |
|
|
| 90 |
for script in driver.loaded_scripts: |
|
| 91 |
lines = enumerate(script.split('\n'), 1)
|
|
| 92 |
for err_info in [('===',), *lines]:
|
|
| 93 |
print(*err_info, file=sys.stderr) |
|
| 88 | 94 |
|
| 89 | 95 |
raise e from None |
| 90 | 96 |
|
| test/unit/test_patterns.py | ||
|---|---|---|
| 100 | 100 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
|
| 101 | 101 |
'https://eXaMpLe.com/a/b?ver=1.2.3#heading2') |
| 102 | 102 |
assert deco |
| 103 |
assert deco['trailing_dash'] == False
|
|
| 104 |
assert deco['proto'] == 'https' |
|
| 105 |
assert deco['domain'] == ['example', 'com'] |
|
| 106 |
assert deco['path'] == ['a', 'b'] |
|
| 103 |
assert deco['trailing_slash'] == False
|
|
| 104 |
assert deco['proto'] == 'https'
|
|
| 105 |
assert deco['domain'] == ['example', 'com']
|
|
| 106 |
assert deco['path'] == ['a', 'b']
|
|
| 107 | 107 |
|
| 108 | 108 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
|
| 109 | 109 |
'http://**.example.com/') |
| 110 | 110 |
assert deco |
| 111 |
assert deco['trailing_dash'] == True
|
|
| 112 |
assert deco['proto'] == 'http' |
|
| 113 |
assert deco['domain'] == ['**', 'example', 'com'] |
|
| 114 |
assert deco['path'] == [] |
|
| 111 |
assert deco['trailing_slash'] == True
|
|
| 112 |
assert deco['proto'] == 'http'
|
|
| 113 |
assert deco['domain'] == ['**', 'example', 'com']
|
|
| 114 |
assert deco['path'] == []
|
|
| 115 | 115 |
|
| 116 | 116 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
|
| 117 | 117 |
'ftp://user@ftp.example.com/all///passwords.txt/') |
| 118 | 118 |
assert deco |
| 119 |
assert deco['trailing_dash'] == True
|
|
| 120 |
assert deco['proto'] == 'ftp' |
|
| 121 |
assert deco['domain'] == ['ftp', 'example', 'com'] |
|
| 122 |
assert deco['path'] == ['all', 'passwords.txt'] |
|
| 119 |
assert deco['trailing_slash'] == True
|
|
| 120 |
assert deco['proto'] == 'ftp'
|
|
| 121 |
assert deco['domain'] == ['ftp', 'example', 'com']
|
|
| 122 |
assert deco['path'] == ['all', 'passwords.txt']
|
|
| 123 | 123 |
|
| 124 | 124 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
|
| 125 | 125 |
'ftp://mirror.edu.pl.eu.org') |
| 126 | 126 |
assert deco |
| 127 |
assert deco['trailing_dash'] == False
|
|
| 128 |
assert deco['proto'] == 'ftp' |
|
| 129 |
assert deco['domain'] == ['mirror', 'edu', 'pl', 'eu', 'org'] |
|
| 130 |
assert deco['path'] == [] |
|
| 127 |
assert deco['trailing_slash'] == False
|
|
| 128 |
assert deco['proto'] == 'ftp'
|
|
| 129 |
assert deco['domain'] == ['mirror', 'edu', 'pl', 'eu', 'org']
|
|
| 130 |
assert deco['path'] == []
|
|
| 131 | 131 |
|
| 132 | 132 |
deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
|
| 133 | 133 |
'file:///mnt/parabola_chroot///etc/passwd') |
| 134 | 134 |
assert deco |
| 135 |
assert deco['trailing_dash'] == False |
|
| 136 |
assert deco['proto'] == 'file' |
|
| 137 |
assert deco['path'] == ['mnt', 'parabola_chroot', 'etc', 'passwd'] |
|
| 135 |
assert deco['trailing_slash'] == False |
|
| 136 |
assert deco['proto'] == 'file' |
|
| 137 |
assert deco['path'] == ['mnt', 'parabola_chroot', 'etc', 'passwd'] |
|
| 138 |
assert 'domain' not in deco |
|
| 138 | 139 |
|
| 139 | 140 |
for bad_url in [ |
| 140 | 141 |
'://bad-url.missing/protocol', |
| test/unit/test_patterns_query_tree.py | ||
|---|---|---|
| 27 | 27 |
|
| 28 | 28 |
def test_modify_branch(execute_in_page, patterns_tree_code): |
| 29 | 29 |
""" |
| 30 |
patterns_query_tree.js contains Patterns Tree data structure that allows
|
|
| 30 |
patterns_query_tree.js contains Pattern Tree data structure that allows |
|
| 31 | 31 |
arrays of string labels to be mapped to items. |
| 32 | 32 |
Verify operations modifying a single branch of such tree work properly. |
| 33 | 33 |
""" |
| ... | ... | |
| 68 | 68 |
# the right result. |
| 69 | 69 |
branch = execute_in_page( |
| 70 | 70 |
'''{
|
| 71 |
const branch = make_tree_node();
|
|
| 71 |
const branch = empty_node();
|
|
| 72 | 72 |
modify_sequence(branch, ['com', 'example'], item_adder('some_item'));
|
| 73 | 73 |
returnval(branch); |
| 74 | 74 |
}''') |
| ... | ... | |
| 197 | 197 |
|
| 198 | 198 |
def test_search_branch(execute_in_page, patterns_tree_code): |
| 199 | 199 |
""" |
| 200 |
patterns_query_tree.js contains Patterns Tree data structure that allows
|
|
| 200 |
patterns_query_tree.js contains Pattern Tree data structure that allows |
|
| 201 | 201 |
arrays of string labels to be mapped to items. |
| 202 | 202 |
Verify searching a single branch of such tree work properly. |
| 203 | 203 |
""" |
| ... | ... | |
| 210 | 210 |
# Let's construct some tree branch to test on. |
| 211 | 211 |
execute_in_page( |
| 212 | 212 |
''' |
| 213 |
var branch = make_tree_node();
|
|
| 213 |
var branch = empty_node();
|
|
| 214 | 214 |
|
| 215 | 215 |
for (const [item, sequence] of [ |
| 216 | 216 |
['(root)', []], |
| ... | ... | |
| 281 | 281 |
print('sequence:', sequence, '\nexpected:', expected,
|
| 282 | 282 |
'\nresult:', result, file=sys.stderr) |
| 283 | 283 |
raise e from None |
| 284 |
|
|
| 285 |
def test_pattern_tree(execute_in_page, patterns_tree_code): |
|
| 286 |
""" |
|
| 287 |
patterns_query_tree.js contains Pattern Tree data structure that allows |
|
| 288 |
arrays of string labels to be mapped to items. |
|
| 289 |
Verify operations on entire such tree work properly. |
|
| 290 |
""" |
|
| 291 |
execute_in_page(patterns_tree_code, page='https://gotmyowndoma.in') |
|
| 292 |
|
|
| 293 |
# Perform tests with all possible patterns for a simple URL. |
|
| 294 |
url = 'https://example.com' |
|
| 295 |
patterns = [ |
|
| 296 |
'https://example.com', |
|
| 297 |
'https://example.com/***', |
|
| 298 |
'https://***.example.com', |
|
| 299 |
'https://***.example.com/***' |
|
| 300 |
] |
|
| 301 |
bad_patterns = [ |
|
| 302 |
'http://example.com', |
|
| 303 |
'https://a.example.com', |
|
| 304 |
'https://*.example.com', |
|
| 305 |
'https://**.example.com', |
|
| 306 |
'https://example.com/a', |
|
| 307 |
'https://example.com/*', |
|
| 308 |
'https://example.com/**', |
|
| 309 |
] |
|
| 310 |
|
|
| 311 |
expected = [{'key': p} for p in patterns]
|
|
| 312 |
|
|
| 313 |
tree, result = execute_in_page( |
|
| 314 |
'''{
|
|
| 315 |
const tree = pattern_tree.make(); |
|
| 316 |
for (const pattern of arguments[0].concat(arguments[1])) {
|
|
| 317 |
pattern_tree.register(tree, pattern, 'key', pattern); |
|
| 318 |
pattern_tree.register(tree, pattern + '/', 'key', pattern + '/'); |
|
| 319 |
} |
|
| 320 |
returnval([tree, [...pattern_tree.search(tree, arguments[2])]]); |
|
| 321 |
}''', |
|
| 322 |
patterns, bad_patterns, url) |
|
| 323 |
assert expected == result |
|
| 324 |
|
|
| 325 |
# Also verify that deregistering half of the good patterns works correctly. |
|
| 326 |
patterns_removed = [pattern for i, pattern in enumerate(patterns) if i % 2] |
|
| 327 |
patterns = [pattern for i, pattern in enumerate(patterns) if not (i % 2)] |
|
| 328 |
expected = [{'key': p} for p in patterns]
|
|
| 329 |
tree, result = execute_in_page( |
|
| 330 |
'''{
|
|
| 331 |
const tree = arguments[0]; |
|
| 332 |
for (const pattern of arguments[1]) {
|
|
| 333 |
pattern_tree.deregister(tree, pattern, 'key'); |
|
| 334 |
pattern_tree.deregister(tree, pattern + '/', 'key'); |
|
| 335 |
} |
|
| 336 |
returnval([tree, [...pattern_tree.search(tree, arguments[2])]]); |
|
| 337 |
}''', |
|
| 338 |
tree, patterns_removed, url) |
|
| 339 |
assert expected == result |
|
| 340 |
|
|
| 341 |
# Also verify that deregistering all the patterns works correctly. |
|
| 342 |
tree = execute_in_page( |
|
| 343 |
'''{
|
|
| 344 |
const tree = arguments[0]; |
|
| 345 |
for (const pattern of arguments[1].concat(arguments[2])) {
|
|
| 346 |
pattern_tree.deregister(tree, pattern, 'key'); |
|
| 347 |
pattern_tree.deregister(tree, pattern + '/', 'key'); |
|
| 348 |
} |
|
| 349 |
returnval(tree); |
|
| 350 |
}''', |
|
| 351 |
tree, patterns, bad_patterns) |
|
| 352 |
assert tree == {}
|
|
| 353 |
|
|
| 354 |
# Perform tests with all possible patterns for a complex URL. |
|
| 355 |
url = 'http://settings.query.example.com/google/tries/destroy/adblockers//' |
|
| 356 |
patterns = [ |
|
| 357 |
'http://settings.query.example.com/google/tries/destroy/adblockers', |
|
| 358 |
'http://settings.query.example.com/google/tries/destroy/adblockers/***', |
|
| 359 |
'http://settings.query.example.com/google/tries/destroy/*', |
|
| 360 |
'http://settings.query.example.com/google/tries/destroy/***', |
|
| 361 |
'http://settings.query.example.com/google/tries/**', |
|
| 362 |
'http://settings.query.example.com/google/tries/***', |
|
| 363 |
'http://settings.query.example.com/google/**', |
|
| 364 |
'http://settings.query.example.com/google/***', |
|
| 365 |
'http://settings.query.example.com/**', |
|
| 366 |
'http://settings.query.example.com/***', |
|
| 367 |
|
|
| 368 |
'http://***.settings.query.example.com/google/tries/destroy/adblockers', |
|
| 369 |
'http://***.settings.query.example.com/google/tries/destroy/adblockers/***', |
|
| 370 |
'http://***.settings.query.example.com/google/tries/destroy/*', |
|
| 371 |
'http://***.settings.query.example.com/google/tries/destroy/***', |
|
| 372 |
'http://***.settings.query.example.com/google/tries/**', |
|
| 373 |
'http://***.settings.query.example.com/google/tries/***', |
|
| 374 |
'http://***.settings.query.example.com/google/**', |
|
| 375 |
'http://***.settings.query.example.com/google/***', |
|
| 376 |
'http://***.settings.query.example.com/**', |
|
| 377 |
'http://***.settings.query.example.com/***', |
|
| 378 |
'http://*.query.example.com/google/tries/destroy/adblockers', |
|
| 379 |
'http://*.query.example.com/google/tries/destroy/adblockers/***', |
|
| 380 |
'http://*.query.example.com/google/tries/destroy/*', |
|
| 381 |
'http://*.query.example.com/google/tries/destroy/***', |
|
| 382 |
'http://*.query.example.com/google/tries/**', |
|
| 383 |
'http://*.query.example.com/google/tries/***', |
|
| 384 |
'http://*.query.example.com/google/**', |
|
| 385 |
'http://*.query.example.com/google/***', |
|
| 386 |
'http://*.query.example.com/**', |
|
| 387 |
'http://*.query.example.com/***', |
|
| 388 |
'http://***.query.example.com/google/tries/destroy/adblockers', |
|
| 389 |
'http://***.query.example.com/google/tries/destroy/adblockers/***', |
|
| 390 |
'http://***.query.example.com/google/tries/destroy/*', |
|
| 391 |
'http://***.query.example.com/google/tries/destroy/***', |
|
| 392 |
'http://***.query.example.com/google/tries/**', |
|
| 393 |
'http://***.query.example.com/google/tries/***', |
|
| 394 |
'http://***.query.example.com/google/**', |
|
| 395 |
'http://***.query.example.com/google/***', |
|
| 396 |
'http://***.query.example.com/**', |
|
| 397 |
'http://***.query.example.com/***', |
|
| 398 |
'http://**.example.com/google/tries/destroy/adblockers', |
|
| 399 |
'http://**.example.com/google/tries/destroy/adblockers/***', |
|
| 400 |
'http://**.example.com/google/tries/destroy/*', |
|
| 401 |
'http://**.example.com/google/tries/destroy/***', |
|
| 402 |
'http://**.example.com/google/tries/**', |
|
| 403 |
'http://**.example.com/google/tries/***', |
|
| 404 |
'http://**.example.com/google/**', |
|
| 405 |
'http://**.example.com/google/***', |
|
| 406 |
'http://**.example.com/**', |
|
| 407 |
'http://**.example.com/***', |
|
| 408 |
'http://***.example.com/google/tries/destroy/adblockers', |
|
| 409 |
'http://***.example.com/google/tries/destroy/adblockers/***', |
|
| 410 |
'http://***.example.com/google/tries/destroy/*', |
|
| 411 |
'http://***.example.com/google/tries/destroy/***', |
|
| 412 |
'http://***.example.com/google/tries/**', |
|
| 413 |
'http://***.example.com/google/tries/***', |
|
| 414 |
'http://***.example.com/google/**', |
|
| 415 |
'http://***.example.com/google/***', |
|
| 416 |
'http://***.example.com/**', |
|
| 417 |
'http://***.example.com/***' |
|
| 418 |
] |
|
| 419 |
bad_patterns = [ |
|
| 420 |
'https://settings.query.example.com/google/tries/destroy/adblockers', |
|
| 421 |
'http://settings.query.example.com/google/tries/destroy/adblockers/a', |
|
| 422 |
'http://settings.query.example.com/google/tries/destroy/adblockers/*', |
|
| 423 |
'http://settings.query.example.com/google/tries/destroy/adblockers/**', |
|
| 424 |
'http://settings.query.example.com/google/tries/destroy/a', |
|
| 425 |
'http://settings.query.example.com/google/tries/destroy/**', |
|
| 426 |
'http://settings.query.example.com/google/tries/*', |
|
| 427 |
'http://a.settings.query.example.com/google/tries/destroy/adblockers', |
|
| 428 |
'http://*.settings.query.example.com/google/tries/destroy/adblockers', |
|
| 429 |
'http://**.settings.query.example.com/google/tries/destroy/adblockers', |
|
| 430 |
'http://a.query.example.com/google/tries/destroy/adblockers', |
|
| 431 |
'http://**.query.example.com/google/tries/destroy/adblockers', |
|
| 432 |
'http://*.example.com/google/tries/destroy/adblockers' |
|
| 433 |
] |
|
| 434 |
|
|
| 435 |
expected = [{'key': p + s} for p in patterns for s in ['/', '']]
|
|
| 436 |
|
|
| 437 |
tree, result = execute_in_page( |
|
| 438 |
'''{
|
|
| 439 |
const tree = pattern_tree.make(); |
|
| 440 |
for (const pattern of arguments[0].concat(arguments[1])) {
|
|
| 441 |
pattern_tree.register(tree, pattern, 'key', pattern); |
|
| 442 |
pattern_tree.register(tree, pattern + '/', 'key', pattern + '/'); |
|
| 443 |
} |
|
| 444 |
returnval([tree, [...pattern_tree.search(tree, arguments[2])]]); |
|
| 445 |
}''', |
|
| 446 |
patterns, bad_patterns, url) |
|
| 447 |
assert expected == result |
|
| 448 |
|
|
| 449 |
# Also verify that deregistering all patterns with trailing slash works |
|
| 450 |
# correctly. |
|
| 451 |
expected = [{'key': p} for p in patterns]
|
|
| 452 |
tree, result = execute_in_page( |
|
| 453 |
'''{
|
|
| 454 |
const tree = arguments[0]; |
|
| 455 |
for (const pattern of arguments[1]) |
|
| 456 |
pattern_tree.deregister(tree, pattern + '/', 'key'); |
|
| 457 |
returnval([tree, [...pattern_tree.search(tree, arguments[2])]]); |
|
| 458 |
}''', |
|
| 459 |
tree, patterns, url) |
|
| 460 |
assert expected == result |
|
| 461 |
|
|
| 462 |
# Also verify that deregistering all the patterns works correctly. |
|
| 463 |
tree = execute_in_page( |
|
| 464 |
'''{
|
|
| 465 |
const tree = arguments[0]; |
|
| 466 |
for (const pattern of arguments[1]) |
|
| 467 |
pattern_tree.deregister(tree, pattern, 'key'); |
|
| 468 |
for (const pattern of arguments[2]) {
|
|
| 469 |
pattern_tree.deregister(tree, pattern, 'key'); |
|
| 470 |
pattern_tree.deregister(tree, pattern + '/', 'key'); |
|
| 471 |
} |
|
| 472 |
returnval(tree); |
|
| 473 |
}''', |
|
| 474 |
tree, patterns, bad_patterns) |
|
| 475 |
assert tree == {}
|
|
Also available in: Unified diff
finish implementing more efficient querying of URL patterns
The algorithm is implemented and tested. However, it is yet to be hooked into the actual extension.