Project

General

Profile

Download (18.8 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / test / haketilo_test / unit / test_patterns_query_tree.py @ fd9f2fc4

1
# SPDX-License-Identifier: CC0-1.0
2

    
3
"""
4
Haketilo unit tests - URL patterns
5
"""
6

    
7
# This file is part of Haketilo
8
#
9
# Copyright (C) 2021, Wojtek Kosior
10
#
11
# This program is free software: you can redistribute it and/or modify
12
# it under the terms of the CC0 1.0 Universal License as published by
13
# the Creative Commons Corporation.
14
#
15
# This program is distributed in the hope that it will be useful,
16
# but WITHOUT ANY WARRANTY; without even the implied warranty of
17
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
# CC0 1.0 Universal License for more details.
19

    
20
import pytest
21

    
22
from ..script_loader import load_script
23

    
24
@pytest.mark.get_page('https://gotmyowndoma.in')
25
def test_modify_branch(execute_in_page):
26
    """
27
    patterns_query_tree.js contains Pattern Tree data structure that allows
28
    arrays of string labels to be mapped to items.
29
    Verify operations modifying a single branch of such tree work properly.
30
    """
31
    execute_in_page(load_script('common/patterns_query_tree.js'))
32
    execute_in_page(
33
        '''
34
        let items_added;
35
        let items_removed;
36

    
37
        function _item_adder(item, array)
38
        {
39
            items_added++;
40
            return [...(array || []), item];
41
        }
42

    
43
        function item_adder(item)
44
        {
45
            items_added = 0;
46
            return array => _item_adder(item, array);
47
        }
48

    
49
        function _item_remover(array)
50
        {
51
            if (array !== null) {
52
                items_removed++;
53
                array.pop();
54
            }
55
            return (array && array.length > 0) ? array : null;
56
        }
57

    
58
        function item_remover()
59
        {
60
            items_removed = 0;
61
            return _item_remover;
62
        }''')
63

    
64
    # Let's construct some tree branch while checking that each addition gives
65
    # the right result.
66
    branch = execute_in_page(
67
        '''{
68
        const branch = empty_node();
69
        modify_sequence(branch, ['com', 'example'], item_adder('some_item'));
70
        returnval(branch);
71
        }''')
72
    assert branch == {
73
        'literal_match': None,
74
        'wildcard_matches': [None, None, None],
75
        'children': {
76
            'com': {
77
                'literal_match': None,
78
                'wildcard_matches': [None, None, None],
79
                'children': {
80
                    'example': {
81
                        'literal_match': ['some_item'],
82
                        'wildcard_matches': [None, None, None],
83
                        'children': {
84
                        }
85
                    }
86
                }
87
            }
88
        }
89
    }
90

    
91
    branch, items_added = execute_in_page(
92
        '''{
93
        const branch = arguments[0];
94
        modify_sequence(branch, ['com', 'example'], item_adder('other_item'));
95
        returnval([branch, items_added]);
96
        }''', branch)
97
    assert items_added == 1
98
    assert branch['children']['com']['children']['example']['literal_match'] \
99
            == ['some_item', 'other_item']
100

    
101
    for i in range(3):
102
        for expected_array in [['third_item'], ['third_item', '4th_item']]:
103
            wildcard = '*' * (i + 1)
104
            branch, items_added = execute_in_page(
105
                '''{
106
                const branch = arguments[0];
107
                modify_sequence(branch, ['com', 'sample', arguments[1]],
108
                                item_adder(arguments[2]));
109
                returnval([branch, items_added]);
110
                }''',
111
                branch, wildcard, expected_array[-1])
112
            assert items_added == 2
113
            sample = branch['children']['com']['children']['sample']
114
            assert sample['wildcard_matches'][i] == expected_array
115
            assert sample['children'][wildcard]['literal_match'] \
116
                == expected_array
117

    
118
    branch, items_added = execute_in_page(
119
        '''{
120
        const branch = arguments[0];
121
        modify_sequence(branch, ['org', 'koszko', '***', '123'],
122
                        item_adder('5th_item'));
123
        returnval([branch, items_added]);
124
        }''',
125
        branch)
126
    assert items_added == 1
127
    assert branch['children']['org']['children']['koszko']['children']['***']\
128
        ['children']['123']['literal_match'] == ['5th_item']
129

    
130
    # Let's verify that removing a nonexistent element doesn't modify the tree.
131
    branch2, items_removed = execute_in_page(
132
        '''{
133
        const branch = arguments[0];
134
        modify_sequence(branch, ['com', 'not', 'registered', '*'],
135
                        item_remover());
136
        returnval([branch, items_removed]);
137
        }''',
138
        branch)
139
    assert branch == branch2
140
    assert items_removed == 0
141

    
142
    # Let's remove all elements in the tree branch while checking that each
143
    # removal gives the right result.
144
    branch, items_removed = execute_in_page(
145
        '''{
146
        const branch = arguments[0];
147
        modify_sequence(branch, ['org', 'koszko', '***', '123'],
148
                        item_remover());
149
        returnval([branch, items_removed]);
150
        }''',
151
        branch)
152
    assert items_removed == 1
153
    assert 'org' not in branch['children']
154

    
155
    for i in range(3):
156
        for expected_array in [['third_item'], None]:
157
            wildcard = '*' * (i + 1)
158
            branch, items_removed = execute_in_page(
159
                '''{
160
                const branch = arguments[0];
161
                modify_sequence(branch, ['com', 'sample', arguments[1]],
162
                                item_remover());
163
                returnval([branch, items_removed]);
164
                }''',
165
                branch, wildcard)
166
            assert items_removed == 2
167
            if i == 2 and expected_array == []:
168
                break
169
            sample = branch['children']['com']['children'].get('sample', {})
170
            assert sample.get('wildcard_matches', [None, None, None])[i] \
171
                == expected_array
172
            assert sample.get('children', {}).get(wildcard, {})\
173
                .get('literal_match') == expected_array
174

    
175
    for i in range(2):
176
        branch, items_removed = execute_in_page(
177
            '''{
178
            const branch = arguments[0];
179
            modify_sequence(branch, ['com', 'example'], item_remover());
180
            returnval([branch, items_removed]);
181
            }''',
182
            branch)
183
        assert items_removed == 1
184
        if i == 0:
185
            assert branch['children']['com']['children']['example']\
186
                ['literal_match'] == ['some_item']
187
        else:
188
            assert branch == {
189
                'literal_match': None,
190
                'wildcard_matches': [None, None, None],
191
                'children': {
192
                }
193
            }
194

    
195
@pytest.mark.get_page('https://gotmyowndoma.in')
196
def test_search_branch(execute_in_page):
197
    """
198
    patterns_query_tree.js contains Pattern Tree data structure that allows
199
    arrays of string labels to be mapped to items.
200
    Verify searching a single branch of such tree work properly.
201
    """
202
    execute_in_page(load_script('common/patterns_query_tree.js'))
203
    execute_in_page(
204
        '''
205
        const item_adder = item => (array => [...(array || []), item]);
206
        ''')
207

    
208
    # Let's construct some tree branch to test on.
209
    execute_in_page(
210
        '''
211
        var branch = empty_node();
212

    
213
        for (const [item, sequence] of [
214
            ['(root)', []],
215
            ['***',    ['***']],
216
            ['**',     ['**']],
217
            ['*',      ['*']],
218

    
219
            ['a',      ['a']],
220
            ['A',      ['a']],
221
            ['b',      ['b']],
222

    
223
            ['a/***',  ['a', '***']],
224
            ['A/***',  ['a', '***']],
225
            ['a/**',   ['a', '**']],
226
            ['A/**',   ['a', '**']],
227
            ['a/*',    ['a', '*']],
228
            ['A/*',    ['a', '*']],
229
            ['a/sth',  ['a', 'sth']],
230
            ['A/sth',  ['a', 'sth']],
231

    
232
            ['b/***',  ['b', '***']],
233
            ['b/**',   ['b', '**']],
234
            ['b/*',    ['b', '*']],
235
            ['b/sth',  ['b', 'sth']],
236
        ])
237
            modify_sequence(branch, sequence, item_adder(item));
238
        ''')
239

    
240
    # Let's make the actual searches on our testing branch.
241
    for sequence, expected in [
242
            ([],      [{'(root)'},                            {'***'}]),
243
            (['a'],   [{'a', 'A'}, {'a/***', 'A/***'}, {'*'}, {'***'}]),
244
            (['b'],   [{'b'},      {'b/***'},          {'*'}, {'***'}]),
245
            (['c'],   [                                {'*'}, {'***'}]),
246
            (['***'], [{'***'},                        {'*'}         ]),
247
            (['**'],  [{'**'},                         {'*'}, {'***'}]),
248
            (['**'],  [{'**'},                         {'*'}, {'***'}]),
249
            (['*'],   [{'*'},                                 {'***'}]),
250

    
251
            (['a', 'sth'], [{'a/sth', 'A/sth'}, {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
252
            (['b', 'sth'], [{'b/sth'},          {'b/*'},        {'b/***'},          {'**'}, {'***'}]),
253
            (['a', 'hts'], [                    {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
254
            (['b', 'hts'], [                    {'b/*'},        {'b/***'},          {'**'}, {'***'}]),
255
            (['a', '***'], [{'a/***', 'A/***'}, {'a/*', 'A/*'},                     {'**'}, {'***'}]),
256
            (['b', '***'], [{'b/***'},          {'b/*'},                            {'**'}, {'***'}]),
257
            (['a', '**'],  [{'a/**', 'A/**'},   {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
258
            (['b', '**'],  [{'b/**'},           {'b/*'},        {'b/***'},          {'**'}, {'***'}]),
259
            (['a', '*'],   [{'a/*', 'A/*'},                     {'a/***', 'A/***'}, {'**'}, {'***'}]),
260
            (['b', '*'],   [{'b/*'},                            {'b/***'},          {'**'}, {'***'}]),
261

    
262
            (['a', 'c', 'd'], [{'a/**', 'A/**'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
263
            (['b', 'c', 'd'], [{'b/**'},         {'b/***'},          {'**'}, {'***'}])
264
    ]:
265
        result = execute_in_page(
266
            '''
267
            returnval([...search_sequence(branch, arguments[0])]);
268
            ''',
269
            sequence)
270

    
271
        try:
272
            assert len(result) == len(expected)
273

    
274
            for expected_set, result_array in zip(expected, result):
275
                assert len(expected_set) == len(result_array)
276
                assert expected_set      == set(result_array)
277
        except Exception as e:
278
            import sys
279
            print('sequence:', sequence, '\nexpected:', expected,
280
                  '\nresult:', result, file=sys.stderr)
281
            raise e from None
282

    
283
@pytest.mark.get_page('https://gotmyowndoma.in')
284
def test_pattern_tree(execute_in_page):
285
    """
286
    patterns_query_tree.js contains Pattern Tree data structure that allows
287
    arrays of string labels to be mapped to items.
288
    Verify operations on entire such tree work properly.
289
    """
290
    execute_in_page(load_script('common/patterns_query_tree.js'))
291

    
292
    # Perform tests with all possible patterns for a simple URL.
293
    url = 'https://example.com'
294
    patterns = [
295
        'https://example.com',
296
        'https://example.com/***',
297
        'https://***.example.com',
298
        'https://***.example.com/***'
299
    ]
300
    bad_patterns = [
301
        'http://example.com',
302
        'https://a.example.com',
303
        'https://*.example.com',
304
        'https://**.example.com',
305
        'https://example.com/a',
306
        'https://example.com/*',
307
        'https://example.com/**',
308
    ]
309

    
310
    expected = [{'key': p} for p in patterns]
311

    
312
    tree, result = execute_in_page(
313
        '''{
314
        const tree = pattern_tree_make();
315
        for (const pattern of arguments[0].concat(arguments[1])) {
316
            pattern_tree_register(tree, pattern,       'key', pattern);
317
            pattern_tree_register(tree, pattern + '/', 'key', pattern + '/');
318
        }
319
        returnval([tree, [...pattern_tree_search(tree, arguments[2])]]);
320
        }''',
321
        patterns, bad_patterns, url)
322
    assert expected == result
323

    
324
    # Also verify that deregistering half of the good patterns works correctly.
325
    patterns_removed = [pattern for i, pattern in enumerate(patterns) if i % 2]
326
    patterns = [pattern for i, pattern in enumerate(patterns) if not (i % 2)]
327
    expected = [{'key': p} for p in patterns]
328
    tree, result = execute_in_page(
329
        '''{
330
        const tree = arguments[0];
331
        for (const pattern of arguments[1]) {
332
            pattern_tree_deregister(tree, pattern,       'key');
333
            pattern_tree_deregister(tree, pattern + '/', 'key');
334
        }
335
        returnval([tree, [...pattern_tree_search(tree, arguments[2])]]);
336
        }''',
337
        tree, patterns_removed, url)
338
    assert expected == result
339

    
340
    # Also verify that deregistering all the patterns works correctly.
341
    tree = execute_in_page(
342
        '''{
343
        const tree = arguments[0];
344
        for (const pattern of arguments[1].concat(arguments[2])) {
345
            pattern_tree_deregister(tree, pattern,       'key');
346
            pattern_tree_deregister(tree, pattern + '/', 'key');
347
        }
348
        returnval(tree);
349
        }''',
350
        tree, patterns, bad_patterns)
351
    assert tree == {}
352

    
353
    # Perform tests with all possible patterns for a complex URL.
354
    url = 'http://settings.query.example.com/google/tries/destroy/adblockers//'
355
    patterns = [
356
        'http://settings.query.example.com/google/tries/destroy/adblockers',
357
        'http://settings.query.example.com/google/tries/destroy/adblockers/***',
358
        'http://settings.query.example.com/google/tries/destroy/*',
359
        'http://settings.query.example.com/google/tries/destroy/***',
360
        'http://settings.query.example.com/google/tries/**',
361
        'http://settings.query.example.com/google/tries/***',
362
        'http://settings.query.example.com/google/**',
363
        'http://settings.query.example.com/google/***',
364
        'http://settings.query.example.com/**',
365
        'http://settings.query.example.com/***',
366

    
367
        'http://***.settings.query.example.com/google/tries/destroy/adblockers',
368
        'http://***.settings.query.example.com/google/tries/destroy/adblockers/***',
369
        'http://***.settings.query.example.com/google/tries/destroy/*',
370
        'http://***.settings.query.example.com/google/tries/destroy/***',
371
        'http://***.settings.query.example.com/google/tries/**',
372
        'http://***.settings.query.example.com/google/tries/***',
373
        'http://***.settings.query.example.com/google/**',
374
        'http://***.settings.query.example.com/google/***',
375
        'http://***.settings.query.example.com/**',
376
        'http://***.settings.query.example.com/***',
377
        'http://*.query.example.com/google/tries/destroy/adblockers',
378
        'http://*.query.example.com/google/tries/destroy/adblockers/***',
379
        'http://*.query.example.com/google/tries/destroy/*',
380
        'http://*.query.example.com/google/tries/destroy/***',
381
        'http://*.query.example.com/google/tries/**',
382
        'http://*.query.example.com/google/tries/***',
383
        'http://*.query.example.com/google/**',
384
        'http://*.query.example.com/google/***',
385
        'http://*.query.example.com/**',
386
        'http://*.query.example.com/***',
387
        'http://***.query.example.com/google/tries/destroy/adblockers',
388
        'http://***.query.example.com/google/tries/destroy/adblockers/***',
389
        'http://***.query.example.com/google/tries/destroy/*',
390
        'http://***.query.example.com/google/tries/destroy/***',
391
        'http://***.query.example.com/google/tries/**',
392
        'http://***.query.example.com/google/tries/***',
393
        'http://***.query.example.com/google/**',
394
        'http://***.query.example.com/google/***',
395
        'http://***.query.example.com/**',
396
        'http://***.query.example.com/***',
397
        'http://**.example.com/google/tries/destroy/adblockers',
398
        'http://**.example.com/google/tries/destroy/adblockers/***',
399
        'http://**.example.com/google/tries/destroy/*',
400
        'http://**.example.com/google/tries/destroy/***',
401
        'http://**.example.com/google/tries/**',
402
        'http://**.example.com/google/tries/***',
403
        'http://**.example.com/google/**',
404
        'http://**.example.com/google/***',
405
        'http://**.example.com/**',
406
        'http://**.example.com/***',
407
        'http://***.example.com/google/tries/destroy/adblockers',
408
        'http://***.example.com/google/tries/destroy/adblockers/***',
409
        'http://***.example.com/google/tries/destroy/*',
410
        'http://***.example.com/google/tries/destroy/***',
411
        'http://***.example.com/google/tries/**',
412
        'http://***.example.com/google/tries/***',
413
        'http://***.example.com/google/**',
414
        'http://***.example.com/google/***',
415
        'http://***.example.com/**',
416
        'http://***.example.com/***'
417
    ]
418
    bad_patterns = [
419
        'https://settings.query.example.com/google/tries/destroy/adblockers',
420
        'http://settings.query.example.com/google/tries/destroy/adblockers/a',
421
        'http://settings.query.example.com/google/tries/destroy/adblockers/*',
422
        'http://settings.query.example.com/google/tries/destroy/adblockers/**',
423
        'http://settings.query.example.com/google/tries/destroy/a',
424
        'http://settings.query.example.com/google/tries/destroy/**',
425
        'http://settings.query.example.com/google/tries/*',
426
        'http://a.settings.query.example.com/google/tries/destroy/adblockers',
427
        'http://*.settings.query.example.com/google/tries/destroy/adblockers',
428
        'http://**.settings.query.example.com/google/tries/destroy/adblockers',
429
        'http://a.query.example.com/google/tries/destroy/adblockers',
430
        'http://**.query.example.com/google/tries/destroy/adblockers',
431
        'http://*.example.com/google/tries/destroy/adblockers'
432
    ]
433

    
434
    expected = [{'key': p + s} for p in patterns for s in ['/', '']]
435

    
436
    tree, result = execute_in_page(
437
        '''{
438
        const tree = pattern_tree_make();
439
        for (const pattern of arguments[0].concat(arguments[1])) {
440
            pattern_tree_register(tree, pattern,       'key', pattern);
441
            pattern_tree_register(tree, pattern + '/', 'key', pattern + '/');
442
        }
443
        returnval([tree, [...pattern_tree_search(tree, arguments[2])]]);
444
        }''',
445
        patterns, bad_patterns, url)
446
    assert expected == result
447

    
448
    # Also verify that deregistering all patterns with trailing slash works
449
    # correctly.
450
    expected = [{'key': p} for p in patterns]
451
    tree, result = execute_in_page(
452
        '''{
453
        const tree = arguments[0];
454
        for (const pattern of arguments[1])
455
            pattern_tree_deregister(tree, pattern + '/', 'key');
456
        returnval([tree, [...pattern_tree_search(tree, arguments[2])]]);
457
        }''',
458
        tree, patterns, url)
459
    assert expected == result
460

    
461
    # Also verify that deregistering all the patterns works correctly.
462
    tree = execute_in_page(
463
        '''{
464
        const tree = arguments[0];
465
        for (const pattern of arguments[1])
466
            pattern_tree_deregister(tree, pattern,       'key');
467
        for (const pattern of arguments[2]) {
468
            pattern_tree_deregister(tree, pattern,       'key');
469
            pattern_tree_deregister(tree, pattern + '/', 'key');
470
        }
471
        returnval(tree);
472
        }''',
473
        tree, patterns, bad_patterns)
474
    assert tree == {}
(15-15/25)