Project

General

Profile

Download (18.9 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / test / unit / test_patterns_query_tree.py @ e1282a63

1
# SPDX-License-Identifier: CC0-1.0
2

    
3
"""
4
Haketilo unit tests - URL patterns
5
"""
6

    
7
# This file is part of Haketilo
8
#
9
# Copyright (C) 2021, Wojtek Kosior
10
#
11
# This program is free software: you can redistribute it and/or modify
12
# it under the terms of the CC0 1.0 Universal License as published by
13
# the Creative Commons Corporation.
14
#
15
# This program is distributed in the hope that it will be useful,
16
# but WITHOUT ANY WARRANTY; without even the implied warranty of
17
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
# CC0 1.0 Universal License for more details.
19

    
20
import pytest
21

    
22
from ..script_loader import load_script
23

    
24
@pytest.fixture(scope="session")
25
def patterns_tree_code():
26
    yield load_script('common/patterns_query_tree.js', ['common'])
27

    
28
def test_modify_branch(execute_in_page, patterns_tree_code):
29
    """
30
    patterns_query_tree.js contains Pattern Tree data structure that allows
31
    arrays of string labels to be mapped to items.
32
    Verify operations modifying a single branch of such tree work properly.
33
    """
34
    execute_in_page(patterns_tree_code, page='https://gotmyowndoma.in')
35
    execute_in_page(
36
        '''
37
        let items_added;
38
        let items_removed;
39

    
40
        function _item_adder(item, array)
41
        {
42
            items_added++;
43
            return [...(array || []), item];
44
        }
45

    
46
        function item_adder(item)
47
        {
48
            items_added = 0;
49
            return array => _item_adder(item, array);
50
        }
51

    
52
        function _item_remover(array)
53
        {
54
            if (array !== null) {
55
                items_removed++;
56
                array.pop();
57
            }
58
            return (array && array.length > 0) ? array : null;
59
        }
60

    
61
        function item_remover()
62
        {
63
            items_removed = 0;
64
            return _item_remover;
65
        }''')
66

    
67
    # Let's construct some tree branch while checking that each addition gives
68
    # the right result.
69
    branch = execute_in_page(
70
        '''{
71
        const branch = empty_node();
72
        modify_sequence(branch, ['com', 'example'], item_adder('some_item'));
73
        returnval(branch);
74
        }''')
75
    assert branch == {
76
        'literal_match': None,
77
        'wildcard_matches': [None, None, None],
78
        'children': {
79
            'com': {
80
                'literal_match': None,
81
                'wildcard_matches': [None, None, None],
82
                'children': {
83
                    'example': {
84
                        'literal_match': ['some_item'],
85
                        'wildcard_matches': [None, None, None],
86
                        'children': {
87
                        }
88
                    }
89
                }
90
            }
91
        }
92
    }
93

    
94
    branch, items_added = execute_in_page(
95
        '''{
96
        const branch = arguments[0];
97
        modify_sequence(branch, ['com', 'example'], item_adder('other_item'));
98
        returnval([branch, items_added]);
99
        }''', branch)
100
    assert items_added == 1
101
    assert branch['children']['com']['children']['example']['literal_match'] \
102
            == ['some_item', 'other_item']
103

    
104
    for i in range(3):
105
        for expected_array in [['third_item'], ['third_item', '4th_item']]:
106
            wildcard = '*' * (i + 1)
107
            branch, items_added = execute_in_page(
108
                '''{
109
                const branch = arguments[0];
110
                modify_sequence(branch, ['com', 'sample', arguments[1]],
111
                                item_adder(arguments[2]));
112
                returnval([branch, items_added]);
113
                }''',
114
                branch, wildcard, expected_array[-1])
115
            assert items_added == 2
116
            sample = branch['children']['com']['children']['sample']
117
            assert sample['wildcard_matches'][i] == expected_array
118
            assert sample['children'][wildcard]['literal_match'] \
119
                == expected_array
120

    
121
    branch, items_added = execute_in_page(
122
        '''{
123
        const branch = arguments[0];
124
        modify_sequence(branch, ['org', 'koszko', '***', '123'],
125
                        item_adder('5th_item'));
126
        returnval([branch, items_added]);
127
        }''',
128
        branch)
129
    assert items_added == 1
130
    assert branch['children']['org']['children']['koszko']['children']['***']\
131
        ['children']['123']['literal_match'] == ['5th_item']
132

    
133
    # Let's verify that removing a nonexistent element doesn't modify the tree.
134
    branch2, items_removed = execute_in_page(
135
        '''{
136
        const branch = arguments[0];
137
        modify_sequence(branch, ['com', 'not', 'registered', '*'],
138
                        item_remover());
139
        returnval([branch, items_removed]);
140
        }''',
141
        branch)
142
    assert branch == branch2
143
    assert items_removed == 0
144

    
145
    # Let's remove all elements in the tree branch while checking that each
146
    # removal gives the right result.
147
    branch, items_removed = execute_in_page(
148
        '''{
149
        const branch = arguments[0];
150
        modify_sequence(branch, ['org', 'koszko', '***', '123'],
151
                        item_remover());
152
        returnval([branch, items_removed]);
153
        }''',
154
        branch)
155
    assert items_removed == 1
156
    assert 'org' not in branch['children']
157

    
158
    for i in range(3):
159
        for expected_array in [['third_item'], None]:
160
            wildcard = '*' * (i + 1)
161
            branch, items_removed = execute_in_page(
162
                '''{
163
                const branch = arguments[0];
164
                modify_sequence(branch, ['com', 'sample', arguments[1]],
165
                                item_remover());
166
                returnval([branch, items_removed]);
167
                }''',
168
                branch, wildcard)
169
            assert items_removed == 2
170
            if i == 2 and expected_array == []:
171
                break
172
            sample = branch['children']['com']['children'].get('sample', {})
173
            assert sample.get('wildcard_matches', [None, None, None])[i] \
174
                == expected_array
175
            assert sample.get('children', {}).get(wildcard, {})\
176
                .get('literal_match') == expected_array
177

    
178
    for i in range(2):
179
        branch, items_removed = execute_in_page(
180
            '''{
181
            const branch = arguments[0];
182
            modify_sequence(branch, ['com', 'example'], item_remover());
183
            returnval([branch, items_removed]);
184
            }''',
185
            branch)
186
        assert items_removed == 1
187
        if i == 0:
188
            assert branch['children']['com']['children']['example']\
189
                ['literal_match'] == ['some_item']
190
        else:
191
            assert branch == {
192
                'literal_match': None,
193
                'wildcard_matches': [None, None, None],
194
                'children': {
195
                }
196
            }
197

    
198
def test_search_branch(execute_in_page, patterns_tree_code):
199
    """
200
    patterns_query_tree.js contains Pattern Tree data structure that allows
201
    arrays of string labels to be mapped to items.
202
    Verify searching a single branch of such tree work properly.
203
    """
204
    execute_in_page(patterns_tree_code, page='https://gotmyowndoma.in')
205
    execute_in_page(
206
        '''
207
        const item_adder = item => (array => [...(array || []), item]);
208
        ''')
209

    
210
    # Let's construct some tree branch to test on.
211
    execute_in_page(
212
        '''
213
        var branch = empty_node();
214

    
215
        for (const [item, sequence] of [
216
            ['(root)', []],
217
            ['***',    ['***']],
218
            ['**',     ['**']],
219
            ['*',      ['*']],
220

    
221
            ['a',      ['a']],
222
            ['A',      ['a']],
223
            ['b',      ['b']],
224

    
225
            ['a/***',  ['a', '***']],
226
            ['A/***',  ['a', '***']],
227
            ['a/**',   ['a', '**']],
228
            ['A/**',   ['a', '**']],
229
            ['a/*',    ['a', '*']],
230
            ['A/*',    ['a', '*']],
231
            ['a/sth',  ['a', 'sth']],
232
            ['A/sth',  ['a', 'sth']],
233

    
234
            ['b/***',  ['b', '***']],
235
            ['b/**',   ['b', '**']],
236
            ['b/*',    ['b', '*']],
237
            ['b/sth',  ['b', 'sth']],
238
        ])
239
            modify_sequence(branch, sequence, item_adder(item));
240
        ''')
241

    
242
    # Let's make the actual searches on our testing branch.
243
    for sequence, expected in [
244
            ([],      [{'(root)'},                            {'***'}]),
245
            (['a'],   [{'a', 'A'}, {'a/***', 'A/***'}, {'*'}, {'***'}]),
246
            (['b'],   [{'b'},      {'b/***'},          {'*'}, {'***'}]),
247
            (['c'],   [                                {'*'}, {'***'}]),
248
            (['***'], [{'***'},                        {'*'}         ]),
249
            (['**'],  [{'**'},                         {'*'}, {'***'}]),
250
            (['**'],  [{'**'},                         {'*'}, {'***'}]),
251
            (['*'],   [{'*'},                                 {'***'}]),
252

    
253
            (['a', 'sth'], [{'a/sth', 'A/sth'}, {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
254
            (['b', 'sth'], [{'b/sth'},          {'b/*'},        {'b/***'},          {'**'}, {'***'}]),
255
            (['a', 'hts'], [                    {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
256
            (['b', 'hts'], [                    {'b/*'},        {'b/***'},          {'**'}, {'***'}]),
257
            (['a', '***'], [{'a/***', 'A/***'}, {'a/*', 'A/*'},                     {'**'}, {'***'}]),
258
            (['b', '***'], [{'b/***'},          {'b/*'},                            {'**'}, {'***'}]),
259
            (['a', '**'],  [{'a/**', 'A/**'},   {'a/*', 'A/*'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
260
            (['b', '**'],  [{'b/**'},           {'b/*'},        {'b/***'},          {'**'}, {'***'}]),
261
            (['a', '*'],   [{'a/*', 'A/*'},                     {'a/***', 'A/***'}, {'**'}, {'***'}]),
262
            (['b', '*'],   [{'b/*'},                            {'b/***'},          {'**'}, {'***'}]),
263

    
264
            (['a', 'c', 'd'], [{'a/**', 'A/**'}, {'a/***', 'A/***'}, {'**'}, {'***'}]),
265
            (['b', 'c', 'd'], [{'b/**'},         {'b/***'},          {'**'}, {'***'}])
266
    ]:
267
        result = execute_in_page(
268
            '''
269
            returnval([...search_sequence(branch, arguments[0])]);
270
            ''',
271
            sequence)
272

    
273
        try:
274
            assert len(result) == len(expected)
275

    
276
            for expected_set, result_array in zip(expected, result):
277
                assert len(expected_set) == len(result_array)
278
                assert expected_set      == set(result_array)
279
        except Exception as e:
280
            import sys
281
            print('sequence:', sequence, '\nexpected:', expected,
282
                  '\nresult:', result, file=sys.stderr)
283
            raise e from None
284

    
285
def test_pattern_tree(execute_in_page, patterns_tree_code):
286
    """
287
    patterns_query_tree.js contains Pattern Tree data structure that allows
288
    arrays of string labels to be mapped to items.
289
    Verify operations on entire such tree work properly.
290
    """
291
    execute_in_page(patterns_tree_code, page='https://gotmyowndoma.in')
292

    
293
    # Perform tests with all possible patterns for a simple URL.
294
    url = 'https://example.com'
295
    patterns = [
296
        'https://example.com',
297
        'https://example.com/***',
298
        'https://***.example.com',
299
        'https://***.example.com/***'
300
    ]
301
    bad_patterns = [
302
        'http://example.com',
303
        'https://a.example.com',
304
        'https://*.example.com',
305
        'https://**.example.com',
306
        'https://example.com/a',
307
        'https://example.com/*',
308
        'https://example.com/**',
309
    ]
310

    
311
    expected = [{'key': p} for p in patterns]
312

    
313
    tree, result = execute_in_page(
314
        '''{
315
        const tree = pattern_tree.make();
316
        for (const pattern of arguments[0].concat(arguments[1])) {
317
            pattern_tree.register(tree, pattern,       'key', pattern);
318
            pattern_tree.register(tree, pattern + '/', 'key', pattern + '/');
319
        }
320
        returnval([tree, [...pattern_tree.search(tree, arguments[2])]]);
321
        }''',
322
        patterns, bad_patterns, url)
323
    assert expected == result
324

    
325
    # Also verify that deregistering half of the good patterns works correctly.
326
    patterns_removed = [pattern for i, pattern in enumerate(patterns) if i % 2]
327
    patterns = [pattern for i, pattern in enumerate(patterns) if not (i % 2)]
328
    expected = [{'key': p} for p in patterns]
329
    tree, result = execute_in_page(
330
        '''{
331
        const tree = arguments[0];
332
        for (const pattern of arguments[1]) {
333
            pattern_tree.deregister(tree, pattern,       'key');
334
            pattern_tree.deregister(tree, pattern + '/', 'key');
335
        }
336
        returnval([tree, [...pattern_tree.search(tree, arguments[2])]]);
337
        }''',
338
        tree, patterns_removed, url)
339
    assert expected == result
340

    
341
    # Also verify that deregistering all the patterns works correctly.
342
    tree = execute_in_page(
343
        '''{
344
        const tree = arguments[0];
345
        for (const pattern of arguments[1].concat(arguments[2])) {
346
            pattern_tree.deregister(tree, pattern,       'key');
347
            pattern_tree.deregister(tree, pattern + '/', 'key');
348
        }
349
        returnval(tree);
350
        }''',
351
        tree, patterns, bad_patterns)
352
    assert tree == {}
353

    
354
    # Perform tests with all possible patterns for a complex URL.
355
    url = 'http://settings.query.example.com/google/tries/destroy/adblockers//'
356
    patterns = [
357
        'http://settings.query.example.com/google/tries/destroy/adblockers',
358
        'http://settings.query.example.com/google/tries/destroy/adblockers/***',
359
        'http://settings.query.example.com/google/tries/destroy/*',
360
        'http://settings.query.example.com/google/tries/destroy/***',
361
        'http://settings.query.example.com/google/tries/**',
362
        'http://settings.query.example.com/google/tries/***',
363
        'http://settings.query.example.com/google/**',
364
        'http://settings.query.example.com/google/***',
365
        'http://settings.query.example.com/**',
366
        'http://settings.query.example.com/***',
367

    
368
        'http://***.settings.query.example.com/google/tries/destroy/adblockers',
369
        'http://***.settings.query.example.com/google/tries/destroy/adblockers/***',
370
        'http://***.settings.query.example.com/google/tries/destroy/*',
371
        'http://***.settings.query.example.com/google/tries/destroy/***',
372
        'http://***.settings.query.example.com/google/tries/**',
373
        'http://***.settings.query.example.com/google/tries/***',
374
        'http://***.settings.query.example.com/google/**',
375
        'http://***.settings.query.example.com/google/***',
376
        'http://***.settings.query.example.com/**',
377
        'http://***.settings.query.example.com/***',
378
        'http://*.query.example.com/google/tries/destroy/adblockers',
379
        'http://*.query.example.com/google/tries/destroy/adblockers/***',
380
        'http://*.query.example.com/google/tries/destroy/*',
381
        'http://*.query.example.com/google/tries/destroy/***',
382
        'http://*.query.example.com/google/tries/**',
383
        'http://*.query.example.com/google/tries/***',
384
        'http://*.query.example.com/google/**',
385
        'http://*.query.example.com/google/***',
386
        'http://*.query.example.com/**',
387
        'http://*.query.example.com/***',
388
        'http://***.query.example.com/google/tries/destroy/adblockers',
389
        'http://***.query.example.com/google/tries/destroy/adblockers/***',
390
        'http://***.query.example.com/google/tries/destroy/*',
391
        'http://***.query.example.com/google/tries/destroy/***',
392
        'http://***.query.example.com/google/tries/**',
393
        'http://***.query.example.com/google/tries/***',
394
        'http://***.query.example.com/google/**',
395
        'http://***.query.example.com/google/***',
396
        'http://***.query.example.com/**',
397
        'http://***.query.example.com/***',
398
        'http://**.example.com/google/tries/destroy/adblockers',
399
        'http://**.example.com/google/tries/destroy/adblockers/***',
400
        'http://**.example.com/google/tries/destroy/*',
401
        'http://**.example.com/google/tries/destroy/***',
402
        'http://**.example.com/google/tries/**',
403
        'http://**.example.com/google/tries/***',
404
        'http://**.example.com/google/**',
405
        'http://**.example.com/google/***',
406
        'http://**.example.com/**',
407
        'http://**.example.com/***',
408
        'http://***.example.com/google/tries/destroy/adblockers',
409
        'http://***.example.com/google/tries/destroy/adblockers/***',
410
        'http://***.example.com/google/tries/destroy/*',
411
        'http://***.example.com/google/tries/destroy/***',
412
        'http://***.example.com/google/tries/**',
413
        'http://***.example.com/google/tries/***',
414
        'http://***.example.com/google/**',
415
        'http://***.example.com/google/***',
416
        'http://***.example.com/**',
417
        'http://***.example.com/***'
418
    ]
419
    bad_patterns = [
420
        'https://settings.query.example.com/google/tries/destroy/adblockers',
421
        'http://settings.query.example.com/google/tries/destroy/adblockers/a',
422
        'http://settings.query.example.com/google/tries/destroy/adblockers/*',
423
        'http://settings.query.example.com/google/tries/destroy/adblockers/**',
424
        'http://settings.query.example.com/google/tries/destroy/a',
425
        'http://settings.query.example.com/google/tries/destroy/**',
426
        'http://settings.query.example.com/google/tries/*',
427
        'http://a.settings.query.example.com/google/tries/destroy/adblockers',
428
        'http://*.settings.query.example.com/google/tries/destroy/adblockers',
429
        'http://**.settings.query.example.com/google/tries/destroy/adblockers',
430
        'http://a.query.example.com/google/tries/destroy/adblockers',
431
        'http://**.query.example.com/google/tries/destroy/adblockers',
432
        'http://*.example.com/google/tries/destroy/adblockers'
433
    ]
434

    
435
    expected = [{'key': p + s} for p in patterns for s in ['/', '']]
436

    
437
    tree, result = execute_in_page(
438
        '''{
439
        const tree = pattern_tree.make();
440
        for (const pattern of arguments[0].concat(arguments[1])) {
441
            pattern_tree.register(tree, pattern,       'key', pattern);
442
            pattern_tree.register(tree, pattern + '/', 'key', pattern + '/');
443
        }
444
        returnval([tree, [...pattern_tree.search(tree, arguments[2])]]);
445
        }''',
446
        patterns, bad_patterns, url)
447
    assert expected == result
448

    
449
    # Also verify that deregistering all patterns with trailing slash works
450
    # correctly.
451
    expected = [{'key': p} for p in patterns]
452
    tree, result = execute_in_page(
453
        '''{
454
        const tree = arguments[0];
455
        for (const pattern of arguments[1])
456
            pattern_tree.deregister(tree, pattern + '/', 'key');
457
        returnval([tree, [...pattern_tree.search(tree, arguments[2])]]);
458
        }''',
459
        tree, patterns, url)
460
    assert expected == result
461

    
462
    # Also verify that deregistering all the patterns works correctly.
463
    tree = execute_in_page(
464
        '''{
465
        const tree = arguments[0];
466
        for (const pattern of arguments[1])
467
            pattern_tree.deregister(tree, pattern,       'key');
468
        for (const pattern of arguments[2]) {
469
            pattern_tree.deregister(tree, pattern,       'key');
470
            pattern_tree.deregister(tree, pattern + '/', 'key');
471
        }
472
        returnval(tree);
473
        }''',
474
        tree, patterns, bad_patterns)
475
    assert tree == {}
(5-5/5)