Project

General

Profile

Download (5.42 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / test / haketilo_test / unit / test_patterns.py @ fd9f2fc4

1
# SPDX-License-Identifier: CC0-1.0
2

    
3
"""
4
Haketilo unit tests - URL patterns
5
"""
6

    
7
# This file is part of Haketilo
8
#
9
# Copyright (C) 2021, Wojtek Kosior
10
#
11
# This program is free software: you can redistribute it and/or modify
12
# it under the terms of the CC0 1.0 Universal License as published by
13
# the Creative Commons Corporation.
14
#
15
# This program is distributed in the hope that it will be useful,
16
# but WITHOUT ANY WARRANTY; without even the implied warranty of
17
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
# CC0 1.0 Universal License for more details.
19

    
20
import pytest
21

    
22
from ..script_loader import load_script
23

    
24
@pytest.mark.get_page('https://gotmyowndoma.in')
25
def test_regexes(execute_in_page):
26
    """
27
    patterns.js contains regexes used for URL parsing.
28
    Verify they work properly.
29
    """
30
    execute_in_page(load_script('common/patterns.js'))
31

    
32
    valid_url = 'https://example.com/a/b?ver=1.2.3#heading2'
33
    valid_url_rest = 'example.com/a/b?ver=1.2.3#heading2'
34

    
35
    # Test matching of URL protocol.
36
    match = execute_in_page('returnval(proto_regex.exec(arguments[0]));',
37
                            valid_url)
38
    assert match
39
    assert match[1] == 'https'
40
    assert match[2] == valid_url_rest
41

    
42
    match = execute_in_page('returnval(proto_regex.exec(arguments[0]));',
43
                            '://bad-url.missing/protocol')
44
    assert match is None
45

    
46
    # Test matching of http(s) URLs.
47
    match = execute_in_page('returnval(http_regex.exec(arguments[0]));',
48
                            valid_url_rest)
49
    assert match
50
    assert match[1] == 'example.com'
51
    assert match[2] == '/a/b'
52
    assert match[3] == '?ver=1.2.3'
53

    
54
    match = execute_in_page('returnval(http_regex.exec(arguments[0]));',
55
                            'another.example.com')
56
    assert match
57
    assert match[1] == 'another.example.com'
58
    assert match[2] == ''
59
    assert match[3] == ''
60

    
61
    match = execute_in_page('returnval(http_regex.exec(arguments[0]));',
62
                            '/bad/http/example')
63
    assert match == None
64

    
65
    # Test matching of file URLs.
66
    match = execute_in_page('returnval(file_regex.exec(arguments[0]));',
67
                            '/good/file/example')
68
    assert match
69
    assert match[1] == '/good/file/example'
70

    
71
    # Test matching of ftp URLs.
72
    match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));',
73
                            'example.com/a/b#heading2')
74
    assert match
75
    assert match[1] is None
76
    assert match[2] == 'example.com'
77
    assert match[3] == '/a/b'
78

    
79
    match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));',
80
                            'some_user@localhost')
81
    assert match
82
    assert match[1] == 'some_user@'
83
    assert match[2] == 'localhost'
84
    assert match[3] == ''
85

    
86
    match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));',
87
                            '@bad.url/')
88
    assert match is None
89

    
90
@pytest.mark.get_page('https://gotmyowndoma.in')
91
def test_deconstruct_url(execute_in_page):
92
    """
93
    patterns.js contains deconstruct_url() function that handles URL parsing.
94
    Verify it works properly.
95
    """
96
    execute_in_page(load_script('common/patterns.js'))
97

    
98
    deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
99
                           'https://eXaMpLe.com/a/b?ver=1.2.3#heading2')
100
    assert deco
101
    assert deco['trailing_slash'] == False
102
    assert deco['proto']          == 'https'
103
    assert deco['domain']         == ['example', 'com']
104
    assert deco['path']           == ['a', 'b']
105

    
106
    deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
107
                           'http://**.example.com/')
108
    assert deco
109
    assert deco['trailing_slash'] == True
110
    assert deco['proto']          == 'http'
111
    assert deco['domain']         == ['**', 'example', 'com']
112
    assert deco['path']           == []
113

    
114
    deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
115
                           'ftp://user@ftp.example.com/all///passwords.txt/')
116
    assert deco
117
    assert deco['trailing_slash'] == True
118
    assert deco['proto']          == 'ftp'
119
    assert deco['domain']         == ['ftp', 'example', 'com']
120
    assert deco['path']           == ['all', 'passwords.txt']
121

    
122
    deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
123
                           'ftp://mirror.edu.pl.eu.org')
124
    assert deco
125
    assert deco['trailing_slash'] == False
126
    assert deco['proto']          == 'ftp'
127
    assert deco['domain']         == ['mirror', 'edu', 'pl', 'eu', 'org']
128
    assert deco['path']           == []
129

    
130
    deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
131
                           'file:///mnt/parabola_chroot///etc/passwd')
132
    assert deco
133
    assert deco['trailing_slash'] == False
134
    assert deco['proto']          == 'file'
135
    assert deco['path']           == ['mnt', 'parabola_chroot', 'etc', 'passwd']
136
    assert 'domain' not in deco
137

    
138
    for bad_url in [
139
            '://bad-url.missing/protocol',
140
            'http:/example.com/a/b',
141
            'unknown://example.com/a/b',
142
            'idontfancypineapple',
143
            'ftp://@example.org/',
144
            'https:///some/path/',
145
            'file://non-absolute/path'
146
    ]:
147
        with pytest.raises(Exception, match=r'Error in injected script'):
148
            deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
149
                                   bad_url)
150

    
151
    # at some point we might also consider testing url deconstruction with
152
    # length limits...
(13-13/25)