Project

General

Profile

Download (6.06 KB) Statistics
| Branch: | Tag: | Revision:

haketilo / test / haketilo_test / unit / test_patterns.py @ aec5c9ae

1
# SPDX-License-Identifier: CC0-1.0
2

    
3
"""
4
Haketilo unit tests - URL patterns
5
"""
6

    
7
# This file is part of Haketilo
8
#
9
# Copyright (C) 2021, Wojtek Kosior
10
#
11
# This program is free software: you can redistribute it and/or modify
12
# it under the terms of the CC0 1.0 Universal License as published by
13
# the Creative Commons Corporation.
14
#
15
# This program is distributed in the hope that it will be useful,
16
# but WITHOUT ANY WARRANTY; without even the implied warranty of
17
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
# CC0 1.0 Universal License for more details.
19

    
20
import pytest
21

    
22
from ..script_loader import load_script
23

    
24
@pytest.mark.get_page('https://gotmyowndoma.in')
25
def test_regexes(execute_in_page):
26
    """
27
    patterns.js contains regexes used for URL parsing.
28
    Verify they work properly.
29
    """
30
    execute_in_page(load_script('common/patterns.js'))
31

    
32
    valid_url = 'https://example.com/a/b?ver=1.2.3#heading2'
33
    valid_url_rest = 'example.com/a/b?ver=1.2.3#heading2'
34

    
35
    # Test matching of URL protocol.
36
    match = execute_in_page('returnval(proto_regex.exec(arguments[0]));',
37
                            valid_url)
38
    assert match
39
    assert match[1] == 'https'
40
    assert match[2] == valid_url_rest
41

    
42
    match = execute_in_page('returnval(proto_regex.exec(arguments[0]));',
43
                            '://bad-url.missing/protocol')
44
    assert match is None
45

    
46
    # Test matching of http(s) URLs.
47
    match = execute_in_page('returnval(http_regex.exec(arguments[0]));',
48
                            valid_url_rest)
49
    assert match
50
    assert match[1] == 'example.com'
51
    assert match[2] == '/a/b'
52
    assert match[3] == '?ver=1.2.3'
53

    
54
    match = execute_in_page('returnval(http_regex.exec(arguments[0]));',
55
                            'another.example.com')
56
    assert match
57
    assert match[1] == 'another.example.com'
58
    assert match[2] == ''
59
    assert match[3] == ''
60

    
61
    match = execute_in_page('returnval(http_regex.exec(arguments[0]));',
62
                            '/bad/http/example')
63
    assert match == None
64

    
65
    # Test matching of file URLs.
66
    match = execute_in_page('returnval(file_regex.exec(arguments[0]));',
67
                            '/good/file/example')
68
    assert match
69
    assert match[1] == '/good/file/example'
70

    
71
    # Test matching of ftp URLs.
72
    match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));',
73
                            'example.com/a/b#heading2')
74
    assert match
75
    assert match[1] is None
76
    assert match[2] == 'example.com'
77
    assert match[3] == '/a/b'
78

    
79
    match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));',
80
                            'some_user@localhost')
81
    assert match
82
    assert match[1] == 'some_user@'
83
    assert match[2] == 'localhost'
84
    assert match[3] == ''
85

    
86
    match = execute_in_page('returnval(ftp_regex.exec(arguments[0]));',
87
                            '@bad.url/')
88
    assert match is None
89

    
90
@pytest.mark.get_page('https://gotmyowndoma.in')
91
def test_deconstruct_url(execute_in_page):
92
    """
93
    patterns.js contains deconstruct_url() function that handles URL parsing.
94
    Verify it works properly.
95
    """
96
    execute_in_page(load_script('common/patterns.js'))
97

    
98
    deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
99
                           'https://eXaMpLe.com/a/b?ver=1.2.3#heading2')
100
    assert deco
101
    assert deco['trailing_slash'] == False
102
    assert deco['proto']          == 'https'
103
    assert deco['domain']         == ['example', 'com']
104
    assert deco['path']           == ['a', 'b']
105

    
106
    reco = execute_in_page('returnval(reconstruct_url(arguments[0]));', deco)
107
    assert reco == 'https://example.com/a/b'
108

    
109
    deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
110
                           'http://**.example.com/')
111
    assert deco
112
    assert deco['trailing_slash'] == True
113
    assert deco['proto']          == 'http'
114
    assert deco['domain']         == ['**', 'example', 'com']
115
    assert deco['path']           == []
116

    
117
    reco = execute_in_page('returnval(reconstruct_url(arguments[0]));', deco)
118
    assert reco == 'http://**.example.com/'
119

    
120
    deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
121
                           'ftp://user@ftp.example.com/all///passwords.txt/')
122
    assert deco
123
    assert deco['trailing_slash'] == True
124
    assert deco['proto']          == 'ftp'
125
    assert deco['domain']         == ['ftp', 'example', 'com']
126
    assert deco['path']           == ['all', 'passwords.txt']
127

    
128
    reco = execute_in_page('returnval(reconstruct_url(arguments[0]));', deco)
129
    assert reco == 'ftp://ftp.example.com/all/passwords.txt/'
130

    
131
    deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
132
                           'ftp://mirror.edu.pl.eu.org')
133
    assert deco
134
    assert deco['trailing_slash'] == False
135
    assert deco['proto']          == 'ftp'
136
    assert deco['domain']         == ['mirror', 'edu', 'pl', 'eu', 'org']
137
    assert deco['path']           == []
138

    
139
    reco = execute_in_page('returnval(reconstruct_url(arguments[0]));', deco)
140
    assert reco == 'ftp://mirror.edu.pl.eu.org'
141

    
142
    deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
143
                           'file:///mnt/parabola_chroot///etc/passwd')
144
    assert deco
145
    assert deco['trailing_slash'] == False
146
    assert deco['proto']          == 'file'
147
    assert deco['path']           == ['mnt', 'parabola_chroot', 'etc', 'passwd']
148
    assert 'domain' not in deco
149

    
150
    reco = execute_in_page('returnval(reconstruct_url(arguments[0]));', deco)
151
    assert reco == 'file:///mnt/parabola_chroot/etc/passwd'
152

    
153
    for bad_url in [
154
            '://bad-url.missing/protocol',
155
            'http:/example.com/a/b',
156
            'unknown://example.com/a/b',
157
            'idontfancypineapple',
158
            'ftp://@example.org/',
159
            'https:///some/path/',
160
            'file://non-absolute/path'
161
    ]:
162
        with pytest.raises(Exception, match=r'Error in injected script'):
163
            deco = execute_in_page('returnval(deconstruct_url(arguments[0]));',
164
                                   bad_url)
165

    
166
    # at some point we might also consider testing url deconstruction with
167
    # length limits...
(14-14/26)