1
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
2
|
|
3
|
"""
|
4
|
Our helpful little stand-in for the Internet
|
5
|
"""
|
6
|
|
7
|
# This file is part of Haketilo.
|
8
|
#
|
9
|
# Copyright (C) 2021 jahoti <jahoti@tilde.team>
|
10
|
# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org>
|
11
|
#
|
12
|
# This program is free software: you can redistribute it and/or modify
|
13
|
# it under the terms of the GNU Affero General Public License as
|
14
|
# published by the Free Software Foundation, either version 3 of the
|
15
|
# License, or (at your option) any later version.
|
16
|
#
|
17
|
# This program is distributed in the hope that it will be useful,
|
18
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
19
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
20
|
# GNU Affero General Public License for more details.
|
21
|
#
|
22
|
# You should have received a copy of the GNU Affero General Public License
|
23
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
24
|
#
|
25
|
#
|
26
|
# I, Wojtek Kosior, thereby promise not to sue for violation of this
|
27
|
# file's license. Although I request that you do not make use of this code
|
28
|
# in a proprietary program, I am not going to enforce this in court.
|
29
|
|
30
|
from hashlib import sha256
|
31
|
from pathlib import Path
|
32
|
from shutil import rmtree
|
33
|
from threading import Lock
|
34
|
from uuid import uuid4
|
35
|
import json
|
36
|
|
37
|
from .misc_constants import here
|
38
|
from .unit.utils import * # sample repo data
|
39
|
|
40
|
# TODO: instead of having the entire catalog defined here, make it possible to
|
41
|
# add catalog items from within individual test files.
|
42
|
|
43
|
served_scripts = {}
|
44
|
served_scripts_lock = Lock()
|
45
|
|
46
|
def start_serving_script(script_text):
|
47
|
"""
|
48
|
Register given script so that it is served at
|
49
|
https://serve.scrip.ts/?sha256=<script's_sha256_sum>
|
50
|
|
51
|
Returns the URL at which script will be served.
|
52
|
|
53
|
This function lacks thread safety. Might moght consider fixing this if it
|
54
|
turns
|
55
|
"""
|
56
|
sha256sum = sha256(script_text.encode()).digest().hex()
|
57
|
served_scripts_lock.acquire()
|
58
|
served_scripts[sha256sum] = script_text
|
59
|
served_scripts_lock.release()
|
60
|
|
61
|
return f'https://serve.scrip.ts/?sha256={sha256sum}'
|
62
|
|
63
|
def serve_script(command, get_params, post_params):
|
64
|
"""
|
65
|
info() callback to pass to request-handling code in server.py. Facilitates
|
66
|
serving scripts that have been registered with start_serving_script().
|
67
|
"""
|
68
|
served_scripts_lock.acquire()
|
69
|
try:
|
70
|
script = served_scripts.get(get_params['sha256'][0])
|
71
|
finally:
|
72
|
served_scripts_lock.release()
|
73
|
if script is None:
|
74
|
return 404, {}, b''
|
75
|
|
76
|
return 200, {'Content-Type': 'application/javascript'}, script
|
77
|
|
78
|
def dump_scripts(directory=(Path.cwd() / 'injected_scripts')):
|
79
|
"""
|
80
|
Write all scripts that have been registered with start_serving_script()
|
81
|
under the provided directory. If the directory already exists, it is wiped
|
82
|
beforehand. If it doesn't exist, it is created.
|
83
|
"""
|
84
|
directory = Path(directory)
|
85
|
rmtree(directory, ignore_errors=True)
|
86
|
directory.mkdir(parents=True)
|
87
|
|
88
|
served_scripts_lock.acquire()
|
89
|
for sha256, script in served_scripts.items():
|
90
|
with open(directory / sha256, 'wt') as file:
|
91
|
file.write(script)
|
92
|
served_scripts_lock.release()
|
93
|
|
94
|
some_data = '{"some": "data"}'
|
95
|
|
96
|
# used by handler function of https://counterdoma.in
|
97
|
request_counter = 0
|
98
|
|
99
|
def serve_counter(command, get_params, post_params):
|
100
|
global request_counter
|
101
|
request_counter += 1
|
102
|
return (
|
103
|
200,
|
104
|
{'Cache-Control': 'private, max-age=0, no-store'},
|
105
|
json.dumps({'counter': request_counter})
|
106
|
)
|
107
|
|
108
|
# Mock a Hydrilla repository.
|
109
|
|
110
|
make_handler = lambda txt: lambda c, g, p: (200, {}, txt)
|
111
|
|
112
|
# Mock files in the repository.
|
113
|
sample_contents = [f'Mi povas manĝi vitron, ĝi ne damaĝas min {i}'
|
114
|
for i in range(9)]
|
115
|
sample_hashes = [sha256(c.encode()).digest().hex() for c in sample_contents]
|
116
|
|
117
|
file_url = lambda hashed: f'https://hydril.la/file/sha256/{hashed}'
|
118
|
|
119
|
sample_files_catalog = dict([(file_url(h), make_handler(c))
|
120
|
for h, c in zip(sample_hashes, sample_contents)])
|
121
|
|
122
|
# Mock resources and mappings in the repository.
|
123
|
sample_resource_templates = []
|
124
|
|
125
|
for deps in [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, 9)]:
|
126
|
letters = [chr(ord('a') + i) for i in deps]
|
127
|
sample_resource_templates.append({
|
128
|
'id_suffix': ''.join(letters),
|
129
|
'files_count': deps[0],
|
130
|
'dependencies': [{'identifier': f'resource_{l}'} for l in letters]
|
131
|
})
|
132
|
|
133
|
suffixes = [srt['id_suffix'] for srt in sample_resource_templates]
|
134
|
sample_resource_templates.append({
|
135
|
'id_suffix': '-'.join(suffixes),
|
136
|
'files_count': 2,
|
137
|
'dependencies': [{'identifier': f'resource_{suf}'} for suf in suffixes]
|
138
|
})
|
139
|
|
140
|
for i in range(10):
|
141
|
sample_resource_templates.append({
|
142
|
'id_suffix': chr(ord('a') + i),
|
143
|
'files_count': i,
|
144
|
'dependencies': []
|
145
|
})
|
146
|
|
147
|
sample_resources_catalog = {}
|
148
|
sample_mappings_catalog = {}
|
149
|
sample_queries = {}
|
150
|
|
151
|
for srt in sample_resource_templates:
|
152
|
resource = make_sample_resource()
|
153
|
resource['identifier'] = f'resource_{srt["id_suffix"]}'
|
154
|
resource['long_name'] = resource['identifier'].upper()
|
155
|
resource['uuid'] = str(uuid4())
|
156
|
resource['dependencies'] = srt['dependencies']
|
157
|
resource['source_copyright'] = []
|
158
|
resource['scripts'] = []
|
159
|
for i in range(srt['files_count']):
|
160
|
file_ref = {'file': f'file_{i}', 'sha256': sample_hashes[i]}
|
161
|
resource[('source_copyright', 'scripts')[i & 1]].append(file_ref)
|
162
|
|
163
|
resource_versions = [resource['version'], resource['version'].copy()]
|
164
|
resource_versions[1][-1] += 1
|
165
|
|
166
|
mapping = make_sample_mapping()
|
167
|
mapping['identifier'] = f'mapping_{srt["id_suffix"]}'
|
168
|
mapping['long_name'] = mapping['identifier'].upper()
|
169
|
mapping['uuid'] = str(uuid4())
|
170
|
mapping['source_copyright'] = resource['source_copyright']
|
171
|
|
172
|
mapping_versions = [mapping['version'], mapping['version'].copy()]
|
173
|
mapping_versions[1][-1] += 1
|
174
|
|
175
|
sufs = [srt["id_suffix"], *[l for l in srt["id_suffix"] if l.isalpha()]]
|
176
|
patterns = [f'https://example_{suf}.com/*' for suf in set(sufs)]
|
177
|
payloads = {}
|
178
|
|
179
|
for pat in patterns:
|
180
|
payloads[pat] = {'identifier': resource['identifier']}
|
181
|
|
182
|
queryable_url = pat.replace('*', 'something')
|
183
|
if queryable_url not in sample_queries:
|
184
|
sample_queries[queryable_url] = []
|
185
|
|
186
|
sample_queries[queryable_url].append({
|
187
|
'identifier': mapping['identifier'],
|
188
|
'long_name': mapping['long_name'],
|
189
|
'version': mapping_versions[1]
|
190
|
})
|
191
|
|
192
|
mapping['payloads'] = payloads
|
193
|
|
194
|
for item, versions, catalog in [
|
195
|
(resource, resource_versions, sample_resources_catalog),
|
196
|
(mapping, mapping_versions, sample_mappings_catalog)
|
197
|
]:
|
198
|
fmt = f'https://hydril.la/{item["type"]}/{item["identifier"]}%s.json'
|
199
|
# Make 2 versions of each item so that we can test updates.
|
200
|
for ver in versions:
|
201
|
item['version'] = ver
|
202
|
for fmt_arg in ('', '/' + item_version_string(item)):
|
203
|
catalog[fmt % fmt_arg] = make_handler(json.dumps(item))
|
204
|
|
205
|
def serve_query(command, get_params, post_params):
|
206
|
response = {
|
207
|
'$schema': 'https://hydrilla.koszko.org/schemas/api_query_result-1.schema.json',
|
208
|
'generated_by': {
|
209
|
'name': 'human',
|
210
|
'version': 'sapiens-0.8.15'
|
211
|
},
|
212
|
'mappings': sample_queries[get_params['url'][0]]
|
213
|
}
|
214
|
|
215
|
return (200, {}, json.dumps(response))
|
216
|
|
217
|
sample_queries_catalog = dict([(f'https://hydril.la/{suf}query', serve_query)
|
218
|
for suf in ('', '1/', '2/', '3/', '4/')])
|
219
|
|
220
|
catalog = {
|
221
|
'http://gotmyowndoma.in':
|
222
|
(302, {'location': 'http://gotmyowndoma.in/index.html'}, None),
|
223
|
'http://gotmyowndoma.in/':
|
224
|
(302, {'location': 'http://gotmyowndoma.in/index.html'}, None),
|
225
|
'http://gotmyowndoma.in/index.html':
|
226
|
(200, {}, here / 'data' / 'pages' / 'gotmyowndomain.html'),
|
227
|
|
228
|
'https://gotmyowndoma.in':
|
229
|
(302, {'location': 'https://gotmyowndoma.in/index.html'}, None),
|
230
|
'https://gotmyowndoma.in/':
|
231
|
(302, {'location': 'https://gotmyowndoma.in/index.html'}, None),
|
232
|
'https://gotmyowndoma.in/index.html':
|
233
|
(200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
|
234
|
|
235
|
'https://gotmyowndoma.in/scripts_to_block_1.html':
|
236
|
(200, {}, here / 'data' / 'pages' / 'scripts_to_block_1.html'),
|
237
|
|
238
|
'https://anotherdoma.in/resource/blocked/by/CORS.json':
|
239
|
lambda command, get_params, post_params: (200, {}, some_data),
|
240
|
|
241
|
'https://counterdoma.in/': serve_counter,
|
242
|
|
243
|
'https://serve.scrip.ts/': serve_script,
|
244
|
|
245
|
'https://site.with.scripts.block.ed':
|
246
|
(302, {'location': 'https://site.with.scripts.block.ed/index.html'}, None),
|
247
|
'https://site.with.scripts.block.ed/':
|
248
|
(302, {'location': 'https://site.with.scripts.block.ed/index.html'}, None),
|
249
|
'https://site.with.scripts.block.ed/index.html':
|
250
|
(200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
|
251
|
|
252
|
'https://site.with.scripts.allow.ed':
|
253
|
(302, {'location': 'https://site.with.scripts.allow.ed/index.html'}, None),
|
254
|
'https://site.with.scripts.allow.ed/':
|
255
|
(302, {'location': 'https://site.with.scripts.allow.ed/index.html'}, None),
|
256
|
'https://site.with.scripts.allow.ed/index.html':
|
257
|
(200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
|
258
|
|
259
|
'https://site.with.paylo.ad':
|
260
|
(302, {'location': 'https://site.with.paylo.ad/index.html'}, None),
|
261
|
'https://site.with.paylo.ad/':
|
262
|
(302, {'location': 'https://site.with.paylo.ad/index.html'}, None),
|
263
|
'https://site.with.paylo.ad/index.html':
|
264
|
(200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
|
265
|
|
266
|
**sample_files_catalog,
|
267
|
**sample_resources_catalog,
|
268
|
**sample_mappings_catalog,
|
269
|
**sample_queries_catalog
|
270
|
}
|