1
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
2
|
|
3
|
"""
|
4
|
Our helpful little stand-in for the Internet
|
5
|
"""
|
6
|
|
7
|
# This file is part of Haketilo.
|
8
|
#
|
9
|
# Copyright (C) 2021 jahoti <jahoti@tilde.team>
|
10
|
# Copyright (C) 2021 Wojtek Kosior <koszko@koszko.org>
|
11
|
#
|
12
|
# This program is free software: you can redistribute it and/or modify
|
13
|
# it under the terms of the GNU Affero General Public License as
|
14
|
# published by the Free Software Foundation, either version 3 of the
|
15
|
# License, or (at your option) any later version.
|
16
|
#
|
17
|
# This program is distributed in the hope that it will be useful,
|
18
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
19
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
20
|
# GNU Affero General Public License for more details.
|
21
|
#
|
22
|
# You should have received a copy of the GNU Affero General Public License
|
23
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
24
|
#
|
25
|
#
|
26
|
# I, Wojtek Kosior, thereby promise not to sue for violation of this
|
27
|
# file's license. Although I request that you do not make use of this code
|
28
|
# in a proprietary program, I am not going to enforce this in court.
|
29
|
|
30
|
from hashlib import sha256
|
31
|
from pathlib import Path
|
32
|
from shutil import rmtree
|
33
|
from threading import Lock
|
34
|
from uuid import uuid4
|
35
|
import json
|
36
|
import functools as ft
|
37
|
import operator as op
|
38
|
|
39
|
from .misc_constants import here
|
40
|
from .unit.utils import * # sample repo data
|
41
|
|
42
|
# TODO: instead of having the entire catalog defined here, make it possible to
|
43
|
# add catalog items from within individual test files.
|
44
|
|
45
|
served_scripts = {}
|
46
|
served_scripts_lock = Lock()
|
47
|
|
48
|
def start_serving_script(script_text):
|
49
|
"""
|
50
|
Register given script so that it is served at
|
51
|
https://serve.scrip.ts/?sha256=<script's_sha256_sum>
|
52
|
|
53
|
Returns the URL at which script will be served.
|
54
|
|
55
|
This function lacks thread safety. Might moght consider fixing this if it
|
56
|
turns
|
57
|
"""
|
58
|
sha256sum = sha256(script_text.encode()).digest().hex()
|
59
|
served_scripts_lock.acquire()
|
60
|
served_scripts[sha256sum] = script_text
|
61
|
served_scripts_lock.release()
|
62
|
|
63
|
return f'https://serve.scrip.ts/?sha256={sha256sum}'
|
64
|
|
65
|
def serve_script(command, get_params, post_params):
|
66
|
"""
|
67
|
info() callback to pass to request-handling code in server.py. Facilitates
|
68
|
serving scripts that have been registered with start_serving_script().
|
69
|
"""
|
70
|
served_scripts_lock.acquire()
|
71
|
try:
|
72
|
script = served_scripts.get(get_params['sha256'][0])
|
73
|
finally:
|
74
|
served_scripts_lock.release()
|
75
|
if script is None:
|
76
|
return 404, {}, b''
|
77
|
|
78
|
return 200, {'Content-Type': 'application/javascript'}, script
|
79
|
|
80
|
def dump_scripts(directory=(Path.cwd() / 'injected_scripts')):
|
81
|
"""
|
82
|
Write all scripts that have been registered with start_serving_script()
|
83
|
under the provided directory. If the directory already exists, it is wiped
|
84
|
beforehand. If it doesn't exist, it is created.
|
85
|
"""
|
86
|
directory = Path(directory)
|
87
|
rmtree(directory, ignore_errors=True)
|
88
|
directory.mkdir(parents=True)
|
89
|
|
90
|
served_scripts_lock.acquire()
|
91
|
for sha256, script in served_scripts.items():
|
92
|
with open(directory / sha256, 'wt') as file:
|
93
|
file.write(script)
|
94
|
served_scripts_lock.release()
|
95
|
|
96
|
some_data = '{"some": "data"}'
|
97
|
|
98
|
# used by handler function of https://counterdoma.in
|
99
|
request_counter = 0
|
100
|
|
101
|
def serve_counter(command, get_params, post_params):
|
102
|
global request_counter
|
103
|
request_counter += 1
|
104
|
return (
|
105
|
200,
|
106
|
{'Cache-Control': 'private, max-age=0, no-store'},
|
107
|
json.dumps({'counter': request_counter})
|
108
|
)
|
109
|
|
110
|
# Mock a Hydrilla repository.
|
111
|
|
112
|
make_handler = lambda txt: lambda c, g, p: (200, {}, txt)
|
113
|
|
114
|
# Mock files in the repository.
|
115
|
sample_contents = [f'Mi povas manĝi vitron, ĝi ne damaĝas min {i}'
|
116
|
for i in range(9)]
|
117
|
sample_hashes = [sha256(c.encode()).digest().hex() for c in sample_contents]
|
118
|
|
119
|
file_url = ft.partial(op.concat, 'https://hydril.la/file/sha256/')
|
120
|
|
121
|
sample_files_catalog = dict([(file_url(h), make_handler(c))
|
122
|
for h, c in zip(sample_hashes, sample_contents)])
|
123
|
|
124
|
# Mock resources and mappings in the repository.
|
125
|
sample_resource_templates = []
|
126
|
|
127
|
for deps in [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, 9)]:
|
128
|
letters = [chr(ord('a') + i) for i in deps]
|
129
|
sample_resource_templates.append({
|
130
|
'id_suffix': ''.join(letters),
|
131
|
'files_count': deps[0],
|
132
|
'dependencies': [{'identifier': f'resource-{l}'} for l in letters]
|
133
|
})
|
134
|
|
135
|
suffixes = [srt['id_suffix'] for srt in sample_resource_templates]
|
136
|
sample_resource_templates.append({
|
137
|
'id_suffix': '-'.join(suffixes),
|
138
|
'files_count': 2,
|
139
|
'dependencies': [{'identifier': f'resource-{suf}'} for suf in suffixes]
|
140
|
})
|
141
|
|
142
|
for i in range(10):
|
143
|
sample_resource_templates.append({
|
144
|
'id_suffix': chr(ord('a') + i),
|
145
|
'files_count': i,
|
146
|
'dependencies': []
|
147
|
})
|
148
|
|
149
|
# The one below will generate items with schema still at version 1, so required
|
150
|
# mappings will be ignored.
|
151
|
sample_resource_templates.append({
|
152
|
'id_suffix': 'a-w-required-mapping-v1',
|
153
|
'files_count': 1,
|
154
|
'dependencies': [],
|
155
|
'required_mappings': [{'identifier': 'mapping-a'}]
|
156
|
})
|
157
|
|
158
|
sample_resource_templates.append({
|
159
|
'id_suffix': 'a-w-required-mapping-v2',
|
160
|
'files_count': 1,
|
161
|
'dependencies': [],
|
162
|
'required_mappings': [{'identifier': 'mapping-a'}],
|
163
|
'schema_ver': '2'
|
164
|
})
|
165
|
|
166
|
sample_resources_catalog = {}
|
167
|
sample_mappings_catalog = {}
|
168
|
sample_queries = {}
|
169
|
|
170
|
for srt in sample_resource_templates:
|
171
|
resource = make_sample_resource()
|
172
|
resource['identifier'] = f'resource-{srt["id_suffix"]}'
|
173
|
resource['long_name'] = resource['identifier'].upper()
|
174
|
resource['uuid'] = str(uuid4())
|
175
|
resource['dependencies'] = srt['dependencies']
|
176
|
resource['source_copyright'] = []
|
177
|
resource['scripts'] = []
|
178
|
for i in range(srt['files_count']):
|
179
|
file_ref = {'file': f'file_{i}', 'sha256': sample_hashes[i]}
|
180
|
resource[('source_copyright', 'scripts')[i & 1]].append(file_ref)
|
181
|
|
182
|
resource_versions = [resource['version'], resource['version'].copy()]
|
183
|
resource_versions[1][-1] += 1
|
184
|
|
185
|
mapping = make_sample_mapping()
|
186
|
mapping['identifier'] = f'mapping-{srt["id_suffix"]}'
|
187
|
mapping['long_name'] = mapping['identifier'].upper()
|
188
|
mapping['uuid'] = str(uuid4())
|
189
|
mapping['source_copyright'] = resource['source_copyright']
|
190
|
|
191
|
mapping_versions = [mapping['version'], mapping['version'].copy()]
|
192
|
mapping_versions[1][-1] += 1
|
193
|
|
194
|
sufs = [srt["id_suffix"], *[l for l in srt["id_suffix"] if l.isalpha()]]
|
195
|
patterns = [f'https://example_{suf}.com/*' for suf in set(sufs)]
|
196
|
payloads = {}
|
197
|
|
198
|
for pat in patterns:
|
199
|
payloads[pat] = {'identifier': resource['identifier']}
|
200
|
|
201
|
queryable_url = pat.replace('*', 'something')
|
202
|
if queryable_url not in sample_queries:
|
203
|
sample_queries[queryable_url] = []
|
204
|
|
205
|
sample_queries[queryable_url].append({
|
206
|
'identifier': mapping['identifier'],
|
207
|
'long_name': mapping['long_name'],
|
208
|
'version': mapping_versions[1]
|
209
|
})
|
210
|
|
211
|
mapping['payloads'] = payloads
|
212
|
|
213
|
for item in resource, mapping:
|
214
|
if 'required_mappings' in srt:
|
215
|
item['required_mappings'] = srt['required_mappings']
|
216
|
if 'schema_ver' in srt:
|
217
|
item['$schema'] = item['$schema'].replace('1', srt['schema_ver'])
|
218
|
|
219
|
for item, versions, catalog in [
|
220
|
(resource, resource_versions, sample_resources_catalog),
|
221
|
(mapping, mapping_versions, sample_mappings_catalog)
|
222
|
]:
|
223
|
fmt = f'https://hydril.la/{item["type"]}/{item["identifier"]}%s'
|
224
|
# Make 2 versions of each item so that we can test updates.
|
225
|
for ver in versions:
|
226
|
item['version'] = ver
|
227
|
for fmt_arg in ('.json', '/' + item_version_string(item)):
|
228
|
catalog[fmt % fmt_arg] = make_handler(json.dumps(item))
|
229
|
|
230
|
def serve_query(command, get_params, post_params):
|
231
|
response = {
|
232
|
'$schema': 'https://hydrilla.koszko.org/schemas/api_query_result-1.schema.json',
|
233
|
'generated_by': {
|
234
|
'name': 'human',
|
235
|
'version': 'sapiens-0.8.15'
|
236
|
},
|
237
|
'mappings': sample_queries[get_params['url'][0]]
|
238
|
}
|
239
|
|
240
|
return (200, {}, json.dumps(response))
|
241
|
|
242
|
sample_queries_catalog = dict([(f'https://hydril.la/{suf}query', serve_query)
|
243
|
for suf in ('', '1/', '2/', '3/', '4/')])
|
244
|
|
245
|
catalog = {
|
246
|
'http://gotmyowndoma.in':
|
247
|
(302, {'location': 'http://gotmyowndoma.in/index.html'}, None),
|
248
|
'http://gotmyowndoma.in/':
|
249
|
(302, {'location': 'http://gotmyowndoma.in/index.html'}, None),
|
250
|
'http://gotmyowndoma.in/index.html':
|
251
|
(200, {}, here / 'data' / 'pages' / 'gotmyowndomain.html'),
|
252
|
|
253
|
'https://gotmyowndoma.in':
|
254
|
(302, {'location': 'https://gotmyowndoma.in/index.html'}, None),
|
255
|
'https://gotmyowndoma.in/':
|
256
|
(302, {'location': 'https://gotmyowndoma.in/index.html'}, None),
|
257
|
'https://gotmyowndoma.in/index.html':
|
258
|
(200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
|
259
|
|
260
|
'https://gotmyowndoma.in/scripts_to_block_1.html':
|
261
|
(200, {}, here / 'data' / 'pages' / 'scripts_to_block_1.html'),
|
262
|
'https://gotmyowndoma.in/scripts_to_block_2.xml':
|
263
|
(200, {}, here / 'data' / 'pages' / 'scripts_to_block_2.xml'),
|
264
|
|
265
|
'https://anotherdoma.in/resource/blocked/by/CORS.json':
|
266
|
lambda command, get_params, post_params: (200, {}, some_data),
|
267
|
|
268
|
'https://counterdoma.in/': serve_counter,
|
269
|
|
270
|
'https://serve.scrip.ts/': serve_script,
|
271
|
|
272
|
'https://site.with.scripts.block.ed':
|
273
|
(302, {'location': 'https://site.with.scripts.block.ed/index.html'}, None),
|
274
|
'https://site.with.scripts.block.ed/':
|
275
|
(302, {'location': 'https://site.with.scripts.block.ed/index.html'}, None),
|
276
|
'https://site.with.scripts.block.ed/index.html':
|
277
|
(200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
|
278
|
|
279
|
'https://site.with.scripts.allow.ed':
|
280
|
(302, {'location': 'https://site.with.scripts.allow.ed/index.html'}, None),
|
281
|
'https://site.with.scripts.allow.ed/':
|
282
|
(302, {'location': 'https://site.with.scripts.allow.ed/index.html'}, None),
|
283
|
'https://site.with.scripts.allow.ed/index.html':
|
284
|
(200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
|
285
|
|
286
|
'https://site.with.paylo.ad':
|
287
|
(302, {'location': 'https://site.with.paylo.ad/index.html'}, None),
|
288
|
'https://site.with.paylo.ad/':
|
289
|
(302, {'location': 'https://site.with.paylo.ad/index.html'}, None),
|
290
|
'https://site.with.paylo.ad/index.html':
|
291
|
(200, {}, here / 'data' / 'pages' / 'gotmyowndomain_https.html'),
|
292
|
|
293
|
**sample_files_catalog,
|
294
|
**sample_resources_catalog,
|
295
|
**sample_mappings_catalog,
|
296
|
**sample_queries_catalog
|
297
|
}
|