1 |
5ac7ec33
|
Wojtek Kosior
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
2 |
|
|
|
3 |
|
|
# Building Hydrilla packages.
|
4 |
|
|
#
|
5 |
|
|
# This file is part of Hydrilla
|
6 |
|
|
#
|
7 |
|
|
# Copyright (C) 2021,2022 Wojtek Kosior
|
8 |
|
|
#
|
9 |
|
|
# This program is free software: you can redistribute it and/or modify
|
10 |
|
|
# it under the terms of the GNU Affero General Public License as
|
11 |
|
|
# published by the Free Software Foundation, either version 3 of the
|
12 |
|
|
# License, or (at your option) any later version.
|
13 |
|
|
#
|
14 |
|
|
# This program is distributed in the hope that it will be useful,
|
15 |
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16 |
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17 |
|
|
# GNU Affero General Public License for more details.
|
18 |
|
|
#
|
19 |
|
|
# You should have received a copy of the GNU Affero General Public License
|
20 |
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
21 |
|
|
#
|
22 |
|
|
#
|
23 |
|
|
# I, Wojtek Kosior, thereby promise not to sue for violation of this
|
24 |
|
|
# file's license. Although I request that you do not make use this code
|
25 |
|
|
# in a proprietary program, I am not going to enforce this in court.
|
26 |
|
|
|
27 |
|
|
|
28 |
|
|
import json
|
29 |
|
|
import re
|
30 |
|
|
import zipfile
|
31 |
|
|
from pathlib import Path
|
32 |
|
|
from hashlib import sha256
|
33 |
|
|
from sys import stderr
|
34 |
|
|
|
35 |
|
|
import jsonschema
|
36 |
|
|
|
37 |
|
|
here = Path(__file__).resolve().parent
|
38 |
|
|
with open(here / 'schemas' / 'package_source-1.schema.json') as schema_file:
|
39 |
|
|
index_json_schema = json.load(schema_file)
|
40 |
|
|
|
41 |
|
|
class FileReferenceError(Exception):
|
42 |
|
|
"""
|
43 |
|
|
Exception used to report various problems concerning files referenced from
|
44 |
|
|
source package's index.json.
|
45 |
|
|
"""
|
46 |
|
|
|
47 |
|
|
class ReuseError(Exception):
|
48 |
|
|
"""
|
49 |
|
|
Exception used to report various problems when calling the REUSE tool.
|
50 |
|
|
"""
|
51 |
|
|
|
52 |
|
|
strip_comment_re = re.compile(r'''
|
53 |
|
|
^ # match from the beginning of each line
|
54 |
|
|
( # catch the part before '//' comment
|
55 |
|
|
(?: # this group matches either a string or a single out-of-string character
|
56 |
|
|
[^"/] |
|
57 |
|
|
"
|
58 |
|
|
(?: # this group matches any in-a-string character
|
59 |
|
|
[^"\\] | # match any normal character
|
60 |
|
|
\\[^u] | # match any escaped character like '\f' or '\n'
|
61 |
|
|
\\u[a-fA-F0-9]{4} # match an escape
|
62 |
|
|
)*
|
63 |
|
|
"
|
64 |
|
|
)*
|
65 |
|
|
)
|
66 |
|
|
# expect either end-of-line or a comment:
|
67 |
|
|
# * unterminated strings will cause matching to fail
|
68 |
|
|
# * bad comment (with '/' instead of '//') will be indicated by second group
|
69 |
|
|
# having length 1 instead of 2 or 0
|
70 |
|
|
(//?|$)
|
71 |
|
|
''', re.VERBOSE)
|
72 |
|
|
|
73 |
|
|
def strip_json_comments(text):
|
74 |
|
|
"""
|
75 |
|
|
Accept JSON text with optional C++-style ('//') comments and return the text
|
76 |
|
|
with comments removed. Consecutive slashes inside strings are handled
|
77 |
|
|
properly. A spurious single slash ('/') shall generate an error. Errors in
|
78 |
|
|
JSON itself shall be ignored.
|
79 |
|
|
"""
|
80 |
|
|
processed = 0
|
81 |
|
|
stripped_text = []
|
82 |
|
|
for line in text.split('\n'):
|
83 |
|
|
match = strip_comment_re.match(line)
|
84 |
|
|
|
85 |
|
|
if match is None: # unterminated string
|
86 |
|
|
# ignore this error, let json module report it
|
87 |
|
|
stripped = line
|
88 |
|
|
elif len(match[2]) == 1:
|
89 |
|
|
raise json.JSONDecodeError('bad comment', text,
|
90 |
|
|
processed + len(match[1]))
|
91 |
|
|
else:
|
92 |
|
|
stripped = match[1]
|
93 |
|
|
|
94 |
|
|
stripped_text.append(stripped)
|
95 |
|
|
processed += len(line) + 1
|
96 |
|
|
|
97 |
|
|
return '\n'.join(stripped_text)
|
98 |
|
|
|
99 |
|
|
def normalize_version(ver):
|
100 |
|
|
'''
|
101 |
|
|
'ver' is an array of integers. Strip right-most zeroes from ver.
|
102 |
|
|
|
103 |
|
|
Returns a *new* array. Doesn't modify its argument.
|
104 |
|
|
'''
|
105 |
|
|
new_len = 0
|
106 |
|
|
for i, num in enumerate(ver):
|
107 |
|
|
if num != 0:
|
108 |
|
|
new_len = i + 1
|
109 |
|
|
|
110 |
|
|
return ver[:new_len]
|
111 |
|
|
|
112 |
|
|
class FileBuffer:
|
113 |
|
|
"""
|
114 |
|
|
Implement a file-like object that buffers data written to it.
|
115 |
|
|
"""
|
116 |
|
|
def __init__(self):
|
117 |
|
|
"""
|
118 |
|
|
Initialize FileBuffer.
|
119 |
|
|
"""
|
120 |
|
|
self.chunks = []
|
121 |
|
|
|
122 |
|
|
def write(self, b):
|
123 |
|
|
"""
|
124 |
|
|
Buffer 'b', return number of bytes buffered.
|
125 |
|
|
|
126 |
|
|
'b' is expected to be an instance of 'bytes' or 'str', in which case it
|
127 |
|
|
gets encoded as UTF-8.
|
128 |
|
|
"""
|
129 |
|
|
if type(b) is str:
|
130 |
|
|
b = b.encode()
|
131 |
|
|
self.chunks.append(b)
|
132 |
|
|
return len(b)
|
133 |
|
|
|
134 |
|
|
def flush(self):
|
135 |
|
|
"""
|
136 |
|
|
A no-op mock of file-like object's flush() method.
|
137 |
|
|
"""
|
138 |
|
|
pass
|
139 |
|
|
|
140 |
|
|
def get_bytes(self):
|
141 |
|
|
"""
|
142 |
|
|
Return all data written so far concatenated into a single 'bytes'
|
143 |
|
|
object.
|
144 |
|
|
"""
|
145 |
|
|
return b''.join(self.chunks)
|
146 |
|
|
|
147 |
|
|
def generate_spdx_report(root):
|
148 |
|
|
"""
|
149 |
|
|
Use REUSE tool to generate an SPDX report for sources under 'root' and
|
150 |
|
|
return the report's contents as 'bytes'.
|
151 |
|
|
|
152 |
|
|
'root' shall be an instance of pathlib.Path.
|
153 |
|
|
|
154 |
|
|
In case the directory tree under 'root' does not constitute a
|
155 |
|
|
REUSE-compliant package, linting report is printed to standard output and
|
156 |
|
|
an exception is raised.
|
157 |
|
|
|
158 |
|
|
In case the reuse package is not installed, an exception is also raised.
|
159 |
|
|
"""
|
160 |
|
|
try:
|
161 |
|
|
from reuse._main import main as reuse_main
|
162 |
|
|
except ModuleNotFoundError:
|
163 |
|
|
ReuseError("Could not import 'reuse'. Is the tool installed and visible to this Python instance?")
|
164 |
|
|
|
165 |
|
|
mocked_output = FileBuffer()
|
166 |
|
|
if reuse_main(args=['--root', str(root), 'lint'], out=mocked_output) != 0:
|
167 |
b5eb89e1
|
Wojtek Kosior
|
stderr.write(mocked_output.get_bytes().decode())
|
168 |
5ac7ec33
|
Wojtek Kosior
|
raise ReuseError('Attempt to generate an SPDX report for a REUSE-incompliant package.')
|
169 |
|
|
|
170 |
|
|
mocked_output = FileBuffer()
|
171 |
|
|
if reuse_main(args=['--root', str(root), 'spdx'], out=mocked_output) != 0:
|
172 |
b5eb89e1
|
Wojtek Kosior
|
stderr.write(mocked_output.get_bytes().decode())
|
173 |
5ac7ec33
|
Wojtek Kosior
|
raise ReuseError("Couldn't generate an SPDX report for package.")
|
174 |
|
|
|
175 |
|
|
return mocked_output.get_bytes()
|
176 |
|
|
|
177 |
|
|
class FileRef:
|
178 |
|
|
"""Represent reference to a file in the package."""
|
179 |
|
|
def __init__(self, path: Path, contents: bytes):
|
180 |
|
|
"""Initialize FileRef."""
|
181 |
|
|
self.include_in_distribution = False
|
182 |
|
|
self.include_in_zipfile = True
|
183 |
|
|
self.path = path
|
184 |
|
|
self.contents = contents
|
185 |
|
|
|
186 |
|
|
self.contents_hash = sha256(contents).digest().hex()
|
187 |
|
|
|
188 |
|
|
def make_ref_dict(self, filename: str):
|
189 |
|
|
"""
|
190 |
|
|
Represent the file reference through a dict that can be included in JSON
|
191 |
|
|
defintions.
|
192 |
|
|
"""
|
193 |
|
|
return {
|
194 |
|
|
'file': filename,
|
195 |
|
|
'sha256': self.contents_hash
|
196 |
|
|
}
|
197 |
|
|
|
198 |
|
|
class Build:
|
199 |
|
|
"""
|
200 |
|
|
Build a Hydrilla package.
|
201 |
|
|
"""
|
202 |
|
|
def __init__(self, srcdir, index_json_path):
|
203 |
|
|
"""
|
204 |
|
|
Initialize a build. All files to be included in a distribution package
|
205 |
|
|
are loaded into memory, all data gets validated and all necessary
|
206 |
|
|
computations (e.g. preparing of hashes) are performed.
|
207 |
|
|
|
208 |
|
|
'srcdir' and 'index_json' are expected to be pathlib.Path objects.
|
209 |
|
|
"""
|
210 |
|
|
self.srcdir = srcdir.resolve()
|
211 |
|
|
self.index_json_path = index_json_path
|
212 |
|
|
self.files_by_path = {}
|
213 |
|
|
self.resource_list = []
|
214 |
|
|
self.mapping_list = []
|
215 |
|
|
|
216 |
|
|
if not index_json_path.is_absolute():
|
217 |
|
|
self.index_json_path = (self.srcdir / self.index_json_path)
|
218 |
|
|
|
219 |
|
|
self.index_json_path = self.index_json_path.resolve()
|
220 |
|
|
|
221 |
|
|
with open(self.index_json_path, 'rt') as index_file:
|
222 |
|
|
index_json_text = index_file.read()
|
223 |
|
|
|
224 |
|
|
index_obj = json.loads(strip_json_comments(index_json_text))
|
225 |
|
|
|
226 |
8a036bc7
|
Wojtek Kosior
|
self.files_by_path[self.srcdir / 'index.json'] = \
|
227 |
|
|
FileRef(self.srcdir / 'index.json', index_json_text.encode())
|
228 |
5ac7ec33
|
Wojtek Kosior
|
|
229 |
|
|
self._process_index_json(index_obj)
|
230 |
|
|
|
231 |
|
|
def _process_file(self, filename: str, include_in_distribution: bool=True):
|
232 |
|
|
"""
|
233 |
|
|
Resolve 'filename' relative to srcdir, load it to memory (if not loaded
|
234 |
|
|
before), compute its hash and store its information in
|
235 |
|
|
'self.files_by_path'.
|
236 |
|
|
|
237 |
|
|
'filename' shall represent a relative path using '/' as a separator.
|
238 |
|
|
|
239 |
|
|
if 'include_in_distribution' is True it shall cause the file to not only
|
240 |
|
|
be included in the source package's zipfile, but also written as one of
|
241 |
|
|
built package's files.
|
242 |
|
|
|
243 |
|
|
Return file's reference object that can be included in JSON defintions
|
244 |
|
|
of various kinds.
|
245 |
|
|
"""
|
246 |
|
|
path = self.srcdir
|
247 |
|
|
for segment in filename.split('/'):
|
248 |
|
|
path /= segment
|
249 |
|
|
|
250 |
|
|
path = path.resolve()
|
251 |
|
|
if not path.is_relative_to(self.srcdir):
|
252 |
b5eb89e1
|
Wojtek Kosior
|
raise FileReferenceError(f"Attempt to load '{filename}' which lies outside package source directory.")
|
253 |
5ac7ec33
|
Wojtek Kosior
|
|
254 |
b5eb89e1
|
Wojtek Kosior
|
if str(path.relative_to(self.srcdir)) == 'index.json':
|
255 |
|
|
raise FileReferenceError("Attempt to load 'index.json' which is a reserved filename.")
|
256 |
5ac7ec33
|
Wojtek Kosior
|
|
257 |
|
|
file_ref = self.files_by_path.get(path)
|
258 |
|
|
if file_ref is None:
|
259 |
|
|
with open(path, 'rb') as file_handle:
|
260 |
|
|
contents = file_handle.read()
|
261 |
|
|
|
262 |
|
|
file_ref = FileRef(path, contents)
|
263 |
|
|
self.files_by_path[path] = file_ref
|
264 |
|
|
|
265 |
|
|
if include_in_distribution:
|
266 |
|
|
file_ref.include_in_distribution = True
|
267 |
|
|
|
268 |
|
|
return file_ref.make_ref_dict(filename)
|
269 |
|
|
|
270 |
|
|
def _prepare_source_package_zip(self, root_dir_name: str):
|
271 |
|
|
"""
|
272 |
|
|
Create and store in memory a .zip archive containing files needed to
|
273 |
|
|
build this source package.
|
274 |
|
|
|
275 |
|
|
'root_dir_name' shall not contain any slashes ('/').
|
276 |
|
|
|
277 |
|
|
Return zipfile's sha256 sum's hexstring.
|
278 |
|
|
"""
|
279 |
|
|
fb = FileBuffer()
|
280 |
|
|
root_dir_path = Path(root_dir_name)
|
281 |
|
|
|
282 |
|
|
def zippath(file_path):
|
283 |
34072d8d
|
Wojtek Kosior
|
file_path = root_dir_path / file_path.relative_to(self.srcdir)
|
284 |
|
|
return file_path.as_posix()
|
285 |
5ac7ec33
|
Wojtek Kosior
|
|
286 |
|
|
with zipfile.ZipFile(fb, 'w') as xpi:
|
287 |
|
|
for file_ref in self.files_by_path.values():
|
288 |
|
|
if file_ref.include_in_zipfile:
|
289 |
|
|
xpi.writestr(zippath(file_ref.path), file_ref.contents)
|
290 |
|
|
|
291 |
|
|
self.source_zip_contents = fb.get_bytes()
|
292 |
|
|
|
293 |
|
|
return sha256(self.source_zip_contents).digest().hex()
|
294 |
|
|
|
295 |
|
|
def _process_item(self, item_def: dict):
|
296 |
|
|
"""
|
297 |
|
|
Process 'item_def' as definition of a resource/mapping and store in
|
298 |
|
|
memory its processed form and files used by it.
|
299 |
34072d8d
|
Wojtek Kosior
|
|
300 |
|
|
Return a minimal item reference suitable for using in source
|
301 |
|
|
description.
|
302 |
5ac7ec33
|
Wojtek Kosior
|
"""
|
303 |
|
|
copy_props = ['type', 'identifier', 'long_name', 'uuid', 'description']
|
304 |
|
|
if 'comment' in item_def:
|
305 |
8a036bc7
|
Wojtek Kosior
|
copy_props.append('comment')
|
306 |
5ac7ec33
|
Wojtek Kosior
|
|
307 |
|
|
if item_def['type'] == 'resource':
|
308 |
|
|
item_list = self.resource_list
|
309 |
|
|
|
310 |
|
|
copy_props.append('revision')
|
311 |
|
|
|
312 |
|
|
script_file_refs = [self._process_file(f['file'])
|
313 |
|
|
for f in item_def.get('scripts', [])]
|
314 |
|
|
|
315 |
|
|
new_item_obj = {
|
316 |
|
|
'dependencies': item_def.get('dependencies', []),
|
317 |
|
|
'scripts': script_file_refs
|
318 |
|
|
}
|
319 |
|
|
else:
|
320 |
|
|
item_list = self.mapping_list
|
321 |
|
|
|
322 |
8a036bc7
|
Wojtek Kosior
|
payloads = {}
|
323 |
|
|
for pat, res_ref in item_def.get('payloads', {}).items():
|
324 |
|
|
payloads[pat] = {'identifier': res_ref['identifier']}
|
325 |
5ac7ec33
|
Wojtek Kosior
|
|
326 |
|
|
new_item_obj = {
|
327 |
|
|
'payloads': payloads
|
328 |
|
|
}
|
329 |
|
|
|
330 |
|
|
new_item_obj.update([(p, item_def[p]) for p in copy_props])
|
331 |
|
|
|
332 |
|
|
new_item_obj['version'] = normalize_version(item_def['version'])
|
333 |
|
|
new_item_obj['api_schema_version'] = [1, 0, 1]
|
334 |
34072d8d
|
Wojtek Kosior
|
new_item_obj['source_copyright'] = self.copyright_file_refs
|
335 |
|
|
new_item_obj['source_name'] = self.source_name
|
336 |
5ac7ec33
|
Wojtek Kosior
|
|
337 |
|
|
item_list.append(new_item_obj)
|
338 |
|
|
|
339 |
34072d8d
|
Wojtek Kosior
|
return dict([(prop, new_item_obj[prop])
|
340 |
|
|
for prop in ('type', 'identifier', 'version')])
|
341 |
|
|
|
342 |
5ac7ec33
|
Wojtek Kosior
|
def _process_index_json(self, index_obj: dict):
|
343 |
|
|
"""
|
344 |
|
|
Process 'index_obj' as contents of source package's index.json and store
|
345 |
|
|
in memory this source package's zipfile as well as package's individual
|
346 |
|
|
files and computed definitions of the source package and items defined
|
347 |
|
|
in it.
|
348 |
|
|
"""
|
349 |
|
|
jsonschema.validate(index_obj, index_json_schema)
|
350 |
|
|
|
351 |
34072d8d
|
Wojtek Kosior
|
self.source_name = index_obj['source_name']
|
352 |
|
|
|
353 |
5ac7ec33
|
Wojtek Kosior
|
generate_spdx = index_obj.get('reuse_generate_spdx_report', False)
|
354 |
|
|
if generate_spdx:
|
355 |
|
|
contents = generate_spdx_report(self.srcdir)
|
356 |
|
|
spdx_path = (self.srcdir / 'report.spdx').resolve()
|
357 |
|
|
spdx_ref = FileRef(spdx_path, contents)
|
358 |
|
|
|
359 |
|
|
spdx_ref.include_in_zipfile = False
|
360 |
|
|
self.files_by_path[spdx_path] = spdx_ref
|
361 |
|
|
|
362 |
34072d8d
|
Wojtek Kosior
|
self.copyright_file_refs = \
|
363 |
5ac7ec33
|
Wojtek Kosior
|
[self._process_file(f['file']) for f in index_obj['copyright']]
|
364 |
|
|
|
365 |
|
|
if generate_spdx and not spdx_ref.include_in_distribution:
|
366 |
|
|
raise FileReferenceError("Told to generate 'report.spdx' but 'report.spdx' is not listed among copyright files. Refusing to proceed.")
|
367 |
|
|
|
368 |
|
|
item_refs = [self._process_item(d) for d in index_obj['definitions']]
|
369 |
|
|
|
370 |
|
|
for file_ref in index_obj.get('additional_files', []):
|
371 |
|
|
self._process_file(file_ref['file'], include_in_distribution=False)
|
372 |
|
|
|
373 |
34072d8d
|
Wojtek Kosior
|
root_dir_path = Path(self.source_name)
|
374 |
5ac7ec33
|
Wojtek Kosior
|
|
375 |
|
|
source_archives_obj = {
|
376 |
|
|
'zip' : {
|
377 |
|
|
'sha256': self._prepare_source_package_zip(root_dir_path)
|
378 |
|
|
}
|
379 |
|
|
}
|
380 |
|
|
|
381 |
|
|
self.source_description = {
|
382 |
|
|
'api_schema_version': [1, 0, 1],
|
383 |
34072d8d
|
Wojtek Kosior
|
'source_name': self.source_name,
|
384 |
|
|
'source_copyright': self.copyright_file_refs,
|
385 |
5ac7ec33
|
Wojtek Kosior
|
'upstream_url': index_obj['upstream_url'],
|
386 |
|
|
'definitions': item_refs,
|
387 |
|
|
'source_archives': source_archives_obj
|
388 |
|
|
}
|
389 |
|
|
|
390 |
8a036bc7
|
Wojtek Kosior
|
if 'comment' in index_obj:
|
391 |
|
|
self.source_description['comment'] = index_obj['comment']
|
392 |
|
|
|
393 |
5ac7ec33
|
Wojtek Kosior
|
def write_source_package_zip(self, dstpath: Path):
|
394 |
|
|
"""
|
395 |
|
|
Create a .zip archive containing files needed to build this source
|
396 |
|
|
package and write it at 'dstpath'.
|
397 |
|
|
"""
|
398 |
|
|
with open(dstpath, 'wb') as output:
|
399 |
|
|
output.write(self.source_zip_contents)
|
400 |
|
|
|
401 |
|
|
def write_package_files(self, dstpath: Path):
|
402 |
|
|
"""Write package files under 'dstpath' for distribution."""
|
403 |
|
|
file_dir_path = (dstpath / 'file').resolve()
|
404 |
|
|
file_dir_path.mkdir(parents=True, exist_ok=True)
|
405 |
|
|
|
406 |
|
|
for file_ref in self.files_by_path.values():
|
407 |
|
|
if file_ref.include_in_distribution:
|
408 |
|
|
file_name = f'sha256-{file_ref.contents_hash}'
|
409 |
|
|
with open(file_dir_path / file_name, 'wb') as output:
|
410 |
|
|
output.write(file_ref.contents)
|
411 |
|
|
|
412 |
|
|
source_dir_path = (dstpath / 'source').resolve()
|
413 |
|
|
source_dir_path.mkdir(parents=True, exist_ok=True)
|
414 |
|
|
source_name = self.source_description["source_name"]
|
415 |
|
|
|
416 |
|
|
with open(source_dir_path / f'{source_name}.json', 'wt') as output:
|
417 |
|
|
json.dump(self.source_description, output)
|
418 |
|
|
|
419 |
|
|
with open(source_dir_path / f'{source_name}.zip', 'wb') as output:
|
420 |
|
|
output.write(self.source_zip_contents)
|
421 |
|
|
|
422 |
|
|
for item_type, item_list in [
|
423 |
|
|
('resource', self.resource_list),
|
424 |
|
|
('mapping', self.mapping_list)
|
425 |
|
|
]:
|
426 |
|
|
item_type_dir_path = (dstpath / item_type).resolve()
|
427 |
|
|
|
428 |
|
|
for item_def in item_list:
|
429 |
|
|
item_dir_path = item_type_dir_path / item_def['identifier']
|
430 |
|
|
item_dir_path.mkdir(parents=True, exist_ok=True)
|
431 |
|
|
|
432 |
|
|
version = '.'.join([str(n) for n in item_def['version']])
|
433 |
|
|
with open(item_dir_path / version, 'wt') as output:
|
434 |
|
|
json.dump(item_def, output)
|