Project

General

Profile

Download (13 KB) Statistics
| Branch: | Tag: | Revision:

hydrilla-builder / src / hydrilla / builder / build.py @ 456ad6c0

1
# SPDX-License-Identifier: AGPL-3.0-or-later
2

    
3
# Building Hydrilla packages.
4
#
5
# This file is part of Hydrilla
6
#
7
# Copyright (C) 2022 Wojtek Kosior
8
#
9
# This program is free software: you can redistribute it and/or modify
10
# it under the terms of the GNU Affero General Public License as
11
# published by the Free Software Foundation, either version 3 of the
12
# License, or (at your option) any later version.
13
#
14
# This program is distributed in the hope that it will be useful,
15
# but WITHOUT ANY WARRANTY; without even the implied warranty of
16
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
# GNU Affero General Public License for more details.
18
#
19
# You should have received a copy of the GNU Affero General Public License
20
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
21
#
22
#
23
# I, Wojtek Kosior, thereby promise not to sue for violation of this
24
# file's license. Although I request that you do not make use this code
25
# in a proprietary program, I am not going to enforce this in court.
26

    
27
import json
28
import re
29
import zipfile
30
from pathlib import Path
31
from hashlib import sha256
32
from sys import stderr
33

    
34
import jsonschema
35

    
36
from .. import util
37

    
38
index_validator = util.validator_for('package_source-1.schema.json')
39

    
40
class FileReferenceError(Exception):
41
    """
42
    Exception used to report various problems concerning files referenced from
43
    source package's index.json.
44
    """
45

    
46
class ReuseError(Exception):
47
    """
48
    Exception used to report various problems when calling the REUSE tool.
49
    """
50

    
51
class FileBuffer:
52
    """
53
    Implement a file-like object that buffers data written to it.
54
    """
55
    def __init__(self):
56
        """
57
        Initialize FileBuffer.
58
        """
59
        self.chunks = []
60

    
61
    def write(self, b):
62
        """
63
        Buffer 'b', return number of bytes buffered.
64

    
65
        'b' is expected to be an instance of 'bytes' or 'str', in which case it
66
        gets encoded as UTF-8.
67
        """
68
        if type(b) is str:
69
            b = b.encode()
70
        self.chunks.append(b)
71
        return len(b)
72

    
73
    def flush(self):
74
        """
75
        A no-op mock of file-like object's flush() method.
76
        """
77
        pass
78

    
79
    def get_bytes(self):
80
        """
81
        Return all data written so far concatenated into a single 'bytes'
82
        object.
83
        """
84
        return b''.join(self.chunks)
85

    
86
def generate_spdx_report(root):
87
    """
88
    Use REUSE tool to generate an SPDX report for sources under 'root' and
89
    return the report's contents as 'bytes'.
90

    
91
    'root' shall be an instance of pathlib.Path.
92

    
93
    In case the directory tree under 'root' does not constitute a
94
    REUSE-compliant package, linting report is printed to standard output and
95
    an exception is raised.
96

    
97
    In case the reuse package is not installed, an exception is also raised.
98
    """
99
    try:
100
        from reuse._main import main as reuse_main
101
    except ModuleNotFoundError:
102
        ReuseError("Could not import 'reuse'. Is the tool installed and visible to this Python instance?")
103

    
104
    mocked_output = FileBuffer()
105
    if reuse_main(args=['--root', str(root), 'lint'], out=mocked_output) != 0:
106
        stderr.write(mocked_output.get_bytes().decode())
107
        raise ReuseError('Attempt to generate an SPDX report for a REUSE-incompliant package.')
108

    
109
    mocked_output = FileBuffer()
110
    if reuse_main(args=['--root', str(root), 'spdx'], out=mocked_output) != 0:
111
        stderr.write(mocked_output.get_bytes().decode())
112
        raise ReuseError("Couldn't generate an SPDX report for package.")
113

    
114
    return mocked_output.get_bytes()
115

    
116
class FileRef:
117
    """Represent reference to a file in the package."""
118
    def __init__(self, path: Path, contents: bytes):
119
        """Initialize FileRef."""
120
        self.include_in_distribution = False
121
        self.include_in_zipfile      = True
122
        self.path                    = path
123
        self.contents                = contents
124

    
125
        self.contents_hash = sha256(contents).digest().hex()
126

    
127
    def make_ref_dict(self, filename: str):
128
        """
129
        Represent the file reference through a dict that can be included in JSON
130
        defintions.
131
        """
132
        return {
133
            'file':   filename,
134
            'sha256': self.contents_hash
135
        }
136

    
137
class Build:
138
    """
139
    Build a Hydrilla package.
140
    """
141
    def __init__(self, srcdir, index_json_path):
142
        """
143
        Initialize a build. All files to be included in a distribution package
144
        are loaded into memory, all data gets validated and all necessary
145
        computations (e.g. preparing of hashes) are performed.
146

    
147
        'srcdir' and 'index_json' are expected to be pathlib.Path objects.
148
        """
149
        self.srcdir          = srcdir.resolve()
150
        self.index_json_path = index_json_path
151
        self.files_by_path   = {}
152
        self.resource_list   = []
153
        self.mapping_list    = []
154

    
155
        if not index_json_path.is_absolute():
156
            self.index_json_path = (self.srcdir / self.index_json_path)
157

    
158
        self.index_json_path = self.index_json_path.resolve()
159

    
160
        with open(self.index_json_path, 'rt') as index_file:
161
            index_json_text = index_file.read()
162

    
163
        index_obj = json.loads(util.strip_json_comments(index_json_text))
164

    
165
        self.files_by_path[self.srcdir / 'index.json'] = \
166
            FileRef(self.srcdir / 'index.json', index_json_text.encode())
167

    
168
        self._process_index_json(index_obj)
169

    
170
    def _process_file(self, filename: str, include_in_distribution: bool=True):
171
        """
172
        Resolve 'filename' relative to srcdir, load it to memory (if not loaded
173
        before), compute its hash and store its information in
174
        'self.files_by_path'.
175

    
176
        'filename' shall represent a relative path using '/' as a separator.
177

    
178
        if 'include_in_distribution' is True it shall cause the file to not only
179
        be included in the source package's zipfile, but also written as one of
180
        built package's files.
181

    
182
        Return file's reference object that can be included in JSON defintions
183
        of various kinds.
184
        """
185
        path = self.srcdir
186
        for segment in filename.split('/'):
187
            path /= segment
188

    
189
        path = path.resolve()
190
        if not path.is_relative_to(self.srcdir):
191
            raise FileReferenceError(f"Attempt to load '{filename}' which lies outside package source directory.")
192

    
193
        if str(path.relative_to(self.srcdir)) == 'index.json':
194
            raise FileReferenceError("Attempt to load 'index.json' which is a reserved filename.")
195

    
196
        file_ref = self.files_by_path.get(path)
197
        if file_ref is None:
198
            with open(path, 'rb') as file_handle:
199
                contents = file_handle.read()
200

    
201
            file_ref = FileRef(path, contents)
202
            self.files_by_path[path] = file_ref
203

    
204
        if include_in_distribution:
205
            file_ref.include_in_distribution = True
206

    
207
        return file_ref.make_ref_dict(filename)
208

    
209
    def _prepare_source_package_zip(self, root_dir_name: str):
210
        """
211
        Create and store in memory a .zip archive containing files needed to
212
        build this source package.
213

    
214
        'root_dir_name' shall not contain any slashes ('/').
215

    
216
        Return zipfile's sha256 sum's hexstring.
217
        """
218
        fb = FileBuffer()
219
        root_dir_path = Path(root_dir_name)
220

    
221
        def zippath(file_path):
222
            file_path = root_dir_path / file_path.relative_to(self.srcdir)
223
            return file_path.as_posix()
224

    
225
        with zipfile.ZipFile(fb, 'w') as xpi:
226
            for file_ref in self.files_by_path.values():
227
                if file_ref.include_in_zipfile:
228
                    xpi.writestr(zippath(file_ref.path), file_ref.contents)
229

    
230
        self.source_zip_contents = fb.get_bytes()
231

    
232
        return sha256(self.source_zip_contents).digest().hex()
233

    
234
    def _process_item(self, item_def: dict):
235
        """
236
        Process 'item_def' as definition of a resource/mapping and store in
237
        memory its processed form and files used by it.
238

    
239
        Return a minimal item reference suitable for using in source
240
        description.
241
        """
242
        copy_props = ['type', 'identifier', 'long_name', 'uuid', 'description']
243
        if 'comment' in item_def:
244
            copy_props.append('comment')
245

    
246
        if item_def['type'] == 'resource':
247
            item_list = self.resource_list
248

    
249
            copy_props.append('revision')
250

    
251
            script_file_refs = [self._process_file(f['file'])
252
                                for f in item_def.get('scripts', [])]
253

    
254
            new_item_obj = {
255
                'dependencies': item_def.get('dependencies', []),
256
                'scripts':      script_file_refs
257
            }
258
        else:
259
            item_list = self.mapping_list
260

    
261
            payloads = {}
262
            for pat, res_ref in item_def.get('payloads', {}).items():
263
                payloads[pat] = {'identifier': res_ref['identifier']}
264

    
265
            new_item_obj = {
266
                'payloads': payloads
267
            }
268

    
269
        new_item_obj.update([(p, item_def[p]) for p in copy_props])
270

    
271
        new_item_obj['version'] = util.normalize_version(item_def['version'])
272
        new_item_obj['api_schema_version'] = [1, 0, 1]
273
        new_item_obj['source_copyright'] = self.copyright_file_refs
274
        new_item_obj['source_name'] = self.source_name
275

    
276
        item_list.append(new_item_obj)
277

    
278
        props_in_ref = ('type', 'identifier', 'version', 'long_name')
279
        return dict([(prop, new_item_obj[prop]) for prop in props_in_ref])
280

    
281
    def _process_index_json(self, index_obj: dict):
282
        """
283
        Process 'index_obj' as contents of source package's index.json and store
284
        in memory this source package's zipfile as well as package's individual
285
        files and computed definitions of the source package and items defined
286
        in it.
287
        """
288
        index_validator.validate(index_obj)
289

    
290
        self.source_name = index_obj['source_name']
291

    
292
        generate_spdx = index_obj.get('reuse_generate_spdx_report', False)
293
        if generate_spdx:
294
            contents  = generate_spdx_report(self.srcdir)
295
            spdx_path = (self.srcdir / 'report.spdx').resolve()
296
            spdx_ref  = FileRef(spdx_path, contents)
297

    
298
            spdx_ref.include_in_zipfile = False
299
            self.files_by_path[spdx_path] = spdx_ref
300

    
301
        self.copyright_file_refs = \
302
            [self._process_file(f['file']) for f in index_obj['copyright']]
303

    
304
        if generate_spdx and not spdx_ref.include_in_distribution:
305
            raise FileReferenceError("Told to generate 'report.spdx' but 'report.spdx' is not listed among copyright files. Refusing to proceed.")
306

    
307
        item_refs = [self._process_item(d) for d in index_obj['definitions']]
308

    
309
        for file_ref in index_obj.get('additional_files', []):
310
            self._process_file(file_ref['file'], include_in_distribution=False)
311

    
312
        root_dir_path = Path(self.source_name)
313

    
314
        source_archives_obj = {
315
            'zip' : {
316
                'sha256': self._prepare_source_package_zip(root_dir_path)
317
            }
318
        }
319

    
320
        self.source_description = {
321
            'api_schema_version': [1, 0, 1],
322
            'source_name':        self.source_name,
323
            'source_copyright':   self.copyright_file_refs,
324
            'upstream_url':       index_obj['upstream_url'],
325
            'definitions':        item_refs,
326
            'source_archives':    source_archives_obj
327
        }
328

    
329
        if 'comment' in index_obj:
330
            self.source_description['comment'] = index_obj['comment']
331

    
332
    def write_source_package_zip(self, dstpath: Path):
333
        """
334
        Create a .zip archive containing files needed to build this source
335
        package and write it at 'dstpath'.
336
        """
337
        with open(dstpath, 'wb') as output:
338
            output.write(self.source_zip_contents)
339

    
340
    def write_package_files(self, dstpath: Path):
341
        """Write package files under 'dstpath' for distribution."""
342
        file_dir_path = (dstpath / 'file').resolve()
343
        file_dir_path.mkdir(parents=True, exist_ok=True)
344

    
345
        for file_ref in self.files_by_path.values():
346
            if file_ref.include_in_distribution:
347
                file_name = f'sha256-{file_ref.contents_hash}'
348
                with open(file_dir_path / file_name, 'wb') as output:
349
                    output.write(file_ref.contents)
350

    
351
        source_dir_path = (dstpath / 'source').resolve()
352
        source_dir_path.mkdir(parents=True, exist_ok=True)
353
        source_name = self.source_description["source_name"]
354

    
355
        with open(source_dir_path / f'{source_name}.json', 'wt') as output:
356
            json.dump(self.source_description, output)
357

    
358
        with open(source_dir_path / f'{source_name}.zip', 'wb') as output:
359
            output.write(self.source_zip_contents)
360

    
361
        for item_type, item_list in [
362
                ('resource', self.resource_list),
363
                ('mapping', self.mapping_list)
364
        ]:
365
            item_type_dir_path = (dstpath / item_type).resolve()
366

    
367
            for item_def in item_list:
368
                item_dir_path = item_type_dir_path / item_def['identifier']
369
                item_dir_path.mkdir(parents=True, exist_ok=True)
370

    
371
                version = '.'.join([str(n) for n in item_def['version']])
372
                with open(item_dir_path / version, 'wt') as output:
373
                    json.dump(item_def, output)
(3-3/3)