Project

General

Profile

Download (16.7 KB) Statistics
| Branch: | Tag: | Revision:

hydrilla-builder / src / hydrilla / builder / build.py @ bd588eb9

1
# SPDX-License-Identifier: AGPL-3.0-or-later
2

    
3
# Building Hydrilla packages.
4
#
5
# This file is part of Hydrilla
6
#
7
# Copyright (C) 2022 Wojtek Kosior
8
#
9
# This program is free software: you can redistribute it and/or modify
10
# it under the terms of the GNU Affero General Public License as
11
# published by the Free Software Foundation, either version 3 of the
12
# License, or (at your option) any later version.
13
#
14
# This program is distributed in the hope that it will be useful,
15
# but WITHOUT ANY WARRANTY; without even the implied warranty of
16
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
# GNU Affero General Public License for more details.
18
#
19
# You should have received a copy of the GNU Affero General Public License
20
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
21
#
22
#
23
# I, Wojtek Kosior, thereby promise not to sue for violation of this
24
# file's license. Although I request that you do not make use this code
25
# in a proprietary program, I am not going to enforce this in court.
26

    
27
# Enable using with Python 3.7.
28
from __future__ import annotations
29

    
30
import json
31
import re
32
import zipfile
33
import subprocess
34
from pathlib import Path, PurePosixPath
35
from hashlib import sha256
36
from sys import stderr
37
from contextlib import contextmanager
38
from tempfile import TemporaryDirectory, TemporaryFile
39
from typing import Optional, Iterable, Union
40

    
41
import jsonschema
42
import click
43

    
44
from .. import util
45
from . import _version
46
from . import local_apt
47
from .piggybacking import Piggybacked
48
from .common_errors import *
49

    
50
here = Path(__file__).resolve().parent
51

    
52
_ = util.translation(here / 'locales').gettext
53

    
54
index_validator = util.validator_for('package_source-2.schema.json')
55

    
56
schemas_root = 'https://hydrilla.koszko.org/schemas'
57

    
58
generated_by = {
59
    'name': 'hydrilla.builder',
60
    'version': _version.version
61
}
62

    
63
class ReuseError(SubprocessError):
64
    """
65
    Exception used to report various problems when calling the REUSE tool.
66
    """
67

    
68
def generate_spdx_report(root: Path) -> bytes:
69
    """
70
    Use REUSE tool to generate an SPDX report for sources under 'root' and
71
    return the report's contents as 'bytes'.
72

    
73
    In case the directory tree under 'root' does not constitute a
74
    REUSE-compliant package, as exception is raised with linting report
75
    included in it.
76

    
77
    In case the reuse tool is not installed, an exception is also raised.
78
    """
79
    for command in [
80
            ['reuse', '--root', str(root), 'lint'],
81
            ['reuse', '--root', str(root), 'spdx']
82
    ]:
83
        try:
84
            cp = subprocess.run(command, capture_output=True, text=True)
85
        except FileNotFoundError:
86
            msg = _('couldnt_execute_{}_is_it_installed').format('reuse')
87
            raise ReuseError(msg)
88

    
89
        if cp.returncode != 0:
90
            msg = _('command_{}_failed').format(' '.join(command))
91
            raise ReuseError(msg, cp)
92

    
93
    return cp.stdout.encode()
94

    
95
class FileRef:
96
    """Represent reference to a file in the package."""
97
    def __init__(self, path: PurePosixPath, contents: bytes) -> None:
98
        """Initialize FileRef."""
99
        self.include_in_distribution   = False
100
        self.include_in_source_archive = True
101
        self.path                      = path
102
        self.contents                  = contents
103

    
104
        self.contents_hash = sha256(contents).digest().hex()
105

    
106
    def make_ref_dict(self) -> dict[str, str]:
107
        """
108
        Represent the file reference through a dict that can be included in JSON
109
        defintions.
110
        """
111
        return {
112
            'file':   str(self.path),
113
            'sha256': self.contents_hash
114
        }
115

    
116
@contextmanager
117
def piggybacked_system(piggyback_def: Optional[dict],
118
                       piggyback_files: Optional[Path]) \
119
                       -> Iterable[Piggybacked]:
120
    """
121
    Resolve resources from a foreign software packaging system. Optionally, use
122
    package files (.deb's, etc.) from a specified directory instead of resolving
123
    and downloading them.
124
    """
125
    if piggyback_def is None:
126
        yield Piggybacked()
127
    else:
128
        # apt is the only supported system right now
129
        assert piggyback_def['system'] == 'apt'
130

    
131
        with local_apt.piggybacked_system(piggyback_def, piggyback_files) \
132
             as piggybacked:
133
            yield piggybacked
134

    
135
class Build:
136
    """
137
    Build a Hydrilla package.
138
    """
139
    def __init__(self, srcdir: Path, index_json_path: Path,
140
                 piggyback_files: Optional[Path]=None):
141
        """
142
        Initialize a build. All files to be included in a distribution package
143
        are loaded into memory, all data gets validated and all necessary
144
        computations (e.g. preparing of hashes) are performed.
145
        """
146
        self.srcdir          = srcdir.resolve()
147
        self.piggyback_files = piggyback_files
148
        # TODO: the piggyback files we set are ignored for now; use them
149
        if piggyback_files is None:
150
            piggyback_default_path = \
151
                srcdir.parent / f'{srcdir.name}.foreign-packages'
152
            if piggyback_default_path.exists():
153
                self.piggyback_files = piggyback_default_path
154
        self.files_by_path   = {}
155
        self.resource_list   = []
156
        self.mapping_list    = []
157

    
158
        if not index_json_path.is_absolute():
159
            index_json_path = (self.srcdir / index_json_path)
160

    
161
        with open(index_json_path, 'rt') as index_file:
162
            index_json_text = index_file.read()
163

    
164
        index_obj = json.loads(util.strip_json_comments(index_json_text))
165

    
166
        index_desired_path = PurePosixPath('index.json')
167
        self.files_by_path[index_desired_path] = \
168
            FileRef(index_desired_path, index_json_text.encode())
169

    
170
        self._process_index_json(index_obj)
171

    
172
    def _process_file(self, filename: Union[str, PurePosixPath],
173
                      piggybacked: Piggybacked,
174
                      include_in_distribution: bool=True):
175
        """
176
        Resolve 'filename' relative to srcdir, load it to memory (if not loaded
177
        before), compute its hash and store its information in
178
        'self.files_by_path'.
179

    
180
        'filename' shall represent a relative path withing package directory.
181

    
182
        if 'include_in_distribution' is True it shall cause the file to not only
183
        be included in the source package's zipfile, but also written as one of
184
        built package's files.
185

    
186
        For each file an attempt is made to resolve it using 'piggybacked'
187
        object. If a file is found and pulled from foreign software packaging
188
        system this way, it gets automatically excluded from inclusion in
189
        Hydrilla source package's zipfile.
190

    
191
        Return file's reference object that can be included in JSON defintions
192
        of various kinds.
193
        """
194
        include_in_source_archive = True
195

    
196
        desired_path = PurePosixPath(filename)
197
        if '..' in desired_path.parts:
198
            msg = _('path_contains_double_dot_{}').format(filename)
199
            raise FileReferenceError(msg)
200

    
201
        path = piggybacked.resolve_file(desired_path)
202
        if path is None:
203
            path = (self.srcdir / desired_path).resolve()
204
            if not path.is_relative_to(self.srcdir):
205
                raise FileReferenceError(_('loading_{}_outside_package_dir')
206
                                         .format(filename))
207

    
208
            if str(path.relative_to(self.srcdir)) == 'index.json':
209
                raise FileReferenceError(_('loading_reserved_index_json'))
210
        else:
211
            include_in_source_archive = False
212

    
213
        file_ref = self.files_by_path.get(desired_path)
214
        if file_ref is None:
215
            with open(path, 'rb') as file_handle:
216
                contents = file_handle.read()
217

    
218
            file_ref = FileRef(desired_path, contents)
219
            self.files_by_path[desired_path] = file_ref
220

    
221
        if include_in_distribution:
222
            file_ref.include_in_distribution = True
223

    
224
        if not include_in_source_archive:
225
            file_ref.include_in_source_archive = False
226

    
227
        return file_ref.make_ref_dict()
228

    
229
    def _prepare_source_package_zip(self, source_name: str,
230
                                    piggybacked: Piggybacked) -> str:
231
        """
232
        Create and store in memory a .zip archive containing files needed to
233
        build this source package.
234

    
235
        'src_dir_name' shall not contain any slashes ('/').
236

    
237
        Return zipfile's sha256 sum's hexstring.
238
        """
239
        tf = TemporaryFile()
240
        source_dir_path      = PurePosixPath(source_name)
241
        piggybacked_dir_path = PurePosixPath(f'{source_name}.foreign-packages')
242

    
243
        with zipfile.ZipFile(tf, 'w') as zf:
244
            for file_ref in self.files_by_path.values():
245
                if file_ref.include_in_source_archive:
246
                    zf.writestr(str(source_dir_path / file_ref.path),
247
                                file_ref.contents)
248

    
249
            for desired_path, real_path in piggybacked.archive_files():
250
                zf.writestr(str(piggybacked_dir_path / desired_path),
251
                            real_path.read_bytes())
252

    
253
        tf.seek(0)
254
        self.source_zip_contents = tf.read()
255

    
256
        return sha256(self.source_zip_contents).digest().hex()
257

    
258
    def _process_item(self, item_def: dict, piggybacked: Piggybacked):
259
        """
260
        Process 'item_def' as definition of a resource/mapping and store in
261
        memory its processed form and files used by it.
262

    
263
        Return a minimal item reference suitable for using in source
264
        description.
265
        """
266
        copy_props = ['type', 'identifier', 'long_name', 'description']
267
        for prop in ('comment', 'uuid'):
268
            if prop in item_def:
269
                copy_props.append(prop)
270

    
271
        if item_def['type'] == 'resource':
272
            item_list = self.resource_list
273

    
274
            copy_props.append('revision')
275

    
276
            script_file_refs = [self._process_file(f['file'], piggybacked)
277
                                for f in item_def.get('scripts', [])]
278

    
279
            deps = [{'identifier': res_ref['identifier']}
280
                    for res_ref in item_def.get('dependencies', [])]
281

    
282
            new_item_obj = {
283
                'dependencies': [*piggybacked.package_must_depend, *deps],
284
                'scripts':      script_file_refs
285
            }
286
        else:
287
            item_list = self.mapping_list
288

    
289
            payloads = {}
290
            for pat, res_ref in item_def.get('payloads', {}).items():
291
                payloads[pat] = {'identifier': res_ref['identifier']}
292

    
293
            new_item_obj = {
294
                'payloads': payloads
295
            }
296

    
297
        new_item_obj.update([(p, item_def[p]) for p in copy_props])
298

    
299
        new_item_obj['version'] = util.normalize_version(item_def['version'])
300
        new_item_obj['$schema'] = f'{schemas_root}/api_{item_def["type"]}_description-1.schema.json'
301
        new_item_obj['source_copyright'] = self.copyright_file_refs
302
        new_item_obj['source_name'] = self.source_name
303
        new_item_obj['generated_by'] = generated_by
304

    
305
        item_list.append(new_item_obj)
306

    
307
        props_in_ref = ('type', 'identifier', 'version', 'long_name')
308
        return dict([(prop, new_item_obj[prop]) for prop in props_in_ref])
309

    
310
    def _process_index_json(self, index_obj: dict):
311
        """
312
        Process 'index_obj' as contents of source package's index.json and store
313
        in memory this source package's zipfile as well as package's individual
314
        files and computed definitions of the source package and items defined
315
        in it.
316
        """
317
        index_validator.validate(index_obj)
318
        match = re.match(r'.*-((([1-9][0-9]*|0)\.)+)schema\.json$',
319
                         index_obj['$schema'])
320
        self.source_schema_ver = \
321
            [int(n) for n in filter(None, match.group(1).split('.'))]
322

    
323
        out_schema = f'{schemas_root}/api_source_description-1.schema.json'
324

    
325
        self.source_name = index_obj['source_name']
326

    
327
        generate_spdx = index_obj.get('reuse_generate_spdx_report', False)
328
        if generate_spdx:
329
            contents  = generate_spdx_report(self.srcdir)
330
            spdx_path = PurePosixPath('report.spdx')
331
            spdx_ref  = FileRef(spdx_path, contents)
332

    
333
            spdx_ref.include_in_source_archive = False
334
            self.files_by_path[spdx_path] = spdx_ref
335

    
336
        piggyback_def = None
337
        if self.source_schema_ver >= [1, 1] and 'piggyback_on' in index_obj:
338
            piggyback_def = index_obj['piggyback_on']
339

    
340
        with piggybacked_system(piggyback_def, self.piggyback_files) \
341
             as piggybacked:
342
            copyright_to_process = [
343
                *(file_ref['file'] for file_ref in index_obj['copyright']),
344
                *piggybacked.package_license_files
345
            ]
346
            self.copyright_file_refs = [self._process_file(f, piggybacked)
347
                                        for f in copyright_to_process]
348

    
349
            if generate_spdx and not spdx_ref.include_in_distribution:
350
                raise FileReferenceError(_('report_spdx_not_in_copyright_list'))
351

    
352
            item_refs = [self._process_item(d, piggybacked)
353
                         for d in index_obj['definitions']]
354

    
355
            for file_ref in index_obj.get('additional_files', []):
356
                self._process_file(file_ref['file'], piggybacked,
357
                                   include_in_distribution=False)
358

    
359
            zipfile_sha256 = self._prepare_source_package_zip\
360
                (self.source_name, piggybacked)
361

    
362
            source_archives_obj = {'zip' : {'sha256': zipfile_sha256}}
363

    
364
        self.source_description = {
365
            '$schema':            out_schema,
366
            'source_name':        self.source_name,
367
            'source_copyright':   self.copyright_file_refs,
368
            'upstream_url':       index_obj['upstream_url'],
369
            'definitions':        item_refs,
370
            'source_archives':    source_archives_obj,
371
            'generated_by':       generated_by
372
        }
373

    
374
        if 'comment' in index_obj:
375
            self.source_description['comment'] = index_obj['comment']
376

    
377
    def write_source_package_zip(self, dstpath: Path):
378
        """
379
        Create a .zip archive containing files needed to build this source
380
        package and write it at 'dstpath'.
381
        """
382
        with open(dstpath, 'wb') as output:
383
            output.write(self.source_zip_contents)
384

    
385
    def write_package_files(self, dstpath: Path):
386
        """Write package files under 'dstpath' for distribution."""
387
        file_dir_path = (dstpath / 'file' / 'sha256').resolve()
388
        file_dir_path.mkdir(parents=True, exist_ok=True)
389

    
390
        for file_ref in self.files_by_path.values():
391
            if file_ref.include_in_distribution:
392
                file_path = file_dir_path / file_ref.contents_hash
393
                file_path.write_bytes(file_ref.contents)
394

    
395
        source_dir_path = (dstpath / 'source').resolve()
396
        source_dir_path.mkdir(parents=True, exist_ok=True)
397
        source_name = self.source_description["source_name"]
398

    
399
        with open(source_dir_path / f'{source_name}.json', 'wt') as output:
400
            json.dump(self.source_description, output)
401

    
402
        with open(source_dir_path / f'{source_name}.zip', 'wb') as output:
403
            output.write(self.source_zip_contents)
404

    
405
        for item_type, item_list in [
406
                ('resource', self.resource_list),
407
                ('mapping', self.mapping_list)
408
        ]:
409
            item_type_dir_path = (dstpath / item_type).resolve()
410

    
411
            for item_def in item_list:
412
                item_dir_path = item_type_dir_path / item_def['identifier']
413
                item_dir_path.mkdir(parents=True, exist_ok=True)
414

    
415
                version = '.'.join([str(n) for n in item_def['version']])
416
                with open(item_dir_path / version, 'wt') as output:
417
                    json.dump(item_def, output)
418

    
419
dir_type = click.Path(exists=True, file_okay=False, resolve_path=True)
420

    
421
@click.command(help=_('build_package_from_srcdir_to_dstdir'))
422
@click.option('-s', '--srcdir', default='./', type=dir_type, show_default=True,
423
              help=_('source_directory_to_build_from'))
424
@click.option('-i', '--index-json', default='index.json', type=click.Path(),
425
              help=_('path_instead_of_index_json'))
426
@click.option('-p', '--piggyback-files', type=click.Path(),
427
              help=_('path_instead_for_piggyback_files'))
428
@click.option('-d', '--dstdir', type=dir_type, required=True,
429
              help=_('built_package_files_destination'))
430
@click.version_option(version=_version.version, prog_name='Hydrilla builder',
431
                      message=_('%(prog)s_%(version)s_license'),
432
                      help=_('version_printing'))
433
def perform(srcdir, index_json, piggyback_files, dstdir):
434
    """
435
    Execute Hydrilla builder to turn source package into a distributable one.
436

    
437
    This command is meant to be the entry point of hydrilla-builder command
438
    exported by this package.
439
    """
440
    build = Build(Path(srcdir), Path(index_json),
441
                  piggyback_files and Path(piggyback_files))
442
    build.write_package_files(Path(dstdir))
(3-3/6)