Project

General

Profile

Download (17.1 KB) Statistics
| Branch: | Tag: | Revision:

hydrilla-builder / src / hydrilla / builder / build.py @ f42f5c19

1
# SPDX-License-Identifier: AGPL-3.0-or-later
2

    
3
# Building Hydrilla packages.
4
#
5
# This file is part of Hydrilla
6
#
7
# Copyright (C) 2022 Wojtek Kosior
8
#
9
# This program is free software: you can redistribute it and/or modify
10
# it under the terms of the GNU Affero General Public License as
11
# published by the Free Software Foundation, either version 3 of the
12
# License, or (at your option) any later version.
13
#
14
# This program is distributed in the hope that it will be useful,
15
# but WITHOUT ANY WARRANTY; without even the implied warranty of
16
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
# GNU Affero General Public License for more details.
18
#
19
# You should have received a copy of the GNU Affero General Public License
20
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
21
#
22
#
23
# I, Wojtek Kosior, thereby promise not to sue for violation of this
24
# file's license. Although I request that you do not make use this code
25
# in a proprietary program, I am not going to enforce this in court.
26

    
27
# Enable using with Python 3.7.
28
from __future__ import annotations
29

    
30
import json
31
import re
32
import zipfile
33
import subprocess
34
from pathlib import Path, PurePosixPath
35
from hashlib import sha256
36
from sys import stderr
37
from contextlib import contextmanager
38
from tempfile import TemporaryDirectory, TemporaryFile
39
from typing import Optional, Iterable, Union
40

    
41
import jsonschema
42
import click
43

    
44
from .. import util
45
from . import _version
46
from . import local_apt
47
from .piggybacking import Piggybacked
48
from .common_errors import *
49

    
50
here = Path(__file__).resolve().parent
51

    
52
_ = util.translation(here / 'locales').gettext
53

    
54
def index_validator(major_schema_version):
55
    """
56
    Create an index.json schema validator specific to the requested schema
57
    version series.
58
    """
59
    exact_version = {1: '1.0.1', 2: '2'}[major_schema_version]
60

    
61
    return util.validator_for(f'package_source-{exact_version}.schema.json')
62

    
63
schemas_root = 'https://hydrilla.koszko.org/schemas'
64

    
65
generated_by = {
66
    'name': 'hydrilla.builder',
67
    'version': _version.version
68
}
69

    
70
class ReuseError(SubprocessError):
71
    """
72
    Exception used to report various problems when calling the REUSE tool.
73
    """
74

    
75
def generate_spdx_report(root: Path) -> bytes:
76
    """
77
    Use REUSE tool to generate an SPDX report for sources under 'root' and
78
    return the report's contents as 'bytes'.
79

    
80
    In case the directory tree under 'root' does not constitute a
81
    REUSE-compliant package, as exception is raised with linting report
82
    included in it.
83

    
84
    In case the reuse tool is not installed, an exception is also raised.
85
    """
86
    for command in [
87
            ['reuse', '--root', str(root), 'lint'],
88
            ['reuse', '--root', str(root), 'spdx']
89
    ]:
90
        try:
91
            cp = subprocess.run(command, capture_output=True, text=True)
92
        except FileNotFoundError:
93
            msg = _('couldnt_execute_{}_is_it_installed').format('reuse')
94
            raise ReuseError(msg)
95

    
96
        if cp.returncode != 0:
97
            msg = _('command_{}_failed').format(' '.join(command))
98
            raise ReuseError(msg, cp)
99

    
100
    return cp.stdout.encode()
101

    
102
class FileRef:
103
    """Represent reference to a file in the package."""
104
    def __init__(self, path: PurePosixPath, contents: bytes) -> None:
105
        """Initialize FileRef."""
106
        self.include_in_distribution   = False
107
        self.include_in_source_archive = True
108
        self.path                      = path
109
        self.contents                  = contents
110

    
111
        self.contents_hash = sha256(contents).digest().hex()
112

    
113
    def make_ref_dict(self) -> dict[str, str]:
114
        """
115
        Represent the file reference through a dict that can be included in JSON
116
        defintions.
117
        """
118
        return {
119
            'file':   str(self.path),
120
            'sha256': self.contents_hash
121
        }
122

    
123
@contextmanager
124
def piggybacked_system(piggyback_def: Optional[dict],
125
                       piggyback_files: Optional[Path]) \
126
                       -> Iterable[Piggybacked]:
127
    """
128
    Resolve resources from a foreign software packaging system. Optionally, use
129
    package files (.deb's, etc.) from a specified directory instead of resolving
130
    and downloading them.
131
    """
132
    if piggyback_def is None:
133
        yield Piggybacked()
134
    else:
135
        # apt is the only supported system right now
136
        assert piggyback_def['system'] == 'apt'
137

    
138
        with local_apt.piggybacked_system(piggyback_def, piggyback_files) \
139
             as piggybacked:
140
            yield piggybacked
141

    
142
class Build:
143
    """
144
    Build a Hydrilla package.
145
    """
146
    def __init__(self, srcdir: Path, index_json_path: Path,
147
                 piggyback_files: Optional[Path]=None):
148
        """
149
        Initialize a build. All files to be included in a distribution package
150
        are loaded into memory, all data gets validated and all necessary
151
        computations (e.g. preparing of hashes) are performed.
152
        """
153
        self.srcdir          = srcdir.resolve()
154
        self.piggyback_files = piggyback_files
155
        # TODO: the piggyback files we set are ignored for now; use them
156
        if piggyback_files is None:
157
            piggyback_default_path = \
158
                srcdir.parent / f'{srcdir.name}.foreign-packages'
159
            if piggyback_default_path.exists():
160
                self.piggyback_files = piggyback_default_path
161
        self.files_by_path   = {}
162
        self.resource_list   = []
163
        self.mapping_list    = []
164

    
165
        if not index_json_path.is_absolute():
166
            index_json_path = (self.srcdir / index_json_path)
167

    
168
        index_obj, major = util.load_instance_from_file(index_json_path)
169

    
170
        if major not in (1, 2):
171
            msg = _('unknown_schema_package_source_{}')\
172
                .format(index_json_path)
173
            raise util.UnknownSchemaError(msg)
174

    
175
        index_desired_path = PurePosixPath('index.json')
176
        self.files_by_path[index_desired_path] = \
177
            FileRef(index_desired_path, index_json_path.read_bytes())
178

    
179
        self._process_index_json(index_obj, major)
180

    
181
    def _process_file(self, filename: Union[str, PurePosixPath],
182
                      piggybacked: Piggybacked,
183
                      include_in_distribution: bool=True):
184
        """
185
        Resolve 'filename' relative to srcdir, load it to memory (if not loaded
186
        before), compute its hash and store its information in
187
        'self.files_by_path'.
188

    
189
        'filename' shall represent a relative path withing package directory.
190

    
191
        if 'include_in_distribution' is True it shall cause the file to not only
192
        be included in the source package's zipfile, but also written as one of
193
        built package's files.
194

    
195
        For each file an attempt is made to resolve it using 'piggybacked'
196
        object. If a file is found and pulled from foreign software packaging
197
        system this way, it gets automatically excluded from inclusion in
198
        Hydrilla source package's zipfile.
199

    
200
        Return file's reference object that can be included in JSON defintions
201
        of various kinds.
202
        """
203
        include_in_source_archive = True
204

    
205
        desired_path = PurePosixPath(filename)
206
        if '..' in desired_path.parts:
207
            msg = _('path_contains_double_dot_{}').format(filename)
208
            raise FileReferenceError(msg)
209

    
210
        path = piggybacked.resolve_file(desired_path)
211
        if path is None:
212
            path = (self.srcdir / desired_path).resolve()
213
            if not path.is_relative_to(self.srcdir):
214
                raise FileReferenceError(_('loading_{}_outside_package_dir')
215
                                         .format(filename))
216

    
217
            if str(path.relative_to(self.srcdir)) == 'index.json':
218
                raise FileReferenceError(_('loading_reserved_index_json'))
219
        else:
220
            include_in_source_archive = False
221

    
222
        file_ref = self.files_by_path.get(desired_path)
223
        if file_ref is None:
224
            if not path.is_file():
225
                msg = _('referenced_file_{}_missing').format(desired_path)
226
                raise FileReferenceError(msg)
227

    
228
            file_ref = FileRef(desired_path, path.read_bytes())
229
            self.files_by_path[desired_path] = file_ref
230

    
231
        if include_in_distribution:
232
            file_ref.include_in_distribution = True
233

    
234
        if not include_in_source_archive:
235
            file_ref.include_in_source_archive = False
236

    
237
        return file_ref.make_ref_dict()
238

    
239
    def _prepare_source_package_zip(self, source_name: str,
240
                                    piggybacked: Piggybacked) -> str:
241
        """
242
        Create and store in memory a .zip archive containing files needed to
243
        build this source package.
244

    
245
        'src_dir_name' shall not contain any slashes ('/').
246

    
247
        Return zipfile's sha256 sum's hexstring.
248
        """
249
        tf = TemporaryFile()
250
        source_dir_path      = PurePosixPath(source_name)
251
        piggybacked_dir_path = PurePosixPath(f'{source_name}.foreign-packages')
252

    
253
        with zipfile.ZipFile(tf, 'w') as zf:
254
            for file_ref in self.files_by_path.values():
255
                if file_ref.include_in_source_archive:
256
                    zf.writestr(str(source_dir_path / file_ref.path),
257
                                file_ref.contents)
258

    
259
            for desired_path, real_path in piggybacked.archive_files():
260
                zf.writestr(str(piggybacked_dir_path / desired_path),
261
                            real_path.read_bytes())
262

    
263
        tf.seek(0)
264
        self.source_zip_contents = tf.read()
265

    
266
        return sha256(self.source_zip_contents).digest().hex()
267

    
268
    def _process_item(self, item_def: dict, piggybacked: Piggybacked):
269
        """
270
        Process 'item_def' as definition of a resource/mapping and store in
271
        memory its processed form and files used by it.
272

    
273
        Return a minimal item reference suitable for using in source
274
        description.
275
        """
276
        copy_props = ['type', 'identifier', 'long_name', 'description']
277
        for prop in ('comment', 'uuid'):
278
            if prop in item_def:
279
                copy_props.append(prop)
280

    
281
        if item_def['type'] == 'resource':
282
            item_list = self.resource_list
283

    
284
            copy_props.append('revision')
285

    
286
            script_file_refs = [self._process_file(f['file'], piggybacked)
287
                                for f in item_def.get('scripts', [])]
288

    
289
            deps = [{'identifier': res_ref['identifier']}
290
                    for res_ref in item_def.get('dependencies', [])]
291

    
292
            new_item_obj = {
293
                'dependencies': [*piggybacked.package_must_depend, *deps],
294
                'scripts':      script_file_refs
295
            }
296
        else:
297
            item_list = self.mapping_list
298

    
299
            payloads = {}
300
            for pat, res_ref in item_def.get('payloads', {}).items():
301
                payloads[pat] = {'identifier': res_ref['identifier']}
302

    
303
            new_item_obj = {
304
                'payloads': payloads
305
            }
306

    
307
        new_item_obj.update([(p, item_def[p]) for p in copy_props])
308

    
309
        new_item_obj['version'] = util.normalize_version(item_def['version'])
310
        new_item_obj['$schema'] = f'{schemas_root}/api_{item_def["type"]}_description-1.schema.json'
311
        new_item_obj['source_copyright'] = self.copyright_file_refs
312
        new_item_obj['source_name'] = self.source_name
313
        new_item_obj['generated_by'] = generated_by
314

    
315
        item_list.append(new_item_obj)
316

    
317
        props_in_ref = ('type', 'identifier', 'version', 'long_name')
318
        return dict([(prop, new_item_obj[prop]) for prop in props_in_ref])
319

    
320
    def _process_index_json(self, index_obj: dict,
321
                            major_schema_version: int) -> None:
322
        """
323
        Process 'index_obj' as contents of source package's index.json and store
324
        in memory this source package's zipfile as well as package's individual
325
        files and computed definitions of the source package and items defined
326
        in it.
327
        """
328
        index_validator(major_schema_version).validate(index_obj)
329

    
330
        match = re.match(r'.*-((([1-9][0-9]*|0)\.)+)schema\.json$',
331
                         index_obj['$schema'])
332
        self.source_schema_ver = \
333
            [int(n) for n in filter(None, match.group(1).split('.'))]
334

    
335
        out_schema = f'{schemas_root}/api_source_description-1.schema.json'
336

    
337
        self.source_name = index_obj['source_name']
338

    
339
        generate_spdx = index_obj.get('reuse_generate_spdx_report', False)
340
        if generate_spdx:
341
            contents  = generate_spdx_report(self.srcdir)
342
            spdx_path = PurePosixPath('report.spdx')
343
            spdx_ref  = FileRef(spdx_path, contents)
344

    
345
            spdx_ref.include_in_source_archive = False
346
            self.files_by_path[spdx_path] = spdx_ref
347

    
348
        piggyback_def = None
349
        if self.source_schema_ver >= [1, 1] and 'piggyback_on' in index_obj:
350
            piggyback_def = index_obj['piggyback_on']
351

    
352
        with piggybacked_system(piggyback_def, self.piggyback_files) \
353
             as piggybacked:
354
            copyright_to_process = [
355
                *(file_ref['file'] for file_ref in index_obj['copyright']),
356
                *piggybacked.package_license_files
357
            ]
358
            self.copyright_file_refs = [self._process_file(f, piggybacked)
359
                                        for f in copyright_to_process]
360

    
361
            if generate_spdx and not spdx_ref.include_in_distribution:
362
                raise FileReferenceError(_('report_spdx_not_in_copyright_list'))
363

    
364
            item_refs = [self._process_item(d, piggybacked)
365
                         for d in index_obj['definitions']]
366

    
367
            for file_ref in index_obj.get('additional_files', []):
368
                self._process_file(file_ref['file'], piggybacked,
369
                                   include_in_distribution=False)
370

    
371
            zipfile_sha256 = self._prepare_source_package_zip\
372
                (self.source_name, piggybacked)
373

    
374
            source_archives_obj = {'zip' : {'sha256': zipfile_sha256}}
375

    
376
        self.source_description = {
377
            '$schema':            out_schema,
378
            'source_name':        self.source_name,
379
            'source_copyright':   self.copyright_file_refs,
380
            'upstream_url':       index_obj['upstream_url'],
381
            'definitions':        item_refs,
382
            'source_archives':    source_archives_obj,
383
            'generated_by':       generated_by
384
        }
385

    
386
        if 'comment' in index_obj:
387
            self.source_description['comment'] = index_obj['comment']
388

    
389
    def write_source_package_zip(self, dstpath: Path):
390
        """
391
        Create a .zip archive containing files needed to build this source
392
        package and write it at 'dstpath'.
393
        """
394
        with open(dstpath, 'wb') as output:
395
            output.write(self.source_zip_contents)
396

    
397
    def write_package_files(self, dstpath: Path):
398
        """Write package files under 'dstpath' for distribution."""
399
        file_dir_path = (dstpath / 'file' / 'sha256').resolve()
400
        file_dir_path.mkdir(parents=True, exist_ok=True)
401

    
402
        for file_ref in self.files_by_path.values():
403
            if file_ref.include_in_distribution:
404
                file_path = file_dir_path / file_ref.contents_hash
405
                file_path.write_bytes(file_ref.contents)
406

    
407
        source_dir_path = (dstpath / 'source').resolve()
408
        source_dir_path.mkdir(parents=True, exist_ok=True)
409
        source_name = self.source_description["source_name"]
410

    
411
        with open(source_dir_path / f'{source_name}.json', 'wt') as output:
412
            json.dump(self.source_description, output)
413

    
414
        with open(source_dir_path / f'{source_name}.zip', 'wb') as output:
415
            output.write(self.source_zip_contents)
416

    
417
        for item_type, item_list in [
418
                ('resource', self.resource_list),
419
                ('mapping', self.mapping_list)
420
        ]:
421
            item_type_dir_path = (dstpath / item_type).resolve()
422

    
423
            for item_def in item_list:
424
                item_dir_path = item_type_dir_path / item_def['identifier']
425
                item_dir_path.mkdir(parents=True, exist_ok=True)
426

    
427
                version = '.'.join([str(n) for n in item_def['version']])
428
                with open(item_dir_path / version, 'wt') as output:
429
                    json.dump(item_def, output)
430

    
431
dir_type = click.Path(exists=True, file_okay=False, resolve_path=True)
432

    
433
@click.command(help=_('build_package_from_srcdir_to_dstdir'))
434
@click.option('-s', '--srcdir', default='./', type=dir_type, show_default=True,
435
              help=_('source_directory_to_build_from'))
436
@click.option('-i', '--index-json', default='index.json', type=click.Path(),
437
              help=_('path_instead_of_index_json'))
438
@click.option('-p', '--piggyback-files', type=click.Path(),
439
              help=_('path_instead_for_piggyback_files'))
440
@click.option('-d', '--dstdir', type=dir_type, required=True,
441
              help=_('built_package_files_destination'))
442
@click.version_option(version=_version.version, prog_name='Hydrilla builder',
443
                      message=_('%(prog)s_%(version)s_license'),
444
                      help=_('version_printing'))
445
def perform(srcdir, index_json, piggyback_files, dstdir):
446
    """
447
    Execute Hydrilla builder to turn source package into a distributable one.
448

    
449
    This command is meant to be the entry point of hydrilla-builder command
450
    exported by this package.
451
    """
452
    build = Build(Path(srcdir), Path(index_json),
453
                  piggyback_files and Path(piggyback_files))
454
    build.write_package_files(Path(dstdir))
(3-3/6)