Project

General

Profile

Download (16.7 KB) Statistics
| Branch: | Tag: | Revision:

hydrilla-builder / src / hydrilla / builder / build.py @ 98687e91

1
# SPDX-License-Identifier: AGPL-3.0-or-later
2

    
3
# Building Hydrilla packages.
4
#
5
# This file is part of Hydrilla
6
#
7
# Copyright (C) 2022 Wojtek Kosior
8
#
9
# This program is free software: you can redistribute it and/or modify
10
# it under the terms of the GNU Affero General Public License as
11
# published by the Free Software Foundation, either version 3 of the
12
# License, or (at your option) any later version.
13
#
14
# This program is distributed in the hope that it will be useful,
15
# but WITHOUT ANY WARRANTY; without even the implied warranty of
16
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
# GNU Affero General Public License for more details.
18
#
19
# You should have received a copy of the GNU Affero General Public License
20
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
21
#
22
#
23
# I, Wojtek Kosior, thereby promise not to sue for violation of this
24
# file's license. Although I request that you do not make use this code
25
# in a proprietary program, I am not going to enforce this in court.
26

    
27
# Enable using with Python 3.7.
28
from __future__ import annotations
29

    
30
import json
31
import re
32
import zipfile
33
import subprocess
34
from pathlib import Path, PurePosixPath
35
from hashlib import sha256
36
from sys import stderr
37
from contextlib import contextmanager
38
from tempfile import TemporaryDirectory, TemporaryFile
39
from typing import Optional, Iterable, Union
40

    
41
import jsonschema
42
import click
43

    
44
from .. import util
45
from . import _version
46
from . import local_apt
47
from .piggybacking import Piggybacked
48
from .common_errors import *
49

    
50
here = Path(__file__).resolve().parent
51

    
52
_ = util.translation(here / 'locales').gettext
53

    
54
index_validator = util.validator_for('package_source-2.schema.json')
55

    
56
schemas_root = 'https://hydrilla.koszko.org/schemas'
57

    
58
generated_by = {
59
    'name': 'hydrilla.builder',
60
    'version': _version.version
61
}
62

    
63
class ReuseError(SubprocessError):
64
    """
65
    Exception used to report various problems when calling the REUSE tool.
66
    """
67

    
68
def generate_spdx_report(root: Path) -> bytes:
69
    """
70
    Use REUSE tool to generate an SPDX report for sources under 'root' and
71
    return the report's contents as 'bytes'.
72

    
73
    In case the directory tree under 'root' does not constitute a
74
    REUSE-compliant package, as exception is raised with linting report
75
    included in it.
76

    
77
    In case the reuse tool is not installed, an exception is also raised.
78
    """
79
    for command in [
80
            ['reuse', '--root', str(root), 'lint'],
81
            ['reuse', '--root', str(root), 'spdx']
82
    ]:
83
        try:
84
            cp = subprocess.run(command, capture_output=True, text=True)
85
        except FileNotFoundError:
86
            msg = _('couldnt_execute_{}_is_it_installed').format('reuse')
87
            raise ReuseError(msg)
88

    
89
        if cp.returncode != 0:
90
            msg = _('command_{}_failed').format(' '.join(command))
91
            raise ReuseError(msg, cp)
92

    
93
    return cp.stdout.encode()
94

    
95
class FileRef:
96
    """Represent reference to a file in the package."""
97
    def __init__(self, path: PurePosixPath, contents: bytes) -> None:
98
        """Initialize FileRef."""
99
        self.include_in_distribution   = False
100
        self.include_in_source_archive = True
101
        self.path                      = path
102
        self.contents                  = contents
103

    
104
        self.contents_hash = sha256(contents).digest().hex()
105

    
106
    def make_ref_dict(self) -> dict[str, str]:
107
        """
108
        Represent the file reference through a dict that can be included in JSON
109
        defintions.
110
        """
111
        return {
112
            'file':   str(self.path),
113
            'sha256': self.contents_hash
114
        }
115

    
116
@contextmanager
117
def piggybacked_system(piggyback_def: Optional[dict],
118
                       piggyback_files: Optional[Path]) \
119
                       -> Iterable[Piggybacked]:
120
    """
121
    Resolve resources from a foreign software packaging system. Optionally, use
122
    package files (.deb's, etc.) from a specified directory instead of resolving
123
    and downloading them.
124
    """
125
    if piggyback_def is None:
126
        yield Piggybacked()
127
    else:
128
        # apt is the only supported system right now
129
        assert piggyback_def['system'] == 'apt'
130

    
131
        with local_apt.piggybacked_system(piggyback_def, piggyback_files) \
132
             as piggybacked:
133
            yield piggybacked
134

    
135
class Build:
136
    """
137
    Build a Hydrilla package.
138
    """
139
    def __init__(self, srcdir: Path, index_json_path: Path,
140
                 piggyback_files: Optional[Path]=None):
141
        """
142
        Initialize a build. All files to be included in a distribution package
143
        are loaded into memory, all data gets validated and all necessary
144
        computations (e.g. preparing of hashes) are performed.
145
        """
146
        self.srcdir          = srcdir.resolve()
147
        self.piggyback_files = piggyback_files
148
        # TODO: the piggyback files we set are ignored for now; use them
149
        if piggyback_files is None:
150
            piggyback_default_path = \
151
                srcdir.parent / f'{srcdir.name}.foreign-packages'
152
            if piggyback_default_path.exists():
153
                self.piggyback_files = piggyback_default_path
154
        self.files_by_path   = {}
155
        self.resource_list   = []
156
        self.mapping_list    = []
157

    
158
        if not index_json_path.is_absolute():
159
            index_json_path = (self.srcdir / index_json_path)
160

    
161
        with open(index_json_path, 'rt') as index_file:
162
            index_json_text = index_file.read()
163

    
164
        index_obj = json.loads(util.strip_json_comments(index_json_text))
165

    
166
        index_desired_path = PurePosixPath('index.json')
167
        self.files_by_path[index_desired_path] = \
168
            FileRef(index_desired_path, index_json_text.encode())
169

    
170
        self._process_index_json(index_obj)
171

    
172
    def _process_file(self, filename: Union[str, PurePosixPath],
173
                      piggybacked: Piggybacked,
174
                      include_in_distribution: bool=True):
175
        """
176
        Resolve 'filename' relative to srcdir, load it to memory (if not loaded
177
        before), compute its hash and store its information in
178
        'self.files_by_path'.
179

    
180
        'filename' shall represent a relative path withing package directory.
181

    
182
        if 'include_in_distribution' is True it shall cause the file to not only
183
        be included in the source package's zipfile, but also written as one of
184
        built package's files.
185

    
186
        For each file an attempt is made to resolve it using 'piggybacked'
187
        object. If a file is found and pulled from foreign software packaging
188
        system this way, it gets automatically excluded from inclusion in
189
        Hydrilla source package's zipfile.
190

    
191
        Return file's reference object that can be included in JSON defintions
192
        of various kinds.
193
        """
194
        include_in_source_archive = True
195

    
196
        desired_path = PurePosixPath(filename)
197
        if '..' in desired_path.parts:
198
            msg = _('path_contains_double_dot_{}').format(filename)
199
            raise FileReferenceError(msg)
200

    
201
        path = piggybacked.resolve_file(desired_path)
202
        if path is None:
203
            path = (self.srcdir / desired_path).resolve()
204
            if not path.is_relative_to(self.srcdir):
205
                raise FileReferenceError(_('loading_{}_outside_package_dir')
206
                                         .format(filename))
207

    
208
            if str(path.relative_to(self.srcdir)) == 'index.json':
209
                raise FileReferenceError(_('loading_reserved_index_json'))
210
        else:
211
            include_in_source_archive = False
212

    
213
        file_ref = self.files_by_path.get(desired_path)
214
        if file_ref is None:
215
            if not path.is_file():
216
                msg = _('referenced_file_{}_missing').format(desired_path)
217
                raise FileReferenceError(msg)
218

    
219
            file_ref = FileRef(desired_path, path.read_bytes())
220
            self.files_by_path[desired_path] = file_ref
221

    
222
        if include_in_distribution:
223
            file_ref.include_in_distribution = True
224

    
225
        if not include_in_source_archive:
226
            file_ref.include_in_source_archive = False
227

    
228
        return file_ref.make_ref_dict()
229

    
230
    def _prepare_source_package_zip(self, source_name: str,
231
                                    piggybacked: Piggybacked) -> str:
232
        """
233
        Create and store in memory a .zip archive containing files needed to
234
        build this source package.
235

    
236
        'src_dir_name' shall not contain any slashes ('/').
237

    
238
        Return zipfile's sha256 sum's hexstring.
239
        """
240
        tf = TemporaryFile()
241
        source_dir_path      = PurePosixPath(source_name)
242
        piggybacked_dir_path = PurePosixPath(f'{source_name}.foreign-packages')
243

    
244
        with zipfile.ZipFile(tf, 'w') as zf:
245
            for file_ref in self.files_by_path.values():
246
                if file_ref.include_in_source_archive:
247
                    zf.writestr(str(source_dir_path / file_ref.path),
248
                                file_ref.contents)
249

    
250
            for desired_path, real_path in piggybacked.archive_files():
251
                zf.writestr(str(piggybacked_dir_path / desired_path),
252
                            real_path.read_bytes())
253

    
254
        tf.seek(0)
255
        self.source_zip_contents = tf.read()
256

    
257
        return sha256(self.source_zip_contents).digest().hex()
258

    
259
    def _process_item(self, item_def: dict, piggybacked: Piggybacked):
260
        """
261
        Process 'item_def' as definition of a resource/mapping and store in
262
        memory its processed form and files used by it.
263

    
264
        Return a minimal item reference suitable for using in source
265
        description.
266
        """
267
        copy_props = ['type', 'identifier', 'long_name', 'description']
268
        for prop in ('comment', 'uuid'):
269
            if prop in item_def:
270
                copy_props.append(prop)
271

    
272
        if item_def['type'] == 'resource':
273
            item_list = self.resource_list
274

    
275
            copy_props.append('revision')
276

    
277
            script_file_refs = [self._process_file(f['file'], piggybacked)
278
                                for f in item_def.get('scripts', [])]
279

    
280
            deps = [{'identifier': res_ref['identifier']}
281
                    for res_ref in item_def.get('dependencies', [])]
282

    
283
            new_item_obj = {
284
                'dependencies': [*piggybacked.package_must_depend, *deps],
285
                'scripts':      script_file_refs
286
            }
287
        else:
288
            item_list = self.mapping_list
289

    
290
            payloads = {}
291
            for pat, res_ref in item_def.get('payloads', {}).items():
292
                payloads[pat] = {'identifier': res_ref['identifier']}
293

    
294
            new_item_obj = {
295
                'payloads': payloads
296
            }
297

    
298
        new_item_obj.update([(p, item_def[p]) for p in copy_props])
299

    
300
        new_item_obj['version'] = util.normalize_version(item_def['version'])
301
        new_item_obj['$schema'] = f'{schemas_root}/api_{item_def["type"]}_description-1.schema.json'
302
        new_item_obj['source_copyright'] = self.copyright_file_refs
303
        new_item_obj['source_name'] = self.source_name
304
        new_item_obj['generated_by'] = generated_by
305

    
306
        item_list.append(new_item_obj)
307

    
308
        props_in_ref = ('type', 'identifier', 'version', 'long_name')
309
        return dict([(prop, new_item_obj[prop]) for prop in props_in_ref])
310

    
311
    def _process_index_json(self, index_obj: dict):
312
        """
313
        Process 'index_obj' as contents of source package's index.json and store
314
        in memory this source package's zipfile as well as package's individual
315
        files and computed definitions of the source package and items defined
316
        in it.
317
        """
318
        index_validator.validate(index_obj)
319
        match = re.match(r'.*-((([1-9][0-9]*|0)\.)+)schema\.json$',
320
                         index_obj['$schema'])
321
        self.source_schema_ver = \
322
            [int(n) for n in filter(None, match.group(1).split('.'))]
323

    
324
        out_schema = f'{schemas_root}/api_source_description-1.schema.json'
325

    
326
        self.source_name = index_obj['source_name']
327

    
328
        generate_spdx = index_obj.get('reuse_generate_spdx_report', False)
329
        if generate_spdx:
330
            contents  = generate_spdx_report(self.srcdir)
331
            spdx_path = PurePosixPath('report.spdx')
332
            spdx_ref  = FileRef(spdx_path, contents)
333

    
334
            spdx_ref.include_in_source_archive = False
335
            self.files_by_path[spdx_path] = spdx_ref
336

    
337
        piggyback_def = None
338
        if self.source_schema_ver >= [1, 1] and 'piggyback_on' in index_obj:
339
            piggyback_def = index_obj['piggyback_on']
340

    
341
        with piggybacked_system(piggyback_def, self.piggyback_files) \
342
             as piggybacked:
343
            copyright_to_process = [
344
                *(file_ref['file'] for file_ref in index_obj['copyright']),
345
                *piggybacked.package_license_files
346
            ]
347
            self.copyright_file_refs = [self._process_file(f, piggybacked)
348
                                        for f in copyright_to_process]
349

    
350
            if generate_spdx and not spdx_ref.include_in_distribution:
351
                raise FileReferenceError(_('report_spdx_not_in_copyright_list'))
352

    
353
            item_refs = [self._process_item(d, piggybacked)
354
                         for d in index_obj['definitions']]
355

    
356
            for file_ref in index_obj.get('additional_files', []):
357
                self._process_file(file_ref['file'], piggybacked,
358
                                   include_in_distribution=False)
359

    
360
            zipfile_sha256 = self._prepare_source_package_zip\
361
                (self.source_name, piggybacked)
362

    
363
            source_archives_obj = {'zip' : {'sha256': zipfile_sha256}}
364

    
365
        self.source_description = {
366
            '$schema':            out_schema,
367
            'source_name':        self.source_name,
368
            'source_copyright':   self.copyright_file_refs,
369
            'upstream_url':       index_obj['upstream_url'],
370
            'definitions':        item_refs,
371
            'source_archives':    source_archives_obj,
372
            'generated_by':       generated_by
373
        }
374

    
375
        if 'comment' in index_obj:
376
            self.source_description['comment'] = index_obj['comment']
377

    
378
    def write_source_package_zip(self, dstpath: Path):
379
        """
380
        Create a .zip archive containing files needed to build this source
381
        package and write it at 'dstpath'.
382
        """
383
        with open(dstpath, 'wb') as output:
384
            output.write(self.source_zip_contents)
385

    
386
    def write_package_files(self, dstpath: Path):
387
        """Write package files under 'dstpath' for distribution."""
388
        file_dir_path = (dstpath / 'file' / 'sha256').resolve()
389
        file_dir_path.mkdir(parents=True, exist_ok=True)
390

    
391
        for file_ref in self.files_by_path.values():
392
            if file_ref.include_in_distribution:
393
                file_path = file_dir_path / file_ref.contents_hash
394
                file_path.write_bytes(file_ref.contents)
395

    
396
        source_dir_path = (dstpath / 'source').resolve()
397
        source_dir_path.mkdir(parents=True, exist_ok=True)
398
        source_name = self.source_description["source_name"]
399

    
400
        with open(source_dir_path / f'{source_name}.json', 'wt') as output:
401
            json.dump(self.source_description, output)
402

    
403
        with open(source_dir_path / f'{source_name}.zip', 'wb') as output:
404
            output.write(self.source_zip_contents)
405

    
406
        for item_type, item_list in [
407
                ('resource', self.resource_list),
408
                ('mapping', self.mapping_list)
409
        ]:
410
            item_type_dir_path = (dstpath / item_type).resolve()
411

    
412
            for item_def in item_list:
413
                item_dir_path = item_type_dir_path / item_def['identifier']
414
                item_dir_path.mkdir(parents=True, exist_ok=True)
415

    
416
                version = '.'.join([str(n) for n in item_def['version']])
417
                with open(item_dir_path / version, 'wt') as output:
418
                    json.dump(item_def, output)
419

    
420
dir_type = click.Path(exists=True, file_okay=False, resolve_path=True)
421

    
422
@click.command(help=_('build_package_from_srcdir_to_dstdir'))
423
@click.option('-s', '--srcdir', default='./', type=dir_type, show_default=True,
424
              help=_('source_directory_to_build_from'))
425
@click.option('-i', '--index-json', default='index.json', type=click.Path(),
426
              help=_('path_instead_of_index_json'))
427
@click.option('-p', '--piggyback-files', type=click.Path(),
428
              help=_('path_instead_for_piggyback_files'))
429
@click.option('-d', '--dstdir', type=dir_type, required=True,
430
              help=_('built_package_files_destination'))
431
@click.version_option(version=_version.version, prog_name='Hydrilla builder',
432
                      message=_('%(prog)s_%(version)s_license'),
433
                      help=_('version_printing'))
434
def perform(srcdir, index_json, piggyback_files, dstdir):
435
    """
436
    Execute Hydrilla builder to turn source package into a distributable one.
437

    
438
    This command is meant to be the entry point of hydrilla-builder command
439
    exported by this package.
440
    """
441
    build = Build(Path(srcdir), Path(index_json),
442
                  piggyback_files and Path(piggyback_files))
443
    build.write_package_files(Path(dstdir))
(3-3/6)