From a51aa6dec031adcc991752bd51b4acfabc7fa3e1 Mon Sep 17 00:00:00 2001 From: Ariel D'Alessandro <ariel.dalessandro@collabora.com> Date: Fri, 4 Jun 2021 15:21:17 -0300 Subject: [PATCH] ci-license-scan: Manually check whitelisted files Files whitelisted in debian/apertis/copyright.whitelist are used to instruct the scanner process not to raise an error which allows the pipeline to succeed and merge the change. However, as a side effect, the entries in debian/apertis/copyright with offending licenses are being removed, dropping important information. BOM generator for Apertis binary packages needs the whole information. scan-copyright scanner must be called without a whitelist, so the pipeline keeps license information for all the present files in the source package. This MR adapts ci-license-scan script to generate license information for all the files, without failing on whitelisted offending ones. Link: https://phabricator.apertis.org/T7878 Signed-off-by: Ariel D'Alessandro <ariel.dalessandro@collabora.com> --- .../overlay/usr/bin/ci-license-scan | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/package-source-builder/overlay/usr/bin/ci-license-scan b/package-source-builder/overlay/usr/bin/ci-license-scan index 8aefd7c..dd705c1 100755 --- a/package-source-builder/overlay/usr/bin/ci-license-scan +++ b/package-source-builder/overlay/usr/bin/ci-license-scan @@ -17,6 +17,7 @@ import yaml import textwrap import unicodedata import fossology +import re # this is necessary to eliminate references in the generated YAML # Perl tools use YAML::Tiny which doesn’t support references. @@ -90,7 +91,6 @@ except: import warnings warnings.warn("pathspec not available, copyright ignore patterns will not match exactly") import fnmatch - import re NOTDOUBLESTAR = re.compile(r'\.\*(?!\.\*)') @@ -198,12 +198,19 @@ def gitignore2pat(s): def parse_whitelist(filename: str): """ Attempt to open and parse a whilelist, do nothing if it’s not there + Note that a whitelist can only contain git ignore patterns. """ with Path(filename) as f: if f.is_file(): - return gitignore2pat(f.read_text()) + return gitignore2pat(f.read_text())['ignore']['pattern'] else: - return {} + return [] + +def is_whitelisted(whitelist_patterns, filename): + for whitelist_pattern in whitelist_patterns: + if re.match(whitelist_pattern, filename): + return True + return False def detect_license_for(path, c=None): if c is None: @@ -252,7 +259,7 @@ def reindent_multiline(s: str) -> str: lines = s.split('\n') return '\n '.join([l.lstrip() for l in lines]) -def configure_scanner(whitelists): +def configure_scanner(): """ Merge our metadata with that of the Debian package """ @@ -265,8 +272,6 @@ def configure_scanner(whitelists): pass extra_patterns = [gitignore2pat(builtin_ignores)] - for whitelist in whitelists: - extra_patterns += [parse_whitelist(whitelist)] new_yaml = yaml.safe_dump(merge_scan_patterns(debian_patterns, *extra_patterns), default_style='|') scan_patterns.write_text(new_yaml) copyright_overrides = {} @@ -348,11 +353,13 @@ def main(): if disallowlist: print(f"Disallowed licenses: {disallowlist}") disallowlist = [d.lower() for d in disallowlist] - print(f"Using whitelists: {args.whitelists}") - configure_scanner(args.whitelists) + configure_scanner() Path('debian/apertis/copyright').touch(exist_ok=True) sys.stdout.flush() + print(f"Using whitelists: {args.whitelists}") + whitelist_patterns = [re.compile(p) for whitelist in args.whitelists for p in parse_whitelist(whitelist)] + with open('debian/apertis/copyright.new', 'wt', buffering=1) as f: print('Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\n', file=f) scan_copyrights(_out=f) @@ -395,12 +402,15 @@ def main(): copyright, license = detect_license_for(f, debian_copyrights) if copyright is not None and license is not None: if license.synopsis.rstrip('+').lower() in disallowlist: - bad_licenses.add(license.synopsis) + if not is_whitelisted(whitelist_patterns, f): + bad_licenses.add(license.synopsis) fixups.setdefault(copyright, {}).setdefault(license, []).append(p) else: unknown_licensed = True elif p.license.synopsis.rstrip('+').lower() in disallowlist: - bad_licenses.add(p.license.synopsis) + for f in p.files: + if not is_whitelisted(whitelist_patterns, f): + bad_licenses.add(p.license.synopsis) if p.copyright and is_gibberish(p.copyright): p.copyright = 'no-info-found' if fixups: -- GitLab