Skip to content
Snippets Groups Projects
Commit a51aa6de authored by Ariel D'Alessandro's avatar Ariel D'Alessandro
Browse files

ci-license-scan: Manually check whitelisted files

Files whitelisted in debian/apertis/copyright.whitelist are used to
instruct the scanner process not to raise an error which allows the
pipeline to succeed and merge the change. However, as a side effect, the
entries in debian/apertis/copyright with offending licenses are being
removed, dropping important information.

BOM generator for Apertis binary packages needs the whole information.
scan-copyright scanner must be called without a whitelist, so the
pipeline keeps license information for all the present files in the
source package.

This MR adapts ci-license-scan script to generate license information
for all the files, without failing on whitelisted offending ones.

Link: https://phabricator.apertis.org/T7878



Signed-off-by: default avatarAriel D'Alessandro <ariel.dalessandro@collabora.com>
parent fd3df13c
No related branches found
No related tags found
2 merge requests!226T7878: ci-license-scan: Manually check whitelisted files,!93WIP: documentation-builder: Rebase on Apertis instead of Debian Buster
......@@ -17,6 +17,7 @@ import yaml
import textwrap
import unicodedata
import fossology
import re
# this is necessary to eliminate references in the generated YAML
# Perl tools use YAML::Tiny which doesn’t support references.
......@@ -90,7 +91,6 @@ except:
import warnings
warnings.warn("pathspec not available, copyright ignore patterns will not match exactly")
import fnmatch
import re
NOTDOUBLESTAR = re.compile(r'\.\*(?!\.\*)')
......@@ -198,12 +198,19 @@ def gitignore2pat(s):
def parse_whitelist(filename: str):
"""
Attempt to open and parse a whilelist, do nothing if it’s not there
Note that a whitelist can only contain git ignore patterns.
"""
with Path(filename) as f:
if f.is_file():
return gitignore2pat(f.read_text())
return gitignore2pat(f.read_text())['ignore']['pattern']
else:
return {}
return []
def is_whitelisted(whitelist_patterns, filename):
for whitelist_pattern in whitelist_patterns:
if re.match(whitelist_pattern, filename):
return True
return False
def detect_license_for(path, c=None):
if c is None:
......@@ -252,7 +259,7 @@ def reindent_multiline(s: str) -> str:
lines = s.split('\n')
return '\n '.join([l.lstrip() for l in lines])
def configure_scanner(whitelists):
def configure_scanner():
"""
Merge our metadata with that of the Debian package
"""
......@@ -265,8 +272,6 @@ def configure_scanner(whitelists):
pass
extra_patterns = [gitignore2pat(builtin_ignores)]
for whitelist in whitelists:
extra_patterns += [parse_whitelist(whitelist)]
new_yaml = yaml.safe_dump(merge_scan_patterns(debian_patterns, *extra_patterns), default_style='|')
scan_patterns.write_text(new_yaml)
copyright_overrides = {}
......@@ -348,11 +353,13 @@ def main():
if disallowlist:
print(f"Disallowed licenses: {disallowlist}")
disallowlist = [d.lower() for d in disallowlist]
print(f"Using whitelists: {args.whitelists}")
configure_scanner(args.whitelists)
configure_scanner()
Path('debian/apertis/copyright').touch(exist_ok=True)
sys.stdout.flush()
print(f"Using whitelists: {args.whitelists}")
whitelist_patterns = [re.compile(p) for whitelist in args.whitelists for p in parse_whitelist(whitelist)]
with open('debian/apertis/copyright.new', 'wt', buffering=1) as f:
print('Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\n', file=f)
scan_copyrights(_out=f)
......@@ -395,12 +402,15 @@ def main():
copyright, license = detect_license_for(f, debian_copyrights)
if copyright is not None and license is not None:
if license.synopsis.rstrip('+').lower() in disallowlist:
bad_licenses.add(license.synopsis)
if not is_whitelisted(whitelist_patterns, f):
bad_licenses.add(license.synopsis)
fixups.setdefault(copyright, {}).setdefault(license, []).append(p)
else:
unknown_licensed = True
elif p.license.synopsis.rstrip('+').lower() in disallowlist:
bad_licenses.add(p.license.synopsis)
for f in p.files:
if not is_whitelisted(whitelist_patterns, f):
bad_licenses.add(p.license.synopsis)
if p.copyright and is_gibberish(p.copyright):
p.copyright = 'no-info-found'
if fixups:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment