diff --git a/bin/packaging-data-fetch-downstream b/bin/packaging-data-fetch-downstream index 69b60625f3465726f8ba68064d6d70282af4cebe..0980a5d1f6068fc625f157550432450d6139bdbc 100755 --- a/bin/packaging-data-fetch-downstream +++ b/bin/packaging-data-fetch-downstream @@ -4,7 +4,6 @@ from __future__ import annotations import argparse import fnmatch -import itertools import logging import urllib.parse @@ -153,12 +152,13 @@ class DownstreamFetcher: branch.tags.sort() def fetch_component_license_report(self, projects): - num_worker_threads = 10 + num_worker_threads = 30 def _fetch_license_report(project): for branch in project.branches.values(): - # Not needed really but using it to avoid trigger unneeded http queries - if branch.name.split("/")[-1].split("-")[0] <= "v2021": + release = branch.name.split("/")[-1].split("-")[0] + if release < "v2021": + # prior to v2021 the component was not tracked in git continue logging.debug( f"Checking component for package {project.packagename} on branch {branch.name}" @@ -174,6 +174,9 @@ class DownstreamFetcher: continue component = component.decode("utf-8").strip() branch.component = component + if release < "v2022": + # prior to v2022 the license reports were not enforced + continue logging.debug( f"Checking license report for package {project.packagename} on branch {branch.name}" ) @@ -192,21 +195,6 @@ class DownstreamFetcher: thread_pool(num_worker_threads, _fetch_license_report, projects, num_retries=2) -def check_duplicates(projects, data): - def packagenamefunc(p): - return p.packagename - - for packagename, projects in itertools.groupby( - sorted(projects, key=packagenamefunc), packagenamefunc - ): - paths = [p.path_with_namespace for p in projects] - if len(paths) == 1: - continue - msg = f"Project name '{packagename}' is ambiguous across projects: {', '.join(paths)}" - logging.error(msg) - data["packages"][packagename].setdefault("errors", []).append({"msg": msg}) - - if __name__ == "__main__": parser = argparse.ArgumentParser( description="Fetch data from the GitLab packaging projects" @@ -270,6 +258,4 @@ if __name__ == "__main__": "channels": d.channels, } - check_duplicates(d.projects, data) - yaml.dump(data, args.yaml, width=120, Dumper=yaml.CSafeDumper)