From dd3e81a91d809b28ebd8a513e0572d81b96055f9 Mon Sep 17 00:00:00 2001 From: Emanuele Aina <emanuele.aina@collabora.com> Date: Thu, 4 Nov 2021 22:51:13 +0100 Subject: [PATCH 1/3] fetch-downstream: Drop check_duplicates() Since we moved all git repositories directly under a single `pkg/` group instead of the `pkg/$component/` nested categories, checking for duplicates is no longer relevant. Signed-off-by: Emanuele Aina <emanuele.aina@collabora.com> --- bin/packaging-data-fetch-downstream | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/bin/packaging-data-fetch-downstream b/bin/packaging-data-fetch-downstream index 69b6062..bc33e81 100755 --- a/bin/packaging-data-fetch-downstream +++ b/bin/packaging-data-fetch-downstream @@ -4,7 +4,6 @@ from __future__ import annotations import argparse import fnmatch -import itertools import logging import urllib.parse @@ -192,21 +191,6 @@ class DownstreamFetcher: thread_pool(num_worker_threads, _fetch_license_report, projects, num_retries=2) -def check_duplicates(projects, data): - def packagenamefunc(p): - return p.packagename - - for packagename, projects in itertools.groupby( - sorted(projects, key=packagenamefunc), packagenamefunc - ): - paths = [p.path_with_namespace for p in projects] - if len(paths) == 1: - continue - msg = f"Project name '{packagename}' is ambiguous across projects: {', '.join(paths)}" - logging.error(msg) - data["packages"][packagename].setdefault("errors", []).append({"msg": msg}) - - if __name__ == "__main__": parser = argparse.ArgumentParser( description="Fetch data from the GitLab packaging projects" @@ -270,6 +254,4 @@ if __name__ == "__main__": "channels": d.channels, } - check_duplicates(d.projects, data) - yaml.dump(data, args.yaml, width=120, Dumper=yaml.CSafeDumper) -- GitLab From 33d698eca396348efac6f530df56d6c0d9598c91 Mon Sep 17 00:00:00 2001 From: Emanuele Aina <emanuele.aina@collabora.com> Date: Sat, 6 Nov 2021 11:22:28 +0100 Subject: [PATCH 2/3] fetch-downstream: Fetch component for v2021 as well Signed-off-by: Emanuele Aina <emanuele.aina@collabora.com> --- bin/packaging-data-fetch-downstream | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bin/packaging-data-fetch-downstream b/bin/packaging-data-fetch-downstream index bc33e81..d31e61d 100755 --- a/bin/packaging-data-fetch-downstream +++ b/bin/packaging-data-fetch-downstream @@ -156,8 +156,9 @@ class DownstreamFetcher: def _fetch_license_report(project): for branch in project.branches.values(): - # Not needed really but using it to avoid trigger unneeded http queries - if branch.name.split("/")[-1].split("-")[0] <= "v2021": + release = branch.name.split("/")[-1].split("-")[0] + if release < "v2021": + # prior to v2021 the component was not tracked in git continue logging.debug( f"Checking component for package {project.packagename} on branch {branch.name}" @@ -173,6 +174,9 @@ class DownstreamFetcher: continue component = component.decode("utf-8").strip() branch.component = component + if release < "v2022": + # prior to v2022 the license reports were not enforced + continue logging.debug( f"Checking license report for package {project.packagename} on branch {branch.name}" ) -- GitLab From 7059e89b87000ec84e373d615eb89d73fcb4cb6e Mon Sep 17 00:00:00 2001 From: Emanuele Aina <emanuele.aina@collabora.com> Date: Sat, 6 Nov 2021 11:23:22 +0100 Subject: [PATCH 3/3] fetch-downstream: Increase parallelism retrieving component Add more parallel workers whe retrieving the component and licensing report from git to speed up the process. Signed-off-by: Emanuele Aina <emanuele.aina@collabora.com> --- bin/packaging-data-fetch-downstream | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/packaging-data-fetch-downstream b/bin/packaging-data-fetch-downstream index d31e61d..0980a5d 100755 --- a/bin/packaging-data-fetch-downstream +++ b/bin/packaging-data-fetch-downstream @@ -152,7 +152,7 @@ class DownstreamFetcher: branch.tags.sort() def fetch_component_license_report(self, projects): - num_worker_threads = 10 + num_worker_threads = 30 def _fetch_license_report(project): for branch in project.branches.values(): -- GitLab