From dd3e81a91d809b28ebd8a513e0572d81b96055f9 Mon Sep 17 00:00:00 2001
From: Emanuele Aina <emanuele.aina@collabora.com>
Date: Thu, 4 Nov 2021 22:51:13 +0100
Subject: [PATCH 1/3] fetch-downstream: Drop check_duplicates()

Since we moved all git repositories directly under a single `pkg/` group
instead of the `pkg/$component/` nested categories, checking for
duplicates is no longer relevant.

Signed-off-by: Emanuele Aina <emanuele.aina@collabora.com>
---
 bin/packaging-data-fetch-downstream | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/bin/packaging-data-fetch-downstream b/bin/packaging-data-fetch-downstream
index 69b6062..bc33e81 100755
--- a/bin/packaging-data-fetch-downstream
+++ b/bin/packaging-data-fetch-downstream
@@ -4,7 +4,6 @@ from __future__ import annotations
 
 import argparse
 import fnmatch
-import itertools
 import logging
 import urllib.parse
 
@@ -192,21 +191,6 @@ class DownstreamFetcher:
         thread_pool(num_worker_threads, _fetch_license_report, projects, num_retries=2)
 
 
-def check_duplicates(projects, data):
-    def packagenamefunc(p):
-        return p.packagename
-
-    for packagename, projects in itertools.groupby(
-        sorted(projects, key=packagenamefunc), packagenamefunc
-    ):
-        paths = [p.path_with_namespace for p in projects]
-        if len(paths) == 1:
-            continue
-        msg = f"Project name '{packagename}' is ambiguous across projects: {', '.join(paths)}"
-        logging.error(msg)
-        data["packages"][packagename].setdefault("errors", []).append({"msg": msg})
-
-
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description="Fetch data from the GitLab packaging projects"
@@ -270,6 +254,4 @@ if __name__ == "__main__":
         "channels": d.channels,
     }
 
-    check_duplicates(d.projects, data)
-
     yaml.dump(data, args.yaml, width=120, Dumper=yaml.CSafeDumper)
-- 
GitLab


From 33d698eca396348efac6f530df56d6c0d9598c91 Mon Sep 17 00:00:00 2001
From: Emanuele Aina <emanuele.aina@collabora.com>
Date: Sat, 6 Nov 2021 11:22:28 +0100
Subject: [PATCH 2/3] fetch-downstream: Fetch component for v2021 as well

Signed-off-by: Emanuele Aina <emanuele.aina@collabora.com>
---
 bin/packaging-data-fetch-downstream | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/bin/packaging-data-fetch-downstream b/bin/packaging-data-fetch-downstream
index bc33e81..d31e61d 100755
--- a/bin/packaging-data-fetch-downstream
+++ b/bin/packaging-data-fetch-downstream
@@ -156,8 +156,9 @@ class DownstreamFetcher:
 
         def _fetch_license_report(project):
             for branch in project.branches.values():
-                # Not needed really but using it to avoid trigger unneeded http queries
-                if branch.name.split("/")[-1].split("-")[0] <= "v2021":
+                release = branch.name.split("/")[-1].split("-")[0]
+                if release < "v2021":
+                    # prior to v2021 the component was not tracked in git
                     continue
                 logging.debug(
                     f"Checking component for package {project.packagename} on branch {branch.name}"
@@ -173,6 +174,9 @@ class DownstreamFetcher:
                     continue
                 component = component.decode("utf-8").strip()
                 branch.component = component
+                if release < "v2022":
+                    # prior to v2022 the license reports were not enforced
+                    continue
                 logging.debug(
                     f"Checking license report for package {project.packagename} on branch {branch.name}"
                 )
-- 
GitLab


From 7059e89b87000ec84e373d615eb89d73fcb4cb6e Mon Sep 17 00:00:00 2001
From: Emanuele Aina <emanuele.aina@collabora.com>
Date: Sat, 6 Nov 2021 11:23:22 +0100
Subject: [PATCH 3/3] fetch-downstream: Increase parallelism retrieving
 component

Add more parallel workers whe retrieving the component and licensing
report from git to speed up the process.

Signed-off-by: Emanuele Aina <emanuele.aina@collabora.com>
---
 bin/packaging-data-fetch-downstream | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/packaging-data-fetch-downstream b/bin/packaging-data-fetch-downstream
index d31e61d..0980a5d 100755
--- a/bin/packaging-data-fetch-downstream
+++ b/bin/packaging-data-fetch-downstream
@@ -152,7 +152,7 @@ class DownstreamFetcher:
                 branch.tags.sort()
 
     def fetch_component_license_report(self, projects):
-        num_worker_threads = 10
+        num_worker_threads = 30
 
         def _fetch_license_report(project):
             for branch in project.branches.values():
-- 
GitLab