From d4981381ed6669441e32e3a3263252146aeb474c Mon Sep 17 00:00:00 2001
From: Emanuele Aina <emanuele.aina@collabora.com>
Date: Thu, 8 Jul 2021 21:36:48 +0200
Subject: [PATCH] test_urls: Use a set to avoid O(N) insertion

Not that it makes any sensible difference, but still. :)

Signed-off-by: Emanuele Aina <emanuele.aina@collabora.com>
---
 scripts/test_urls.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/test_urls.py b/scripts/test_urls.py
index 96eb1a018..c23157453 100755
--- a/scripts/test_urls.py
+++ b/scripts/test_urls.py
@@ -20,6 +20,8 @@ EXCLUDE = [
     "*://phabricator.apertis.org/T*",   # it's not public anyway :(
 ]
 
+urls = set()
+
 def get_link(url):
     link = url.group("link")
 
@@ -35,8 +37,7 @@ def get_link(url):
         if fnmatch.fnmatch(link, exclude):
             return
 
-    if not link in urls:
-        urls.append(link)
+    urls.add(link)
 
 
 def parse_file(filename):
@@ -56,8 +57,6 @@ def parse_file(filename):
         pattern = re.compile("href=\"(?P<link>.*?)\"")
         doc = pattern.sub(get_link, text)
 
-urls = []
-
 # Parse aliases
 for root, dirs, files in os.walk(sys.argv[1]):
     for file in files:
@@ -67,6 +66,7 @@ for root, dirs, files in os.walk(sys.argv[1]):
             except:
                 print("Failed to parse %s/%s" % (root, file))
 
+urls = list(urls)
 # This seems to trigger issues with sites not wanting us to crawl them
 #urls.sort()
 # So let's randomise the list to see if that helps...
-- 
GitLab