From d4981381ed6669441e32e3a3263252146aeb474c Mon Sep 17 00:00:00 2001 From: Emanuele Aina <emanuele.aina@collabora.com> Date: Thu, 8 Jul 2021 21:36:48 +0200 Subject: [PATCH] test_urls: Use a set to avoid O(N) insertion Not that it makes any sensible difference, but still. :) Signed-off-by: Emanuele Aina <emanuele.aina@collabora.com> --- scripts/test_urls.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/test_urls.py b/scripts/test_urls.py index 96eb1a018..c23157453 100755 --- a/scripts/test_urls.py +++ b/scripts/test_urls.py @@ -20,6 +20,8 @@ EXCLUDE = [ "*://phabricator.apertis.org/T*", # it's not public anyway :( ] +urls = set() + def get_link(url): link = url.group("link") @@ -35,8 +37,7 @@ def get_link(url): if fnmatch.fnmatch(link, exclude): return - if not link in urls: - urls.append(link) + urls.add(link) def parse_file(filename): @@ -56,8 +57,6 @@ def parse_file(filename): pattern = re.compile("href=\"(?P<link>.*?)\"") doc = pattern.sub(get_link, text) -urls = [] - # Parse aliases for root, dirs, files in os.walk(sys.argv[1]): for file in files: @@ -67,6 +66,7 @@ for root, dirs, files in os.walk(sys.argv[1]): except: print("Failed to parse %s/%s" % (root, file)) +urls = list(urls) # This seems to trigger issues with sites not wanting us to crawl them #urls.sort() # So let's randomise the list to see if that helps... -- GitLab