diff --git a/scripts/test_urls.py b/scripts/test_urls.py
index 96eb1a018ab896e519e904f0d661df46ffdc5e7d..c23157453c59f20bcc38085717ab752bef3a581d 100755
--- a/scripts/test_urls.py
+++ b/scripts/test_urls.py
@@ -20,6 +20,8 @@ EXCLUDE = [
     "*://phabricator.apertis.org/T*",   # it's not public anyway :(
 ]
 
+urls = set()
+
 def get_link(url):
     link = url.group("link")
 
@@ -35,8 +37,7 @@ def get_link(url):
         if fnmatch.fnmatch(link, exclude):
             return
 
-    if not link in urls:
-        urls.append(link)
+    urls.add(link)
 
 
 def parse_file(filename):
@@ -56,8 +57,6 @@ def parse_file(filename):
         pattern = re.compile("href=\"(?P<link>.*?)\"")
         doc = pattern.sub(get_link, text)
 
-urls = []
-
 # Parse aliases
 for root, dirs, files in os.walk(sys.argv[1]):
     for file in files:
@@ -67,6 +66,7 @@ for root, dirs, files in os.walk(sys.argv[1]):
             except:
                 print("Failed to parse %s/%s" % (root, file))
 
+urls = list(urls)
 # This seems to trigger issues with sites not wanting us to crawl them
 #urls.sort()
 # So let's randomise the list to see if that helps...