test_urls: Use a threadpool for the HTTP requests

Speed up the querying by using a threadpool. Signed-off-by: Emanuele Aina <emanuele.aina@collabora.com>

test_urls: Use a threadpool for the HTTP requests
Speed up the querying by using a threadpool. Signed-off-by: Emanuele Aina <emanuele.aina@collabora.com>
e1e4a0df · Emanuele Aina · 5dc9655b · e1e4a0df
Commit e1e4a0df authored 3 years ago by Emanuele Aina
--- a/scripts/test_urls.py
+++ b/scripts/test_urls.py
@@ -11,6 +11,8 @@ import sys
 import traceback
 from urllib.parse import urlparse
 import urllib3
+import asyncio
+import concurrent.futures
 import time
 import fnmatch
 import textwrap
@@ -87,10 +89,7 @@ adapter = requests.adapters.HTTPAdapter(max_retries=3)
 session.mount('http://', adapter)
 session.mount('https://', adapter)

-broken = []
-for url in urls:
-    print("%s : " %(url), end='')
-    sys.stdout.flush()
+def url_check(url):
    start = time.perf_counter()
    status = None
    resp = None
@@ -116,9 +115,15 @@ for url in urls:
        print(f"ERROR(2): {url} {resp.status_code if resp else '-'}\n{e_str}")
    end = time.perf_counter()

-    if not status:
-        broken.append(url)
    print(url, "OK" if status else "FAIL", resp.status_code if resp else "-", f"{end - start:0.4f}s")
+    return url, status
+
+print(f"Testing {len(urls)} URLs")
+with concurrent.futures.ThreadPoolExecutor(max_workers=PARALLEL) as executor:
+    broken = []
+    for url, status in executor.map(url_check, urls):
+        if not status:
+            broken.append(url)

 print(f"Found {len(broken)} broken URLs in {len(urls)} tested:")
 for b in broken: