Skip to content
Snippets Groups Projects
Commit bddc195e authored by Emanuele Aina's avatar Emanuele Aina
Browse files

test_urls: Ratelimit some origins


The GNOME wiki is unhappy about our requests, be nice.

Signed-off-by: Emanuele Aina's avatarEmanuele Aina <emanuele.aina@collabora.com>
parent aa93fae0
No related branches found
No related tags found
1 merge request!278Speedup test_urls
Pipeline #265985 passed with warnings
......@@ -16,6 +16,9 @@ import concurrent.futures
import time
import fnmatch
import textwrap
import itertools
PARALLEL = 50
EXCLUDE = [
"*://lavaphabbridge.apertis.org", #
......@@ -23,6 +26,13 @@ EXCLUDE = [
"*://phabricator.apertis.org/T*", # it's not public anyway :(
]
RATELIMITED = [
"live.gnome.org",
"wiki.gnome.org",
]
RATELIMIT_DELAY_SECONDS = 2
urls = set()
def get_link(url):
......@@ -105,6 +115,7 @@ def url_check(url):
# Some servers aren't setup to handle HEAD requests, so check anything
# that's not got a 200 status code with GET as well.
if not status:
time.sleep(RATELIMIT_DELAY_SECONDS)
resp = None
resp = session.get(url, headers=headers, allow_redirects=True, timeout=60, verify=False)
status = resp.ok
......@@ -118,10 +129,27 @@ def url_check(url):
print(url, "OK" if status else "FAIL", resp.status_code if resp else "-", f"{end - start:0.4f}s")
return url, status
def urls_check(urls):
results = []
for url in urls:
results.append(url_check(url))
time.sleep(RATELIMIT_DELAY_SECONDS)
return results
print(f"Testing {len(urls)} URLs")
parallel = {}
for url in urls:
origin = urlparse(url).netloc
if urlparse(url).netloc in RATELIMITED:
parallel.setdefault(origin, []).append(url)
else:
parallel[url] = [url]
with concurrent.futures.ThreadPoolExecutor(max_workers=PARALLEL) as executor:
broken = []
for url, status in executor.map(url_check, urls):
results = itertools.chain(*executor.map(urls_check, parallel.values()))
for url, status in results:
if not status:
broken.append(url)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment