Skip to content
Snippets Groups Projects

URL checker tweaks

Merged Emanuele Aina requested to merge wip/em/test-url-summary into master
1 file
+ 22
11
Compare changes
  • Side-by-side
  • Inline
+ 22
11
@@ -11,7 +11,14 @@ import sys
import traceback
from urllib.parse import urlparse
import urllib3
import time
import fnmatch
EXCLUDE = [
"*://lavaphabbridge.apertis.org", #
"*://lavaphabbridge.apertis.org/*", # it's slooooooow
"*://phabricator.apertis.org/T*", # it's not public anyway :(
]
def get_link(url):
link = url.group("link")
@@ -24,6 +31,10 @@ def get_link(url):
url = url._replace(fragment="")
link = url.geturl()
for exclude in EXCLUDE:
if fnmatch.fnmatch(link, exclude):
return
if not link in urls:
urls.append(link)
@@ -70,10 +81,12 @@ headers={
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
}
broken = 0
broken = []
for url in urls:
print("%s : " %(url), end='')
sys.stdout.flush()
start = time.perf_counter()
status = None
try:
resp = requests.head(url, headers=headers, allow_redirects=True, timeout=60, verify=False)
status = resp.ok
@@ -90,16 +103,14 @@ for url in urls:
resp.close()
except Exception as e:
status = False
end = time.perf_counter()
if status:
print("OK")
else:
print("Fail")
broken += 1
print("Found %d broken URLs in %d tested" %(broken, len(urls)))
if not status:
broken.append(url)
print(url, "OK" if status else "FAIL", f"{end - start:0.4f}s")
if broken:
sys.exit(1)
print(f"Found {len(broken)} broken URLs in {len(urls)} tested:")
for b in broken:
print(" ", b)
sys.exit(0)
sys.exit(1 if broken else 0)
Loading