Remove broken links and fix python script (#1418)

This commit is contained in:
Sitram 2020-10-15 21:50:42 +03:00 committed by GitHub
parent 2f2d3d0d78
commit 29351783e9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 5 additions and 18 deletions

View file

@ -11,9 +11,9 @@ def parse_links(filename):
with open(filename) as fp:
data = fp.read()
raw_links = re.findall(
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
'((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'\".,<>?«»“”‘’]))',
data)
links = [raw_link.replace(')', '') for raw_link in raw_links]
links = [raw_link[0] for raw_link in raw_links]
return links
@ -22,9 +22,9 @@ def validate_links(links):
print('Validating {} links...'.format(len(links)))
errors = []
for link in links:
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=5)
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=10)
try:
resp = h.request(link, 'HEAD')
resp = h.request(link)
code = int(resp[0]['status'])
# check if status code is a client or server error
if code >= 404: