Faking user agent + handle "no server" and fallback error

This commit is contained in:
Yann Bertrand 2020-11-19 21:50:32 +01:00
parent aef03d1653
commit 7f7a3906e5

View file

@ -19,35 +19,45 @@ def parse_links(filename):
def validate_links(links): def validate_links(links):
"""Checks each entry in JSON file for live link""" """Checks each entry in JSON file for live link"""
print('Validating {} links...'.format(len(links))) print(f'Validating {len(links)} links...')
errors = [] hasError = False
for link in links: for link in links:
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=25) h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=25)
try: try:
resp = h.request(link, headers={'user-agent': 'python-httplib2/0.18.0'}) resp = h.request(link, headers={
# Faking user agent as some hosting services block not-whitelisted UA
'user-agent': 'Mozilla/5.0'
})
code = int(resp[0]['status']) code = int(resp[0]['status'])
# check if status code is a client or server error # Checking status code errors
if code >= 404: if (code >= 300):
errors.append('{}: {}'.format(code, link)) hasError = True
print(f"ERR:CLT:{code} : {link}")
except TimeoutError: except TimeoutError:
errors.append("TMO: " + link) hasError = True
print(f"ERR:TMO: {link}")
except socket.error as socketerror: except socket.error as socketerror:
errors.append("SOC: {} : {}".format(socketerror, link)) hasError = True
print(f"ERR:SOC: {socketerror} : {link}")
except Exception as e: except Exception as e:
hasError = True
# Ignore some exceptions which are not actually errors. # Ignore some exceptions which are not actually errors.
# The list below should be extended with other exceptions in the future if needed # The list below should be extended with other exceptions in the future if needed
if ((-1 != str(e).find("Content purported to be compressed with gzip but failed to decompress.")) and if (-1 != str(e).find("[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:852)")):
(-1 != str(e).find("[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:852)"))) : print(f"ERR:SSL: {e} : {link}")
errors.append("ERR: {} : {}".format(e, link)) elif (-1 != str(e).find("Content purported to be compressed with gzip but failed to decompress.")):
return errors print(f"ERR:GZP: {e} : {link}")
elif (-1 != str(e).find("Unable to find the server at")):
print(f"ERR:SRV: {e} : {link}")
else:
print(f"ERR:UKN: {e} : {link}")
return hasError
if __name__ == "__main__": if __name__ == "__main__":
num_args = len(sys.argv) num_args = len(sys.argv)
if num_args < 2: if num_args < 2:
print("No .md file passed") print("No .md file passed")
sys.exit(1) sys.exit(1)
errors = validate_links(parse_links(sys.argv[1])) hasError = validate_links(parse_links(sys.argv[1]))
if len(errors) > 0: if hasError:
for err in errors:
print(err)
sys.exit(1) sys.exit(1)