mirror of
https://github.com/public-apis/public-apis.git
synced 2025-05-10 15:46:48 +02:00
Faking user agent + handle "no server" and fallback error
This commit is contained in:
parent
aef03d1653
commit
7f7a3906e5
1 changed files with 26 additions and 16 deletions
|
@ -19,35 +19,45 @@ def parse_links(filename):
|
||||||
|
|
||||||
def validate_links(links):
|
def validate_links(links):
|
||||||
"""Checks each entry in JSON file for live link"""
|
"""Checks each entry in JSON file for live link"""
|
||||||
print('Validating {} links...'.format(len(links)))
|
print(f'Validating {len(links)} links...')
|
||||||
errors = []
|
hasError = False
|
||||||
for link in links:
|
for link in links:
|
||||||
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=25)
|
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=25)
|
||||||
try:
|
try:
|
||||||
resp = h.request(link, headers={'user-agent': 'python-httplib2/0.18.0'})
|
resp = h.request(link, headers={
|
||||||
|
# Faking user agent as some hosting services block not-whitelisted UA
|
||||||
|
'user-agent': 'Mozilla/5.0'
|
||||||
|
})
|
||||||
code = int(resp[0]['status'])
|
code = int(resp[0]['status'])
|
||||||
# check if status code is a client or server error
|
# Checking status code errors
|
||||||
if code >= 404:
|
if (code >= 300):
|
||||||
errors.append('{}: {}'.format(code, link))
|
hasError = True
|
||||||
|
print(f"ERR:CLT:{code} : {link}")
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
errors.append("TMO: " + link)
|
hasError = True
|
||||||
|
print(f"ERR:TMO: {link}")
|
||||||
except socket.error as socketerror:
|
except socket.error as socketerror:
|
||||||
errors.append("SOC: {} : {}".format(socketerror, link))
|
hasError = True
|
||||||
|
print(f"ERR:SOC: {socketerror} : {link}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
hasError = True
|
||||||
# Ignore some exceptions which are not actually errors.
|
# Ignore some exceptions which are not actually errors.
|
||||||
# The list below should be extended with other exceptions in the future if needed
|
# The list below should be extended with other exceptions in the future if needed
|
||||||
if ((-1 != str(e).find("Content purported to be compressed with gzip but failed to decompress.")) and
|
if (-1 != str(e).find("[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:852)")):
|
||||||
(-1 != str(e).find("[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:852)"))) :
|
print(f"ERR:SSL: {e} : {link}")
|
||||||
errors.append("ERR: {} : {}".format(e, link))
|
elif (-1 != str(e).find("Content purported to be compressed with gzip but failed to decompress.")):
|
||||||
return errors
|
print(f"ERR:GZP: {e} : {link}")
|
||||||
|
elif (-1 != str(e).find("Unable to find the server at")):
|
||||||
|
print(f"ERR:SRV: {e} : {link}")
|
||||||
|
else:
|
||||||
|
print(f"ERR:UKN: {e} : {link}")
|
||||||
|
return hasError
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
num_args = len(sys.argv)
|
num_args = len(sys.argv)
|
||||||
if num_args < 2:
|
if num_args < 2:
|
||||||
print("No .md file passed")
|
print("No .md file passed")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
errors = validate_links(parse_links(sys.argv[1]))
|
hasError = validate_links(parse_links(sys.argv[1]))
|
||||||
if len(errors) > 0:
|
if hasError:
|
||||||
for err in errors:
|
|
||||||
print(err)
|
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue