Set host header for link validator (#2409)

This commit is contained in:
Dmytro Khmelenko 2021-10-22 02:48:05 +02:00 committed by GitHub
parent aff6b25f3b
commit 3979c871cd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -51,9 +51,16 @@ def validate_links(links):
for link in links: for link in links:
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=25) h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=25)
try: try:
# fetching host name, removing leading www
host = link.split('//', 1)[1].split('/', 1)[0]
if host[:3] == 'www':
host = host[4:]
resp = h.request(link, headers={ resp = h.request(link, headers={
# Faking user agent as some hosting services block not-whitelisted UA # Faking user agent as some hosting services block not-whitelisted UA
'user-agent': 'Mozilla/5.0' 'user-agent': 'Mozilla/5.0',
# setting host because Cloudflare returns 403 asking for captcha if host is missing
'host': host
}) })
code = int(resp[0]['status']) code = int(resp[0]['status'])
# Checking status code errors # Checking status code errors