mirror of
https://github.com/public-apis/public-apis.git
synced 2025-05-10 15:46:48 +02:00
Set host header for link validator (#2409)
This commit is contained in:
parent
aff6b25f3b
commit
3979c871cd
1 changed files with 8 additions and 1 deletions
|
@ -51,9 +51,16 @@ def validate_links(links):
|
||||||
for link in links:
|
for link in links:
|
||||||
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=25)
|
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=25)
|
||||||
try:
|
try:
|
||||||
|
# fetching host name, removing leading www
|
||||||
|
host = link.split('//', 1)[1].split('/', 1)[0]
|
||||||
|
if host[:3] == 'www':
|
||||||
|
host = host[4:]
|
||||||
|
|
||||||
resp = h.request(link, headers={
|
resp = h.request(link, headers={
|
||||||
# Faking user agent as some hosting services block not-whitelisted UA
|
# Faking user agent as some hosting services block not-whitelisted UA
|
||||||
'user-agent': 'Mozilla/5.0'
|
'user-agent': 'Mozilla/5.0',
|
||||||
|
# setting host because Cloudflare returns 403 asking for captcha if host is missing
|
||||||
|
'host': host
|
||||||
})
|
})
|
||||||
code = int(resp[0]['status'])
|
code = int(resp[0]['status'])
|
||||||
# Checking status code errors
|
# Checking status code errors
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue