mirror of
https://github.com/public-apis/public-apis.git
synced 2025-05-10 15:46:48 +02:00
Replace Ruby link validation with Python
This commit is contained in:
parent
7fb4f77080
commit
3c12e3a7c9
2 changed files with 53 additions and 81 deletions
53
build/validate_links.py
Executable file
53
build/validate_links.py
Executable file
|
@ -0,0 +1,53 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import httplib2
|
||||||
|
import json
|
||||||
|
import socket
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def parse_links(filename):
|
||||||
|
"""Returns a list of links from JSON object"""
|
||||||
|
data = json.load(open(filename))
|
||||||
|
links = []
|
||||||
|
for entry in data['entries']:
|
||||||
|
link = entry['Link']
|
||||||
|
https = True if link.startswith('https') else False
|
||||||
|
x = {
|
||||||
|
'link': link,
|
||||||
|
'https': https,
|
||||||
|
}
|
||||||
|
links.append(x)
|
||||||
|
return links
|
||||||
|
|
||||||
|
|
||||||
|
def validate_links(links):
|
||||||
|
"""Checks each entry in JSON file for live link"""
|
||||||
|
print('Validating {} links...'.format(len(links)))
|
||||||
|
errors = []
|
||||||
|
for each in links:
|
||||||
|
link = each['link']
|
||||||
|
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=5)
|
||||||
|
try:
|
||||||
|
resp = h.request(link, 'HEAD')
|
||||||
|
code = int(resp[0]['status'])
|
||||||
|
# check if status code is a client or server error
|
||||||
|
if code >= 404:
|
||||||
|
errors.append('{}: {}'.format(code, link))
|
||||||
|
except TimeoutError:
|
||||||
|
errors.append("TMO: " + link)
|
||||||
|
except socket.error as socketerror:
|
||||||
|
errors.append("SOC: {} : {}".format(socketerror, link))
|
||||||
|
return errors
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
num_args = len(sys.argv)
|
||||||
|
if num_args < 2:
|
||||||
|
print("No .json file passed")
|
||||||
|
sys.exit(1)
|
||||||
|
errors = validate_links(parse_links(sys.argv[1]))
|
||||||
|
if len(errors) > 0:
|
||||||
|
for err in errors:
|
||||||
|
print(err)
|
||||||
|
sys.exit(1)
|
||||||
|
|
|
@ -1,81 +0,0 @@
|
||||||
#!/usr/bin/env ruby
|
|
||||||
require 'httparty'
|
|
||||||
require 'ruby-progressbar'
|
|
||||||
require 'uri'
|
|
||||||
allowed_codes = [200, 302, 403, 429]
|
|
||||||
allowed_links = ["https://www.yelp.com/developers/documentation/v3"]
|
|
||||||
args = ARGV
|
|
||||||
filename = args[0]
|
|
||||||
contents = File.open(filename, 'rb') { |f| f.read }
|
|
||||||
raw_links = URI.extract(contents, ['http', 'https'])
|
|
||||||
# Remove trailing ')' from entry URLs
|
|
||||||
links = []
|
|
||||||
raw_links.each do |link|
|
|
||||||
if link.end_with?(')')
|
|
||||||
links.push(link[0...-1])
|
|
||||||
else
|
|
||||||
links.push(link)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if links.length <= 0
|
|
||||||
puts "no links to check"
|
|
||||||
exit(0)
|
|
||||||
end
|
|
||||||
fails = []
|
|
||||||
# Fail on any duplicate elements
|
|
||||||
dup = links.select{|element| links.count(element) > 1}
|
|
||||||
if dup.uniq.length > 0
|
|
||||||
dup.uniq.each do |e|
|
|
||||||
fails.push("(DUP): #{e}")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
# Remove any duplicates from array
|
|
||||||
links = links.uniq
|
|
||||||
count = 0
|
|
||||||
total = links.length
|
|
||||||
progressbar = ProgressBar.create(:total => total,
|
|
||||||
:format => "%a %P% | Processed: %c from %C")
|
|
||||||
# GET each link and check for valid response code from allowed_codes
|
|
||||||
links.each do |link|
|
|
||||||
begin
|
|
||||||
count += 1
|
|
||||||
if allowed_links.include?(link)
|
|
||||||
next
|
|
||||||
end
|
|
||||||
res = HTTParty.get(link, timeout: 10)
|
|
||||||
if res.code.nil?
|
|
||||||
fails.push("(NIL): #{link}")
|
|
||||||
next
|
|
||||||
end
|
|
||||||
if !allowed_codes.include?(res.code)
|
|
||||||
fails.push("(#{res.code}): #{link}")
|
|
||||||
end
|
|
||||||
rescue HTTParty::RedirectionTooDeep
|
|
||||||
fails.push("(RTD): #{link}")
|
|
||||||
rescue Net::ReadTimeout
|
|
||||||
fails.push("(TMO): #{link}")
|
|
||||||
rescue Net::OpenTimeout
|
|
||||||
fails.push("(TMO): #{link}")
|
|
||||||
rescue OpenSSL::SSL::SSLError
|
|
||||||
fails.push("(SSL): #{link}")
|
|
||||||
rescue SocketError
|
|
||||||
fails.push("(SOK): #{link}")
|
|
||||||
rescue Errno::ECONNREFUSED
|
|
||||||
fails.push("(CON): #{link}")
|
|
||||||
rescue Errno::ECONNRESET
|
|
||||||
next
|
|
||||||
end
|
|
||||||
progressbar.increment
|
|
||||||
end
|
|
||||||
puts "#{count}/#{total} links checked"
|
|
||||||
if fails.length <= 0
|
|
||||||
puts "all links valid"
|
|
||||||
exit(0)
|
|
||||||
else
|
|
||||||
puts "-- RESULTS --"
|
|
||||||
fails.sort!
|
|
||||||
fails.each do |e|
|
|
||||||
puts e
|
|
||||||
end
|
|
||||||
exit(1)
|
|
||||||
end
|
|
Loading…
Add table
Add a link
Reference in a new issue