mirror of
https://github.com/public-apis/public-apis.git
synced 2025-05-09 15:16:47 +02:00
check for duplicate urls
This commit is contained in:
parent
80e8997d86
commit
5fbf817c1c
1 changed files with 40 additions and 16 deletions
|
@ -1,31 +1,55 @@
|
|||
#!/usr/bin/env ruby
|
||||
require 'faraday'
|
||||
require 'httparty'
|
||||
require 'uri'
|
||||
allowed_codes = [200, 302, 403]
|
||||
args = ARGV
|
||||
filename = args[0]
|
||||
fail_flag = false
|
||||
contents = File.open(filename, 'rb') { |f| f.read }
|
||||
links = URI.extract(contents, ['http', 'https'])
|
||||
dup = links.select{|element| links.count(element) > 1 }
|
||||
if dup.uniq.length > 0
|
||||
dup.uniq.each do |link|
|
||||
if link.end_with?(')')
|
||||
puts link[0...-1]
|
||||
end
|
||||
end
|
||||
exit(1)
|
||||
end
|
||||
links.each do |link|
|
||||
raw_links = URI.extract(contents, ['http', 'https'])
|
||||
# Remove trailing ')' from entry URLs
|
||||
links = []
|
||||
raw_links.each do |link|
|
||||
if link.end_with?(')')
|
||||
link = link[0...-1]
|
||||
links.push(link[0...-1])
|
||||
else
|
||||
links.push(link)
|
||||
end
|
||||
res = Faraday.get(link)
|
||||
if !allowed_codes.include?(res.status)
|
||||
puts "(#{res.status}): #{link}"
|
||||
end
|
||||
# Fail on any duplicate elements
|
||||
dup = links.select{|element| links.count(element) > 1}
|
||||
if dup.uniq.length > 0
|
||||
dup.uniq.each do |e|
|
||||
puts "Duplicate link: #{e}"
|
||||
end
|
||||
fail_flag = true
|
||||
end
|
||||
# Remove any duplicates from array
|
||||
links = links.uniq
|
||||
count = 0
|
||||
total = links.length
|
||||
fails = []
|
||||
# GET each link and check for valid response code from allowed_codes
|
||||
links.each do |link|
|
||||
begin
|
||||
count += 1
|
||||
puts "(#{count}/#{total}) #{link}"
|
||||
res = HTTParty.get(link, timeout: 10)
|
||||
if !allowed_codes.include?(res.code)
|
||||
fails.push("(#{res.code}): #{link}")
|
||||
fail_flag = true
|
||||
else
|
||||
puts "\t(#{res.code})"
|
||||
end
|
||||
rescue
|
||||
puts "FAIL: (#{res.code}) #{link}"
|
||||
fails.push("(#{res.code}): #{link}")
|
||||
fail_flag = true
|
||||
end
|
||||
end
|
||||
fails.each do |e|
|
||||
puts e
|
||||
end
|
||||
if fail_flag
|
||||
exit(1)
|
||||
else
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue