mirror of
https://github.com/public-apis/public-apis.git
synced 2025-05-12 00:26:45 +02:00
check for duplicate urls
This commit is contained in:
parent
80e8997d86
commit
5fbf817c1c
1 changed files with 40 additions and 16 deletions
|
@ -1,31 +1,55 @@
|
||||||
#!/usr/bin/env ruby
|
#!/usr/bin/env ruby
|
||||||
require 'faraday'
|
require 'httparty'
|
||||||
require 'uri'
|
require 'uri'
|
||||||
allowed_codes = [200, 302, 403]
|
allowed_codes = [200, 302, 403]
|
||||||
args = ARGV
|
args = ARGV
|
||||||
filename = args[0]
|
filename = args[0]
|
||||||
fail_flag = false
|
fail_flag = false
|
||||||
contents = File.open(filename, 'rb') { |f| f.read }
|
contents = File.open(filename, 'rb') { |f| f.read }
|
||||||
links = URI.extract(contents, ['http', 'https'])
|
raw_links = URI.extract(contents, ['http', 'https'])
|
||||||
dup = links.select{|element| links.count(element) > 1 }
|
# Remove trailing ')' from entry URLs
|
||||||
if dup.uniq.length > 0
|
links = []
|
||||||
dup.uniq.each do |link|
|
raw_links.each do |link|
|
||||||
if link.end_with?(')')
|
|
||||||
puts link[0...-1]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
exit(1)
|
|
||||||
end
|
|
||||||
links.each do |link|
|
|
||||||
if link.end_with?(')')
|
if link.end_with?(')')
|
||||||
link = link[0...-1]
|
links.push(link[0...-1])
|
||||||
|
else
|
||||||
|
links.push(link)
|
||||||
end
|
end
|
||||||
res = Faraday.get(link)
|
end
|
||||||
if !allowed_codes.include?(res.status)
|
# Fail on any duplicate elements
|
||||||
puts "(#{res.status}): #{link}"
|
dup = links.select{|element| links.count(element) > 1}
|
||||||
|
if dup.uniq.length > 0
|
||||||
|
dup.uniq.each do |e|
|
||||||
|
puts "Duplicate link: #{e}"
|
||||||
|
end
|
||||||
|
fail_flag = true
|
||||||
|
end
|
||||||
|
# Remove any duplicates from array
|
||||||
|
links = links.uniq
|
||||||
|
count = 0
|
||||||
|
total = links.length
|
||||||
|
fails = []
|
||||||
|
# GET each link and check for valid response code from allowed_codes
|
||||||
|
links.each do |link|
|
||||||
|
begin
|
||||||
|
count += 1
|
||||||
|
puts "(#{count}/#{total}) #{link}"
|
||||||
|
res = HTTParty.get(link, timeout: 10)
|
||||||
|
if !allowed_codes.include?(res.code)
|
||||||
|
fails.push("(#{res.code}): #{link}")
|
||||||
|
fail_flag = true
|
||||||
|
else
|
||||||
|
puts "\t(#{res.code})"
|
||||||
|
end
|
||||||
|
rescue
|
||||||
|
puts "FAIL: (#{res.code}) #{link}"
|
||||||
|
fails.push("(#{res.code}): #{link}")
|
||||||
fail_flag = true
|
fail_flag = true
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
fails.each do |e|
|
||||||
|
puts e
|
||||||
|
end
|
||||||
if fail_flag
|
if fail_flag
|
||||||
exit(1)
|
exit(1)
|
||||||
else
|
else
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue