mirror of
https://github.com/public-apis/public-apis.git
synced 2025-07-31 07:09:50 +02:00
Merge pull request #1593 from marekdano/1592-duplicated-links-check
Add check for checking duplicated links in README file
This commit is contained in:
commit
4a7507cccd
2 changed files with 34 additions and 8 deletions
|
@ -20,7 +20,6 @@ num_segments = 5
|
||||||
|
|
||||||
errors = []
|
errors = []
|
||||||
title_links = []
|
title_links = []
|
||||||
previous_links = []
|
|
||||||
anchor_re = re.compile(anchor + '\s(.+)')
|
anchor_re = re.compile(anchor + '\s(.+)')
|
||||||
section_title_re = re.compile('\*\s\[(.*)\]')
|
section_title_re = re.compile('\*\s\[(.*)\]')
|
||||||
link_re = re.compile('\[(.+)\]\((http.*)\)')
|
link_re = re.compile('\[(.+)\]\((http.*)\)')
|
||||||
|
@ -68,12 +67,6 @@ def check_entry(line_num, segments):
|
||||||
title = title_re_match.group(1)
|
title = title_re_match.group(1)
|
||||||
if title.upper().endswith(' API'):
|
if title.upper().endswith(' API'):
|
||||||
add_error(line_num, 'Title should not end with "... API". Every entry is an API here!')
|
add_error(line_num, 'Title should not end with "... API". Every entry is an API here!')
|
||||||
# do not allow duplicate links
|
|
||||||
link = title_re_match.group(2)
|
|
||||||
if link in previous_links:
|
|
||||||
add_error(line_num, 'Duplicate link - entries should only be included in one section')
|
|
||||||
else:
|
|
||||||
previous_links.append(link)
|
|
||||||
# END Title
|
# END Title
|
||||||
# START Description
|
# START Description
|
||||||
# first character should be capitalized
|
# first character should be capitalized
|
||||||
|
|
|
@ -5,6 +5,12 @@ import re
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
ignored_links = [
|
||||||
|
'https://github.com/public-apis/public-apis/actions?query=workflow%3A%22Run+tests%22',
|
||||||
|
'https://github.com/public-apis/public-apis/workflows/Validate%20links/badge.svg?branch=master',
|
||||||
|
'https://github.com/public-apis/public-apis/actions?query=workflow%3A%22Validate+links%22',
|
||||||
|
'https://github.com/davemachado/public-api',
|
||||||
|
]
|
||||||
|
|
||||||
def parse_links(filename):
|
def parse_links(filename):
|
||||||
"""Returns a list of URLs from text file"""
|
"""Returns a list of URLs from text file"""
|
||||||
|
@ -16,6 +22,30 @@ def parse_links(filename):
|
||||||
links = [raw_link[0] for raw_link in raw_links]
|
links = [raw_link[0] for raw_link in raw_links]
|
||||||
return links
|
return links
|
||||||
|
|
||||||
|
def dup_links(links):
|
||||||
|
"""Check for duplicated links"""
|
||||||
|
print(f'Checking for duplicated links...')
|
||||||
|
hasError = False
|
||||||
|
seen = {}
|
||||||
|
dupes = []
|
||||||
|
|
||||||
|
for link in links:
|
||||||
|
link = link.rstrip('/')
|
||||||
|
if link in ignored_links:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if link not in seen:
|
||||||
|
seen[link] = 1
|
||||||
|
else:
|
||||||
|
if seen[link] == 1:
|
||||||
|
dupes.append(link)
|
||||||
|
|
||||||
|
if not dupes:
|
||||||
|
print(f"No duplicate links")
|
||||||
|
else:
|
||||||
|
print(f"Found duplicate links: {dupes}")
|
||||||
|
hasError = True
|
||||||
|
return hasError
|
||||||
|
|
||||||
def validate_links(links):
|
def validate_links(links):
|
||||||
"""Checks each entry in JSON file for live link"""
|
"""Checks each entry in JSON file for live link"""
|
||||||
|
@ -58,6 +88,9 @@ if __name__ == "__main__":
|
||||||
if num_args < 2:
|
if num_args < 2:
|
||||||
print("No .md file passed")
|
print("No .md file passed")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
hasError = validate_links(parse_links(sys.argv[1]))
|
links = parse_links(sys.argv[1])
|
||||||
|
hasError = dup_links(links)
|
||||||
|
if not hasError:
|
||||||
|
hasError = validate_links(links)
|
||||||
if hasError:
|
if hasError:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue