Merge pull request #1593 from marekdano/1592-duplicated-links-check

Add check for checking duplicated links in README file
This commit is contained in:
Marek Dano 2021-03-23 18:34:37 +00:00 committed by GitHub
commit 4a7507cccd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 8 deletions

View file

@ -20,7 +20,6 @@ num_segments = 5
errors = []
title_links = []
previous_links = []
anchor_re = re.compile(anchor + '\s(.+)')
section_title_re = re.compile('\*\s\[(.*)\]')
link_re = re.compile('\[(.+)\]\((http.*)\)')
@ -68,12 +67,6 @@ def check_entry(line_num, segments):
title = title_re_match.group(1)
if title.upper().endswith(' API'):
add_error(line_num, 'Title should not end with "... API". Every entry is an API here!')
# do not allow duplicate links
link = title_re_match.group(2)
if link in previous_links:
add_error(line_num, 'Duplicate link - entries should only be included in one section')
else:
previous_links.append(link)
# END Title
# START Description
# first character should be capitalized

View file

@ -5,6 +5,12 @@ import re
import socket
import sys
ignored_links = [
'https://github.com/public-apis/public-apis/actions?query=workflow%3A%22Run+tests%22',
'https://github.com/public-apis/public-apis/workflows/Validate%20links/badge.svg?branch=master',
'https://github.com/public-apis/public-apis/actions?query=workflow%3A%22Validate+links%22',
'https://github.com/davemachado/public-api',
]
def parse_links(filename):
"""Returns a list of URLs from text file"""
@ -16,6 +22,30 @@ def parse_links(filename):
links = [raw_link[0] for raw_link in raw_links]
return links
def dup_links(links):
"""Check for duplicated links"""
print(f'Checking for duplicated links...')
hasError = False
seen = {}
dupes = []
for link in links:
link = link.rstrip('/')
if link in ignored_links:
continue
if link not in seen:
seen[link] = 1
else:
if seen[link] == 1:
dupes.append(link)
if not dupes:
print(f"No duplicate links")
else:
print(f"Found duplicate links: {dupes}")
hasError = True
return hasError
def validate_links(links):
"""Checks each entry in JSON file for live link"""
@ -58,6 +88,9 @@ if __name__ == "__main__":
if num_args < 2:
print("No .md file passed")
sys.exit(1)
hasError = validate_links(parse_links(sys.argv[1]))
links = parse_links(sys.argv[1])
hasError = dup_links(links)
if not hasError:
hasError = validate_links(links)
if hasError:
sys.exit(1)