mirror of
https://github.com/public-apis/public-apis.git
synced 2025-05-15 18:16:50 +02:00
Implement functions to find links in a text/file
This commit is contained in:
parent
e526f867d8
commit
4808d633a1
1 changed files with 42 additions and 0 deletions
42
scripts/validate/links.py
Normal file
42
scripts/validate/links.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sys
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
|
||||
def find_links_in_text(text: str) -> List[str]:
|
||||
"""Find links in a text and return a list of URLs."""
|
||||
|
||||
link_pattern = re.compile(r'((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'\".,<>?«»“”‘’]))')
|
||||
|
||||
raw_links = re.findall(link_pattern, text)
|
||||
|
||||
links = [
|
||||
str(raw_link[0]).rstrip('/') for raw_link in raw_links
|
||||
]
|
||||
|
||||
return links
|
||||
|
||||
|
||||
def find_links_in_file(filename: str) -> List[str]:
|
||||
"""Find links in a file and return a list of URLs from text file."""
|
||||
|
||||
with open(filename, mode='r', encoding='utf-8') as file:
|
||||
readme = file.read()
|
||||
index_section = readme.find('## Index')
|
||||
content = readme[index_section:]
|
||||
|
||||
links = find_links_in_text(content)
|
||||
|
||||
return links
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
num_args = len(sys.argv)
|
||||
|
||||
if num_args < 2:
|
||||
print('No .md file passed')
|
||||
sys.exit(1)
|
||||
|
||||
links = find_links_in_file(sys.argv[1])
|
Loading…
Add table
Add a link
Reference in a new issue