-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape.py
55 lines (43 loc) · 1.79 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import requests
from bs4 import BeautifulSoup
def check_links(readme_path, base_url):
"""
Checks the validity of links in the LeetCode README file.
Args:
readme_path (str): The path to the README file.
base_url (str): The base URL to prepend to relative links.
Returns:
None
"""
with open(readme_path, 'r', encoding='utf-8') as file:
readme_content = file.read()
# Extract all URLs using BeautifulSoup
soup = BeautifulSoup(readme_content, 'html.parser')
links = soup.find_all('a', href=True)
invalid_links = []
for link in links:
url = link['href']
# Prepend the base URL
if not url.startswith('http'):
url = base_url + url
try:
# Checks for a valid response
response = requests.head(url)
if response.status_code == 200:
print(f"Valid link: {url}")
else:
# Link was successfully reached but server responded with an issue (represented by the status code)
print(f"Invalid link: {url} - Status code: {response.status_code}")
invalid_links.append(url)
# Programme error, probably network issue or server issue
except requests.exceptions.RequestException as e:
print(f"Error checking link: {url} - Error: {e}")
print("========================================================================================")
if invalid_links:
print(f"Found {len(invalid_links)} Invalid links: " + " \n".join(invalid_links))
else:
print("No Invalid Links :)")
if __name__ == "__main__":
readme_path = 'README.md'
base_url = 'https://github.com/jyztintan/Leetcode/blob/main/'
check_links(readme_path, base_url)