-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathssg.py
95 lines (63 loc) · 2.08 KB
/
ssg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""static site generation"""
from asyncio import run
from collections import namedtuple
from pathlib import Path
from httpx import AsyncClient
from app import app
Entry = namedtuple("Entry", "path slug")
def api_entry(slug: str):
"""shortcut for constructing an JSON endpoint"""
return Entry(f"{slug}.json", slug)
def page_entry(slug: str):
"""shortcut for constructing an HTML endpoint"""
return Entry(f"{slug}.html", slug)
def normal_entry(slug: str):
"""shortcut for constructing a ordinary endpoint"""
return Entry(slug, slug)
def find_entries():
"""get all the urls to scrap"""
entries = [
Entry("index.html", ""),
page_entry("about"),
normal_entry("robots.txt"),
normal_entry("common.css"),
api_entry("api/people/list"),
api_entry("api/people/dict"),
# FastAPI
page_entry("docs"),
page_entry("redoc"),
normal_entry("openapi.json"),
]
root = Path("data")
for path in root.glob("**/*.md"):
if path.stem == "index": # university page
slug = path.relative_to(root).parent.as_posix()
else:
slug = path.relative_to(root).with_suffix("").as_posix()
entries.append(page_entry(slug))
entries.append(api_entry(f"api/{slug}"))
return entries
client = AsyncClient(
app=app,
base_url="https://<SSG>",
headers={"accept-encoding": "identity"},
)
def save_one(path: Path, content: bytes, /):
"""save a file after ensuring its directory exists"""
if not path.parent.is_dir():
path.parent.mkdir(parents=True)
path.write_bytes(content)
static = Path("static")
async def scrap_one(entry: Entry):
"""save a response"""
response = await client.get(entry.slug)
save_one(static / entry.path, response.content)
async def scrap_all():
"""save all the responses"""
for entry in find_entries():
await scrap_one(entry)
if __name__ == "__main__":
from time import perf_counter
t = perf_counter()
run(scrap_all())
print(f"finished SSG in {perf_counter() - t :.2f}s")