-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDocCompile.py
76 lines (66 loc) · 2.62 KB
/
DocCompile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin env python3
import glob
import shutil
import sys
import re
from pathlib import Path
import os
# 参考: https://note.nkmk.me/mac-pandoc-markdown-pdf-japanese/
# 参考2: https://weasyprint.readthedocs.io/en/stable/install.html
# 参考3: https://gist.github.com/dashed/6714393#file-github-pandoc-css
# 参考4: https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-printToPDF
FILE = Path(__name__).name
TOP_FOLDER = Path(__name__).resolve().parent
# pandocでhtmlを作成
for fn in glob.glob(f"{TOP_FOLDER}/markdowns/*.md"):
NAME = Path(fn).name
NAME_NO_SUFFIX = re.sub(r".md", "", NAME)
os.system(
f"""pandoc \
-s {TOP_FOLDER}/markdowns/{NAME} \
-f markdown \
--metadata title="Practical Data Science & Engineering Vol.1" \
-c {TOP_FOLDER}/var/github-pandoc.css \
-o {TOP_FOLDER}/var/{NAME_NO_SUFFIX}.html"""
)
lines = []
in_div = False
with open(f"{TOP_FOLDER}/var/{NAME_NO_SUFFIX}.html") as fp:
for line in fp:
line = line.strip()
if "<div" in line:
in_div = True
if in_div and "src" in line and "img" in line:
line = line.replace("<pre>", "")
line = line.replace("<code>", "")
line = line.replace("</pre>", "")
line = line.replace("</code>", "")
line = line.replace("<", "<")
line = line.replace(""", '"')
line = line.replace(">", ">")
line = re.sub(r"data-align", "align", line)
lines.append(line)
elif "<style>" in line:
print("try rewrite entities..")
lines.append(line)
lines.append("html * {font-family: YuMincho !important; }")
elif 'data-align="center"' in line:
line = re.sub(r"data-align", "align", line)
lines.append(line)
elif "<body>" in line:
lines.append(line)
else:
lines.append(line)
if "</div>" in line:
in_div = False
rewrite_html = "\n".join(lines)
with open(f"{TOP_FOLDER}/var/{NAME_NO_SUFFIX}_01.html", "w") as fp:
fp.write(rewrite_html)
PORT = 9222
os.system(
f"{TOP_FOLDER}/util/print-via-chrome.js 9222 {TOP_FOLDER}/var/{NAME_NO_SUFFIX}_01.html {TOP_FOLDER}/var/{NAME_NO_SUFFIX}.pdf"
)
# here is join command
cmd = '"/System/Library/Automator/Combine PDF Pages.action/Contents/Resources/join.py"'
input_args = " ".join(sorted(glob.glob(f"{TOP_FOLDER}/var/*.pdf")))
os.system(f"""{cmd} -o combine.pdf {input_args}""")