generated from project-polymorph/trans-digital-cn
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
99 additions
and
91 deletions.
There are no files selected for viewing
File renamed without changes.
Submodule downloader
updated
from 89c852 to e660ae
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import os | ||
import re | ||
import yaml | ||
import shutil | ||
from urllib.parse import urlparse | ||
|
||
def load_config(): | ||
with open('.github/url_dir_map_config.yml', 'r', encoding='utf-8') as f: | ||
return yaml.safe_load(f) | ||
|
||
def extract_original_url(content): | ||
match = re.search(r'<!--\s*tcd_original_link\s+(.*?)\s*-->', content) | ||
return match.group(1) if match else None | ||
|
||
def get_target_dir(url, config): | ||
if not url: | ||
return "未分类" | ||
|
||
domain = urlparse(url).netloc | ||
for mapping in config['url_mappings']: | ||
if mapping['domain'] in domain: | ||
return mapping['dir'] | ||
return "未分类" | ||
|
||
def get_unique_filename(target_path, filename): | ||
base, ext = os.path.splitext(filename) | ||
counter = 1 | ||
new_path = os.path.join(target_path, filename) | ||
|
||
while os.path.exists(new_path): | ||
new_filename = f"{base}_{counter}{ext}" | ||
new_path = os.path.join(target_path, new_filename) | ||
counter += 1 | ||
|
||
return new_path | ||
|
||
def main(): | ||
config = load_config() | ||
workspace_dir = 'workspace' | ||
|
||
# Create target directories if they don't exist | ||
for mapping in config['url_mappings']: | ||
dir_path = os.path.join(workspace_dir, mapping['dir']) | ||
os.makedirs(dir_path, exist_ok=True) | ||
|
||
# Process markdown files | ||
for root, _, files in os.walk(workspace_dir): | ||
for file in files: | ||
if not file.endswith('.md'): | ||
continue | ||
|
||
file_path = os.path.join(root, file) | ||
|
||
# Skip files that are already in target directories | ||
if any(mapping['dir'] in file_path for mapping in config['url_mappings']): | ||
continue | ||
|
||
with open(file_path, 'r', encoding='utf-8') as f: | ||
content = f.read() | ||
|
||
url = extract_original_url(content) | ||
target_dir = get_target_dir(url, config) | ||
target_dir_path = os.path.join(workspace_dir, target_dir) | ||
|
||
# Create target directory if it doesn't exist | ||
os.makedirs(target_dir_path, exist_ok=True) | ||
|
||
# Get unique filename in target directory | ||
new_file_path = get_unique_filename(target_dir_path, file) | ||
|
||
# Move file | ||
shutil.move(file_path, new_file_path) | ||
print(f"Moved {file} to {target_dir}") | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
url_mappings: | ||
- domain: "sina.com" | ||
dir: "新浪新闻" | ||
- domain: "sina.cn" | ||
dir: "新浪新闻" | ||
- domain: "news.qq.com" | ||
dir: "腾讯新闻" | ||
- domain: "163.com" | ||
dir: "网易新闻" | ||
- domain: "thepaper.cn" | ||
dir: "澎湃新闻" | ||
- domain: "sohu.com" | ||
dir: "搜狐新闻" | ||
- domain: "ifeng.com" | ||
dir: "凤凰网" | ||
- domain: "chinanews.com" | ||
dir: "中国新闻网" | ||
- domain: "*" | ||
dir: "未分类" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.