Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
yunwei37 committed Jan 21, 2025
1 parent 79cec78 commit 3b93970
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 91 deletions.
2 changes: 1 addition & 1 deletion .github/downloader
76 changes: 76 additions & 0 deletions .github/organize_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import os
import re
import yaml
import shutil
from urllib.parse import urlparse

def load_config():
with open('.github/url_dir_map_config.yml', 'r', encoding='utf-8') as f:
return yaml.safe_load(f)

def extract_original_url(content):
match = re.search(r'<!--\s*tcd_original_link\s+(.*?)\s*-->', content)
return match.group(1) if match else None

def get_target_dir(url, config):
if not url:
return "未分类"

domain = urlparse(url).netloc
for mapping in config['url_mappings']:
if mapping['domain'] in domain:
return mapping['dir']
return "未分类"

def get_unique_filename(target_path, filename):
base, ext = os.path.splitext(filename)
counter = 1
new_path = os.path.join(target_path, filename)

while os.path.exists(new_path):
new_filename = f"{base}_{counter}{ext}"
new_path = os.path.join(target_path, new_filename)
counter += 1

return new_path

def main():
config = load_config()
workspace_dir = 'workspace'

# Create target directories if they don't exist
for mapping in config['url_mappings']:
dir_path = os.path.join(workspace_dir, mapping['dir'])
os.makedirs(dir_path, exist_ok=True)

# Process markdown files
for root, _, files in os.walk(workspace_dir):
for file in files:
if not file.endswith('.md'):
continue

file_path = os.path.join(root, file)

# Skip files that are already in target directories
if any(mapping['dir'] in file_path for mapping in config['url_mappings']):
continue

with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()

url = extract_original_url(content)
target_dir = get_target_dir(url, config)
target_dir_path = os.path.join(workspace_dir, target_dir)

# Create target directory if it doesn't exist
os.makedirs(target_dir_path, exist_ok=True)

# Get unique filename in target directory
new_file_path = get_unique_filename(target_dir_path, file)

# Move file
shutil.move(file_path, new_file_path)
print(f"Moved {file} to {target_dir}")

if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion .github/record/2024-12-24/downloads/page.yml
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ www_特朗普称上任第一天就要阻止_“变性妄想”.html:
title: 特朗普称上任第一天就要阻止 “变性妄想”
snippet: 这位将在1月20日就职的美国总统称:“我将签署行政命令,结束切割儿童生殖器的做法,将跨性别者排除在军队之外,并将他们排除在小学、初中和高中之外”。
visited_date: '2025-01-12 18:20:42'
www_12岁时被“快速”接受性别转换_加州医生遭起诉(图).html:
www_12岁时被“快速”接受性别转换_加州医生遭起诉_图_.html:
link: https://www.secretchina.com/news/gb/2024/12/10/1074181.html
md5: 3f3e6d895f11891233c0dd15848970dc
title: 12岁时被“快速”接受性别转换 加州医生遭起诉(图)
Expand Down
19 changes: 19 additions & 0 deletions .github/url_dir_map_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
url_mappings:
- domain: "sina.com"
dir: "新浪新闻"
- domain: "sina.cn"
dir: "新浪新闻"
- domain: "news.qq.com"
dir: "腾讯新闻"
- domain: "163.com"
dir: "网易新闻"
- domain: "thepaper.cn"
dir: "澎湃新闻"
- domain: "sohu.com"
dir: "搜狐新闻"
- domain: "ifeng.com"
dir: "凤凰网"
- domain: "chinanews.com"
dir: "中国新闻网"
- domain: "*"
dir: "未分类"
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@

版权 © 1996 - 2006 SINA Corporation, All Rights Reserved.

<!-- tcd_original_link http://bj.sina.com.cn/t/2006-07-26/104298745.shtml -->

## 摘要与附加信息

<!-- tcd_abstract -->
Expand Down
89 changes: 0 additions & 89 deletions 未分类/news_2015:当青春撞见新时代.md

This file was deleted.

0 comments on commit 3b93970

Please sign in to comment.