-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunknown_files_with_crc32.py
executable file
·94 lines (86 loc) · 3.19 KB
/
unknown_files_with_crc32.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/python3
import anime
import configparser
import argparse
import re
from typing import Dict, List
def get_config(config_file="weltschmerz.cfg"):
config = configparser.ConfigParser()
config.read_file(open(config_file))
parser = argparse.ArgumentParser(
description="Return unknown files from the database."
)
parser.add_argument(
"--database", help="database to use", default=config.get("client", "database")
)
parser.add_argument(
"--log-file",
dest="log_file",
help="logfile to use",
default=config.get("client", "log"),
)
parser.add_argument("--folder", help="folder to query", default=None)
parser.add_argument(
"--single-crc32-match-only",
help="print only files with matching crc32 in filename",
default=False,
action="store_true",
)
parser.add_argument(
"--print-path",
help="print path of files with matching crc32 in filename (default is ed2k link)",
default=False,
action="store_true",
)
args = parser.parse_args()
return args
if __name__ == "__main__":
config = get_config()
dbs = anime.DatabaseSession(config.database, False)
unknown_files: List[anime.LocalFile]
if config.folder:
unknown_files = (
dbs.session.query(anime.LocalFile)
.outerjoin(anime.File, anime.LocalFile.hash_ed2k == anime.File.hash_ed2k)
.filter(
anime.File.hash_ed2k == None,
anime.LocalFile.directory == config.folder.rstrip("/"),
)
.order_by(anime.LocalFile.directory, anime.LocalFile.filename)
.all()
)
if config.single_crc32_match_only:
unknown_files = [
unknown_file
for unknown_file in unknown_files
if unknown_file.hash_crc in unknown_file.filename.casefold()
and not re.match(r".*/by-id/\d\d/\d\d/\d\d", unknown_file.directory)
]
if config.print_path:
for unknown_file in unknown_files:
print(unknown_file.filename)
else:
for unknown_file in unknown_files:
print(unknown_file.ed2k_link)
else:
unknown_files = (
dbs.session.query(anime.LocalFile)
.outerjoin(anime.File, anime.LocalFile.hash_ed2k == anime.File.hash_ed2k)
.filter(anime.File.hash_ed2k == None)
.order_by(anime.LocalFile.directory, anime.LocalFile.filename)
.all()
)
folders: Dict[str, int] = {}
for local_file in unknown_files:
if local_file.hash_crc in local_file.filename.casefold() and not re.match(
r".*/by-id/\d\d/\d\d/\d\d", local_file.directory
):
if local_file.directory not in folders.keys():
folders[local_file.directory] = 0
folders[local_file.directory] += 1
folders = dict(sorted(folders.items(), key=lambda item: item[1], reverse=True))
for folder, filecount in folders.items():
if filecount >= 100:
print(f"{folder}: {filecount}")
else:
break