-
Notifications
You must be signed in to change notification settings - Fork 4
/
TextToUtf-8.py
42 lines (38 loc) · 1.37 KB
/
TextToUtf-8.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import os
import chardet
import codecs
'''
批量转换文件夹内txt为utf-8
'''
def dirlist(path, allfile):
filelist = os.listdir(path)
for filename in filelist: #广义
filepath = os.path.join(path, filename)
if os.path.isdir(filepath):
dirlist(filepath, allfile)
elif filepath.endswith("txt"):
filepath = filepath.replace("\\","/")
allfile.append(filepath)
return allfile
def EncodesAll(FilesDir):
for src in dirlist(FilesDir, []):
dst = os.path.split(src)[0] + "-utf-8/" + os.path.split(src)[1]
if os.path.exists(os.path.split(dst)[0]) == 0:
os.makedirs(os.path.split(dst)[0])
f = open(src, "rb")
coding = chardet.detect(f.read())["encoding"]
print(coding)
f.close()
if coding != "utf-8":
with codecs.open(src, "r", coding) as f:
try:
with codecs.open(dst, "w", encoding="utf-8") as fp:
fp.write(f.read())
try:
print(src + " " + coding + " to utf-8 converted!")
except Exception:
print("print error")
except Exception:
print(src + " " + coding + " read error")
if __name__ == "__main__":
EncodesAll("M:/oretachinitsubasahanai/scripts-utf-8")