-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtxt_to_emtsv.py
73 lines (56 loc) · 1.94 KB
/
txt_to_emtsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#! /usr/bin/env python3
import requests
import os
from glob import glob
import argparse
from pathlib import Path
from shutil import move
def write(outp, dir):
# Path(dir).mkdir(parents=True, exist_ok=True)
os.makedirs(dir, exist_ok=True)
for out in outp:
with open(os.path.join(dir, 'out_' + out[0]), 'w') as f:
f.write(out[1])
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('filepath', help='Path to file', nargs="+")
parser.add_argument('-d', '--directory', help='Path of output file(s)', nargs='?')
basp = 'emtsv_outp'
args = parser.parse_args()
files = []
if args.filepath:
for p in args.filepath:
poss_files = glob(p)
poss_files = [os.path.abspath(x) for x in poss_files]
files += poss_files
# files += p
else:
files = glob(os.path.join(os.getcwd(), "*.txt"))
if args.directory:
basp = os.path.abspath(args.directory)
return {'dir': basp, 'files': files}
def parse_with_emtsv(fls):
count = 0
for fl in fls:
with open(fl) as inp:
# tok/morph/pos/conv-morph/dep/chunk/ner later
try:
# /chunk/ner'
# /conll'
response = requests.post('http://oliphant.nytud.hu:10001/tok/morph/pos/conv-morph/dep/chunk/ner', files={'file':inp})
# print(response.text)
count += 1
print(fl, str(count))
fname = os.path.basename(fl)
#os.makedirs("analyzed_w_emtsv", exist_ok=True)
#move(fl, "analyzed_w_emtsv/"+fname)
yield (fname, response.text)
except UnicodeEncodeError:
print("hibás fájl")
continue
def main():
args = get_args()
outp = parse_with_emtsv(args['files'])
write(outp, args['dir'])
if __name__ == "__main__":
main()