-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtesser.py
26 lines (21 loc) · 839 Bytes
/
tesser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
"""
OCR with PyTesser
"""
import pytesseract
from PIL import Image, ImageEnhance, ImageFilter
from utils import get_files_list, write_file, set_encoding, local_config
set_encoding()
SOURCE_PATH = local_config('original_files_path')
OUTPUT_PATH = local_config('processed_files_path')
files = get_files_list(SOURCE_PATH)
# im = Image.open("./source/img4.png")
# im = im.filter(ImageFilter.MedianFilter())
# enhancer = ImageEnhance.Contrast(im)
# im = enhancer.enhance(2)
# im = im.convert('1')
# im.save('./output/img-out4.jpg')
for image_file_name in files:
if image_file_name.split('.')[1] == 'png':
text = pytesseract.image_to_string(Image.open(SOURCE_PATH+image_file_name))
print "%s: \n>>%s"%(image_file_name, text)
write_file(OUTPUT_PATH + '[PYTESSER]_' + image_file_name.split('.')[0] + '.txt', text)