Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend the ability to process image only from disk to from both disk and memory #260

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
14 changes: 10 additions & 4 deletions ocrolib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,15 +152,18 @@ def isintarray(a):
def isintegerarray(a):
return a.dtype in [dtype('int32'),dtype('int64'),dtype('uint32'),dtype('uint64')]

@checks(str,pageno=int,_=GRAYSCALE)
@checks({str,object},pageno=int,_=GRAYSCALE)
def read_image_gray(fname,pageno=0):
"""Read an image and returns it as a floating point array.
The optional page number allows images from files containing multiple
images to be addressed. Byte and short arrays are rescaled to
the range 0...1 (unsigned) or -1...1 (signed)."""
if type(fname)==tuple: fname,pageno = fname
assert pageno==0
pil = PIL.Image.open(fname)
if issubclass(type(fname), PIL.Image.Image):
pil = fname
else:
pil = PIL.Image.open(fname)
a = pil2array(pil)
if a.dtype==dtype('uint8'):
a = a/255.0
Expand Down Expand Up @@ -191,13 +194,16 @@ def write_image_gray(fname,image,normalize=0,verbose=0):
im = array2pil(image)
im.save(fname)

@checks(str,_=ABINARY2)
@checks({str,object},_=ABINARY2)
def read_image_binary(fname,dtype='i',pageno=0):
"""Read an image from disk and return it as a binary image
of the given dtype."""
if type(fname)==tuple: fname,pageno = fname
assert pageno==0
pil = PIL.Image.open(fname)
if issubclass(type(fname), PIL.Image.Image):
pil = fname
else:
pil = PIL.Image.open(fname)
a = pil2array(pil)
if a.ndim==3: a = amax(a,axis=2)
return array(a>0.5*(amin(a)+amax(a)),dtype)
Expand Down
58 changes: 5 additions & 53 deletions tests/run-unit
Original file line number Diff line number Diff line change
@@ -1,54 +1,6 @@
#!/usr/bin/env python
#/bin/bash

import sys

from ocrolib import edist, utils

# Test the levenshtein function and returns 0 if the computed value
# equals the one it should be, otherwise returns 1 for failed tests.
def testLevenshtein(a, b, should):
if edist.levenshtein(a, b) == should:
print 'ok - levenshtein(%s, %s) == %s' % (a,b,should)
return 0
else:
print 'not ok - levenshtein(%s, %s) == %s' % (a,b,should)
return 1


def testXLevenshtein(a, b, context, should):
#print(edist.xlevenshtein(a, b, context))
if edist.xlevenshtein(a, b, context) == should:
print 'ok - xlevenshtein(%s, %s, %s) == %s' % (a,b,context,should)
return 0
else:
print 'not ok - xlevenshtein(%s, %s, %s) == %s' % (a,b,context,should)
return 1


failed_tests = 0

print('# 1 Test function "levenshtein" in edist.py')
failed_tests += testLevenshtein('a', 'a', 0)
failed_tests += testLevenshtein('', '', 0)
failed_tests += testLevenshtein('a', '', 1)
failed_tests += testLevenshtein('', 'a', 1)
failed_tests += testLevenshtein('aa', 'aaaaaa', 4)
failed_tests += testLevenshtein('aba', 'bab', 2)

print('\n# 2 Test function "xlevenshtein" in edist.py')
failed_tests += testXLevenshtein('exccpt', 'except', 1, should=(1.0, [('ccp', 'cep')]))
failed_tests += testXLevenshtein('exccpt', 'except', 2, should=(1.0, [('xccpt', 'xcept')]))
failed_tests += testXLevenshtein('exccpt', 'except', 3, should=(1.0, [('exccpt ', 'except ')]))
failed_tests += testXLevenshtein('exccpt', 'except', 4, should=(1.0, [(' exccpt ', ' except ')]))
failed_tests += testXLevenshtein('', 'test', 1, should=(4.0, []))
failed_tests += testXLevenshtein('aaaaaaaaaaa', 'a', 1, should=(10.0, [('aaaaaaaaaaa ', 'a__________ ')]))
failed_tests += testXLevenshtein('123 111 456', '132 111 444', 1, should=(4.0, [('123_ ', '1_32 '), ('456 ', '444 ')]))

print('\n# 3 utils.sumouter / utils.sumprod')
from pylab import randn
utils.sumouter(randn(10,3),randn(10,4),out=randn(3,4))
print('ok - dimensions of sumouter')
utils.sumprod(randn(11,7),randn(11,7),out=randn(7))
print('ok - dimensions of sumprod')

sys.exit(failed_tests)
BASE=$(dirname $0)/..
export PYTHONPATH="$BASE":"$PYTHONPATH"
python2 "$BASE"/tests/test_read_image.py
python2 "$BASE"/tests/test-levenshtein.py
54 changes: 54 additions & 0 deletions tests/test-levenshtein.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env python

import sys

from ocrolib import edist, utils

# Test the levenshtein function and returns 0 if the computed value
# equals the one it should be, otherwise returns 1 for failed tests.
def testLevenshtein(a, b, should):
if edist.levenshtein(a, b) == should:
print 'ok - levenshtein(%s, %s) == %s' % (a,b,should)
return 0
else:
print 'not ok - levenshtein(%s, %s) == %s' % (a,b,should)
return 1


def testXLevenshtein(a, b, context, should):
#print(edist.xlevenshtein(a, b, context))
if edist.xlevenshtein(a, b, context) == should:
print 'ok - xlevenshtein(%s, %s, %s) == %s' % (a,b,context,should)
return 0
else:
print 'not ok - xlevenshtein(%s, %s, %s) == %s' % (a,b,context,should)
return 1


failed_tests = 0

print('# 1 Test function "levenshtein" in edist.py')
failed_tests += testLevenshtein('a', 'a', 0)
failed_tests += testLevenshtein('', '', 0)
failed_tests += testLevenshtein('a', '', 1)
failed_tests += testLevenshtein('', 'a', 1)
failed_tests += testLevenshtein('aa', 'aaaaaa', 4)
failed_tests += testLevenshtein('aba', 'bab', 2)

print('\n# 2 Test function "xlevenshtein" in edist.py')
failed_tests += testXLevenshtein('exccpt', 'except', 1, should=(1.0, [('ccp', 'cep')]))
failed_tests += testXLevenshtein('exccpt', 'except', 2, should=(1.0, [('xccpt', 'xcept')]))
failed_tests += testXLevenshtein('exccpt', 'except', 3, should=(1.0, [('exccpt ', 'except ')]))
failed_tests += testXLevenshtein('exccpt', 'except', 4, should=(1.0, [(' exccpt ', ' except ')]))
failed_tests += testXLevenshtein('', 'test', 1, should=(4.0, []))
failed_tests += testXLevenshtein('aaaaaaaaaaa', 'a', 1, should=(10.0, [('aaaaaaaaaaa ', 'a__________ ')]))
failed_tests += testXLevenshtein('123 111 456', '132 111 444', 1, should=(4.0, [('123_ ', '1_32 '), ('456 ', '444 ')]))

print('\n# 3 utils.sumouter / utils.sumprod')
from pylab import randn
utils.sumouter(randn(10,3),randn(10,4),out=randn(3,4))
print('ok - dimensions of sumouter')
utils.sumprod(randn(11,7),randn(11,7),out=randn(7))
print('ok - dimensions of sumprod')

sys.exit(failed_tests)
35 changes: 35 additions & 0 deletions tests/test_read_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import unittest
import ocrolib
import PIL
import numpy

### Original image in disk and memory used to test method read_image_gray()
img_disk = "tests/testpage.png"
img_mem = PIL.Image.open(img_disk)

### Binarized image in disk and memory used to test method read_image_binary()
img_bin_disk = "tests/010030.bin.png"
img_bin_mem = PIL.Image.open(img_bin_disk)

class OcrolibTestCase(unittest.TestCase):
"""
Tests for processing image from disk and memory for methods
read_image_gray() and read_image_binary() in common.py under ocrolib
"""

def test_read_image_gray(self):
"""
Test whether the function read_image_gray() will return same result
when pass a image file name (from disk) and a image object (PIL.Image from memory).
The return object of read_image_gray() is a 'ndarray' dedfined by 'numpy', thus we use the
built-in function 'array_equal' to compare two ndarray objects
"""
self.assertTrue(numpy.array_equal(ocrolib.read_image_gray(img_disk), ocrolib.read_image_gray(img_mem)))


def test_read_image_binary(self):
self.assertTrue(numpy.array_equal(ocrolib.read_image_binary(img_bin_disk), ocrolib.read_image_binary(img_bin_mem)))


if __name__ == '__main__':
unittest.main()