Skip to content

Commit

Permalink
[REM] obsolete function get_text_objects, Update imports
Browse files Browse the repository at this point in the history
Update the pyppdf_table_extraction imports
  • Loading branch information
bosd committed Nov 2, 2024
1 parent e9b891a commit 4fe629e
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 40 deletions.
39 changes: 0 additions & 39 deletions camelot/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1425,45 +1425,6 @@ def get_page_layout(
return layout, dim


def get_text_objects(layout, ltype="char", t=None):
"""Recursively parses pdf layout to get a list of PDFMiner text objects.
Parameters
----------
layout : object
PDFMiner LTPage object.
ltype : string
Specify 'char', 'lh', 'lv' to get LTChar, LTTextLineHorizontal,
and LTTextLineVertical objects respectively.
t : list
Returns
-------
t : list
List of PDFMiner text objects.
"""
if ltype == "char":
LTObject = LTChar # noqa
elif ltype == "image":
LTObject = LTImage # noqa
elif ltype == "horizontal_text":
LTObject = LTTextLineHorizontal # noqa
elif ltype == "vertical_text":
LTObject = LTTextLineVertical # noqa
if t is None:
t = []
try:
for obj in layout._objs:
if isinstance(obj, LTObject): # noqa
t.append(obj)
else:
t += get_text_objects(obj, ltype=ltype)
except AttributeError:
pass
return t


def get_char_and_text_objects(
layout: LTContainer[LTItem],
) -> tuple[list[LTChar], list[LTTextLineHorizontal], list[LTTextLineVertical]]:
Expand Down
4 changes: 3 additions & 1 deletion pypdf_table_extraction/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
from camelot.utils import find_rows_boundaries # noqa F401
from camelot.utils import flag_font_size # noqa F401
from camelot.utils import flavor_to_kwargs # noqa F401
from camelot.utils import get_char_and_text_objects # noqa F401
from camelot.utils import get_char_objects # noqa F401
from camelot.utils import get_image_and_text_objects # noqa F401
from camelot.utils import get_index_closest_point # noqa F401
from camelot.utils import get_page_layout # noqa F401
from camelot.utils import get_rotation # noqa F401
from camelot.utils import get_table_index # noqa F401
from camelot.utils import get_text_objects # noqa F401
from camelot.utils import get_textline_coords # noqa F401
from camelot.utils import is_url # noqa F401
from camelot.utils import lattice_kwargs # noqa F401
Expand Down

0 comments on commit 4fe629e

Please sign in to comment.