From 4fe629e37a43d7e710495427c4a6a20de7d93013 Mon Sep 17 00:00:00 2001 From: bosd Date: Sat, 2 Nov 2024 15:27:32 +0100 Subject: [PATCH] [REM] obsolete function get_text_objects, Update imports Update the pyppdf_table_extraction imports --- camelot/utils.py | 39 --------------------------------- pypdf_table_extraction/utils.py | 4 +++- 2 files changed, 3 insertions(+), 40 deletions(-) diff --git a/camelot/utils.py b/camelot/utils.py index 5a5b3b7a..d49b588b 100644 --- a/camelot/utils.py +++ b/camelot/utils.py @@ -1425,45 +1425,6 @@ def get_page_layout( return layout, dim -def get_text_objects(layout, ltype="char", t=None): - """Recursively parses pdf layout to get a list of PDFMiner text objects. - - Parameters - ---------- - layout : object - PDFMiner LTPage object. - ltype : string - Specify 'char', 'lh', 'lv' to get LTChar, LTTextLineHorizontal, - and LTTextLineVertical objects respectively. - t : list - - Returns - ------- - t : list - List of PDFMiner text objects. - - """ - if ltype == "char": - LTObject = LTChar # noqa - elif ltype == "image": - LTObject = LTImage # noqa - elif ltype == "horizontal_text": - LTObject = LTTextLineHorizontal # noqa - elif ltype == "vertical_text": - LTObject = LTTextLineVertical # noqa - if t is None: - t = [] - try: - for obj in layout._objs: - if isinstance(obj, LTObject): # noqa - t.append(obj) - else: - t += get_text_objects(obj, ltype=ltype) - except AttributeError: - pass - return t - - def get_char_and_text_objects( layout: LTContainer[LTItem], ) -> tuple[list[LTChar], list[LTTextLineHorizontal], list[LTTextLineVertical]]: diff --git a/pypdf_table_extraction/utils.py b/pypdf_table_extraction/utils.py index d879b2c3..52ba032e 100644 --- a/pypdf_table_extraction/utils.py +++ b/pypdf_table_extraction/utils.py @@ -17,11 +17,13 @@ from camelot.utils import find_rows_boundaries # noqa F401 from camelot.utils import flag_font_size # noqa F401 from camelot.utils import flavor_to_kwargs # noqa F401 +from camelot.utils import get_char_and_text_objects # noqa F401 +from camelot.utils import get_char_objects # noqa F401 +from camelot.utils import get_image_and_text_objects # noqa F401 from camelot.utils import get_index_closest_point # noqa F401 from camelot.utils import get_page_layout # noqa F401 from camelot.utils import get_rotation # noqa F401 from camelot.utils import get_table_index # noqa F401 -from camelot.utils import get_text_objects # noqa F401 from camelot.utils import get_textline_coords # noqa F401 from camelot.utils import is_url # noqa F401 from camelot.utils import lattice_kwargs # noqa F401