Skip to content

Commit

Permalink
fix: Part of the docx document is parsed incorrectly
Browse files Browse the repository at this point in the history
  • Loading branch information
shaohuzhang1 committed Jan 6, 2025
1 parent 00591a5 commit 5171da9
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions apps/common/handle/impl/doc_split_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,10 @@ class DocSplitHandle(BaseSplitHandle):
def paragraph_to_md(paragraph: Paragraph, doc: Document, images_list, get_image_id):
try:
psn = paragraph.style.name
if psn.startswith('Heading'):
title = "".join(["#" for i in range(int(psn.replace("Heading ", '')))]) + " " + paragraph.text
if psn.startswith('Heading') or psn.startswith('TOC 标题') or psn.startswith('标题'):
title = "".join(["#" for i in range(
int(psn.replace("Heading ", '').replace('TOC 标题', '').replace('标题',
'')))]) + " " + paragraph.text
images = reduce(lambda x, y: [*x, *y],
[get_paragraph_element_images(e, doc, images_list, get_image_id) for e in
paragraph._element],
Expand Down Expand Up @@ -202,4 +204,4 @@ def get_content(self, file, save_image):
return content
except BaseException as e:
traceback.print_exception(e)
return f'{e}'
return f'{e}'

0 comments on commit 5171da9

Please sign in to comment.