Skip to content

Commit

Permalink
fix: Ignore ncx and use lxml-xml option to suppress warning
Browse files Browse the repository at this point in the history
  • Loading branch information
p0n1 committed Jun 28, 2024
1 parent c11d9a3 commit 74d1644
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions audiobook_generator/book_parsers/epub_book_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class EpubBookParser(BaseBookParser):
def __init__(self, config: GeneralConfig):
super().__init__(config)
logger.setLevel(config.log)
self.book = epub.read_epub(self.config.input_file)
self.book = epub.read_epub(self.config.input_file, {"ignore_ncx": True})

def __str__(self) -> str:
return super().__str__()
Expand Down Expand Up @@ -44,7 +44,7 @@ def get_chapters(self, break_string) -> List[Tuple[str, str]]:
chapters = []
for item in self.book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
content = item.get_content()
soup = BeautifulSoup(content, "lxml")
soup = BeautifulSoup(content, "lxml-xml")
raw = soup.get_text(strip=False)
logger.debug(f"Raw text: <{raw[:]}>")

Expand Down

0 comments on commit 74d1644

Please sign in to comment.