From 39ba98c84d6781cf98c2bf831afab2ea73d6acdb Mon Sep 17 00:00:00 2001
From: Andrew White <white.d.andrew@gmail.com>
Date: Mon, 15 Jan 2024 14:20:32 -0800
Subject: [PATCH] Fixed problem for very short texts

---
 paperqa/readers.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/paperqa/readers.py b/paperqa/readers.py
index 8a74c328..816e75fc 100644
--- a/paperqa/readers.py
+++ b/paperqa/readers.py
@@ -31,7 +31,7 @@ def parse_pdf_fitz(path: Path, doc: Doc, chunk_chars: int, overlap: int) -> List
             )
             split = split[chunk_chars - overlap :]
             pages = [str(i + 1)]
-    if len(split) > overlap:
+    if len(split) > overlap or len(texts) == 0:
         pg = "-".join([pages[0], pages[-1]])
         texts.append(
             Text(text=split[:chunk_chars], name=f"{doc.docname} pages {pg}", doc=doc)
@@ -64,7 +64,7 @@ def parse_pdf(path: Path, doc: Doc, chunk_chars: int, overlap: int) -> List[Text
             )
             split = split[chunk_chars - overlap :]
             pages = [str(i + 1)]
-    if len(split) > overlap:
+    if len(split) > overlap or len(texts) == 0:
         pg = "-".join([pages[0], pages[-1]])
         texts.append(
             Text(text=split[:chunk_chars], name=f"{doc.docname} pages {pg}", doc=doc)
@@ -112,7 +112,7 @@ def parse_txt(
             )
             split = [split_flat[chunk_chars - overlap :].encode("utf-8")]
             split_size = len(split[0])
-    if len(split) > overlap:
+    if split_size > overlap or len(texts) == 0:
         split_flat = b"".join(split).decode()
         texts.append(
             Text(
@@ -134,7 +134,7 @@ def parse_code_txt(path: Path, doc: Doc, chunk_chars: int, overlap: int) -> List
     with open(path) as f:
         for i, line in enumerate(f):
             split += line
-            if len(split) > chunk_chars:
+            while len(split) > chunk_chars:
                 texts.append(
                     Text(
                         text=split[:chunk_chars],
@@ -144,7 +144,7 @@ def parse_code_txt(path: Path, doc: Doc, chunk_chars: int, overlap: int) -> List
                 )
                 split = split[chunk_chars - overlap :]
                 last_line = i
-    if len(split) > overlap:
+    if len(split) > overlap or len(texts) == 0:
         texts.append(
             Text(
                 text=split[:chunk_chars],