From 436f8e32fb45eb41e248855239d82d44b134310d Mon Sep 17 00:00:00 2001 From: Bhargava Shastry Date: Mon, 9 Oct 2023 11:40:06 +0200 Subject: [PATCH] Fix bug in pinecone utils and add test for docubot build knowledge base method. --- pinecone_utils/pinecone_utils.py | 1 + tests/test_docubot.py | 26 ++++++++++++++++++++++++++ tests/test_text_utils.py | 2 +- 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 tests/test_docubot.py diff --git a/pinecone_utils/pinecone_utils.py b/pinecone_utils/pinecone_utils.py index e4d8a85..7ce0aba 100644 --- a/pinecone_utils/pinecone_utils.py +++ b/pinecone_utils/pinecone_utils.py @@ -56,6 +56,7 @@ def create_vector_store(index_name: str, chunks: List[T]) -> Pinecone: import pinecone from langchain.vectorstores import Pinecone from langchain.embeddings.openai import OpenAIEmbeddings + from text_utils.text_utils import embedding_cost # Prompt user whether they want to continue, quit if they don't while True: diff --git a/tests/test_docubot.py b/tests/test_docubot.py new file mode 100644 index 0000000..a5c3c9d --- /dev/null +++ b/tests/test_docubot.py @@ -0,0 +1,26 @@ +import unittest +from docubot import build_kb +from text_utils.text_utils import tiktoken_len +from langchain.docstore.document import Document + + +class TestDocuBot(unittest.TestCase): + """ + A class for testing the functionality of the DocuBot application. + """ + + def test_build_kb(self): + chunks = build_kb("test_files") + # Test if the function returns a list + self.assertIsInstance(chunks, list) + + # Test if a chunk is a document + self.assertIsInstance(chunks[0], Document) + + # Test if the function returns a list of chunks with at most 512 tokens per chunk + for c in chunks: + self.assertLessEqual(tiktoken_len(c.page_content), 512) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_text_utils.py b/tests/test_text_utils.py index 446072e..952cedc 100644 --- a/tests/test_text_utils.py +++ b/tests/test_text_utils.py @@ -6,7 +6,7 @@ EMBEDDING_COST_PER_TOKEN = 0.0000001 -class TestDocumentLoaders(unittest.TestCase): +class TestTextUtils(unittest.TestCase): def test_tiktoken_len(self): assert tiktoken_len("Hello, world!") == 4 assert tiktoken_len("This is a sentence.") == 5