diff --git a/docs/advanced/adding_data.mdx b/docs/advanced/adding_data.mdx index bb2c71e593..98b30f2b6b 100644 --- a/docs/advanced/adding_data.mdx +++ b/docs/advanced/adding_data.mdx @@ -4,13 +4,12 @@ title: '➕ Adding Data' ## Add Dataset -- This step assumes that you have already created an `app` instance by either using `App`, `OpenSourceApp` or `CustomApp`. We are calling our app instance as `naval_chat_bot` 🤖 +- This step assumes that you have already created an `App`. We are calling our app instance as `naval_chat_bot` 🤖 - Now use `.add` method to add any dataset. ```python -# naval_chat_bot = App() or -# naval_chat_bot = OpenSourceApp() +naval_chat_bot = App() # Embed Online Resources naval_chat_bot.add("https://www.youtube.com/watch?v=3qHkcs3kG44") diff --git a/docs/advanced/app_types.mdx b/docs/advanced/app_types.mdx index 7b094e5983..8ff600af5d 100644 --- a/docs/advanced/app_types.mdx +++ b/docs/advanced/app_types.mdx @@ -4,108 +4,120 @@ title: '📱 App types' ## App Types -We have three types of App. +Embedchain supports a variety of LLMs, embedding functions/models and vector databases. -### App +Our app gives you full control over which components you want to use, you can mix and match them to your hearts content. + + +Out of the box, if you just use `app = App()`, Embedchain uses what we believe to be the best configuration available. This might include paid/proprietary components. Currently, this is + +* LLM: OpenAi (gpt-3.5-turbo-0613) +* Embedder: OpenAi (text-embedding-ada-002) +* Database: ChromaDB + + +### LLM + +#### Choosing an LLM + +The following LLM providers are supported by Embedchain: +- OPENAI +- ANTHPROPIC +- VERTEX_AI +- GPT4ALL +- AZURE_OPENAI +- LLAMA2 + +You can choose one by importing it from `embedchain.llm`. E.g.: ```python from embedchain import App -app = App() +from embedchain.llm.llama2 import Llama2Llm + +app = App(llm=Llama2Llm()) ``` -- `App` uses OpenAI's model, so these are paid models. 💸 You will be charged for embedding model usage and LLM usage. -- `App` uses OpenAI's embedding model to create embeddings for chunks and ChatGPT API as LLM to get answer given the relevant docs. Make sure that you have an OpenAI account and an API key. If you don't have an API key, you can create one by visiting [this link](https://platform.openai.com/account/api-keys). -- `App` is opinionated. It uses the best embedding model and LLM on the market. -- Once you have the API key, set it in an environment variable called `OPENAI_API_KEY` +#### Configuration -```python -import os -os.environ["OPENAI_API_KEY"] = "sk-xxxx" -``` +The LLMs can be configured by passing an LlmConfig object. -### Llama2App +The config options can be found [here](/advanced/query_configuration#llmconfig) ```python -import os +from embedchain import App +from embedchain.llm.llama2 import Llama2Llm +from embedchain.config import LlmConfig -from embedchain import Llama2App +app = App(llm=Llama2Llm(), llm_config=LlmConfig(number_documents=3, temperature=0)) +``` -os.environ['REPLICATE_API_TOKEN'] = "REPLICATE API TOKEN" +### Embedder -zuck_bot = Llama2App() +#### Choosing an Embedder -# Embed your data -zuck_bot.add("https://www.youtube.com/watch?v=Ff4fRgnuFgQ") -zuck_bot.add("https://en.wikipedia.org/wiki/Mark_Zuckerberg") +The following providers for embedding functions are supported by Embedchain: +- OPENAI +- HUGGING_FACE +- VERTEX_AI +- GPT4ALL +- AZURE_OPENAI -# Nice, your bot is ready now. Start asking questions to your bot. -zuck_bot.query("Who is Mark Zuckerberg?") -# Answer: Mark Zuckerberg is an American internet entrepreneur and business magnate. He is the co-founder and CEO of Facebook. Born in 1984, he dropped out of Harvard University to focus on his social media platform, which has since grown to become one of the largest and most influential technology companies in the world. +You can choose one by importing it from `embedchain.embedder`. E.g.: -# Enable web search for your bot -zuck_bot.online = True # enable internet access for the bot -zuck_bot.query("Who owns the new threads app and when it was founded?") -# Answer: Based on the context provided, the new Threads app is owned by Meta, the parent company of Facebook, Instagram, and WhatsApp. -``` +```python +from embedchain import App +from embedchain.embedder.vertexai import VertexAiEmbedder -- `Llama2App` uses Replicate's LLM model, so these are paid models. You can get the `REPLICATE_API_TOKEN` by registering on [their website](https://replicate.com/account). -- `Llama2App` uses OpenAI's embedding model to create embeddings for chunks. Make sure that you have an OpenAI account and an API key. If you don't have an API key, you can create one by visiting [this link](https://platform.openai.com/account/api-keys). +app = App(embedder=VertexAiEmbedder()) +``` +#### Configuration -### OpenSourceApp +The LLMs can be configured by passing an EmbedderConfig object. ```python -from embedchain import OpenSourceApp -app = OpenSourceApp() +from embedchain import App +from embedchain.embedder.openai import OpenAiEmbedder +from embedchain.config import EmbedderConfig + +app = App(embedder=OpenAiEmbedder(), embedder_config=EmbedderConfig(model="text-embedding-ada-002")) ``` -- `OpenSourceApp` uses open source embedding and LLM model. It uses `all-MiniLM-L6-v2` from Sentence Transformers library as the embedding model and `gpt4all` as the LLM. -- Here there is no need to setup any api keys. You just need to install embedchain package and these will get automatically installed. 📦 -- Once you have imported and instantiated the app, every functionality from here onwards is the same for either type of app. 📚 -- `OpenSourceApp` is opinionated. It uses the best open source embedding model and LLM on the market. -- extra dependencies are required for this app type. Install them with `pip install --upgrade embedchain[opensource]`. + +You can also pass an `LlmConfig` instance directly to the `query` or `chat` method. +This creates a temporary config for that request alone, so you could, for example, use a different model (from the same provider) or get more context documents for a specific query. + + +### Vector Database + +#### Choosing a Vector Database -### CustomApp +The following vector databases are supported by Embedchain: +- ChromaDB +- Elasticsearch + +You can choose one by importing it from `embedchain.vectordb`. E.g.: ```python -from embedchain import CustomApp -from embedchain.config import (CustomAppConfig, ElasticsearchDBConfig, - EmbedderConfig, LlmConfig) -from embedchain.embedder.vertexai import VertexAiEmbedder -from embedchain.llm.vertex_ai import VertexAiLlm -from embedchain.models import EmbeddingFunctions, Providers -from embedchain.vectordb.elasticsearch import Elasticsearch - -# short -app = CustomApp(llm=VertexAiLlm(), db=Elasticsearch(), embedder=VertexAiEmbedder()) -# with configs -app = CustomApp( - config=CustomAppConfig(log_level="INFO"), - llm=VertexAiLlm(config=LlmConfig(number_documents=5)), - db=Elasticsearch(config=ElasticsearchDBConfig(es_url="...")), - embedder=VertexAiEmbedder(config=EmbedderConfig()), -) +from embedchain import App +from embedchain.vectordb.elasticsearch import ElasticsearchDB + +app = App(db=ElasticsearchDB()) ``` -- `CustomApp` is not opinionated. -- Configuration required. It's for advanced users who want to mix and match different embedding models and LLMs. -- while it's doing that, it's still providing abstractions by allowing you to import Classes from `embedchain.llm`, `embedchain.vectordb`, and `embedchain.embedder`. -- paid and free/open source providers included. -- Once you have imported and instantiated the app, every functionality from here onwards is the same for either type of app. 📚 -- Following providers are available for an LLM - - OPENAI - - ANTHPROPIC - - VERTEX_AI - - GPT4ALL - - AZURE_OPENAI - - LLAMA2 -- Following embedding functions are available for an embedding function - - OPENAI - - HUGGING_FACE - - VERTEX_AI - - GPT4ALL - - AZURE_OPENAI +#### Configuration + +The vector databases can be configured by passing a specific config object. + +These vary greatly between the different vector databases. + +```python +from embedchain import App +from embedchain.vectordb.elasticsearch import ElasticsearchDB +from embedchain.config import ElasticsearchDBConfig +app = App(db=ElasticsearchDB(), db_config=ElasticsearchDBConfig()) +``` ### PersonApp @@ -123,18 +135,79 @@ import os os.environ["OPENAI_API_KEY"] = "sk-xxxx" ``` -#### Compatibility with other apps +### Full Configuration Examples + +Embedchain previously offered fully configured classes, namely `App`, `OpenSourceApp`, `CustomApp` and `Llama2App`. +We deprecated these apps. The reason for this decision was that it was hard to switch from to a different LLM, embedder or vector db, if you one day decided that that's what you want to do. +The new app allows drop-in replacements, such as changing `App(llm=OpenAiLlm())` to `App(llm=Llama2Llm())`. + +To make the switch to our new, fully configurable app easier, we provide you with full examples for what the old classes would look like implemented as a new app. +You can swap these in, and if you decide you want to try a different model one day, you don't have to rewrite your whole bot. + +#### App +App without any configuration is still using the best options available, so you can keep using: + +```python +from embedchain import App + +app = App() +``` + +#### OpenSourceApp + +Use this snippet to run an open source app. + +```python +from embedchain import App +from embedchain.llm.gpt4all import GPT4ALLLlm +from embedchain.embedder.gpt4all import GPT4AllEmbedder +from embedchain.vectordb.chroma import ChromaDB + +app = App(llm=GPT4ALLLlm(), embedder=GPT4AllEmbedder(), db=ChromaDB()) +``` + +#### Llama2App +```python +from embedchain import App +from embedchain.llm.llama2 import Llama2Llm + +app = App(llm=Llama2Llm()) +``` + +#### CustomApp + +Every app is a custom app now. +If you were previously using a `CustomApp`, you can now just change it to `App`. + +Here's one example, what you could do if we combined everything shown on this page. + +```python +from embedchain import App +from embedchain.config import ElasticsearchDBConfig, EmbedderConfig, LlmConfig +from embedchain.embedder.openai import OpenAiEmbedder +from embedchain.llm.llama2 import Llama2Llm +from embedchain.vectordb.elasticsearch import ElasticsearchDB + +app = App( + llm=Llama2Llm(), + llm_config=LlmConfig(number_documents=3, temperature=0), + embedder=OpenAiEmbedder(), + embedder_config=EmbedderConfig(model="text-embedding-ada-002"), + db=ElasticsearchDB(), + db_config=ElasticsearchDBConfig(), +) +``` + +### Compatibility with other apps - If there is any other app instance in your script or app, you can change the import as ```python from embedchain import App as EmbedChainApp -from embedchain import OpenSourceApp as EmbedChainOSApp from embedchain import PersonApp as EmbedChainPersonApp # or from embedchain import App as ECApp -from embedchain import OpenSourceApp as ECOSApp from embedchain import PersonApp as ECPApp ``` diff --git a/embedchain/apps/Llama2App.py b/embedchain/apps/Llama2App.py index 8b4bf3f2ea..eaf4abb165 100644 --- a/embedchain/apps/Llama2App.py +++ b/embedchain/apps/Llama2App.py @@ -1,15 +1,14 @@ +import logging from typing import Optional -from embedchain.apps.custom_app import CustomApp +from embedchain.apps.app import App from embedchain.config import CustomAppConfig -from embedchain.embedder.openai import OpenAIEmbedder from embedchain.helper.json_serializable import register_deserializable from embedchain.llm.llama2 import Llama2Llm -from embedchain.vectordb.chroma import ChromaDB @register_deserializable -class Llama2App(CustomApp): +class Llama2App(App): """ The EmbedChain Llama2App class. @@ -17,17 +16,23 @@ class Llama2App(CustomApp): add(source, data_type): adds the data from the given URL to the vector db. query(query): finds answer to the given query using vector database and LLM. chat(query): finds answer to the given query using vector database and LLM, with conversation history. + + .. deprecated:: 0.0.59 + Use `App` instead. """ def __init__(self, config: CustomAppConfig = None, system_prompt: Optional[str] = None): """ + .. deprecated:: 0.0.59 + Use `App` instead. + :param config: CustomAppConfig instance to load as configuration. Optional. :param system_prompt: System prompt string. Optional. """ - - if config is None: - config = CustomAppConfig() - - super().__init__( - config=config, llm=Llama2Llm(), db=ChromaDB(), embedder=OpenAIEmbedder(), system_prompt=system_prompt + logging.warning( + "DEPRECATION WARNING: Please use `App` instead of `Llama2App`. " + "`Llama2App` will be removed in a future release. " + "Please refer to https://docs.embedchain.ai/advanced/app_types#llama2app for instructions." ) + + super().__init__(config=config, llm=Llama2Llm(), system_prompt=system_prompt) diff --git a/embedchain/apps/app.py b/embedchain/apps/app.py index 9c9c7bbfc0..9148fec8f0 100644 --- a/embedchain/apps/app.py +++ b/embedchain/apps/app.py @@ -1,11 +1,16 @@ +import logging from typing import Optional from embedchain.config import (AppConfig, BaseEmbedderConfig, BaseLlmConfig, ChromaDbConfig) +from embedchain.config.vectordb.base import BaseVectorDbConfig from embedchain.embedchain import EmbedChain +from embedchain.embedder.base import BaseEmbedder from embedchain.embedder.openai import OpenAIEmbedder from embedchain.helper.json_serializable import register_deserializable +from embedchain.llm.base import BaseLlm from embedchain.llm.openai import OpenAILlm +from embedchain.vectordb.base import BaseVectorDB from embedchain.vectordb.chroma import ChromaDB @@ -23,32 +28,98 @@ class App(EmbedChain): def __init__( self, - config: AppConfig = None, - llm_config: BaseLlmConfig = None, + config: Optional[AppConfig] = None, + llm: BaseLlm = None, + llm_config: Optional[BaseLlmConfig] = None, + db: BaseVectorDB = None, + db_config: Optional[BaseVectorDbConfig] = None, + embedder: BaseEmbedder = None, + embedder_config: Optional[BaseEmbedderConfig] = None, chromadb_config: Optional[ChromaDbConfig] = None, system_prompt: Optional[str] = None, ): """ - Initialize a new `CustomApp` instance. You only have a few choices to make. + Initialize a new `App` instance. - :param config: Config for the app instance. - This is the most basic configuration, that does not fall into the LLM, database or embedder category, - defaults to None - :type config: AppConfig, optional + :param config: Config for the app instance., defaults to None + :type config: Optional[AppConfig], optional + :param llm: LLM Class instance. example: `from embedchain.llm.openai import OpenAILlm`, defaults to OpenAiLlm + :type llm: BaseLlm, optional :param llm_config: Allows you to configure the LLM, e.g. how many documents to return, example: `from embedchain.config import LlmConfig`, defaults to None - :type llm_config: BaseLlmConfig, optional - :param chromadb_config: Allows you to configure the vector database, + :type llm_config: Optional[BaseLlmConfig], optional + :param db: The database to use for storing and retrieving embeddings, + example: `from embedchain.vectordb.chroma_db import ChromaDb`, defaults to ChromaDb + :type db: BaseVectorDB, optional + :param db_config: Allows you to configure the vector database, example: `from embedchain.config import ChromaDbConfig`, defaults to None + :type db_config: Optional[BaseVectorDbConfig], optional + :param embedder: The embedder (embedding model and function) use to calculate embeddings. + example: `from embedchain.embedder.gpt4all_embedder import GPT4AllEmbedder`, defaults to OpenAIEmbedder + :type embedder: BaseEmbedder, optional + :param embedder_config: Allows you to configure the Embedder. + example: `from embedchain.config import BaseEmbedderConfig`, defaults to None + :type embedder_config: Optional[BaseEmbedderConfig], optional + :param chromadb_config: Deprecated alias of `db_config`, defaults to None :type chromadb_config: Optional[ChromaDbConfig], optional :param system_prompt: System prompt that will be provided to the LLM as such, defaults to None :type system_prompt: Optional[str], optional + :raises TypeError: LLM, database or embedder or their config is not a valid class instance. """ + # Overwrite deprecated arguments + if chromadb_config: + logging.warning( + "DEPRECATION WARNING: Please use `db_config` argument instead of `chromadb_config`." + "`chromadb_config` will be removed in a future release." + ) + db_config = chromadb_config + + # Type check configs + if config and not isinstance(config, AppConfig): + raise TypeError( + "Config is not a `AppConfig` instance. " + "Please make sure the type is right and that you are passing an instance." + ) + if llm_config and not isinstance(llm_config, BaseLlmConfig): + raise TypeError( + "`llm_config` is not a `BaseLlmConfig` instance. " + "Please make sure the type is right and that you are passing an instance." + ) + if db_config and not isinstance(db_config, BaseVectorDbConfig): + raise TypeError( + "`db_config` is not a `BaseVectorDbConfig` instance. " + "Please make sure the type is right and that you are passing an instance." + ) + if embedder_config and not isinstance(embedder_config, BaseEmbedderConfig): + raise TypeError( + "`embedder_config` is not a `BaseEmbedderConfig` instance. " + "Please make sure the type is right and that you are passing an instance." + ) + + # Assign defaults if config is None: config = AppConfig() + if llm is None: + llm = OpenAILlm(config=llm_config) + if db is None: + db = ChromaDB(config=db_config) + if embedder is None: + embedder = OpenAIEmbedder(config=embedder_config) - llm = OpenAILlm(config=llm_config) - embedder = OpenAIEmbedder(config=BaseEmbedderConfig(model="text-embedding-ada-002")) - database = ChromaDB(config=chromadb_config) - - super().__init__(config, llm, db=database, embedder=embedder, system_prompt=system_prompt) + # Type check assignments + if not isinstance(llm, BaseLlm): + raise TypeError( + "LLM is not a `BaseLlm` instance. " + "Please make sure the type is right and that you are passing an instance." + ) + if not isinstance(db, BaseVectorDB): + raise TypeError( + "Database is not a `BaseVectorDB` instance. " + "Please make sure the type is right and that you are passing an instance." + ) + if not isinstance(embedder, BaseEmbedder): + raise TypeError( + "Embedder is not a `BaseEmbedder` instance. " + "Please make sure the type is right and that you are passing an instance." + ) + super().__init__(config, llm=llm, db=db, embedder=embedder, system_prompt=system_prompt) diff --git a/embedchain/apps/custom_app.py b/embedchain/apps/custom_app.py index 875027bfbc..a72d8e3e77 100644 --- a/embedchain/apps/custom_app.py +++ b/embedchain/apps/custom_app.py @@ -1,7 +1,8 @@ +import logging from typing import Optional +from embedchain.apps.app import App from embedchain.config import CustomAppConfig -from embedchain.embedchain import EmbedChain from embedchain.embedder.base import BaseEmbedder from embedchain.helper.json_serializable import register_deserializable from embedchain.llm.base import BaseLlm @@ -9,7 +10,7 @@ @register_deserializable -class CustomApp(EmbedChain): +class CustomApp(App): """ Embedchain's custom app allows for most flexibility. @@ -19,6 +20,9 @@ class CustomApp(EmbedChain): add(source, data_type): adds the data from the given URL to the vector db. query(query): finds answer to the given query using vector database and LLM. chat(query): finds answer to the given query using vector database and LLM, with conversation history. + + .. deprecated:: 0.0.59 + Use `App` instead. """ def __init__( @@ -32,6 +36,9 @@ def __init__( """ Initialize a new `CustomApp` instance. You have to choose a LLM, database and embedder. + .. deprecated:: 0.0.59 + Use `App` instead. + :param config: Config for the app instance. This is the most basic configuration, that does not fall into the LLM, database or embedder category, defaults to None :type config: Optional[CustomAppConfig], optional @@ -48,36 +55,9 @@ def __init__( :raises ValueError: LLM, database or embedder has not been defined. :raises TypeError: LLM, database or embedder is not a valid class instance. """ - # Config is not required, it has a default - if config is None: - config = CustomAppConfig() - - if llm is None: - raise ValueError("LLM must be provided for custom app. Please import from `embedchain.llm`.") - if db is None: - raise ValueError("Database must be provided for custom app. Please import from `embedchain.vectordb`.") - if embedder is None: - raise ValueError("Embedder must be provided for custom app. Please import from `embedchain.embedder`.") - - if not isinstance(config, CustomAppConfig): - raise TypeError( - "Config is not a `CustomAppConfig` instance. " - "Please make sure the type is right and that you are passing an instance." - ) - if not isinstance(llm, BaseLlm): - raise TypeError( - "LLM is not a `BaseLlm` instance. " - "Please make sure the type is right and that you are passing an instance." - ) - if not isinstance(db, BaseVectorDB): - raise TypeError( - "Database is not a `BaseVectorDB` instance. " - "Please make sure the type is right and that you are passing an instance." - ) - if not isinstance(embedder, BaseEmbedder): - raise TypeError( - "Embedder is not a `BaseEmbedder` instance. " - "Please make sure the type is right and that you are passing an instance." - ) - + logging.warning( + "DEPRECATION WARNING: Please use `App` instead of `CustomApp`. " + "`CustomApp` will be removed in a future release. " + "Please refer to https://docs.embedchain.ai/advanced/app_types#opensourceapp for instructions." + ) super().__init__(config=config, llm=llm, db=db, embedder=embedder, system_prompt=system_prompt) diff --git a/embedchain/apps/open_source_app.py b/embedchain/apps/open_source_app.py index 80ece56783..15dbbb0612 100644 --- a/embedchain/apps/open_source_app.py +++ b/embedchain/apps/open_source_app.py @@ -1,9 +1,9 @@ import logging from typing import Optional -from embedchain.config import (BaseEmbedderConfig, BaseLlmConfig, - ChromaDbConfig, OpenSourceAppConfig) -from embedchain.embedchain import EmbedChain +from embedchain.apps.app import App +from embedchain.config import (BaseLlmConfig, ChromaDbConfig, + OpenSourceAppConfig) from embedchain.embedder.gpt4all import GPT4AllEmbedder from embedchain.helper.json_serializable import register_deserializable from embedchain.llm.gpt4all import GPT4ALLLlm @@ -13,7 +13,7 @@ @register_deserializable -class OpenSourceApp(EmbedChain): +class OpenSourceApp(App): """ The embedchain Open Source App. Comes preconfigured with the best open source LLM, embedding model, database. @@ -22,6 +22,9 @@ class OpenSourceApp(EmbedChain): add(source, data_type): adds the data from the given URL to the vector db. query(query): finds answer to the given query using vector database and LLM. chat(query): finds answer to the given query using vector database and LLM, with conversation history. + + .. deprecated:: 0.0.59 + Use `App` instead. """ def __init__( @@ -36,6 +39,9 @@ def __init__( Since it's opinionated you don't have to choose a LLM, database and embedder. However, you can configure those. + .. deprecated:: 0.0.59 + Use `App` instead. + :param config: Config for the app instance. This is the most basic configuration, that does not fall into the LLM, database or embedder category, defaults to None :type config: OpenSourceAppConfig, optional @@ -50,29 +56,16 @@ def __init__( :type system_prompt: Optional[str], optional :raises TypeError: `OpenSourceAppConfig` or `LlmConfig` invalid. """ - logging.info("Loading open source embedding model. This may take some time...") # noqa:E501 - if not config: - config = OpenSourceAppConfig() - - if not isinstance(config, OpenSourceAppConfig): - raise TypeError( - "OpenSourceApp needs a OpenSourceAppConfig passed to it. " - "You can import it with `from embedchain.config import OpenSourceAppConfig`" - ) - - if not llm_config: - llm_config = BaseLlmConfig(model="orca-mini-3b.ggmlv3.q4_0.bin") - elif not isinstance(llm_config, BaseLlmConfig): - raise TypeError( - "The LlmConfig passed to OpenSourceApp is invalid. " - "You can import it with `from embedchain.config import LlmConfig`" - ) - elif not llm_config.model: - llm_config.model = "orca-mini-3b.ggmlv3.q4_0.bin" - - llm = GPT4ALLLlm(config=llm_config) - embedder = GPT4AllEmbedder(config=BaseEmbedderConfig(model="all-MiniLM-L6-v2")) - logging.error("Successfully loaded open source embedding model.") - database = ChromaDB(config=chromadb_config) + logging.warning( + "DEPRECATION WARNING: Please use `App` instead of `OpenSourceApp`." + "`OpenSourceApp` will be removed in a future release." + "Please refer to https://docs.embedchain.ai/advanced/app_types#customapp for instructions." + ) - super().__init__(config, llm=llm, db=database, embedder=embedder, system_prompt=system_prompt) + super().__init__( + config=config, + llm=GPT4ALLLlm(config=llm_config), + db=ChromaDB(config=chromadb_config), + embedder=GPT4AllEmbedder(), + system_prompt=system_prompt, + ) diff --git a/tests/apps/test_apps.py b/tests/apps/test_apps.py new file mode 100644 index 0000000000..79a4c85bd5 --- /dev/null +++ b/tests/apps/test_apps.py @@ -0,0 +1,60 @@ +import os +import unittest + +from embedchain import App, CustomApp, Llama2App, OpenSourceApp +from embedchain.config import ChromaDbConfig +from embedchain.embedder.base import BaseEmbedder +from embedchain.llm.base import BaseLlm +from embedchain.vectordb.base import BaseVectorDB +from embedchain.vectordb.chroma import ChromaDB + + +class TestApps(unittest.TestCase): + try: + del os.environ["OPENAI_KEY"] + except KeyError: + pass + + def test_app(self): + app = App() + self.assertIsInstance(app.llm, BaseLlm) + self.assertIsInstance(app.db, BaseVectorDB) + self.assertIsInstance(app.embedder, BaseEmbedder) + + def test_custom_app(self): + app = CustomApp() + self.assertIsInstance(app.llm, BaseLlm) + self.assertIsInstance(app.db, BaseVectorDB) + self.assertIsInstance(app.embedder, BaseEmbedder) + + def test_opensource_app(self): + app = OpenSourceApp() + self.assertIsInstance(app.llm, BaseLlm) + self.assertIsInstance(app.db, BaseVectorDB) + self.assertIsInstance(app.embedder, BaseEmbedder) + + def test_llama2_app(self): + os.environ["REPLICATE_API_TOKEN"] = "-" + app = Llama2App() + self.assertIsInstance(app.llm, BaseLlm) + self.assertIsInstance(app.db, BaseVectorDB) + self.assertIsInstance(app.embedder, BaseEmbedder) + + +class TestConfigForAppComponents(unittest.TestCase): + collection_name = "my-test-collection" + + def test_constructor_config(self): + """ + Test that app can be configured through the app constructor. + """ + app = App(db_config=ChromaDbConfig(collection_name=self.collection_name)) + self.assertEqual(app.db.config.collection_name, self.collection_name) + + def test_component_config(self): + """ + Test that app can also be configured by passing a configured component to the app + """ + database = ChromaDB(config=ChromaDbConfig(collection_name=self.collection_name)) + app = App(db=database) + self.assertEqual(app.db.config.collection_name, self.collection_name)