zylon-ai · imartinez · Jul 18, 2024 · Jul 15, 2024 · Jul 15, 2024 · Jul 16, 2024
diff --git a/fern/docs/pages/installation/concepts.mdx b/fern/docs/pages/installation/concepts.mdx
@@ -38,7 +38,7 @@ will load the configuration from `settings.yaml` and `settings-ollama.yaml`.
 ## About Fully Local Setups
 In order to run PrivateGPT in a fully local setup, you will need to run the LLM, Embeddings and Vector Store locally.
 ### Vector stores
-The vector stores supported (Qdrant, ChromaDB and Postgres) run locally by default.
+The vector stores supported (Qdrant, Milvus, ChromaDB and Postgres) run locally by default.
 ### Embeddings
 For local Embeddings there are two options:
 * (Recommended) You can use the 'ollama' option in PrivateGPT, which will connect to your local Ollama instance. Ollama simplifies a lot the installation of local LLMs.

diff --git a/fern/docs/pages/installation/installation.mdx b/fern/docs/pages/installation/installation.mdx
@@ -32,7 +32,7 @@ Here are the different options available:
 
 - LLM: "llama-cpp", "ollama", "sagemaker", "openai", "openailike", "azopenai"
 - Embeddings: "huggingface", "openai", "sagemaker", "azopenai"
-- Vector stores: "qdrant", "chroma", "postgres"
+- Vector stores: "qdrant", "milvus", "chroma", "postgres"
 - UI: whether or not to enable UI (Gradio) or just go with the API
 
 In order to only install the required dependencies, PrivateGPT offers different `extras` that can be combined during the installation process:

diff --git a/fern/docs/pages/manual/vectordb.mdx b/fern/docs/pages/manual/vectordb.mdx
@@ -1,7 +1,7 @@
 ## Vectorstores
-PrivateGPT supports [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/), [PGVector](https://github.com/pgvector/pgvector) and [ClickHouse](https://github.com/ClickHouse/ClickHouse) as vectorstore providers. Qdrant being the default.
+PrivateGPT supports [Qdrant](https://qdrant.tech/), [Milvus](https://milvus.io//), [Chroma](https://www.trychroma.com/), [PGVector](https://github.com/pgvector/pgvector) and [ClickHouse](https://github.com/ClickHouse/ClickHouse) as vectorstore providers. Qdrant being the default.
 
-In order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `chroma`, `postgres` and `clickhouse`.
+In order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `milvus`, `chroma`, `postgres` and `clickhouse`.
 
 ```yaml
 vectorstore:
@@ -39,6 +39,24 @@ qdrant:
   path: local_data/private_gpt/qdrant
 ```
 
+### Milvus configuration
+
+To enable Milvus, set the `vectorstore.database` property in the `settings.yaml` file to `milvus` and install the `milvus` extra.
+
+```bash
+poetry install --extras vector-stores-milvus
+```
+
+The available configuration options are:
+| Field        | Description |
+|--------------|-------------|
+| uri     | Default is set to "local_data/private_gpt/milvus_local.db" as a local file, you can also set up a more performant Milvus server on docker or k8s.|
+| token          | Pair with Milvus server on docker or k8s.|
+| collection_name         | The name of the collection, set to default "milvus_db".|
+| overwrite    | Overwrite the data in collection if it existed, set to default as True. |
+
+To obtain a local setup (disk-based database) without running a Milvus server, configure the uri value in settings.yaml, to store in local_data/private_gpt/milvus_local.db.
+
 ### Chroma configuration
 
 To enable Chroma, set the `vectorstore.database` property in the `settings.yaml` file to `chroma` and install the `chroma` extra.

diff --git a/poetry.lock b/poetry.lock
diff --git a/private_gpt/components/vector_store/vector_store_component.py b/private_gpt/components/vector_store/vector_store_component.py
@@ -121,6 +121,45 @@ def __init__(self, settings: Settings) -> None:
                         collection_name="make_this_parameterizable_per_api_call",
                     ),  # TODO
                 )
+
+            case "milvus":
+                try:
+                    from llama_index.vector_stores.milvus import (  # type: ignore
+                        MilvusVectorStore,
+                    )
+                except ImportError as e:
+                    raise ImportError(
+                        "Milvus dependencies not found, install with `poetry install --extras vector-stores-milvus`"
+                    ) from e
+
+                if settings.milvus is None:
+                    logger.info(
+                        "Milvus config not found. Using default settings.\n"
+                        "Trying to connect to Milvus at ./milvus_llamaindex.db "
+                        "with collection 'make_this_parameterizable_per_api_call'."
+                    )
+
+                    self.vector_store = typing.cast(
+                        BasePydanticVectorStore,
+                        MilvusVectorStore(
+                            dim=settings.embedding.embed_dim,
+                            collection_name="make_this_parameterizable_per_api_call",
+                            overwrite=True,
+                        ),
+                    )
+
+                else:
+                    self.vector_store = typing.cast(
+                        BasePydanticVectorStore,
+                        MilvusVectorStore(
+                            dim=settings.embedding.embed_dim,
+                            uri=settings.milvus.uri,
+                            token=settings.milvus.token,
+                            collection_name=settings.milvus.collection_name,
+                            overwrite=settings.milvus.overwrite,
+                        ),
+                    )
+
             case "clickhouse":
                 try:
                     from clickhouse_connect import (  # type: ignore

diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
@@ -125,7 +125,7 @@ class LLMSettings(BaseModel):
 
 
 class VectorstoreSettings(BaseModel):
-    database: Literal["chroma", "qdrant", "postgres", "clickhouse"]
+    database: Literal["chroma", "qdrant", "postgres", "clickhouse", "milvus"]
 
 
 class NodeStoreSettings(BaseModel):
@@ -508,6 +508,28 @@ class QdrantSettings(BaseModel):
     )
 
 
+class MilvusSettings(BaseModel):
+    uri: str = Field(
+        "local_data/private_gpt/milvus_local.db",
+        description="The URI of the Milvus instance. For example: 'local_data/private_gpt/milvus_local.db' for Milvus Lite.",
+    )
+    token: str = Field(
+        "",
+        description=(
+            "A valid access token to access the specified Milvus instance. "
+            "This can be used as a recommended alternative to setting user and password separately. "
+            "For example: 'root:Milvus', which is the default credential of the root user."
+        ),
+    )
+    collection_name: str = Field(
+        "make_this_parameterizable_per_api_call",
+        description="The name of the collection in Milvus. Default is 'make_this_parameterizable_per_api_call'.",
+    )
+    overwrite: bool = Field(
+        True, description="Overwrite the previous collection schema if it exists."
+    )
+
+
 class Settings(BaseModel):
     server: ServerSettings
     data: DataSettings
@@ -527,6 +549,7 @@ class Settings(BaseModel):
     qdrant: QdrantSettings | None = None
     postgres: PostgresSettings | None = None
     clickhouse: ClickHouseSettings | None = None
+    milvus: MilvusSettings | None = None
 
 
 """

diff --git a/pyproject.toml b/pyproject.toml
@@ -31,6 +31,7 @@ llama-index-embeddings-openai = {version ="^0.1.10", optional = true}
 llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true}
 llama-index-embeddings-gemini = {version ="^0.1.8", optional = true}
 llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true}
+llama-index-vector-stores-milvus = {version ="^0.1.20", optional = true}
 llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true}
 llama-index-vector-stores-postgres = {version ="^0.1.11", optional = true}
 llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true}
@@ -78,6 +79,7 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
 vector-stores-clickhouse = ["llama-index-vector-stores-clickhouse", "clickhouse_connect"]
 vector-stores-chroma = ["llama-index-vector-stores-chroma"]
 vector-stores-postgres = ["llama-index-vector-stores-postgres"]
+vector-stores-milvus = ["llama-index-vector-stores-milvus"]
 storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
 rerank-sentence-transformers = ["torch", "sentence-transformers"]
 

diff --git a/settings.yaml b/settings.yaml
@@ -84,6 +84,11 @@ vectorstore:
 nodestore:
   database: simple
 
+milvus:
+  uri: local_data/private_gpt/milvus_local.db
+  collection_name: milvus_db
+  overwrite: false
+
 qdrant:
   path: local_data/private_gpt/qdrant