Skip to content

Commit

Permalink
Add MongoDB as KV storage
Browse files Browse the repository at this point in the history
  • Loading branch information
partoneplay committed Dec 5, 2024
1 parent 6d27401 commit d8ba7c5
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
os.mkdir(WORKING_DIR)
print(f"WorkingDir: {WORKING_DIR}")

# mongo
os.environ["MONGO_URI"] = "mongodb://root:root@localhost:27017/"
os.environ["MONGO_DATABASE"] = "LightRAG"

# neo4j
BATCH_SIZE_NODES = 500
BATCH_SIZE_EDGES = 100
Expand Down Expand Up @@ -38,6 +42,7 @@
texts=texts, embed_model="bge-m3:latest", host="http://127.0.0.1:11434"
),
),
kv_storage="MongoKVStorage",
graph_storage="Neo4JStorage",
vector_storage="MilvusVectorDBStorge",
)
Expand Down
51 changes: 51 additions & 0 deletions lightrag/kg/mongo_impl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import os
from tqdm.asyncio import tqdm as tqdm_async
from dataclasses import dataclass
from pymongo import MongoClient

from lightrag.utils import logger

from lightrag.base import BaseKVStorage


@dataclass
class MongoKVStorage(BaseKVStorage):
def __post_init__(self):
client = MongoClient(
os.environ.get("MONGO_URI", "mongodb://root:root@localhost:27017/")
)
database = client.get_database(os.environ.get("MONGO_DATABASE", "LightRAG"))
self._data = database.get_collection(self.namespace)
logger.info(f"Use MongoDB as KV {self.namespace}")

async def all_keys(self) -> list[str]:
return [x["_id"] for x in self._data.find({}, {"_id": 1})]

async def get_by_id(self, id):
return self._data.find_one({"_id": id})

async def get_by_ids(self, ids, fields=None):
if fields is None:
return list(self._data.find({"_id": {"$in": ids}}))
return list(
self._data.find(
{"_id": {"$in": ids}},
{field: 1 for field in fields},
)
)

async def filter_keys(self, data: list[str]) -> set[str]:
existing_ids = [
str(x["_id"]) for x in self._data.find({"_id": {"$in": data}}, {"_id": 1})
]
return set([s for s in data if s not in existing_ids])

async def upsert(self, data: dict[str, dict]):
for k, v in tqdm_async(data.items(), desc="Upserting"):
self._data.update_one({"_id": k}, {"$set": v}, upsert=True)
data[k]["_id"] = k
return data

async def drop(self):
""" """
pass
3 changes: 3 additions & 0 deletions lightrag/lightrag.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@

from .kg.milvus_impl import MilvusVectorDBStorge

from .kg.mongo_impl import MongoKVStorage

# future KG integrations

# from .kg.ArangoDB_impl import (
Expand Down Expand Up @@ -227,6 +229,7 @@ def _get_storage_class(self) -> Type[BaseGraphStorage]:
# kv storage
"JsonKVStorage": JsonKVStorage,
"OracleKVStorage": OracleKVStorage,
"MongoKVStorage": MongoKVStorage,
# vector storage
"NanoVectorDBStorage": NanoVectorDBStorage,
"OracleVectorDBStorage": OracleVectorDBStorage,
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ ollama
openai
oracledb
pymilvus
pymongo
pyvis
tenacity
# lmdeploy[all]
Expand Down

0 comments on commit d8ba7c5

Please sign in to comment.