diff --git a/chains.py b/chains.py index 9ad10f40..4a7fc775 100644 --- a/chains.py +++ b/chains.py @@ -33,10 +33,18 @@ def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config= ) dimension = 4096 logger.info("Embedding: Using Ollama") - elif embedding_model_name == "openai": - embeddings = OpenAIEmbeddings() + elif embedding_model_name == "openai-embedding-3-small": + embeddings = OpenAIEmbeddings( + model="text-embedding-3-small" + ) dimension = 1536 - logger.info("Embedding: Using OpenAI") + logger.info("Embedding: Using OpenAI text-embedding-3-small") + elif embedding_model_name == "openai-embedding-3-large": + embeddings = OpenAIEmbeddings( + model="text-embedding-3-large" + ) + dimension = 3072 + logger.info("Embedding: Using OpenAI text-embedding-3-large") elif embedding_model_name == "aws": embeddings = BedrockEmbeddings() dimension = 1536 diff --git a/env.example b/env.example index 88e33cc3..b5698c57 100644 --- a/env.example +++ b/env.example @@ -2,7 +2,8 @@ # LLM and Embedding Model #***************************************************************** LLM=llama2 #or any Ollama model tag, gpt-4, gpt-3.5, or claudev2 -EMBEDDING_MODEL=sentence_transformer #or google-genai-embedding-001 openai, ollama, or aws +EMBEDDING_MODEL=sentence_transformer #or openai-embedding-3-small, openai-embedding-3-large, ollama, or aws + #***************************************************************** # Neo4j diff --git a/readme.md b/readme.md index 28fba3d4..da052f38 100644 --- a/readme.md +++ b/readme.md @@ -15,12 +15,11 @@ Available variables: | NEO4J_USERNAME | neo4j | REQUIRED - Username for Neo4j database | | NEO4J_PASSWORD | password | REQUIRED - Password for Neo4j database | | LLM | llama2 | REQUIRED - Can be any Ollama model tag, or gpt-4 or gpt-3.5 or claudev2 | -| EMBEDDING_MODEL | sentence_transformer | REQUIRED - Can be sentence_transformer, openai, aws, ollama or google-genai-embedding-001| +| EMBEDDING_MODEL | sentence_transformer | REQUIRED - Can be sentence_transformer, openai-embedding-3-small, openai-embedding-3-large, aws, ollama or google-genai-embedding-001 | | AWS_ACCESS_KEY_ID | | REQUIRED - Only if LLM=claudev2 or embedding_model=aws | | AWS_SECRET_ACCESS_KEY | | REQUIRED - Only if LLM=claudev2 or embedding_model=aws | | AWS_DEFAULT_REGION | | REQUIRED - Only if LLM=claudev2 or embedding_model=aws | -| OPENAI_API_KEY | | REQUIRED - Only if LLM=gpt-4 or LLM=gpt-3.5 or embedding_model=openai | -| GOOGLE_API_KEY | | REQUIRED - Only required when using GoogleGenai LLM or embedding model google-genai-embedding-001| +| OPENAI_API_KEY | | REQUIRED - Only if LLM=gpt-4 or LLM=gpt-3.5 or embedding_model=openai-* | | LANGCHAIN_ENDPOINT | "https://api.smith.langchain.com" | OPTIONAL - URL to Langchain Smith API | | LANGCHAIN_TRACING_V2 | false | OPTIONAL - Enable Langchain tracing v2 | | LANGCHAIN_PROJECT | | OPTIONAL - Langchain project name |