Merge branch 'Feature/#959' into Feature/client/raas

Marker-Inc-Korea · Dec 4, 2024 · e697484 · e697484
2 parents 7f63db1 + a76aa29
commit e697484
Show file tree

Hide file tree

Showing 10 changed files with 108 additions and 7 deletions.
diff --git a/api/requirements.txt b/api/requirements.txt
@@ -1,4 +1,5 @@
-AutoRAG[parse]>=0.3.11
+AutoRAG[parse]==0.3.11rc2
+AutoRAG[ko]==0.3.11rc2
 pydantic<2.10.0
 jwt
 quart-cors

diff --git a/autorag/autorag/VERSION b/autorag/autorag/VERSION
@@ -1 +1 @@
-0.3.11
+0.3.11rc2
diff --git a/autorag/autorag/cli.py b/autorag/autorag/cli.py
@@ -75,14 +75,17 @@ def evaluate(config, qa_data_path, corpus_data_path, project_dir, skip_validatio
 @click.option(
 	"--project_dir", help="Path to project directory.", type=str, default=None
 )
-def run_api(config_path, host, port, trial_dir, project_dir):
+@click.option(
+	"--remote", help="Run the API server in remote mode.", type=bool, default=False
+)
+def run_api(config_path, host, port, trial_dir, project_dir, remote: bool):
 	if trial_dir is None:
 		runner = ApiRunner.from_yaml(config_path, project_dir=project_dir)
 	else:
 		runner = ApiRunner.from_trial_folder(trial_dir)
 	logger.info(f"Running API server at {host}:{port}...")
 	nest_asyncio.apply()
-	runner.run_api_server(host, port)
+	runner.run_api_server(host, port, remote=remote)
 
 
 @click.command()

diff --git a/autorag/autorag/deploy/api.py b/autorag/autorag/deploy/api.py
@@ -5,15 +5,14 @@
 from typing import Dict, Optional, List, Union, Literal
 
 import pandas as pd
-from pyngrok import ngrok
 from quart import Quart, request, jsonify
 from quart.helpers import stream_with_context
 from pydantic import BaseModel, ValidationError
 
 from autorag.deploy.base import BaseRunner
 from autorag.nodes.generator.base import BaseGenerator
 from autorag.nodes.promptmaker.base import BasePromptMaker
-from autorag.utils import fetch_contents
+from autorag.utils.util import fetch_contents, to_list
 
 logger = logging.getLogger("AutoRAG")
 
@@ -255,6 +254,8 @@ def run_api_server(
 		"""
 		logger.info(f"Run api server at {host}:{port}")
 		if remote:
+			from pyngrok import ngrok
+
 			http_tunnel = ngrok.connect(str(port), "http")
 			public_url = http_tunnel.public_url
 			logger.info(f"Public API URL: {public_url}")
@@ -278,6 +279,7 @@ def extract_retrieve_passage(self, df: pd.DataFrame) -> List[RetrievedPassage]:
 			)[0]
 		else:
 			start_end_indices = [None] * len(retrieved_ids)
+		start_end_indices = to_list(start_end_indices)
 		return list(
 			map(
 				lambda content, doc_id, path, metadata, start_end_idx: RetrievedPassage(

diff --git a/autorag/autorag/vectordb/milvus.py b/autorag/autorag/vectordb/milvus.py
@@ -24,7 +24,7 @@ def __init__(
 		embedding_model: str,
 		collection_name: str,
 		embedding_batch: int = 100,
-		similarity_metric: str = "l2",
+		similarity_metric: str = "cosine",
 		index_type: str = "IVF_FLAT",
 		uri: str = "http://localhost:19530",
 		db_name: str = "",

diff --git a/docs/source/integration/vectordb/milvus.md b/docs/source/integration/vectordb/milvus.md
@@ -15,6 +15,10 @@ To use the Milvus vector database, you need to configure it in your YAML configu
   token: ${MILVUS_TOKEN}
   embedding_batch: 50
   similarity_metric: cosine
+  index_type: IVF_FLAT
+  params:
+    nlist: 16384
+
 ```
 
 Here is a simple example of a YAML configuration file that uses the Milvus vector database and the OpenAI:
@@ -29,6 +33,9 @@ vectordb:
     token: ${MILVUS_TOKEN}
     embedding_batch: 50
     similarity_metric: cosine
+    index_type: IVF_FLAT
+    params:
+      nlist: 16384
 node_lines:
 - node_line_name: retrieve_node_line  # Arbitrary node line name
   nodes:

diff --git a/tests/autorag/test_deploy.py b/tests/autorag/test_deploy.py
@@ -37,6 +37,19 @@ def evaluator():
 		yield evaluator
 
 
+@pytest.fixture
+def evaluator_data_gen_by_autorag():
+	with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as project_dir:
+		evaluator = Evaluator(
+			os.path.join(resource_dir, "dataset_sample_gen_by_autorag", "qa.parquet"),
+			os.path.join(
+				resource_dir, "dataset_sample_gen_by_autorag", "corpus.parquet"
+			),
+			project_dir=project_dir,
+		)
+		yield evaluator
+
+
 @pytest.fixture
 def evaluator_trial_done(evaluator):
 	evaluator.start_trial(os.path.join(resource_dir, "simple_with_llm.yaml"))
@@ -291,6 +304,45 @@ async def post_to_server_retrieve():
 	assert isinstance(passages[0]["score"], float)
 
 
+def test_runner_api_server2(evaluator_data_gen_by_autorag):
+	project_dir = evaluator_data_gen_by_autorag.project_dir
+	evaluator_data_gen_by_autorag.start_trial(
+		os.path.join(resource_dir, "simple_mock_with_llm.yaml")
+	)
+	runner = ApiRunner.from_trial_folder(os.path.join(project_dir, "0"))
+
+	client = runner.app.test_client()
+
+	async def post_to_server():
+		# Use the TestClient to make a request to the server
+		response = await client.post(
+			"/v1/run",
+			json={
+				"query": "What is the best movie in Korea? Have Korea movie ever won Oscar?",
+			},
+		)
+		json_response = await response.get_json()
+		return json_response, response.status_code
+
+	nest_asyncio.apply()
+	response_json, response_status_code = asyncio.run(post_to_server())
+	assert response_status_code == 200
+	assert "result" in response_json
+	assert "retrieved_passage" in response_json
+	answer = response_json["result"]
+	assert isinstance(answer, str)
+	assert bool(answer)
+
+	retrieved_contents = response_json["retrieved_passage"]
+	assert len(retrieved_contents) == 10
+	assert isinstance(retrieved_contents[0]["content"], str)
+	assert isinstance(retrieved_contents[0]["doc_id"], str)
+	assert retrieved_contents[0]["filepath"]
+	assert retrieved_contents[0]["file_page"]
+	assert retrieved_contents[0]["start_idx"]
+	assert retrieved_contents[0]["end_idx"]
+
+
 @pytest.mark.skip(reason="This test is not working")
 def test_runner_api_server_stream(evaluator_trial_done):
 	project_dir = evaluator_trial_done.project_dir

diff --git a/tests/resources/dataset_sample_gen_by_autorag/corpus.parquet b/tests/resources/dataset_sample_gen_by_autorag/corpus.parquet
diff --git a/tests/resources/dataset_sample_gen_by_autorag/qa.parquet b/tests/resources/dataset_sample_gen_by_autorag/qa.parquet
diff --git a/tests/resources/simple_mock_with_llm.yaml b/tests/resources/simple_mock_with_llm.yaml
@@ -0,0 +1,36 @@
+vectordb:
+  - name: chroma_default
+    db_type: chroma
+    client_type: persistent
+    embedding_model: mock
+    collection_name: openai
+    path: ${PROJECT_DIR}/resources/chroma
+node_lines:
+- node_line_name: retrieve_node_line
+  nodes:
+    - node_type: retrieval  # represents run_node function
+      strategy:  # essential for every node
+        metrics: [retrieval_f1, retrieval_recall]
+      top_k: 10 # node param, which adapt to every module in this node.
+      modules:
+        - module_type: bm25
+          bm25_tokenizer: [ porter_stemmer ]
+        - module_type: vectordb
+          vectordb: chroma_default
+          embedding_batch: 50
+    - node_type: prompt_maker
+      strategy:
+        metrics: [ bleu ]
+        generator_modules:
+          - module_type: llama_index_llm
+            llm: mock
+      modules:
+        - module_type: fstring
+          prompt: "Tell me something about the question: {query} \n\n {retrieved_contents}"
+    - node_type: generator
+      strategy:
+        metrics:
+          - metric_name: bleu
+      modules:
+        - module_type: llama_index_llm
+          llm: mock