From 97d332ab6749ebe2515cc07c2fbc3358a743d900 Mon Sep 17 00:00:00 2001
From: writinwaters <cai.keith@gmail.com>
Date: Tue, 22 Oct 2024 17:08:08 +0800
Subject: [PATCH 1/2] DRAFT: Updated python and http api references

---
 api/http_api_reference.md   | 102 ++++++++++++++++++++----------------
 api/python_api_reference.md |  90 +++++++++++++++----------------
 2 files changed, 103 insertions(+), 89 deletions(-)

diff --git a/api/http_api_reference.md b/api/http_api_reference.md
index 2c11fb5507..bba39fa811 100644
--- a/api/http_api_reference.md
+++ b/api/http_api_reference.md
@@ -20,7 +20,7 @@ Creates a dataset.
 ### Request
 
 - Method: POST
-- URL: `http://{address}/api/v1/dataset`
+- URL: `/api/v1/dataset`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -163,7 +163,7 @@ Deletes datasets by ID.
 ### Request
 
 - Method: DELETE
-- URL: `http://{address}/api/v1/dataset`
+- URL: `/api/v1/dataset`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -219,7 +219,7 @@ Updates configurations for a specified dataset.
 ### Request
 
 - Method: PUT
-- URL: `http://{address}/api/v1/dataset/{dataset_id}`
+- URL: `/api/v1/dataset/{dataset_id}`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -243,8 +243,6 @@ curl --request PUT \
   --data '{
   "name": "test",
   "embedding_model": "BAAI/bge-zh-v1.5",
-  "chunk_count": 0,
-  "document_count": 0,
   "parse_method": "naive"
 }'
 ```
@@ -293,14 +291,12 @@ An error response includes a JSON object like the following:
 
 **GET** `/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
 
-Lists all datasets?????
-
-Retrieves a list of datasets.
+Lists datasets.
 
 ### Request
 
 - Method: GET
-- URL: `http://{address}/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
+- URL: `/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
 - Headers:
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 
@@ -407,10 +403,10 @@ Uploads documents to a specified dataset.
 - Method: POST
 - URL: `/api/v1/dataset/{dataset_id}/document`
 - Headers:
-  - 'Content-Type: multipart/form-data'
+  - `'Content-Type: multipart/form-data'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Form:
-  - 'file=@{FILE_PATH}'
+  - `'file=@{FILE_PATH}'`
 
 #### Request example
 
@@ -425,9 +421,9 @@ curl --request POST \
 #### Request parameters
 
 - `"dataset_id"`: (*Path parameter*)  
-  The dataset ID.
+  The ID of the dataset to which the documents will be uploaded.
 - `"file"`: (*Body parameter*)  
-  The file to upload.
+  The document???? to upload.
 
 ### Response
 
@@ -459,25 +455,25 @@ Updates configurations for a specified document.
 ### Request
 
 - Method: PUT
-- URL: `http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}`
+- URL: `/api/v1/dataset/{dataset_id}/document/{document_id}`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Body:
-  - `name`:`string`
-  - `parser_method`:`string`
-  - `parser_config`:`dict`
+  - `"name"`:`string`
+  - `"chunk_method"`:`string`
+  - `"parser_config"`:`dict`
 
 #### Request example
 
 ```bash
 curl --request PUT \
   --url http://{address}/api/v1/dataset/{dataset_id}/info/{document_id} \
-  --header 'Authorization: Bearer {YOUR_ACCESS TOKEN}' \
+  --header 'Authorization: Bearer {YOUR_API_KEY}' \
   --header 'Content-Type: application/json' \
   --data '{
   "name": "manual.txt", 
-  "parser_method": "manual", 
+  "chunk_method": "manual", 
   "parser_config": {"chunk_token_count": 128, "delimiter": "\n!?。；！？", "layout_recognize": true, "task_page_size": 12}
   }'
 
@@ -485,8 +481,24 @@ curl --request PUT \
 
 #### Request parameters
 
-- `"parser_method"`: (*Body parameter*)  
-  Method used to parse the document.  
+- `"name"`: (*Body parameter*), `string`
+- `"chunk_method"`: (*Body parameter*), `string`  
+  The parsing method to apply to the document.  
+  - `"naive"`: General
+  - `"manual`: Manual
+  - `"qa"`: Q&A
+  - `"table"`: Table
+  - `"paper"`: Paper
+  - `"book"`: Book
+  - `"laws"`: Laws
+  - `"presentation"`: Presentation
+  - `"picture"`: Picture
+  - `"one"`: One
+  - `"knowledge_graph"`: Knowledge Graph
+  - `"email"`: Email
+- 
+
+### Returns  
 
 - `"parser_config"`: (*Body parameter*)  
   Configuration object for the parser.  
@@ -525,7 +537,7 @@ Downloads a document from a specified dataset.
 ### Request
 
 - Method: GET
-- URL: `http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}`
+- URL: `/api/v1/dataset/{dataset_id}/document/{document_id}`
 - Headers:
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Output:
@@ -570,7 +582,7 @@ An error response includes a JSON object like the following:
 
 **GET** `/api/v1/dataset/{dataset_id}/info?offset={offset}&limit={limit}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}`
 
-Retrieves a list of documents from a specified dataset.
+Lists documents in a specified dataset.
 
 ### Request
 
@@ -670,7 +682,7 @@ Deletes documents by ID.
 ### Request
 
 - Method: DELETE
-- URL: `http://{address}/api/v1/dataset/{dataset_id}/document`
+- URL: `/api/v1/dataset/{dataset_id}/document`
 - Headers:
   - `'Content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -724,7 +736,7 @@ Parses documents in a specified dataset.
 ### Request
 
 - Method: POST
-- URL: `http://{address}/api/v1/dataset/{dataset_id}/chunk `
+- URL: `/api/v1/dataset/{dataset_id}/chunk `
 - Headers:
   - `'content-Type: application/json'`
   - 'Authorization: Bearer {YOUR_API_KEY}'
@@ -777,7 +789,7 @@ Stops parsing specified documents.
 ### Request
 
 - Method: DELETE
-- URL: `http://{address}/api/v1/dataset/{dataset_id}/chunk`
+- URL: `/api/v1/dataset/{dataset_id}/chunk`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -831,7 +843,7 @@ Adds a chunk to a specified document in a specified dataset.
 ### Request
 
 - Method: POST
-- URL: `http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
+- URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -896,12 +908,12 @@ An error response includes a JSON object like the following:
 
 **GET** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk?keywords={keywords}&offset={offset}&limit={limit}&id={id}`
 
-Retrieves a list of chunks from a specified document in a specified dataset.
+Lists chunks in a specified document.
 
 ### Request
 
 - Method: GET
-- URL: `http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk?keywords={keywords}&offset={offset}&limit={limit}&id={id}`
+- URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk?keywords={keywords}&offset={offset}&limit={limit}&id={id}`
 - Headers:
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 
@@ -992,7 +1004,7 @@ Deletes chunks by ID.
 ### Request
 
 - Method: DELETE
-- URL: `http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
+- URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -1046,7 +1058,7 @@ Updates content or configurations for a specified chunk.
 ### Request
 
 - Method: PUT
-- URL: `http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk/{chunk_id}`
+- URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk/{chunk_id}`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -1102,12 +1114,12 @@ An error response includes a JSON object like the following:
 
 **GET** `/api/v1/retrieval`
 
-Retrieval test of a dataset
+Retrieves chunks from specified datasets.
 
 ### Request
 
 - Method: POST
-- URL: `http://{address}/api/v1/retrieval`
+- URL: `/api/v1/retrieval`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -1252,7 +1264,7 @@ Creates a chat assistant.
 ### Request
 
 - Method: POST
-- URL: `http://{address}/api/v1/chat`
+- URL: `/api/v1/chat`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -1486,7 +1498,7 @@ Updates configurations for a specified chat assistant.
 ### Request
 
 - Method: PUT
-- URL: `http://{address}/api/v1/chat/{chat_id}`
+- URL: `/api/v1/chat/{chat_id}`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -1538,7 +1550,7 @@ Deletes chat assistants by ID.
 ### Request
 
 - Method: DELETE
-- URL: `http://{address}/api/v1/chat`
+- URL: `/api/v1/chat`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -1586,16 +1598,16 @@ An error response includes a JSON object like the following:
 
 ---
 
-## List chats (INCONSISTENT WITH THE PYTHON API)
+## List chats
 
-**GET** `/api/v1/chat?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
+**GET** `/api/v1/chat?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={chat_name}&id={chat_id}`
 
-Retrieves a list of chat assistants.
+Lists chat assistants.
 
 ### Request
 
 - Method: GET
-- URL: `http://{address}/api/v1/chat?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
+- URL: `/api/v1/chat?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
 - Headers:
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 
@@ -1732,7 +1744,7 @@ Create a chat session.
 ### Request
 
 - Method: POST
-- URL: `http://{address}/api/v1/chat/{chat_id}/session`
+- URL: `/api/v1/chat/{chat_id}/session`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -1827,7 +1839,7 @@ Update a chat session
 ### Request
 
 - Method: PUT
-- URL: `http://{address}/api/v1/chat/{chat_id}/session/{session_id}`
+- URL: `/api/v1/chat/{chat_id}/session/{session_id}`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -1882,7 +1894,7 @@ Lists sessions associated with a specified????????????? chat assistant.
 ### Request
 
 - Method: GET
-- URL: `http://{address}/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
+- URL: `/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
 - Headers:
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 
@@ -1967,7 +1979,7 @@ Deletes sessions by ID.
 ### Request
 
 - Method: DELETE
-- URL: `http://{address}/api/v1/chat/{chat_id}/session`
+- URL: `/api/v1/chat/{chat_id}/session`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
@@ -2023,7 +2035,7 @@ Asks a question to start a conversation.
 ### Request
 
 - Method: POST
-- URL: `http://{address}/api/v1/chat/{chat_id}/completion`
+- URL: `/api/v1/chat/{chat_id}/completion`
 - Headers:
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
diff --git a/api/python_api_reference.md b/api/python_api_reference.md
index 96ab1ef45f..36af91890b 100644
--- a/api/python_api_reference.md
+++ b/api/python_api_reference.md
@@ -17,10 +17,9 @@ RAGFlow.create_dataset(
     name: str,
     avatar: str = "",
     description: str = "",
+    embedding_model: str = "BAAI/bge-zh-v1.5",
     language: str = "English",
     permission: str = "me", 
-    document_count: int = 0,
-    chunk_count: int = 0,
     chunk_method: str = "naive",
     parser_config: DataSet.ParserConfig = None
 ) -> DataSet
@@ -143,7 +142,7 @@ RAGFlow.list_datasets(
 ) -> list[DataSet]
 ```
 
-Retrieves a list of datasets.
+Lists datasets.
 
 ### Parameters
 
@@ -296,7 +295,7 @@ Updates configurations for the current document.
 
 A dictionary representing the attributes to update, with the following keys:
 
-- `"name"`: `str` The name of the document to update.
+- `"display_name"`: `str` The name of the document to update.
 - `"parser_config"`: `dict[str, Any]` The parsing configuration for the document:
   - `"chunk_token_count"`: Defaults to `128`.
   - `"layout_recognize"`: Defaults to `True`.
@@ -370,7 +369,7 @@ print(doc)
 Dataset.list_documents(id:str =None, keywords: str=None, offset: int=0, limit:int = 1024,order_by:str = "create_time", desc: bool = True) -> list[Document]
 ```
 
-Retrieves a list of documents from the current dataset.
+Lists documents in the current dataset.
 
 ### Parameters
 
@@ -388,7 +387,7 @@ The starting index for the documents to retrieve. Typically used in confunction
 
 #### limit: `int`
 
-The maximum number of documents to retrieve. Defaults to `1024`. A value of `-1` indicates that all documents should be returned.
+The maximum number of documents to retrieve. Defaults to `1024`.
 
 #### orderby: `str`
 
@@ -412,7 +411,7 @@ A `Document` object contains the following attributes:
 - `name`: The document name. Defaults to `""`.
 - `thumbnail`: The thumbnail image of the document. Defaults to `None`.
 - `knowledgebase_id`: The dataset ID associated with the document. Defaults to `None`.
-- `chunk_method` The chunk method name. Defaults to `""`. ?????naive??????
+- `chunk_method` The chunk method name. Defaults to `"naive"`.
 - `parser_config`: `ParserConfig` Configuration object for the parser. Defaults to `{"pages": [[1, 1000000]]}`.
 - `source_type`: The source type of the document. Defaults to `"local"`.
 - `type`: Type or category of the document. Defaults to `""`. Reserved for future use.
@@ -425,7 +424,7 @@ A `Document` object contains the following attributes:
 - `process_begin_at`: `datetime` The start time of document processing. Defaults to `None`.
 - `process_duation`: `float` Duration of the processing in seconds. Defaults to `0.0`.
 - `run`: `str` The document's processing status:
-  - `"0"`: UNSTART (default)
+  - `"0"`: UNSTART (default)  ?????????
   - `"1"`: RUNNING
   - `"2"`: CANCEL
   - `"3"`: DONE
@@ -506,9 +505,9 @@ The IDs of the documents to parse.
 rag_object = RAGFlow(api_key="<YOUR_API_KEY>", base_url="http://<YOUR_BASE_URL>:9380")
 dataset = rag_object.create_dataset(name="dataset_name")
 documents = [
-    {'name': 'test1.txt', 'blob': open('./test_data/test1.txt',"rb").read()},
-    {'name': 'test2.txt', 'blob': open('./test_data/test2.txt',"rb").read()},
-    {'name': 'test3.txt', 'blob': open('./test_data/test3.txt',"rb").read()}
+    {'display_name': 'test1.txt', 'blob': open('./test_data/test1.txt',"rb").read()},
+    {'display_name': 'test2.txt', 'blob': open('./test_data/test2.txt',"rb").read()},
+    {'display_name': 'test3.txt', 'blob': open('./test_data/test3.txt',"rb").read()}
 ]
 dataset.upload_documents(documents)
 documents = dataset.list_documents(keywords="test")
@@ -546,9 +545,9 @@ The IDs of the documents for which parsing should be stopped.
 rag_object = RAGFlow(api_key="<YOUR_API_KEY>", base_url="http://<YOUR_BASE_URL>:9380")
 dataset = rag_object.create_dataset(name="dataset_name")
 documents = [
-    {'name': 'test1.txt', 'blob': open('./test_data/test1.txt',"rb").read()},
-    {'name': 'test2.txt', 'blob': open('./test_data/test2.txt',"rb").read()},
-    {'name': 'test3.txt', 'blob': open('./test_data/test3.txt',"rb").read()}
+    {'display_name': 'test1.txt', 'blob': open('./test_data/test1.txt',"rb").read()},
+    {'display_name': 'test2.txt', 'blob': open('./test_data/test2.txt',"rb").read()},
+    {'display_name': 'test3.txt', 'blob': open('./test_data/test3.txt',"rb").read()}
 ]
 dataset.upload_documents(documents)
 documents = dataset.list_documents(keywords="test")
@@ -566,7 +565,7 @@ print("Async bulk parsing cancelled.")
 ## Add chunk
 
 ```python
-Document.add_chunk(content:str) -> Chunk ?????????????????????
+Document.add_chunk(content:str, important_keywords:list[str] = []) -> Chunk
 ```
 
 Adds a chunk to the current document.
@@ -577,7 +576,7 @@ Adds a chunk to the current document.
 
 The text content of the chunk.
 
-#### important_keywords: `list[str]`  ??????????????????????
+#### important_keywords: `list[str]`
 
 The key terms or phrases to tag with the chunk.
 
@@ -588,7 +587,7 @@ The key terms or phrases to tag with the chunk.
 
 A `Chunk` object contains the following attributes:
 
-- `id`: `str` 
+- `id`: `str`
 - `content`: `str` Content of the chunk.
 - `important_keywords`: `list[str]` A list of key terms or phrases to tag with the chunk.
 - `create_time`: `str` The time when the chunk was created (added to the document).
@@ -596,9 +595,9 @@ A `Chunk` object contains the following attributes:
 - `knowledgebase_id`: `str` The ID of the associated dataset.
 - `document_name`: `str` The name of the associated document.
 - `document_id`: `str` The ID of the associated document.
-- `available`: `int`???? The chunk's availability status in the dataset. Value options:
-  - `0`: Unavailable
-  - `1`: Available
+- `available`: `bool` The chunk's availability status in the dataset. Value options:
+  - `False`: Unavailable
+  - `True`: Available
 
 
 ### Examples
@@ -619,26 +618,26 @@ chunk = doc.add_chunk(content="xxxxxxx")
 ## List chunks
 
 ```python
-Document.list_chunks(keywords: str = None, offset: int = 0, limit: int = -1, id : str = None) -> list[Chunk]
+Document.list_chunks(keywords: str = None, offset: int = 1, limit: int = 1024, id : str = None) -> list[Chunk]
 ```
 
-Retrieves a list of chunks from the current document.
+Lists chunks in the current document.
 
 ### Parameters
 
-#### keywords: `str`  
+#### keywords: `str`
   
 The keywords used to match chunk content. Defaults to `None`
 
 #### offset: `int`
 
-The starting index for the chunks to retrieve. Defaults to `1`??????
+The starting index for the chunks to retrieve. Defaults to `1`.
 
-#### limit  
+#### limit: `int`
 
-The maximum number of chunks to retrieve.  Default: `30`?????????
+The maximum number of chunks to retrieve.  Default: `1024`
 
-#### id
+#### id: `str`
 
 The ID of the chunk to retrieve. Default: `None`
 
@@ -713,9 +712,9 @@ A dictionary representing the attributes to update, with the following keys:
 
 - `"content"`: `str` Content of the chunk.
 - `"important_keywords"`: `list[str]` A list of key terms or phrases to tag with the chunk.
-- `"available"`: `int` The chunk's availability status in the dataset. Value options:
-  - `0`: Unavailable
-  - `1`: Available
+- `"available"`: `bool` The chunk's availability status in the dataset. Value options:
+  - `False`: Unavailable
+  - `True`: Available
 
 ### Returns
 
@@ -741,10 +740,10 @@ chunk.update({"content":"sdfx..."})
 ## Retrieve chunks
 
 ```python
-RAGFlow.retrieve(question:str="", datasets:list[str]=None, document=list[str]=None, offset:int=1, limit:int=30, similarity_threshold:float=0.2, vector_similarity_weight:float=0.3, top_k:int=1024,rerank_id:str=None,keyword:bool=False,higlight:bool=False) -> list[Chunk]
+RAGFlow.retrieve(question:str="", datasets:list[str]=None, document=list[str]=None, offset:int=1, limit:int=1024, similarity_threshold:float=0.2, vector_similarity_weight:float=0.3, top_k:int=1024,rerank_id:str=None,keyword:bool=False,higlight:bool=False) -> list[Chunk]
 ```
 
-???????
+Retrieves chunks from specified datasets.
 
 ### Parameters
 
@@ -752,21 +751,21 @@ RAGFlow.retrieve(question:str="", datasets:list[str]=None, document=list[str]=No
 
 The user query or query keywords. Defaults to `""`.
 
-#### datasets: `list[str]`, *Required*?????
+#### datasets: `list[str]`, *Required*
 
 The datasets to search from.
 
 #### document: `list[str]`
 
-The documents to search from. `None` means no limitation. Defaults to `None`.
+The documents to search from. Defaults to `None`.
 
 #### offset: `int`
 
-The starting index for the documents to retrieve. Defaults to `0`??????.
+The starting index for the documents to retrieve. Defaults to `1`.
 
 #### limit: `int`
 
-The maximum number of chunks to retrieve. Defaults to `6`.???????????????
+The maximum number of chunks to retrieve. Defaults to `1024`.
 
 #### Similarity_threshold: `float`
 
@@ -786,14 +785,17 @@ The ID of the rerank model. Defaults to `None`.
 
 #### keyword: `bool`
 
-Indicates whether keyword-based matching is enabled:
+Indicates whether to enable keyword-based matching:
 
-- `True`: Enabled.
-- `False`: Disabled (default).
+- `True`: Enable keyword-based matching.
+- `False`: Disable keyword-based matching (default).
 
 #### highlight: `bool`
 
-Specifying whether to enable highlighting of matched terms in the results (True) or not (False).
+Specifying whether to enable highlighting of matched terms in the results:
+
+- `True`: Enable highlighting of matched terms.
+- `False`: Disable highlighting of matched terms (default).
 
 ### Returns
 
@@ -849,15 +851,15 @@ Creates a chat assistant.
 
 The following shows the attributes of a `Chat` object:
 
-#### name: `str`, *Required*????????
+#### name: `str`, *Required*
 
-The name of the chat assistant. Defaults to `"assistant"`.
+The name of the chat assistant..
 
 #### avatar: `str`
 
 Base64 encoding of the avatar. Defaults to `""`.
 
-#### knowledgebases: `list[str]` 
+#### knowledgebases: `list[str]`
 
 The IDs of the associated datasets. Defaults to `[""]`.
 
@@ -1016,7 +1018,7 @@ RAGFlow.list_chats(
 ) -> list[Chat]
 ```
 
-Retrieves a list of chat assistants.
+Lists chat assistants.
 
 ### Parameters
 

From 246a09e57ef7aa07ea4d55f4fc0dac16c9c5de03 Mon Sep 17 00:00:00 2001
From: writinwaters <cai.keith@gmail.com>
Date: Tue, 22 Oct 2024 19:08:26 +0800
Subject: [PATCH 2/2] minor

---
 api/http_api_reference.md   | 154 ++++++++++++++++++------------------
 api/python_api_reference.md |  16 ++--
 2 files changed, 84 insertions(+), 86 deletions(-)

diff --git a/api/http_api_reference.md b/api/http_api_reference.md
index bba39fa811..7be7c2f45d 100644
--- a/api/http_api_reference.md
+++ b/api/http_api_reference.md
@@ -415,7 +415,8 @@ curl --request POST \
      --url http://{address}/api/v1/dataset/{dataset_id}/document \
      --header 'Content-Type: multipart/form-data' \
      --header 'Authorization: Bearer {YOUR_API_KEY}' \     
-     --form 'file=@./test.txt'
+     --form 'file=@./test1.txt' \
+     --form 'file=@./test2.pdf'
 ```
 
 #### Request parameters
@@ -423,7 +424,7 @@ curl --request POST \
 - `"dataset_id"`: (*Path parameter*)  
   The ID of the dataset to which the documents will be uploaded.
 - `"file"`: (*Body parameter*)  
-  The document???? to upload.
+  The document to upload.
 
 ### Response
 
@@ -474,7 +475,7 @@ curl --request PUT \
   --data '{
   "name": "manual.txt", 
   "chunk_method": "manual", 
-  "parser_config": {"chunk_token_count": 128, "delimiter": "\n!?。；！？", "layout_recognize": true, "task_page_size": 12}
+  "parser_config": {"chunk_token_count": 128}
   }'
 
 ```
@@ -483,7 +484,7 @@ curl --request PUT \
 
 - `"name"`: (*Body parameter*), `string`
 - `"chunk_method"`: (*Body parameter*), `string`  
-  The parsing method to apply to the document.  
+  The parsing method to apply to the document:  
   - `"naive"`: General
   - `"manual`: Manual
   - `"qa"`: Q&A
@@ -496,16 +497,12 @@ curl --request PUT \
   - `"one"`: One
   - `"knowledge_graph"`: Knowledge Graph
   - `"email"`: Email
-- 
-
-### Returns  
-
-- `"parser_config"`: (*Body parameter*)  
-  Configuration object for the parser.  
-  - If the value is `None`, a dictionary with default values will be generated.
-
-- `"name"`: (*Body parameter*)  
-  Name or title of the document.  
+- `"parser_config"`: (*Body parameter*), `dict[string, Any]`
+  The parsing configuration for the document:  
+  - `"chunk_token_count"`: Defaults to `128`.
+  - `"layout_recognize"`: Defaults to `True`.
+  - `"delimiter"`: Defaults to `"\n!?。；！？"`.
+  - `"task_page_size"`: Defaults to `12`.
 
 ### Response
 
@@ -522,7 +519,7 @@ An error response includes a JSON object like the following:
 ```json
 {
     "code": 102,
-    "message": "The dataset not own the document."
+    "message": "The dataset does not have the document."
 }
 ```
 
@@ -541,7 +538,7 @@ Downloads a document from a specified dataset.
 - Headers:
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Output:
-  - '{FILE_NAME}'
+  - `'{FILE_NAME}'`
 
 #### Request example
 
@@ -554,10 +551,10 @@ curl --request GET \
 
 #### Request parameters
 
-- `"dataset_id"`: (*PATH parameter*)
+- `"dataset_id"`: (*Path parameter*)
   The dataset ID.
-- `"documents_id"`: (*PATH parameter*)  
-  The document ID of the file.
+- `"documents_id"`: (*Path parameter*)  
+  The ID of the document to download.
 
 ### Response
 
@@ -602,20 +599,22 @@ curl --request GET \
 
 #### Request parameters
 
-- `"dataset_id"`: (*PATH parameter*)  
-  The dataset id
-- `offset`: (*Filter parameter*)  
-  The beginning number of records for paging.
-- `keywords`: (*Filter parameter*)  
-  The keywords matches the search key workds;
-- `limit`: (*Filter parameter*)  
-  Records number to return.
-- `orderby`: (*Filter parameter*)  
-  The field by which the records should be sorted. This specifies the attribute or column used to order the results.
-- `desc`: (*Filter parameter*)  
-  A boolean flag indicating whether the sorting should be in descending order.
-- `id`: (*Filter parameter*)  
-  The ID of the document to retrieve.
+- `"dataset_id"`: (*Path parameter*)  
+  The dataset ID.
+- `"keywords"`: (*Filter parameter*), `string`  
+  The keywords used to match document titles. Defaults to `None`.
+- `"offset"`: (*Filter parameter*), `integer`  
+  The starting index for the documents to retrieve. Typically used in conjunction with `limit`. Defaults to `1`.
+- `"limit"`: (*Filter parameter*), `integer`  
+  The maximum number of documents to retrieve. Defaults to `1024`.
+- `"orderby"`: (*Filter parameter*), `string`  
+  The field by which documents should be sorted. Available options:
+  - `"create_time"` (default)
+  - `"update_time"`
+- `"desc"`: (*Filter parameter*), `bool`  
+  Indicates whether the retrieved documents should be sorted in descending order. Defaults to `True`.
+- `"document_id"`: (*Filter parameter*)  
+  The ID of the document to retrieve. Defaults to `None`.
 
 ### Response
 
@@ -675,7 +674,7 @@ An error response includes a JSON object like the following:
 
 ## Delete documents
 
-**DELETE** `/api/v1/dataset/{dataset_id}/document `
+**DELETE** `/api/v1/dataset/{dataset_id}/document`
 
 Deletes documents by ID.
 
@@ -687,7 +686,7 @@ Deletes documents by ID.
   - `'Content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Body:
-  - `ids`: `list[string]`
+  - `"ids"`: `list[string]`
 
 #### Request example
 
@@ -703,7 +702,7 @@ curl --request DELETE \
 
 #### Request parameters
 
-- `"ids"`: (*Body parameter*)
+- `"ids"`: (*Body parameter*), `list[string]`
   The IDs of the documents to delete.
 
 ### Response
@@ -736,12 +735,12 @@ Parses documents in a specified dataset.
 ### Request
 
 - Method: POST
-- URL: `/api/v1/dataset/{dataset_id}/chunk `
+- URL: `/api/v1/dataset/{dataset_id}/chunk`
 - Headers:
   - `'content-Type: application/json'`
   - 'Authorization: Bearer {YOUR_API_KEY}'
 - Body:
-  - `document_ids`: `list[string]`
+  - `"document_ids"`: `list[string]`
 
 #### Request example
 
@@ -756,7 +755,7 @@ curl --request POST \
 #### Request parameters
 
 - `"dataset_id"`: (*Path parameter*)
-- `"document_ids"`:(*Body parameter*)  
+- `"document_ids"`: (*Body parameter*)  
   The ids of the documents to parse.
 
 ### Response
@@ -794,7 +793,7 @@ Stops parsing specified documents.
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Body:
-  - `document_ids`: `list[string]`
+  - `"document_ids"`: `list[string]`
 
 #### Request example
 
@@ -809,7 +808,7 @@ curl --request DELETE \
 #### Request parameters
 
 - `"dataset_id"`: (*Path parameter*)
-- `"document_ids"`:(*Body parameter*)  
+- `"document_ids"`: (*Body parameter*)  
   The IDs of the documents to parse.
 
 ### Response
@@ -848,8 +847,8 @@ Adds a chunk to a specified document in a specified dataset.
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Body:
-  - `content`: string
-  - `important_keywords`: `list[string]`
+  - `"content"`: string
+  - `"important_keywords"`: `list[string]`
 
 #### Request example
 
@@ -865,9 +864,9 @@ curl --request POST \
 
 #### Request parameters
 
-- `content`:(*Body parameter*)  
+- `"content"`: (*Body parameter*)  
   Contains the main text or information of the chunk.
-- `important_keywords`(*Body parameter*)  
+- `"important_keywords`(*Body parameter*)  
   List the key terms or phrases that are significant or central to the chunk's content.
 
 ### Response
@@ -1009,7 +1008,7 @@ Deletes chunks by ID.
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Body:
-  - `chunk_ids`: `list[string]`
+  - `"chunk_ids"`: `list[string]`
 
 #### Request example
 
@@ -1025,7 +1024,7 @@ curl --request DELETE \
 
 #### Request parameters
 
-- `"chunk_ids"`:(*Body parameter*)  
+- `"chunk_ids"`: (*Body parameter*)  
   The chunks of the document to delete.
 
 ### Response
@@ -1063,9 +1062,9 @@ Updates content or configurations for a specified chunk.
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Body:
-  - `content`: `string`
-  - `important_keywords`: `string`
-  - `available`: `integer`
+  - `"content"`: `string`
+  - `"important_keywords"`: `string`
+  - `"available"`: `integer`
 
 #### Request example
 
@@ -1082,11 +1081,11 @@ curl --request PUT \
 
 #### Request parameters
 
-- `"content"`:(*Body parameter*)  
+- `"content"`: (*Body parameter*)  
   Contains the main text or information of the chunk.
-- `"important_keywords"`:(*Body parameter*)  
+- `"important_keywords"`: (*Body parameter*)  
   Lists the key terms or phrases that are significant or central to the chunk's content.
-- `"available"`:(*Body parameter*)  
+- `"available"`: (*Body parameter*)  
    Indicating the availability status, 0 means unavailable and 1 means available.
 
 ### Response
@@ -1124,17 +1123,17 @@ Retrieves chunks from specified datasets.
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Body:
-  - `question`: `string`  
-  - `datasets`: `list[string]`  
-  - `documents`: `list[string]`
-  - `offset`: int  
-  - `limit`: int  
-  - `similarity_threshold`: float  
-  - `vector_similarity_weight`: float  
-  - `top_k`: int  
-  - `rerank_id`: string  
-  - `keyword`: bool  
-  - `highlight`: bool
+  - `"question"`: `string`  
+  - `"datasets"`: `list[string]`  
+  - `"documents"`: `list[string]`
+  - `"offset"`: int  
+  - `"limit"`: int  
+  - `"similarity_threshold"`: float  
+  - `"vector_similarity_weight"`: float  
+  - `"top_k"`: int  
+  - `"rerank_id"`: string  
+  - `"keyword"`: bool  
+  - `"highlight"`: bool
 
 #### Request example
 
@@ -1271,12 +1270,11 @@ Creates a chat assistant.
 - Body:
   - `"name"`: `string`
   - `"avatar"`: `string`
-  - `"knowledgebases"`: `List[DataSet]`
+  - `"knowledgebases"`: `list[DataSet]`
   - `"id"`: `string`
   - `"llm"`: `LLM`
   - `"prompt"`: `Prompt`
 
-
 #### Request example
 
 ```shell
@@ -1555,7 +1553,7 @@ Deletes chat assistants by ID.
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Body:
-  - `ids`: list[string]
+  - `"ids"`: `list[string]`
 
 #### Request example
 
@@ -1749,7 +1747,7 @@ Create a chat session.
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Body:
-  - name: `string`
+  - `"name"`: `string`
 
 #### Request example
 
@@ -1844,7 +1842,7 @@ Update a chat session
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Body:
-  - `name`: string
+  - `"name`: string
 
 #### Request example
 ```bash
@@ -1860,7 +1858,7 @@ curl --request PUT \
 
 #### Request Parameter
 
-- `name`: (*Body Parameter)  
+- `"name`: (*Body Parameter)  
   The name of the created session.  
   - `None`
 
@@ -1984,7 +1982,7 @@ Deletes sessions by ID.
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Body:
-  - `ids`: list[string]
+  - `"ids"`: `list[string]`
 
 #### Request example
 
@@ -2001,7 +1999,7 @@ curl --request DELETE \
 
 #### Request Parameters
 
-- `ids`: (*Body Parameter*)  
+- `"ids"`: (*Body Parameter*)  
   IDs of the sessions to delete.
   - `None`
 
@@ -2040,9 +2038,9 @@ Asks a question to start a conversation.
   - `'content-Type: application/json'`
   - `'Authorization: Bearer {YOUR_API_KEY}'`
 - Body:
-  - `question`: `string`
-  - `stream`: `bool`
-  - `session_id`: `string`
+  - `"question"`: `string`
+  - `"stream"`: `bool`
+  - `"session_id"`: `string`
 
 #### Request example
 
@@ -2059,14 +2057,14 @@ curl --request POST \
 
 #### Request Parameters
 
-- `question`:(*Body Parameter*)  
+- `"question"`: (*Body Parameter*)  
   The question you want to ask.  
   - question is required.
   `None`
-- `stream`: (*Body Parameter*)  
+- `"stream"`: (*Body Parameter*)  
   The approach of streaming text generation.  
   `False`
-- `session_id`: (*Body Parameter*)  
+- `"session_id"`: (*Body Parameter*)  
   The ID of session. If not provided, a new session will be generated.
 
 ### Response
diff --git a/api/python_api_reference.md b/api/python_api_reference.md
index 36af91890b..898025a804 100644
--- a/api/python_api_reference.md
+++ b/api/python_api_reference.md
@@ -366,7 +366,7 @@ print(doc)
 ## List documents
 
 ```python
-Dataset.list_documents(id:str =None, keywords: str=None, offset: int=0, limit:int = 1024,order_by:str = "create_time", desc: bool = True) -> list[Document]
+Dataset.list_documents(id:str =None, keywords: str=None, offset: int=1, limit:int = 1024,order_by:str = "create_time", desc: bool = True) -> list[Document]
 ```
 
 Lists documents in the current dataset.
@@ -383,7 +383,7 @@ The keywords used to match document titles. Defaults to `None`.
 
 #### offset: `int`
 
-The starting index for the documents to retrieve. Typically used in confunction with `limit`. Defaults to `0`.
+The starting index for the documents to retrieve. Typically used in conjunction with `limit`. Defaults to `0`.
 
 #### limit: `int`
 
@@ -424,11 +424,11 @@ A `Document` object contains the following attributes:
 - `process_begin_at`: `datetime` The start time of document processing. Defaults to `None`.
 - `process_duation`: `float` Duration of the processing in seconds. Defaults to `0.0`.
 - `run`: `str` The document's processing status:
-  - `"0"`: UNSTART (default)  ?????????
-  - `"1"`: RUNNING
-  - `"2"`: CANCEL
-  - `"3"`: DONE
-  - `"4"`: FAIL
+  - `"UNSTART"`  (default) 
+  - `"RUNNING"` 
+  - `"CANCEL"` 
+  - `"DONE"` 
+  - `"FAIL"` 
 - `status`: `str` Reserved for future use.
 
 ### Examples
@@ -626,7 +626,7 @@ Lists chunks in the current document.
 ### Parameters
 
 #### keywords: `str`
-  
+
 The keywords used to match chunk content. Defaults to `None`
 
 #### offset: `int`