Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved ayah mode support #103

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions src/ansari/agents/ansari_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class AnsariWorkflow:

"""

def __init__(self, settings, message_logger=None, json_format=False):
def __init__(self, settings, message_logger=None, json_format=False, system_prompt_file=None):
self.settings = settings
sq = SearchQuran(settings.KALEMAT_API_KEY.get_secret_value())
sh = SearchHadith(settings.KALEMAT_API_KEY.get_secret_value())
Expand Down Expand Up @@ -81,7 +81,8 @@ def __init__(self, settings, message_logger=None, json_format=False):
}
self.model = settings.MODEL
self.pm = PromptMgr()
self.sys_msg = self.pm.bind(settings.SYSTEM_PROMPT_FILE_NAME).render()
prompt_file = system_prompt_file or settings.SYSTEM_PROMPT_FILE_NAME
self.sys_msg = self.pm.bind(prompt_file).render()
self.tools = [x.get_tool_description() for x in self.tool_name_to_instance.values()]
self.json_format = json_format
self.message_logger = message_logger
Expand Down Expand Up @@ -149,16 +150,22 @@ def _execute_gen_answer_step(self, step_params, prev_outputs):
search_results = "\n---\n".join(
[prev_outputs[i] for i in step_params["search_results_indices"]],
)
prompt = f"""Using {search_results}, compose a response that:
prompt = f""" Consider the following question: '{step_params["input"]}'

Using the excerpts from tafsirs below, compose a response that:
1. Directly answers the query of the user
2. Matches user's language/tone
3. Adheres to your system instructions as Ansari."""
3. Adheres to your system instructions as Ansari.

{search_results}

Reminder: the key question is: '{step_params["input"]}'.
"""
model_response = litellm.completion(
model=self.model,
messages=[
{"role": "system", "content": self.sys_msg},
{"role": "system", "content": prompt},
{"role": "user", "content": step_params["input"]},
{"role": "user", "content": prompt},
],
stream=False,
timeout=30.0,
Expand Down
19 changes: 13 additions & 6 deletions src/ansari/app/main_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,7 @@ class AyahQuestionRequest(BaseModel):
ayah: int
question: str
augment_question: bool | None = False
use_cache: bool | None = True
apikey: str


Expand All @@ -722,16 +723,20 @@ async def answer_ayah_question(
raise HTTPException(status_code=401, detail="Unauthorized")

try:
# Create AnsariWorkflow instance
logging.debug("Creating AnsariWorkflow instance for {req.surah}:{req.ayah}")
ansari_workflow = AnsariWorkflow(settings)
# Create AnsariWorkflow instance with ayah-specific system prompt
logging.debug(f"Creating Ansari Workflow instance for {req.surah}:{req.ayah}")
ansari_workflow = AnsariWorkflow(
settings,
system_prompt_file=settings.AYAH_SYSTEM_PROMPT_FILE_NAME
)

ayah_id = req.surah * 1000 + req.ayah

# Check if the answer is already stored in the database
stored_answer = db.get_quran_answer(req.surah, req.ayah, req.question)
if stored_answer:
return {"response": stored_answer}
if req.use_cache:
stored_answer = db.get_quran_answer(req.surah, req.ayah, req.question)
if stored_answer:
return {"response": stored_answer}

# Define the workflow steps
workflow_steps = [
Expand All @@ -746,6 +751,8 @@ async def answer_ayah_question(
("gen_query", {"input": req.question, "target_corpus": "tafsir"}),
("gen_answer", {"input": req.question, "search_results_indices": [0]}),
]
# If augment_question is False, skip the query generation step to use
# the original question directly
if not req.augment_question:
workflow_steps.pop(1)

Expand Down
66 changes: 64 additions & 2 deletions src/ansari/app/main_file.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,71 @@
import typer
import logging
from typing import Optional

from ansari.agents import Ansari
from ansari.config import get_settings
from ansari.presenters.file_presenter import FilePresenter
from ansari.presenters.ayah_file_presenter import AyahFilePresenter

logging.basicConfig(
level=logging.DEBUG,
)

def main(
input_file: str,
output_file: str,
ayah_mode: bool = typer.Option(
False,
"--ayah-mode",
"-a",
help="Process input as ayah questions (CSV format: surah:ayah,question)",
),
use_query_generation: bool = typer.Option(
True,
"--use-query-generation",
"-q",
help="Use query generation step in ayah mode",
),
answer_column: str = typer.Option(
"answer",
"--answer-column",
"-c",
help="Name of the column to store answers in the output CSV (ayah mode only)",
),
system_message: Optional[str] = typer.Option(
None,
"--system-message",
"-s",
help="Path to system message file. If not provided, uses default.",
),
):
"""
Process input file and generate answers

Args:
input_file: Path to input file
output_file: Path to output file
ayah_mode: Whether to process in ayah mode
use_query_generation: Whether to use query generation
answer_column: Name of column to store answers
system_message: The name of the system message file. If not provided, uses default.
"""
settings = get_settings()

if system_message:
settings.AYAH_SYSTEM_PROMPT_FILE_NAME = system_message

if ayah_mode:
presenter = AyahFilePresenter(
settings=settings,
use_query_generation=use_query_generation,
answer_column=answer_column
)
else:
ansari = Ansari(settings)
presenter = FilePresenter(ansari)

presenter.present(input_file, output_file)

if __name__ == "__main__":
ansari = Ansari(get_settings())
typer.run(FilePresenter(ansari).present)
typer.run(main)
1 change: 1 addition & 0 deletions src/ansari/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def get_resource_path(filename):
MAX_TOOL_TRIES: int = Field(default=3)
MAX_FAILURES: int = Field(default=1)
SYSTEM_PROMPT_FILE_NAME: str = Field(default="system_msg_tool")
AYAH_SYSTEM_PROMPT_FILE_NAME: str = Field(default="system_msg_ayah")
PROMPT_PATH: str = Field(default=str(get_resource_path("prompts")))

LOGGING_LEVEL: str = Field(default="INFO")
Expand Down
136 changes: 136 additions & 0 deletions src/ansari/presenters/ayah_file_presenter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import csv
import logging
import os
from typing import List, Dict, Tuple

Check failure on line 4 in src/ansari/presenters/ayah_file_presenter.py

View workflow job for this annotation

GitHub Actions / ansari-container-job

Ruff (F401)

src/ansari/presenters/ayah_file_presenter.py:4:20: F401 `typing.List` imported but unused

Check failure on line 4 in src/ansari/presenters/ayah_file_presenter.py

View workflow job for this annotation

GitHub Actions / ansari-container-job

Ruff (F401)

src/ansari/presenters/ayah_file_presenter.py:4:26: F401 `typing.Dict` imported but unused


from ansari.agents.ansari_workflow import AnsariWorkflow
from ansari.config import get_settings

Check failure on line 8 in src/ansari/presenters/ayah_file_presenter.py

View workflow job for this annotation

GitHub Actions / ansari-container-job

Ruff (F401)

src/ansari/presenters/ayah_file_presenter.py:8:27: F401 `ansari.config.get_settings` imported but unused


class AyahFilePresenter:
def __init__(self, settings, use_query_generation: bool = False, answer_column: str = "answer"):
self.settings = settings
self.use_query_generation = use_query_generation
self.answer_column = answer_column

def _parse_ayah_reference(self, ayah_ref: str) -> Tuple[int, int]:
"""Parse a surah:ayah reference into separate numbers.

Args:
ayah_ref: String in format "surah:ayah"

Returns:
Tuple of (surah_num, ayah_num)

Raises:
ValueError: If format is invalid or empty
"""
if not ayah_ref or not ayah_ref.strip():
raise ValueError("Empty ayah reference")

try:
surah_str, ayah_str = ayah_ref.strip().split(":")
return int(surah_str), int(ayah_str)
except ValueError:
raise ValueError(f"Invalid ayah reference format: {ayah_ref}. Expected format: surah:ayah (e.g. 1:1)")

def present(self, input_file_path: str, output_file_path: str):
try:
# First pass: read header to get all field names
with open(input_file_path, newline='') as input_file:
# Skip empty lines and get header
for line in input_file:
if line.strip(): # First non-empty line is header
reader = csv.reader([line])
header = next(reader)
if len(header) < 2:
logging.error("Input CSV must contain at least two columns")
return
break
else:
logging.error("Empty input file")
return

# Create fieldnames, preserving original names
fieldnames = header
if self.answer_column not in fieldnames:
fieldnames = fieldnames + [self.answer_column]

# Second pass: process all rows
with open(input_file_path, newline='') as input_file:
reader = csv.reader(input_file)

# Open output file and write
with open(output_file_path, 'w', newline='') as output_file:
writer = csv.writer(output_file)
writer.writerow(fieldnames)

for row in reader:
# Skip empty lines
if not any(row):
continue

try:
# Get values from first and second columns using column positions
ayah_ref = row[0]
question = row[1]

# Validate required fields
if not ayah_ref or not question:
raise ValueError("Missing required fields in first or second column")

surah, ayah = self._parse_ayah_reference(ayah_ref)
question = question.strip()

print(f"Processing surah {surah}, ayah {ayah}, question: {question}")

# Create a new workflow instance for each question
workflow = AnsariWorkflow(
self.settings,
system_prompt_file=self.settings.AYAH_SYSTEM_PROMPT_FILE_NAME
)

ayah_id = surah * 1000 + ayah
workflow_steps = [
(
"search",
{
"query": question,
"tool_name": "search_tafsir",
"metadata_filter": f"part.from_ayah_int<={ayah_id} AND part.to_ayah_int>={ayah_id}",
},
),
]

if self.use_query_generation:
workflow_steps.append(
("gen_query", {"input": question, "target_corpus": "tafsir"})
)

workflow_steps.append(
("gen_answer", {"input": question, "search_results_indices": [0]})
)

# Execute the workflow
workflow_output = workflow.execute_workflow(workflow_steps)
# The answer is the last item in the workflow output
answer = workflow_output[-1]

# Add answer to row and write
row.append(answer)
writer.writerow(row)
output_file.flush()

except Exception as e:
logging.error(f"Error processing row: {e}")
row.append(f"ERROR: {str(e)}")
writer.writerow(row)
output_file.flush()
continue

print(f"Results saved to {os.path.abspath(output_file_path)}")

except Exception as e:
logging.error(f"Error processing file: {e}")
return
30 changes: 30 additions & 0 deletions src/ansari/resources/prompts/system_msg_ayah.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
You are Ansari Qur'an, a multilingual Islamic digital assistant designed to answer
Quran-related questions with accuracy and depth.

Fluent in languages such as Arabic (including transliteration),
Bahasa, Bosnian, French, Turkish, Urdu, and more, you, Ansari,
craft precise, evidence-based responses exclusively from the Sunni tradition.

Here's how you work: You receive a an ayah and a question along with the
desired response language and search results from any tafsirs available.
Currently that includes Ibn Kathir.

If you attribute a statement or opinion to a scholar, you will include EXACTLY
the sentence in which the mufassir says so.

If you say there is a hadith that says something, you will include the hadith
EXACTLY as it was in the source text.

Quoting from the source material is highly recommended when attributing
statements or opinions to scholars or hadith, especially when the source text is
weak or unverified.

Crucially, only attribute specific statements or opinions to these scholars if you
have specific referenceable evidence to support that attribution. When referencing
the Quran, you, Ansari, include the ayah number, Arabic text, and translation
(if the user's language is different from Arabic).

The person reading your answer is a well informed scholar. You may use terms
that an informed scholar would use. Make sure you target your answe at an
nformed scholar.

30 changes: 30 additions & 0 deletions src/ansari/resources/prompts/system_msg_ayah_lay.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
You are Ansari Qur'an, a multilingual Islamic digital assistant designed to answer
Quran-related questions with accuracy and depth.

Fluent in languages such as Arabic (including transliteration),
Bahasa, Bosnian, French, Turkish, Urdu, and more, you, Ansari,
craft precise, evidence-based responses exclusively from the Sunni tradition.

Here's how you work: You receive a an ayah and a question along with the
desired response language and search results from any tafsirs available.
Currently that includes Ibn Kathir.

If you attribute a statement or opinion to a scholar, you will include EXACTLY
the sentence in which the mufassir says so.

If you say there is a hadith that says something, you will include the hadith
EXACTLY as it was in the source text.

Quoting from the source material is highly recommended when attributing
statements or opinions to scholars or hadith, especially when the source text is
weak or unverified.

Crucially, only attribute specific statements or opinions to these scholars if you
have specific referenceable evidence to support that attribution. When referencing
the Quran, you, Ansari, include the ayah number, Arabic text, and translation
(if the user's language is different from Arabic).

The person reading your answer is a general member of the public who
may or may not be a Muslim. Assume the reader only has a basic knowledge of
Islam.

Loading