Skip to content

Commit

Permalink
change eval dataset, include more robust judging, improved main
Browse files Browse the repository at this point in the history
  • Loading branch information
heyjustinai committed Jan 15, 2025
1 parent 43a2cbc commit b85811d
Show file tree
Hide file tree
Showing 5 changed files with 291 additions and 98 deletions.
5 changes: 2 additions & 3 deletions recipes/use_cases/prompt-migration/examples/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@
model_type="openai"
)

# Example evaluation dataset
eval_dataset = [
{"text": "Example text 1", "expected_summary": "Summary 1"},
{"text": "Example text 2", "expected_summary": "Summary 2"},
{"text": "Example text 1", "expected_answer": "Summary 1"},
{"text": "Example text 2", "expected_answer": "Summary 2"},
]

# Migrate prompt
Expand Down
62 changes: 52 additions & 10 deletions recipes/use_cases/prompt-migration/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,48 @@ def main():
api_key=os.getenv("OPENAI_API_KEY")
)

# target_lm = dspy.LM(
# model="together_ai/togethercomputer/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
# api_key=os.getenv("TOGETHER_API_KEY")
# )
target_lm = dspy.LM(
model="together_ai/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
api_key=os.getenv("TOGETHER_API_KEY")
)
# To run it with ollama
# target_lm = dspy.LM('ollama_chat/llama3.2:3b-instruct-fp16', api_base='http://localhost:11434', api_key='')
target_lm = dspy.HFModel(model="gpt2")

# To run it with huggingface
# target_lm = dspy.HFModel(model="gpt2")

engine = PromptMigrationEngine(openai_lm, target_lm)

source_prompt = PromptTemplate(
template="Write a Python function that takes as input a file path to an image, loads the image into memory as a numpy array, then crops the rows and columns around the perimeter if they are darker than a threshold value. Use the mean value of rows and columns to decide if they should be marked for deletion.",
template="""You are an advanced Large Language Model tasked with generating Python code snippets in response to user prompts. Your primary objective is to provide accurate, concise, and well-structured Python functions. Follow these guidelines:
Understand the Context: Analyze the input prompt and identify its category (e.g., API Usage, File Handling, Error Handling).
Generate Code:
Write Python code that directly addresses the user's request.
Ensure the code is syntactically correct, functional, and adheres to Python best practices.
Include necessary imports and handle potential edge cases.
Error Handling:
Include appropriate error handling where applicable (e.g., try-except blocks).
If exceptions occur, provide meaningful error messages.
Readability:
Use clear variable names and include comments where necessary for clarity.
Prioritize readability and maintainability in all generated code.
Complexity Alignment:
Tailor the code's complexity based on the indicated difficulty (e.g., simple, medium, complex).
Ensure that the solution is neither overly simplistic nor unnecessarily complicated.
Prompt Type:
Focus on the code_generation type for creating Python functions.
Avoid deviating from the task unless additional clarification is requested.
Testing and Validity:
Assume the function might be run immediately. Provide code that is ready for use or minimal adaptation.
Highlight any dependencies or external libraries required.
""",
input_variables=["text"],
model_type="openai"
)
Expand All @@ -33,20 +64,31 @@ def main():


# To evaluate on a specific subset, use the following:
#summarization_dataset = get_eval_subset(prompt_type="summarization")
code_generation_dataset = get_eval_subset(prompt_type="code_generation")
#simple_tasks = get_eval_subset(complexity="simple")
evaluator = PromptEvaluator(openai_lm, target_lm)

metrics = evaluator.evaluate(
source_prompt.template, # Same prompt for both
source_prompt.template, # Same prompt for both
code_generation_dataset
)

print(f"Evaluation metrics:")
print(f" Accuracy: {metrics.accuracy:.2f}")
print(f" Similarity: {metrics.similarity:.2f}")
print(f" Consistency: {metrics.consistency:.2f}")

# Migrate prompt
print("Migrating prompt...")
migrated_prompt = engine.migrate_prompt(source_prompt, eval_dataset)
migrated_prompt = engine.migrate_prompt(source_prompt, code_generation_dataset)

# Evaluate migration
print("Evaluating migration...")
evaluator = PromptEvaluator(openai_lm, target_lm)
metrics = evaluator.evaluate(
source_prompt.template,
migrated_prompt.template,
eval_dataset
code_generation_dataset
)

print(f"\nResults:")
Expand Down
66 changes: 52 additions & 14 deletions recipes/use_cases/prompt-migration/prompt_migration/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,53 @@ class PromptTemplate:
model_type: str # 'openai' or 'llama'

class PromptMigrationEngine:
def __init__(self, source_lm: dspy.OpenAI, target_lm: dspy.LM):
def __init__(self, source_lm: dspy.LM, target_lm: dspy.LM):
self.source_lm = source_lm
self.target_lm = target_lm
dspy.configure(lm=source_lm)

def _optimize_transformation(self, transformer, eval_dataset):
"""Optimize the transformation using the evaluation dataset."""
class AccuracyMetric:
class PromptQualityMetric:
def __init__(self, source_lm, target_lm):
self.source_lm = source_lm
self.target_lm = target_lm

def __call__(self, example, prediction, trace=None):
return float(prediction.target == example.expected_output)
if not hasattr(prediction, 'target'):
return 0.0

try:
# Get outputs from both models using the prompts
source_output = self.source_lm(example.source)
target_output = self.target_lm(prediction.target)

# Compare outputs (basic similarity)
from difflib import SequenceMatcher
similarity = SequenceMatcher(None,
str(source_output),
str(target_output)).ratio()
return similarity
except Exception as e:
print(f"Error in metric: {e}")
return 0.0

optimizer = dspy.BootstrapFewShotWithRandomSearch(
metric=AccuracyMetric(),
max_bootstrapped_demos=4,
max_labeled_demos=4,
num_threads=4
metric=PromptQualityMetric(self.source_lm, self.target_lm),
max_bootstrapped_demos=2,
max_labeled_demos=2,
num_threads=1
)

train_data = [
dspy.Example(
# Prepare training data
train_data = []
for item in eval_dataset:
# Create example with both prompt and expected output
example = dspy.Example(
source=item["text"],
expected_output=item["expected_summary"]
).with_inputs("source") for item in eval_dataset
]
expected_output=item["expected_answer"]
).with_inputs("source")
train_data.append(example)

return optimizer.compile(transformer, trainset=train_data)

Expand All @@ -44,15 +67,26 @@ def migrate_prompt(self,
class PromptTransformation(dspy.Signature):
"""Convert a prompt from one format to another."""
source = dspy.InputField(desc="Source prompt template")
target = dspy.OutputField(desc="Transformed prompt template")
target = dspy.OutputField(desc="Transformed prompt template that maintains functionality while adapting to target model format")

class Transformer(dspy.Module):
def __init__(self):
super().__init__()
self.chain = dspy.ChainOfThought(PromptTransformation)

def forward(self, source):
return self.chain(source=source)
# Add context about the transformation task
prompt = f"""
Transform this prompt while:
1. Maintaining core functionality
2. Adapting to target model format
3. Preserving input variables
4. Keeping essential instructions
Source prompt:
{source}
"""
return self.chain(source=prompt)

transformer = Transformer()

Expand All @@ -61,6 +95,10 @@ def forward(self, source):

result = transformer(source=source_prompt.template)

# Format for target model
if source_prompt.model_type == "openai" and "llama" in str(self.target_lm):
result.target = f"### Instruction:\n{result.target}\n\n### Response:"

return PromptTemplate(
template=result.target,
input_variables=source_prompt.input_variables,
Expand Down
Loading

0 comments on commit b85811d

Please sign in to comment.