Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AFLOW: Version 1.0 #1510

Open
wants to merge 96 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 95 commits
Commits
Show all changes
96 commits
Select commit Hold shift + click to select a range
8f1cf58
Update AGS
didiforgithub Jun 29, 2024
fd432fa
Update
didiforgithub Jun 29, 2024
f1ce133
Update he
didiforgithub Jul 1, 2024
4d37664
update ensemble example
didiforgithub Jul 1, 2024
aeac3fe
Update AGS
didiforgithub Jul 4, 2024
86033a1
Update
didiforgithub Jul 9, 2024
4af2315
Update humaneval
didiforgithub Jul 10, 2024
eb97b54
Update
didiforgithub Jul 11, 2024
7fa68d5
Update operator.py
didiforgithub Jul 11, 2024
8a24105
Update
didiforgithub Jul 14, 2024
e0955c5
Update Sota baseline
didiforgithub Jul 16, 2024
89b0c4c
Update
didiforgithub Jul 17, 2024
ca1c8f8
Update
didiforgithub Jul 22, 2024
772d2ae
Update
didiforgithub Jul 25, 2024
eac4b6c
Update GitNore
didiforgithub Jul 26, 2024
3fc3d21
Update
didiforgithub Jul 29, 2024
bdfa6eb
Update
didiforgithub Aug 1, 2024
47470fb
Update
didiforgithub Aug 1, 2024
008c5f0
Update
didiforgithub Aug 6, 2024
c9989c0
Update llm.py
MoshiQAQ Aug 15, 2024
2937d9a
Update 粗糙版本 优化
didiforgithub Aug 23, 2024
02c7c4e
Update 混乱版本
didiforgithub Aug 23, 2024
a3ff254
Update ConText Fill
didiforgithub Aug 23, 2024
1593e98
Update Multi LLM Config & Basic Evaluator
didiforgithub Aug 25, 2024
6a01a67
test for multi llm
didiforgithub Aug 25, 2024
7c2501e
Update
didiforgithub Aug 26, 2024
d97f90f
Update
didiforgithub Aug 26, 2024
c390341
Update Operator Optimize Method.
didiforgithub Sep 2, 2024
ca560a8
合入Eval与Optimize
didiforgithub Sep 8, 2024
4e0a896
提交baseline例子;修改context-fill 格式识别方式
didiforgithub Sep 9, 2024
7ffe68b
重构了Evaluator
didiforgithub Sep 9, 2024
62ffa73
update humaneval baseline & hotpotqa baseline
didiforgithub Sep 10, 2024
c7c34cd
Update human Eval
didiforgithub Sep 10, 2024
0b0a49d
更新 hotpotqa Baseline
didiforgithub Sep 10, 2024
4ce18d7
Update baselines
MoshiQAQ Sep 10, 2024
68e87da
Update Hotpotqa
MoshiQAQ Sep 10, 2024
257b994
Update drop.py
MoshiQAQ Sep 10, 2024
ab11246
添加了cost计算
didiforgithub Sep 10, 2024
445a2e6
Update QA
MoshiQAQ Sep 10, 2024
7f45ef6
Merge branch 'main' of https://github.com/didiforgithub/MetaGPT-MathAI
didiforgithub Sep 10, 2024
bdf865e
Merge branch 'main' of https://github.com/didiforgithub/MetaGPT-MathAI
didiforgithub Sep 10, 2024
1de0653
Update token_counter.py
didiforgithub Sep 10, 2024
f691c5f
Update QA
MoshiQAQ Sep 10, 2024
b9a2d94
更新了xml-compile方法,更新了剩余Baseline
didiforgithub Sep 11, 2024
b805da0
更新了eval BUG,同时更新了新的baseline
didiforgithub Sep 11, 2024
0704f34
更新了eval索引的入口
didiforgithub Sep 11, 2024
53890a5
更新了HotpotQA BenchMark 代码与对应的Self Consistency 实现
didiforgithub Sep 13, 2024
63f3f88
Update for fengwei
didiforgithub Sep 16, 2024
22e8f9d
Update baseline and benchmark; update evaluator
didiforgithub Sep 22, 2024
e3bcedc
Update Benchmark's data
didiforgithub Sep 22, 2024
99a9f7b
update humaneval data path and add baseline data
didiforgithub Sep 22, 2024
c7f44e9
Update
didiforgithub Sep 22, 2024
6a84a9d
Update HumanEval Eval
didiforgithub Sep 24, 2024
8dfe2de
Update
didiforgithub Sep 25, 2024
f14830b
Update optimizer.py
didiforgithub Sep 25, 2024
e8f6186
update
didiforgithub Sep 26, 2024
040a732
Update test_curve.py
didiforgithub Sep 26, 2024
eae3514
Update AFlow
didiforgithub Oct 16, 2024
bb229f2
Update AFlolw
didiforgithub Oct 16, 2024
eea9486
Update Eval
didiforgithub Oct 16, 2024
eab9b84
Change HotpotQA
MoshiQAQ Oct 16, 2024
390b65f
Update HotpotQA
MoshiQAQ Oct 16, 2024
859ee3d
fix test()
MoshiQAQ Oct 16, 2024
cea3473
Update evaluator.py
MoshiQAQ Oct 16, 2024
6aedc4a
Update AFlow
didiforgithub Oct 17, 2024
7c6edce
Update config2.yaml
didiforgithub Oct 17, 2024
d99054a
Merge branch 'main' into main
better629 Oct 17, 2024
2b788b2
Update Annotation to English, And Update Operator.json
didiforgithub Oct 18, 2024
ebcacdd
Update print error
didiforgithub Oct 18, 2024
5d6fa7a
Update readme.md
didiforgithub Oct 18, 2024
17f3cd4
Refactor Evaluator
didiforgithub Oct 18, 2024
6ebf3c4
Update drop.py
MoshiQAQ Oct 19, 2024
478589e
Update action_node.py
MoshiQAQ Oct 19, 2024
ade1068
Update Operator's code
didiforgithub Oct 21, 2024
efa00f8
Update
didiforgithub Oct 21, 2024
c194415
Update mbpp & math's eval
didiforgithub Oct 21, 2024
fe3fca5
Create download_data.py
MoshiQAQ Oct 21, 2024
2d1d7ca
Update Operator & Benchmark
didiforgithub Oct 21, 2024
23eec00
Update operator.py
didiforgithub Oct 21, 2024
d8c7174
Update HotpotQA's init round
didiforgithub Oct 21, 2024
35acb98
Update optimizer.py
didiforgithub Oct 21, 2024
828d187
change context_fill to xml_fill
didiforgithub Oct 22, 2024
66b5239
Update llm.py & handle exception
didiforgithub Oct 22, 2024
0b69ffe
Update download data.py and rm json files
didiforgithub Oct 22, 2024
fcc5e19
mv aflow from example to ext
didiforgithub Oct 22, 2024
5aa62b7
Update
didiforgithub Oct 22, 2024
e575b62
Resolve comment and modify readme
didiforgithub Oct 22, 2024
5775b20
Update Readme
didiforgithub Oct 22, 2024
2ccee33
Update Readme
didiforgithub Oct 22, 2024
462b7d9
Update README.md
didiforgithub Oct 22, 2024
56d0af1
pre-commit modify
didiforgithub Oct 22, 2024
27e942c
update
didiforgithub Oct 22, 2024
8c7cde5
Transform print into logger.info & mv code sanitize to utils.py
didiforgithub Oct 22, 2024
344d87d
Update
didiforgithub Oct 22, 2024
4564b70
Update mbpp.py
didiforgithub Oct 22, 2024
d2f90db
Update readme and better optimizer
didiforgithub Oct 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,4 @@ cov.xml
*-structure.json
*.dot
.python-version
*.csv
55 changes: 55 additions & 0 deletions examples/aflow/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# AFlow: Automating Agentic Workflow Generation

AFlow is a framework for automatically generating and optimizing Agentic Workflows. It uses Monte Carlo tree search in a code-represented workflow space to find effective workflows, replacing manual development with machine effort. Our approach shows potential to outperform handcrafted workflows on various tasks.

[Read our paper on arXiv](https://arxiv.org/abs/2410.10762)

## Framework Components

- **Node**: Basic unit of LLM invocation. See `metagpt/actions/action_node.py` for a flexible interface to control LLM, temperature, format, and prompt.
- **Operator**: Predefined combinations of Nodes to enhance search efficiency. Encapsulates common operations like Generate, Format, Review, Revise, Ensemble, Test, and Programmer. See `metagpt/ext/aflow/operator.py` for details. You can customize your own Operator by referencing the implementations in this code.
- **Workflow**: A sequence of LLM-invoking nodes connected by edges. Can be represented as graphs, neural networks, or code to express various execution structures. See `metagpt/ext/aflow/workflow.py` for our implementation.
- **Optimizer**: Uses LLMs within a Monte Carlo Tree Search variant to explore and refine workflows. Iteratively selects, expands, evaluates, and updates workflows based on performance. See `metagpt/ext/aflow/scripts/optimizer.py` for details.
- **Evaluator**: Assesses workflow performance on given tasks. Provides feedback to guide the optimization process towards more effective workflows. See `metagpt/ext/aflow/scripts/evaluator.py` for details.

## Datasets

### Experimental Datasets
We conducted experiments on six datasets (HumanEval, MBPP, GSM8K, MATH, HotpotQA, DROP) and provide their evaluation code. The data can be found in this [datasets](https://drive.google.com/uc?export=download&id=1DNoegtZiUhWtvkd2xoIuElmIi4ah7k8e) link, or you can download them using `metagpt/ext/aflow/data/download_data.py`

### Custom Datasets
For custom tasks, you can reference the code in the metagpt/ext/aflow/benchmark folder. Inherit the `BaseBenchmark` class and implement `evaluate_problem`, `calculate_score`, and `get_result_columns` to add your custom dataset benchmark. Then, add your benchmark name in `metagpt/ext/aflow/scripts/evaluator.py` and `metagpt/ext/aflow/scripts/optimizer.py` to find effective workflows for your custom dataset.

## Quick Start

1. Configure your search in `optimize.py`:
- Open `examples/aflow/optimize.py`
- Set the following parameters:
```python
dataset = "HumanEval" # Choose from: "HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP" or your custom dataset name
question_type = "code" # Choose from: "math", "code", "qa"
sample = 4 # Number of samples to use for optimization
check_convergence = True # Whether to check for convergence
optimized_path = "path/to/optimized/workflows" # Path to save optimized workflows, defaults to metagpt/ext/aflow/scripts/optimized
initial_round = 1 # Starting round number
max_rounds = 20 # Maximum number of optimization rounds
```
- Adjust these parameters according to your specific requirements and dataset
2. Set up parameters in `config/config2.yaml` (see `examples/aflow/config2.example.yaml` for reference)
3. Set the operator you want to use in `optimize.py` and in `optimized_path/template/operator.py`, `optimized_path/template/operator.json`. You can reference our implementation to add operators for specific datasets
4. When you first run, you can download the datasets and initial rounds by setting `download(["datasets", "initial_rounds"])` in `examples/aflow/optimize.py`
5. (Optional) Add your custom dataset and corresponding evaluation function following the [Custom Datasets](#custom-datasets) section
6. Run `python examples/aflow/optimize.py` to start the optimization process!

## Citation

If you use AFlow in your research, please cite our paper:

```
didiforgithub marked this conversation as resolved.
Show resolved Hide resolved
@article{zhang2024aflow,
title={AFlow: Automating Agentic Workflow Generation},
author={Zhang, Jiayi and Xiang, Jinyu and Yu, Zhaoyang and Teng, Fengwei and Chen, Xionghui and Chen, Jiaqi and Zhuge, Mingchen and Cheng, Xin and Hong, Sirui and Wang, Jinlin and others},
journal={arXiv preprint arXiv:2410.10762},
year={2024}
}
```
12 changes: 12 additions & 0 deletions examples/aflow/config2.example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
models:
didiforgithub marked this conversation as resolved.
Show resolved Hide resolved
"<model_name>": # model: "gpt-4-turbo" # or gpt-3.5-turbo
api_type: "openai" # or azure / ollama / groq etc.
base_url: "<your base url>"
api_key: "<your api key>"
temperature: 0
"<model_name>":
api_type: "openai"
base_url: "<your base url>"
api_key: "<your api key>"
temperature: 0
CALC_USAGE: True
60 changes: 60 additions & 0 deletions examples/aflow/optimize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
# @Date : 8/23/2024 20:00 PM
# @Author : didi
# @Desc : Entrance of AFlow.


from metagpt.configs.models_config import ModelsConfig
from metagpt.ext.aflow.data.download_data import download
from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType

# DatasetType, QuestionType, and OptimizerType definitions
# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
# QuestionType = Literal["math", "code", "qa"]
# OptimizerType = Literal["Graph", "Test"]

# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
download(["datasets", "initial_rounds"])

# Crucial Parameters
dataset: DatasetType = "GSM8K" # Ensure the type is consistent with DatasetType
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
question_type: QuestionType = "code" # Ensure the type is consistent with QuestionType
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
max_rounds: int = 20
check_convergence: bool = True

# Config llm model, you can modify `config/config2.yaml` to use more llms.
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")

# Config operators.
operators = [
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
# "AnswerGenerate" # It's for qa
# "CustomCodeGenerate", # It's for code
"ScEnsemble", # It's for code, math and qa
# "Test", # It's for code
"Programmer", # It's for math
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

添加注释,详细说明如何复现实验结果

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

为每一个数据添加一个直接可以跑的入口

]

# Create an optimizer instance
optimizer = Optimizer(
dataset=dataset, # Config dataset
question_type=question_type, # Config Question Type
opt_llm_config=claude_llm_config, # Config Optimizer LLM
exec_llm_config=mini_llm_config, # Config Execution LLM
check_convergence=check_convergence, # Whether Early Stop
operators=operators, # Config Operators you want to use
optimized_path=optimized_path, # Config Optimized workflow's file path
sample=sample, # Only Top(sample) rounds will be selected.
initial_round=initial_round, # Optimize from initial round
max_rounds=max_rounds, # The max iteration of AFLOW.
)

if __name__ == "__main__":
# Optimize workflow via setting the optimizer's mode to 'Graph'
optimizer.optimize("Graph")
# Test workflow via setting the optimizer's mode to 'Test'
# optimizer.optimize("Test")
134 changes: 134 additions & 0 deletions metagpt/actions/action_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
we can use typing to extract the type of the node, but we cannot use built-in list to extract.
"""
import json
import re
import typing
from enum import Enum
from typing import Any, Dict, List, Optional, Tuple, Type, Union
Expand All @@ -18,6 +19,7 @@

from metagpt.actions.action_outcls_registry import register_action_outcls
from metagpt.const import USE_CONFIG_TIMEOUT
from metagpt.ext.aflow.scripts.utils import sanitize
from metagpt.llm import BaseLLM
from metagpt.logs import logger
from metagpt.provider.postprocess.llm_output_postprocess import llm_output_postprocess
Expand All @@ -38,9 +40,17 @@ class ReviseMode(Enum):

TAG = "CONTENT"


class FillMode(Enum):
CODE_FILL = "code_fill"
XML_FILL = "xml_fill"
SINGLE_FILL = "single_fill"


LANGUAGE_CONSTRAINT = "Language: Please use the same language as Human INPUT."
FORMAT_CONSTRAINT = f"Format: output wrapped inside [{TAG}][/{TAG}] like format example, nothing else."


SIMPLE_TEMPLATE = """
## context
{context}
Expand Down Expand Up @@ -471,6 +481,113 @@ async def simple_fill(

return self

def get_field_name(self):
"""
Get the field name from the Pydantic model associated with this ActionNode.
"""
model_class = self.create_class()
fields = model_class.model_fields

# Assuming there's only one field in the model
if len(fields) == 1:
return next(iter(fields))

# If there are multiple fields, we might want to use self.key to find the right one
return self.key

def get_field_names(self):
"""
获取与此ActionNode关联的Pydantic模型的字段名称。
"""
model_class = self.create_class()
return model_class.model_fields.keys()

def get_field_types(self):
"""
获取与此ActionNode关联的Pydantic模型的字段类型。
"""
model_class = self.create_class()
return {field_name: field.annotation for field_name, field in model_class.model_fields.items()}

def xml_compile(self, context):
# TODO 再来一版

field_names = self.get_field_names()
# Construct the example using the field names
examples = []
for field_name in field_names:
examples.append(f"<{field_name}>content</{field_name}>")

# Join all examples into a single string
example_str = "\n".join(examples)
# Add the example to the context
context += f"""
### Response format (must be strictly followed): All content must be enclosed in the given XML tags, ensuring each opening <tag> has a corresponding closing </tag>, with no incomplete or self-closing tags allowed.\n
{example_str}
"""
return context

async def code_fill(self, context, function_name=None, timeout=USE_CONFIG_TIMEOUT):
"""
Fill CodeBlock Using ``` ```
"""
field_name = self.get_field_name()
prompt = context
content = await self.llm.aask(prompt, timeout=timeout)
extracted_code = sanitize(code=content, entrypoint=function_name)
result = {field_name: extracted_code}
return result

async def single_fill(self, context):
field_name = self.get_field_name()
prompt = context
content = await self.llm.aask(prompt)
result = {field_name: content}
return result

async def xml_fill(self, context):
"""
使用XML标签填充上下文并根据字段类型进行转换,包括字符串、整数、布尔值、列表和字典类型
"""
field_names = self.get_field_names()
field_types = self.get_field_types()

extracted_data = {}
content = await self.llm.aask(context)

for field_name in field_names:
pattern = rf"<{field_name}>(.*?)</{field_name}>"
match = re.search(pattern, content, re.DOTALL)
if match:
raw_value = match.group(1).strip()
field_type = field_types.get(field_name)

if field_type == str:
extracted_data[field_name] = raw_value
elif field_type == int:
try:
extracted_data[field_name] = int(raw_value)
except ValueError:
extracted_data[field_name] = 0 # 或者其他默认值
elif field_type == bool:
extracted_data[field_name] = raw_value.lower() in ("true", "yes", "1", "on", "True")
elif field_type == list:
try:
extracted_data[field_name] = eval(raw_value)
if not isinstance(extracted_data[field_name], list):
raise ValueError
except:
extracted_data[field_name] = [] # 默认空列表
elif field_type == dict:
try:
extracted_data[field_name] = eval(raw_value)
if not isinstance(extracted_data[field_name], dict):
raise ValueError
except:
extracted_data[field_name] = {} # 默认空字典

return extracted_data

async def fill(
self,
context,
Expand All @@ -481,6 +598,7 @@ async def fill(
images: Optional[Union[str, list[str]]] = None,
timeout=USE_CONFIG_TIMEOUT,
exclude=[],
function_name: str = None,
):
"""Fill the node(s) with mode.

Expand All @@ -507,6 +625,22 @@ async def fill(
if self.schema:
schema = self.schema

if mode == FillMode.CODE_FILL.value:
result = await self.code_fill(context, function_name, timeout)
self.instruct_content = self.create_class()(**result)
return self

elif mode == FillMode.XML_FILL.value:
context = self.xml_compile(context=self.context)
result = await self.xml_fill(context)
self.instruct_content = self.create_class()(**result)
return self

elif mode == FillMode.SINGLE_FILL.value:
result = await self.single_fill(context)
self.instruct_content = self.create_class()(**result)
return self

if strgy == "simple":
return await self.simple_fill(schema=schema, mode=mode, images=images, timeout=timeout, exclude=exclude)
elif strgy == "complex":
Expand Down
Loading
Loading