Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add /iterate command #2927

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion aider/coders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .help_coder import HelpCoder
from .udiff_coder import UnifiedDiffCoder
from .wholefile_coder import WholeFileCoder

from .iterate_coder import IterateCoder
# from .single_wholefile_func_coder import SingleWholeFileFunctionCoder

__all__ = [
Expand All @@ -17,6 +17,7 @@
Coder,
EditBlockCoder,
EditBlockFencedCoder,
IterateCoder,
WholeFileCoder,
UnifiedDiffCoder,
# SingleWholeFileFunctionCoder,
Expand Down
110 changes: 110 additions & 0 deletions aider/coders/iterate_coder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from typing import Tuple, override
import copy

from aider.coders.base_coder import Coder
from aider.coders.base_prompts import CoderPrompts
"""Perform a coding task on multiple files in batches that fit the context and outpot token limits, without sending them all at once."""
class IterateCoder(Coder):
coder : Coder = None
original_kwargs: dict = None
edit_format = "iterate"

def __init__(self, main_model, io, **kwargs):
super().__init__(main_model, io,**kwargs)
if 'gpt_prompts' not in kwargs: self.gpt_prompts = CoderPrompts()
@override
def run_one(self, user_message, preproc):
if self.coder is None:
self.coder = Coder.create(main_model=self.main_model, edit_format=self.main_model.edit_format,from_coder=self,**self.original_kwargs)
self.coder.auto_lint, self.coder.auto_commits = (False,False)
chat_files_with_type_and_length = self.get_chat_files_with_type_and_length()
max_tokens = self.main_model.info.get('max_tokens')
max_context = self.main_model.info['max_input_tokens']
max_output = max_tokens if max_tokens is not None else self.main_model.info['max_output_tokens']
repo_token_count = self.main_model.get_repo_map_tokens()
history_token_count = sum([tup[0] for tup in self.summarizer.tokenize( [msg["content"] for msg in self.done_messages])])
prev_io= self.io.yes #shell commmands will still need confirmation for each command, this can be overridden by extending InputOutput class and overriding confirm_ask method.
self.io.yes = True
cruncher = self.file_cruncher( max_context, max_output,repo_token_count + history_token_count,
chat_files_with_type_and_length)
edited_files = self.batch_process(user_message,preproc, cruncher)
self.io.yes= prev_io
if len(edited_files) == 0: return
if self.auto_lint:
cruncher.files_to_crunch = [(fname,True,self.main_model.token_count(self.io.read_text(fname))) for fname in edited_files]
self.batch_lint(cruncher,preproc)
if self.auto_commits:
self.batch_commit(edited_files)

def get_chat_files_with_type_and_length(self):
chat_files_with_type_and_length : list[Tuple[str,bool,int]]=[]
for f in self.abs_fnames:
chat_files_with_type_and_length.append((f, True, self.main_model.token_count(self.io.read_text(f))))
for f in self.abs_read_only_fnames:
chat_files_with_type_and_length.append((f,False,self.main_model.token_count(self.io.read_text(f))))
return chat_files_with_type_and_length

def batch_process(self,message,preproc, cruncher):
edited_files= []
for files_to_send_with_types in cruncher:
self.prepare_batch(files_to_send_with_types)
self.coder.run_one(message,preproc)
edited_files.extend(self.coder.aider_edited_files)
self.coder.aider_edited_files = set()
return edited_files

def prepare_batch(self,files_to_send_with_types : list[Tuple[str,bool]]):
self.coder.done_messages = copy.deepcopy(self.done_messages)
self.coder.cur_messages = []
self.coder.abs_fnames=set([f[0] for f in files_to_send_with_types if f[1]])
self.coder.abs_read_only_fnames=set(f[0] for f in files_to_send_with_types if not f[1])
def batch_lint(self, cruncher,preproc):
for files_with_type in cruncher:
files = [ft[0] for ft in files_with_type]
lint_msg = self.coder.lint_edited(files)
self.auto_commit(files,context="Ran the linter")
if lint_msg:
ok = self.io.confirm_ask("Attempt to fix lint errors?", subject="batch_lint", allow_never=True)
if ok:
self.coder.done_messages, self.coder.cur_messages = ([],[])
self.coder.run_one(lint_msg,preproc)
def batch_commit(self, files : list[str]):
self.repo.commit(files)

class file_cruncher:
context_tokens: int
max_context:int
max_output:int
files_to_crunch : list[Tuple[str,bool,int]]
PADDING:int = 50
def __init__(self,max_context:int,max_output:int,context_tokens,files_to_crunch : list[Tuple[str,bool,int]] ):
self.context_tokens = context_tokens
self.max_context = max_context
self.max_output = max_output
self.files_to_crunch = sorted(files_to_crunch, key = lambda x: x[2])
def __iter__(self):
return self
"""fitting input files + chat history + repo_map + files_to_send to context limit and
files_to_send to the output limit.
output files are assumed to be half the size of input files"""
def __next__(self):
if len(self.files_to_crunch) == 0:
raise StopIteration
files_to_send : list[Tuple[str,bool]]= []
i:int =0
total_context= 0
total_output= 0
for file_name, type_, length in self.files_to_crunch:
if length + length / 2 + self.context_tokens + total_context>= self.max_context or length / 2 + total_output >= self.max_output:
break
total_context+=length + length + self.PADDING
total_output+=length + self.PADDING
files_to_send.append((file_name,type_))
i+=1
if i == 0: #no file fits the limits, roll the dice and let the user deal with it
f,t,_ = self.files_to_crunch[i]
files_to_send.append((copy.copy(f), t))
i=1
self.files_to_crunch = self.files_to_crunch[i:]
return files_to_send

4 changes: 3 additions & 1 deletion aider/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -1059,7 +1059,9 @@ def cmd_help(self, args):
map_mul_no_files=map_mul_no_files,
show_announcements=False,
)

def cmd_iterate(self, args):
"""Iteratively perform the change on files in batches that fit to context and output limits"""
return self._generic_chat_command(args, "iterate")
def cmd_ask(self, args):
"""Ask questions about the code base without editing any files. If no prompt provided, switches to ask mode.""" # noqa
return self._generic_chat_command(args, "ask")
Expand Down
86 changes: 86 additions & 0 deletions tests/basic/test_iterate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import os
import unittest
from pathlib import Path
from unittest.mock import MagicMock, patch
from aider.coders import Coder
from aider.io import InputOutput
from aider.models import Model
from aider.repo import GitRepo
from aider.utils import GitTemporaryDirectory


class TestIterateCoder(unittest.TestCase):
def setUp(self):
self.GPT35 = Model("gpt-3.5-turbo")
self.io = InputOutput(yes=True)
# self.webbrowser_patcher = patch("aider.io.webbrowser.open")
# self.mock_webbrowser = self.webbrowser_patcher.start()

# Get all Python files in aider/coders directory
coders_dir = Path(__file__).parent.parent.parent / "aider" / "coders"
self.files = [str(f) for f in coders_dir.glob("*.py") if f.is_file()]

# Create coder with all files
self.coder = Coder.create(
main_model=self.GPT35,
io=self.io,
fnames=self.files,
edit_format='iterate'
)

def tearDown(self):
# self.webbrowser_patcher.stop()
return
"""Tests that:
- Every request retains the chat history until the /iterate command but not the history of other iterations.
- Added files and history until the /iterate is unmodified.
- Every file is processed(even if a single file that'll be sent with the request exceeds the limits.) and no duplicate processing
"""
def test_iterate_resets_history_and_processes_all_files(self):
processed_files :list[str]= []
original_context:list[dict[str,str]]
prev_file_names : list[str] = None
# Track messages sent to LLM and files processed
def mock_send(self,messages, model=None, functions=None):
nonlocal original_context
nonlocal processed_files
nonlocal prev_file_names
for original_message in original_context:
assert original_message in messages, f"Chat history before start of the command is not retained."
# Simulate response mentioning filename
files_message = [msg['content'] for msg in messages if "*added these files to the chat*" in msg['content']][0]
from re import findall
file_names = findall(r'.*\n(\S+\.py)\n```.*',files_message)
for f_name in file_names:
assert prev_file_names == None or f_name not in prev_file_names, "files from previous iterations hasn't been cleaned up."
prev_file_names = file_names
processed_files.extend(file_names)
# Return minimal response
self.partial_response_content = "Done."
self.partial_response_function_call=dict()

with GitTemporaryDirectory():
# Mock the send method
with (patch.object(Coder, 'send',new_callable=lambda: mock_send), patch.object(Coder, 'lint_edited',lambda *_,**__:None), patch.object(GitRepo,'commit',lambda *_,**__:None)):
self.coder.coder = Coder.create(main_model=self.coder.main_model, edit_format=self.coder.main_model.edit_format,from_coder=self.coder,**self.coder.original_kwargs)
# Add initial conversation history
original_context = self.coder.done_messages = [
{"role": "user", "content": "Initial conversation"},
{"role": "assistant", "content": "OK"}
]

# Run iterate command
self.coder.run(with_message="Process all files")
# Verify all files were processed
input_basenames = {Path(f).name for f in self.files}
processed_basenames = {Path(f).name for f in processed_files}
missing = input_basenames - processed_basenames
assert not missing, f"Files not processed: {missing}"

# Verify history preservation and structure
assert len(self.coder.done_messages) == 2, "Original chat history was modified"
# Verify final file state
assert len(self.coder.abs_fnames) == len(self.files), "Not all files remained in chat"

if __name__ == "__main__":
unittest.main()