entropy-research · j0yk1ll · May 29, 2024 · May 29, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
+# Application specific
+.devon.config
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -191,3 +194,6 @@ dist
 .yarn
 
 build
+
+package-lock.json
+poetry.lock
diff --git a/README.md b/README.md
@@ -108,6 +108,7 @@ Configuring Devon CLI...
 ? Select the model name: 
   claude-opus 
   gpt4-o 
+  gemini-pro
   llama-3-70b 
 ❯ ollama/deepseek-coder:6.7b
 ```

diff --git a/devon-tui/new b/devon-tui/new
diff --git a/devon-tui/source/cli.tsx b/devon-tui/source/cli.tsx
@@ -46,6 +46,7 @@ const cli = meow(
       $ devon start --api_key=YOUR_API_KEY 
       $ devon start --port 8080 --api_key=YOUR_API_KEY
       $ devon start --model=gpt4-o --api_key=YOUR_API_KEY
+      $ devon start --model=gemini-pro --api_key=YOUR_API_KEY
       $ devon start --model=claude-opus --api_key=YOUR_API_KEY
       $ devon start --model=llama-3-70b --api_key=YOUR_API_KEY
       $ devon start --model=custom --api_base=https://api.example.com --prompt_type=anthropic --api_key=YOUR_API_KEY
@@ -67,7 +68,8 @@ const cli = meow(
           type: 'string',
         },
         debug: {
-          type: 'boolean'
+          type: 'boolean',
+          default: false
         },
       },
     },
@@ -85,14 +87,14 @@ const { input } = cli;
 if (input[0] === 'configure') {
     // Handle the configure subcommand
     console.log('Configuring Devon CLI...');
-  
+
     inquirer
       .prompt([
         {
           type: 'list',
           name: 'modelName',
           message: 'Select the model name:',
-          choices: ['claude-opus', 'gpt4-o', 'llama-3-70b', 'ollama/deepseek-coder:6.7b', 'custom'],
+          choices: ['claude-opus', 'gpt4-o', 'gemini-pro', 'llama-3-70b', 'ollama/deepseek-coder:6.7b', 'custom'],
         },
       ])
       .then((answers) => {
@@ -169,19 +171,31 @@ if (input[0] === 'configure') {
     let api_base: string | undefined = undefined
     let prompt_type: string | undefined = undefined
 
-    if (cli.flags.apiKey){
-      api_key = cli.flags['apiKey'];
-    } else if (process.env['OPENAI_API_KEY']){
+    if (process.env['OPENAI_API_KEY']){
       api_key = process.env['OPENAI_API_KEY'];
       modelName = "gpt4-o"
+    } else if (process.env['GEMINI_API_KEY']){
+      api_key = process.env['GEMINI_API_KEY'];
+      modelName = "gemini-pro"
     } else if (process.env['ANTHROPIC_API_KEY']){
       api_key = process.env['ANTHROPIC_API_KEY'];
       modelName = "claude-opus"
     } else if (process.env['GROQ_API_KEY']){
       api_key = process.env['GROQ_API_KEY'];
       modelName = "llama-3-70b"
+    } else if (cli.flags['apiKey']){
+      api_key = cli.flags['apiKey'];
+
+      if(api_key != "FOSS") {
+        if(cli.flags['model']) {
+          modelName = cli.flags['model'] as string;
+        } else {
+          console.log('Please provide a model name. Allowed values are gpt4-o, gemini-pro, claude-opus, llama-3-70b or ollama.');
+          process.exit(1);
+        }
+      }
     } else {
-        console.log('Please provide an API key using the --api_key option or by setting OPENAI_API_KEY or ANTHROPIC_API_KEY.');
+        console.log('Please provide an API key using the --api_key option or by setting OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY or GROQ_API_KEY.');
         process.exit(1);
     }
 
@@ -218,7 +232,7 @@ if (input[0] === 'configure') {
       );
       process.exit(1);
     }
-    console.log( ['server', '--port', port.toString(), '--model', modelName as string, '--api_key', api_key as string, '--api_base', api_base as string, '--prompt_type', prompt_type as string])
+    console.log( ['server', '--port', port.toString(), '--model', modelName as string, '--api_key', api_key as string])
 
     let reset = false
 
@@ -236,7 +250,7 @@ if (input[0] === 'configure') {
 
         const subProcess = childProcess.spawn(
           'devon_agent',
-          ['server', '--port', port.toString(), '--model', modelName as string, '--api_key', api_key as string, '--api_base', api_base as string, '--prompt_type', prompt_type as string],
+          ['server', '--port', port.toString(), '--model', modelName as string, '--api_key', api_key as string],
           {
             signal: controller.signal
           },

diff --git a/devon_agent/__main__.py b/devon_agent/__main__.py
@@ -35,9 +35,11 @@ def server(port, model, api_key, prompt_type, api_base):
     app.prompt_type = prompt_type
     app.model = model
 
-    with open(os.path.join(os.getcwd(), ".devon.config"), "r") as f:
-        config = f.read()
-        app.config = json.loads(config)
+    config_path = os.path.join(os.getcwd(), ".devon.config")
+    if os.path.exists(config_path):
+        with open(config_path, "r") as f:
+            config = f.read()
+            app.config = json.loads(config)
 
     uvicorn.run(app, host="0.0.0.0", port=port)
 
@@ -63,9 +65,11 @@ def headless(model, api_key, prompt_type, api_base, headless):
     app.model = model
     app.headless = headless
 
-    with open(os.path.join(os.getcwd(), ".devon.config"), "r") as f:
-        config = f.read()
-        app.config = json.loads(config)
+    config_path = os.path.join(os.getcwd(), ".devon.config")
+    if os.path.exists(config_path):
+        with open(config_path, "r") as f:
+            config = f.read()
+            app.config = json.loads(config)
 
     agent = TaskAgent(
             name="Devon",

diff --git a/devon_agent/agents/default/agent.py b/devon_agent/agents/default/agent.py
@@ -5,15 +5,14 @@
 import traceback
 from typing import Optional, Tuple
 
-from devon_agent.agents.model import AnthropicModel, GroqModel, ModelArguments, OllamaModel, OpenAiModel
+from devon_agent.agents.model import AnthropicModel, GroqModel, ModelArguments, OllamaModel, OpenAiModel, GeminiModel
 from devon_agent.agents.default.anthropic_prompts import anthropic_history_to_bash_history, anthropic_last_user_prompt_template_v3, anthropic_system_prompt_template_v3, anthropic_commands_to_command_docs
 from devon_agent.agents.default.openai_prompts import openai_last_user_prompt_template_v3, openai_system_prompt_template_v3, openai_commands_to_command_docs
-from devon_agent.agents.default.anthropic_prompts import (
-    parse_response
-)
 from devon_agent.agents.default.llama3_prompts import llama3_commands_to_command_docs, llama3_history_to_bash_history, llama3_last_user_prompt_template_v1, llama3_parse_response, llama3_system_prompt_template_v1
+from devon_agent.agents.default.gemini_prompts import gemini_commands_to_command_docs, gemini_history_to_bash_history, gemini_last_user_prompt_template_v1, gemini_parse_response, gemini_system_prompt_template_v1
 from devon_agent.agents.default.codegemma_prompts import llama3_7b_commands_to_command_docs, llama3_7b_history_to_bash_history, llama3_7b_last_user_prompt_template_v1, llama3_7b_parse_response, llama3_7b_system_prompt_template_v1
 
+
 from devon_agent.tools.utils import get_cwd
 
 from devon_agent.udiff import Hallucination
@@ -47,6 +46,7 @@ def run(self, session: "Session", observation: str = None): ...
 class TaskAgent(Agent):
     default_models = {
         "gpt4-o": OpenAiModel,
+        "gemini-pro": GeminiModel,
         "claude-opus": AnthropicModel,
         "llama-3-70b": GroqModel,
         "ollama/deepseek-coder:6.7b": OllamaModel
@@ -56,6 +56,9 @@ class TaskAgent(Agent):
         "gpt4-o": {
             "prompt_type": "openai",
         },
+        "gemini-pro": {
+            "prompt_type": "gemini",
+        },
         "claude-opus": {
             "prompt_type": "anthropic",
         },
@@ -204,6 +207,32 @@ def _prepare_llama3(self, task, editor, session):
 
         messages = [{"role": "user", "content": last_user_prompt}]
         return messages, system_prompt
+
+    def _prepare_gemini(self, task, editor, session):
+        time.sleep(3)
+
+        command_docs = (
+            "Custom Commands Documentation:\n"
+            + gemini_commands_to_command_docs(
+                list(session.generate_command_docs().values())
+            )
+            + "\n"
+        )
+
+        history = gemini_history_to_bash_history(self.chat_history)
+        system_prompt = gemini_system_prompt_template_v1(command_docs)
+        last_user_prompt = gemini_last_user_prompt_template_v1(
+            task, history, editor, get_cwd(
+                {
+                    "session": session,
+                    "environment": session.default_environment,
+                    "state": session.state
+                }
+            ), session.base_path, self.scratchpad
+        )
+
+        messages = [{"role": "user", "content": last_user_prompt}]
+        return messages, system_prompt
 
     def  _prepare_ollama(self, task, editor, session):
         time.sleep(3)
@@ -262,7 +291,8 @@ def predict(
                 "anthropic": self._prepare_anthropic,
                 "openai": self._prepare_openai,
                 "llama3": self._prepare_llama3,
-                "ollama": self._prepare_ollama
+                "ollama": self._prepare_ollama,
+                "gemini": self._prepare_gemini,
             }
 
             if not self.prompt_type:

diff --git a/devon_agent/agents/default/gemini_prompts.py b/devon_agent/agents/default/gemini_prompts.py
@@ -0,0 +1,159 @@
+from typing import Dict, List, Union
+
+def gemini_commands_to_command_docs(commands: List[Dict]):
+    doc = ""
+    for command in commands:
+        doc += f"{command['signature']}\n{command['docstring']}\n"
+    return doc
+
+def editor_repr(editor):
+    return "\n\n".join(f"{file}:\n{editor[file]}" for file in editor)
+
+def gemini_history_to_bash_history(history):
+    # self.history.append(
+    # {
+    #     "role": "assistant",
+    #     "content": output,
+    #     "thought": thought,
+    #     "action": action,
+    #     "agent": self.name,
+
+    bash_history = ""
+    for entry in history:
+        if entry["role"] == "user":
+            result = entry["content"].strip() if entry["content"] else "" + "\n"
+            bash_history += f"<RESULT>\n{result}\n</RESULT>"
+        elif entry["role"] == "assistant":
+            bash_history += f"""
+<YOU>
+<THOUGHT>{entry['thought']}</THOUGHT>
+<COMMAND>
+{entry['action'][1:]}
+</COMMAND>
+</YOU>
+"""
+    return bash_history
+
+def object_to_xml(data: Union[dict, bool], root="object"):
+    xml = f"<{root}>"
+    if isinstance(data, dict):
+        xml += "".join(object_to_xml(value, key) for key, value in data.items())
+    elif isinstance(data, (list, tuple, set)):
+        xml += "".join(object_to_xml(item, "item") for item in data)
+    else:
+        xml += str(data)
+    xml += f"</{root}>"
+    return xml
+
+def print_tree(directory, level=0, indent=""):
+    return "".join(f"\n{indent}├── {name}/" + print_tree(content, level + 1, indent + "│   ") if isinstance(content, dict) else f"\n{indent}├── {name}" for name, content in directory.items())
+
+def gemini_system_prompt_template_v1(command_docs: str):
+    return f"""
+<SETTING>
+ You are an autonomous programmer, and you're working directly in the command line with a special interface.
+
+ Environment:
+- Editor (<EDITOR>): Open, edit, and auto-save code files. Focus on relevant files for each bug fix.
+- Terminal: Execute commands to perform actions. Modify failed commands before retrying.
+- History (<HISTORY>): Log of previous thoughts and actions. Act as if you've had these thoughts and performed these actions.
+
+Constraints:
+- Maintain proper formatting and adhere to the project's coding conventions.
+- Keep only relevant files open. Close inactive files.
+- Modify failed commands before retrying.
+- Use efficient search techniques to locate relevant code elements.
+- Verify fixes resolve the original issue before submitting.
+- Prioritize general fixes over specific ones.
+- Ask for user input when needed for feedback, clarification, or guidance.
+
+</SETTING>
+<COMMANDS>
+{command_docs}
+</COMMANDS>
+<RESPONSE FORMAT>
+Shell prompt format: <cwd> $
+Required fields for each response:
+<THOUGHT>
+Your reflection, planning, and justification
+</THOUGHT>
+<SCRATCHPAD>
+Information you want to write down
+</SCRATCHPAD>
+<COMMAND>
+A single executable command (no interactive commands)
+</COMMAND>
+</RESPONSE FORMAT>
+"""
+
+def gemini_last_user_prompt_template_v1(issue, history, editor, cwd, root_dir, scratchpad):
+    return f"""
+<SETTING>
+Objective: {issue}
+
+Instructions:
+- Edit files and run checks/tests
+- Submit with 'submit' when done
+- No interactive commands, write scripts instead
+</SETTING>
+<CONSTRAINTS>
+- One command at a time
+- Wait for feedback after each command
+- Locate classes/functions over files
+- Use 'no_op' for thinking time
+- Issue title/first line describes it succinctly
+</CONSTRAINTS>
+<TESTING_TIPS>
+- Write unit tests to verify fixes
+- Run tests frequently to catch regressions 
+- Test edge cases and error handling
+- Manually verify UI and integration tests
+- Ensure tests pass before submitting
+</TESTING_TIPS>
+<PROBLEM_SOLVING>
+- Identify root cause and failure case
+- Fix underlying logic bug generally
+- Trace error to source
+- Identify flawed logic or edge case handling
+- Devise robust solution for core problem
+- Test fix thoroughly for potential impacts
+</PROBLEM_SOLVING>
+<EDITING_TIPS>
+- Use 'no_op' to pause and think
+- Match source lines precisely
+- Scroll to lines before changing
+- Make one change at a time
+- Finish edits before testing
+- Access limited to {root_dir}
+- Current directory: {cwd}
+</EDITING_TIPS>
+<HISTORY>
+{history}
+</HISTORY>
+<EDITOR>
+{editor}
+</EDITOR>
+<SCRATCHPAD>
+{scratchpad}
+</SCRATCHPAD>
+<DIRECTORY>
+{root_dir}
+</DIRECTORY>
+<cwd>{cwd}</cwd> $
+"""
+
+def gemini_parse_response(response):
+    if "<thought>" in response:
+        thought = response.split("<thought>")[1].split("</thought>")[0]
+        action = response.split("<command>")[1].split("</command>")[0]
+        scratchpad = None
+        if "<scratchpad>" in response:
+            scratchpad = response.split("<scratchpad>")[1].split("</scratchpad>")[0]
+    else:
+        thought = response.split("<THOUGHT>")[1].split("</THOUGHT>")[0]
+        action = response.split("<COMMAND>")[1].split("</COMMAND>")[0]
+        scratchpad = None
+        if "<SCRATCHPAD>" in response:
+            scratchpad = response.split("<SCRATCHPAD>")[1].split("</SCRATCHPAD>")[0]
+
+    return thought, action, scratchpad