From 8546b931ae3527ca5711603b96944cf312ffb724 Mon Sep 17 00:00:00 2001
From: Justin <lieffers@arizona.edu>
Date: Fri, 27 Oct 2023 12:25:04 -0700
Subject: [PATCH 1/8] wip

---
 pyproject.toml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 90fb5aec325..c88329901e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -82,8 +82,11 @@ core = ["skema[img2mml]", "skema[isa]", "skema[tr]", "skema[metal]"]
 # see skema/img2mml/render_mml/mathpix_annotator
 annotations = ["matplotlib", "notebook"]
 
+# for llm use in skema
+llms = ["langchain==0.0.325"]
+
 # all extras
-all = ["skema[core]", "skema[dev]", "skema[doc]", "skema[demo]", "skema[annotations]"]
+all = ["skema[core]", "skema[dev]", "skema[doc]", "skema[demo]", "skema[annotations]", "skema[llms]"]
 
 [tool.setuptools.package-dir]
 "skema.gromet" = "skema/gromet"

From 9bb6c8b598f0bfd40af359e6e3e1a4e7aab2f69a Mon Sep 17 00:00:00 2001
From: Justin <lieffers@arizona.edu>
Date: Fri, 27 Oct 2023 13:49:09 -0700
Subject: [PATCH 2/8] added endpoint for llm dynamics extraction

---
 skema/rest/api.py       |   7 ++
 skema/rest/llm_proxy.py | 138 ++++++++++++++++++++++++++++++++++++++++
 skema/rest/proxies.py   |   3 +-
 3 files changed, 146 insertions(+), 2 deletions(-)
 create mode 100644 skema/rest/llm_proxy.py

diff --git a/skema/rest/api.py b/skema/rest/api.py
index c90080417a3..435bcc56fec 100644
--- a/skema/rest/api.py
+++ b/skema/rest/api.py
@@ -7,6 +7,7 @@
     integrated_text_reading_proxy,
     morae_proxy,
     metal_proxy,
+    llm_proxy,
 )
 from skema.img2mml import eqn2mml
 from skema.skema_py import server as code2fn
@@ -110,6 +111,12 @@
     tags=["morae", "skema-rs"],
 )
 
+app.include_router(
+    llm_proxy.router,
+    prefix="/morae",
+    tags=["morae"],
+)
+
 app.include_router(
     integrated_text_reading_proxy.router,
     prefix="/text-reading",
diff --git a/skema/rest/llm_proxy.py b/skema/rest/llm_proxy.py
new file mode 100644
index 00000000000..c3538e7d4c5
--- /dev/null
+++ b/skema/rest/llm_proxy.py
@@ -0,0 +1,138 @@
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
+from langchain.output_parsers import (
+    StructuredOutputParser,
+    ResponseSchema
+)
+from fastapi import APIRouter, FastAPI, File, UploadFile
+from io import BytesIO
+from zipfile import ZipFile
+import requests
+from pathlib import Path
+import json
+from skema.rest.proxies import SKEMA_OPENAI_KEY
+
+router = APIRouter()
+
+class LineSpan:
+    def __init__(self, line_begin, line_end):
+        self.line_begin = line_begin
+        self.line_end = line_end
+
+
+@router.post(
+    "/linespan-given-filepaths-zip",
+    summary=(
+        "Send a zip file containing a code file,"
+        " get a line span of the dynamics back."
+    ),
+)
+async def get_lines_of_model(zip_file: UploadFile = File()) -> LineSpan:
+    """
+    Endpoint for generating a line span containing the dynamics from a zip archive. Currently
+    it only expects there to be one python file in the zip. There can be other files, such as a
+    README.md, but only one .py. Future versions will generalize support to arbritary zip contents. 
+
+    ### Python example
+    ```
+    import requests
+
+    files = {
+      "zip_file": open(zip_path, "rb"),
+    }
+
+    response = requests.post(f"{ENDPOINT}/morae/linespan-given-filepaths-zip", files=files)
+    gromet_json = response.json()
+    """
+    files=[]
+    blobs=[]
+    with ZipFile(BytesIO(zip_file.file.read()), "r") as zip:
+        for file in zip.namelist():
+            file_obj = Path(file)
+            if file_obj.suffix in [".py"]:
+                files.append(file)
+                blobs.append(zip.open(file).read())
+
+    # read in the code, for the prompt
+    code = blobs[0].decode("utf-8") # needs to be regular string, not byte string
+    file = files[0]
+    # json for the fn construction
+    single_snippet_payload = {
+            "files": [file],
+            "blobs": [code],
+        }
+
+    # this is the formatting instructions
+    response_schemas = [
+        ResponseSchema(name="model_function", description="The name of the function that contains the model dynamics")
+    ]
+
+    # for structured output parsing, converts schema to langhchain object
+    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
+
+    # for structured output parsing, makes the instructions to be passed as a variable to prompt template
+    format_instructions = output_parser.get_format_instructions()
+
+    # low temp as is not generative
+    temperature = 0.1
+
+    # initialize the models
+    openai = ChatOpenAI(
+        temperature=temperature,
+        model_name='gpt-3.5-turbo',
+        openai_api_key=SKEMA_OPENAI_KEY
+    )
+
+    # construct the prompts
+    template="You are a assistant that answers questions about code."
+    system_message_prompt = SystemMessagePromptTemplate.from_template(template)
+    human_template="Find the function that contains the model dynamics in {code} \n{format_instructions}"
+    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
+
+    # combining the templates for a chat template
+    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
+
+    # formatting the prompt with input variables
+    formatted_prompt = chat_prompt.format_prompt(code=code, format_instructions = format_instructions).to_messages()
+
+    # running the model
+    output = openai(formatted_prompt)
+
+    # parsing the output
+    try:
+        parsed_output = output_parser.parse(output.content)
+
+        function_name = parsed_output['model_function']
+
+        # Get the FN from it
+        url = "https://api.askem.lum.ai/code2fn/fn-given-filepaths"
+        response_zip = requests.post(url, json=single_snippet_payload)
+
+        # get metadata entry for function
+        for entry in response_zip.json()['modules'][0]['fn_array']:
+            try:
+                if entry['b'][0]['name'][0:len(function_name)] == function_name:
+                    metadata_idx = entry['b'][0]['metadata']
+            except:
+                None
+
+        # get line span using metadata
+        for (i,metadata) in enumerate(response_zip.json()['modules'][0]['metadata_collection']):
+            if i == (metadata_idx - 1):
+                line_begin = metadata[0]['line_begin']
+                line_end =  metadata[0]['line_end']
+    except:
+        print("Failed to parse dynamics")
+        line_begin = 0
+        line_end = 0
+
+    output = LineSpan(line_begin,line_end)
+    return output
+
+
+app = FastAPI()
+app.include_router(router)
\ No newline at end of file
diff --git a/skema/rest/proxies.py b/skema/rest/proxies.py
index 502a355b6c9..2a61fcefd25 100644
--- a/skema/rest/proxies.py
+++ b/skema/rest/proxies.py
@@ -7,7 +7,7 @@
 
 # MORAE etc
 SKEMA_RS_ADDESS = os.environ.get("SKEMA_RS_ADDRESS", "https://skema-rs.askem.lum.ai")
-
+SKEMA_OPENAI_KEY = os.environ.get("SKEMA_OPENAI_KEY", "YOU_FORGOT_TO_SET_SKEMA_OPENAI_KEY")
 
 # MathJAX service
 SKEMA_MATHJAX_PROTOCOL = os.environ.get("SKEMA_MATHJAX_PROTOCOL", "http://")
@@ -24,4 +24,3 @@
 SKEMA_TR_ADDRESS = os.environ.get("SKEMA_TR_ADDRESS", "http://hopper.sista.arizona.edu")
 OPENAI_KEY = os.environ.get("OPENAI_KEY", "YOU_FORGOT_TO_SET_OPENAI_KEY")
 COSMOS_ADDRESS = os.environ.get("COSMOS_ADDRESS",  "https://xdd.wisc.edu/cosmos_service")
-

From 82c1d8219b328344fd6e9ee492d3bb8892988d62 Mon Sep 17 00:00:00 2001
From: Justin <lieffers@arizona.edu>
Date: Fri, 27 Oct 2023 14:24:27 -0700
Subject: [PATCH 3/8] made data class pydantic BaseModel

---
 skema/rest/llm_proxy.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/skema/rest/llm_proxy.py b/skema/rest/llm_proxy.py
index c3538e7d4c5..4d1424de7ce 100644
--- a/skema/rest/llm_proxy.py
+++ b/skema/rest/llm_proxy.py
@@ -13,15 +13,14 @@
 from zipfile import ZipFile
 import requests
 from pathlib import Path
-import json
+from pydantic import BaseModel
 from skema.rest.proxies import SKEMA_OPENAI_KEY
 
 router = APIRouter()
 
-class LineSpan:
-    def __init__(self, line_begin, line_end):
-        self.line_begin = line_begin
-        self.line_end = line_end
+class LineSpan(BaseModel):
+    line_begin: int
+    line_end: int
 
 
 @router.post(
@@ -130,7 +129,7 @@ async def get_lines_of_model(zip_file: UploadFile = File()) -> LineSpan:
         line_begin = 0
         line_end = 0
 
-    output = LineSpan(line_begin,line_end)
+    output = LineSpan(line_begin=line_begin,line_end=line_end)
     return output
 
 

From 477fa9f9e2f77f9fad003bb13b8ec01611ae261f Mon Sep 17 00:00:00 2001
From: Justin <lieffers@arizona.edu>
Date: Mon, 30 Oct 2023 15:35:46 -0400
Subject: [PATCH 4/8] updated output class, based on HMI recommendations

---
 skema/rest/llm_proxy.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/skema/rest/llm_proxy.py b/skema/rest/llm_proxy.py
index 4d1424de7ce..8a3b8d015c8 100644
--- a/skema/rest/llm_proxy.py
+++ b/skema/rest/llm_proxy.py
@@ -13,15 +13,22 @@
 from zipfile import ZipFile
 import requests
 from pathlib import Path
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
+from typing import List, Optional
 from skema.rest.proxies import SKEMA_OPENAI_KEY
 
 router = APIRouter()
 
-class LineSpan(BaseModel):
-    line_begin: int
-    line_end: int
+class Dynamics(BaseModel):
+    """
+    Dynamics Data Model for capturing dynamics within a CodeFile.
+    """
 
+    name: Optional[str] = Field(description="Name of the dynamics section.")
+    description: Optional[str] = Field(description="Description of the dynamics.")
+    block: List[str] = Field(
+        description="A list containing strings indicating the line numbers in the file that contain the dynamics, e.g., ['L205-L213', 'L225-L230']."
+    )
 
 @router.post(
     "/linespan-given-filepaths-zip",
@@ -49,6 +56,7 @@ async def get_lines_of_model(zip_file: UploadFile = File()) -> LineSpan:
     """
     files=[]
     blobs=[]
+    block=[]
     with ZipFile(BytesIO(zip_file.file.read()), "r") as zip:
         for file in zip.namelist():
             file_obj = Path(file)
@@ -129,7 +137,9 @@ async def get_lines_of_model(zip_file: UploadFile = File()) -> LineSpan:
         line_begin = 0
         line_end = 0
 
-    output = LineSpan(line_begin=line_begin,line_end=line_end)
+    block.append(f"L{line_begin}-L{line_end}")
+
+    output = Dynamics(name=None, description=None, block=block)
     return output
 
 

From b5f7b109d23ced9af8f1f5cabc0d35635c45c6c0 Mon Sep 17 00:00:00 2001
From: Justin <lieffers@arizona.edu>
Date: Mon, 30 Oct 2023 16:36:59 -0400
Subject: [PATCH 5/8] added test for LLM prompt

---
 skema/rest/tests/test_llms.py | 57 +++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 skema/rest/tests/test_llms.py

diff --git a/skema/rest/tests/test_llms.py b/skema/rest/tests/test_llms.py
new file mode 100644
index 00000000000..12582500128
--- /dev/null
+++ b/skema/rest/tests/test_llms.py
@@ -0,0 +1,57 @@
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
+from langchain.output_parsers import (
+    StructuredOutputParser,
+    ResponseSchema
+)
+from skema.rest.proxies import SKEMA_OPENAI_KEY
+
+def test_prompt_construction():
+    """Tests prompt template instantiation"""
+    # TODO: your assertion here that the template instantiation returns a string/valid type
+
+    code = "def sir(\n    s: float, i: float, r: float, beta: float, gamma: float, n: float\n) -> Tuple[float, float, float]:\n    \"\"\"The SIR model, one time step.\"\"\"\n    s_n = (-beta * s * i) + s\n    i_n = (beta * s * i - gamma * i) + i\n    r_n = gamma * i + r\n    scale = n / (s_n + i_n + r_n)\n    return s_n * scale, i_n * scale, r_n * scale"
+
+    # this is the formatting instructions
+    response_schemas = [
+        ResponseSchema(name="model_function", description="The name of the function that contains the model dynamics")
+    ]
+
+    # for structured output parsing, converts schema to langhchain object
+    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
+
+    # for structured output parsing, makes the instructions to be passed as a variable to prompt template
+    format_instructions = output_parser.get_format_instructions()
+
+    # low temp as is not generative
+    temperature = 0.0
+
+    # initialize the models
+    openai = ChatOpenAI(
+        temperature=temperature,
+        model_name='gpt-3.5-turbo',
+        openai_api_key=SKEMA_OPENAI_KEY
+    )
+
+    # construct the prompts
+    template="You are a assistant that answers questions about code."
+    system_message_prompt = SystemMessagePromptTemplate.from_template(template)
+    human_template="Find the function that contains the model dynamics in {code} \n{format_instructions}"
+    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
+
+    # combining the templates for a chat template
+    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
+
+    # formatting the prompt with input variables
+    formatted_prompt = chat_prompt.format_prompt(code=code, format_instructions = format_instructions).to_messages()
+
+    # running the model
+    output = openai(formatted_prompt)  
+
+    parsed_output = output_parser.parse(output.content)
+
+    assert isinstance(parsed_output['model_function'], str)
\ No newline at end of file

From d0c81466a82244eb64b91474ea403b57260d5541 Mon Sep 17 00:00:00 2001
From: Justin <lieffers@arizona.edu>
Date: Mon, 30 Oct 2023 17:01:38 -0400
Subject: [PATCH 6/8] simplified the test

---
 skema/rest/tests/test_llms.py | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/skema/rest/tests/test_llms.py b/skema/rest/tests/test_llms.py
index 12582500128..4a7c7cd38c1 100644
--- a/skema/rest/tests/test_llms.py
+++ b/skema/rest/tests/test_llms.py
@@ -8,6 +8,7 @@
     StructuredOutputParser,
     ResponseSchema
 )
+import langchain.schema
 from skema.rest.proxies import SKEMA_OPENAI_KEY
 
 def test_prompt_construction():
@@ -27,16 +28,6 @@ def test_prompt_construction():
     # for structured output parsing, makes the instructions to be passed as a variable to prompt template
     format_instructions = output_parser.get_format_instructions()
 
-    # low temp as is not generative
-    temperature = 0.0
-
-    # initialize the models
-    openai = ChatOpenAI(
-        temperature=temperature,
-        model_name='gpt-3.5-turbo',
-        openai_api_key=SKEMA_OPENAI_KEY
-    )
-
     # construct the prompts
     template="You are a assistant that answers questions about code."
     system_message_prompt = SystemMessagePromptTemplate.from_template(template)
@@ -49,9 +40,5 @@ def test_prompt_construction():
     # formatting the prompt with input variables
     formatted_prompt = chat_prompt.format_prompt(code=code, format_instructions = format_instructions).to_messages()
 
-    # running the model
-    output = openai(formatted_prompt)  
-
-    parsed_output = output_parser.parse(output.content)
-
-    assert isinstance(parsed_output['model_function'], str)
\ No newline at end of file
+    assert isinstance(formatted_prompt[0], langchain.schema.messages.SystemMessage)
+    assert isinstance(formatted_prompt[1], langchain.schema.messages.HumanMessage)
\ No newline at end of file

From 74bc5599b0d41aa5a8d0108b11c3b51042421331 Mon Sep 17 00:00:00 2001
From: Justin <lieffers@arizona.edu>
Date: Mon, 30 Oct 2023 17:08:59 -0400
Subject: [PATCH 7/8] mocked model output to test parser

---
 skema/rest/tests/test_llms.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/skema/rest/tests/test_llms.py b/skema/rest/tests/test_llms.py
index 4a7c7cd38c1..ae23823813b 100644
--- a/skema/rest/tests/test_llms.py
+++ b/skema/rest/tests/test_llms.py
@@ -40,5 +40,12 @@ def test_prompt_construction():
     # formatting the prompt with input variables
     formatted_prompt = chat_prompt.format_prompt(code=code, format_instructions = format_instructions).to_messages()
 
+    # mockes the output from the model
+    output_mock = langchain.schema.messages.AIMessage(content='```json\n{\n\t"model_function": "sir"\n}\n```',additional_kwargs={}, example=False )
+
+    parsed_output = output_parser.parse(output_mock.content)
+
+    assert isinstance(parsed_output['model_fuction'], str)
     assert isinstance(formatted_prompt[0], langchain.schema.messages.SystemMessage)
-    assert isinstance(formatted_prompt[1], langchain.schema.messages.HumanMessage)
\ No newline at end of file
+    assert isinstance(formatted_prompt[1], langchain.schema.messages.HumanMessage)
+

From 0fbc0c2f93b6cec5419cb9ebda663d1d707f8620 Mon Sep 17 00:00:00 2001
From: Justin <lieffers@arizona.edu>
Date: Mon, 30 Oct 2023 18:45:23 -0400
Subject: [PATCH 8/8] miss spelled function

---
 skema/rest/tests/test_llms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skema/rest/tests/test_llms.py b/skema/rest/tests/test_llms.py
index ae23823813b..b95691db217 100644
--- a/skema/rest/tests/test_llms.py
+++ b/skema/rest/tests/test_llms.py
@@ -45,7 +45,7 @@ def test_prompt_construction():
 
     parsed_output = output_parser.parse(output_mock.content)
 
-    assert isinstance(parsed_output['model_fuction'], str)
+    assert isinstance(parsed_output['model_function'], str)
     assert isinstance(formatted_prompt[0], langchain.schema.messages.SystemMessage)
     assert isinstance(formatted_prompt[1], langchain.schema.messages.HumanMessage)