Merge branch 'main' into vraymond/magit

ml4ai · Oct 26, 2023 · 858e6f0 · 858e6f0
2 parents 7bc2e06 + 03b0d1e
commit 858e6f0
Show file tree

Hide file tree

Showing 32 changed files with 1,966 additions and 789 deletions.
diff --git a/.github/workflows/tests-and-docs.yml b/.github/workflows/tests-and-docs.yml
@@ -20,6 +20,18 @@ jobs:
         #     python-version: '3.8'
         #   - os: windows-latest
         #     python-version: '3.6'
+    services:
+      graphdb:
+        image: "memgraph/memgraph-platform"
+        ports:
+          - "7687:7687"
+          - "3000:3000"
+          - "7444:7444"
+        volumes:
+          - mg_lib:/var/lib/memgraph
+          - mg_log:/var/log/memgraph
+          - mg_etc:/etc/memgraph
+
     steps:
     # Checkout code
     - name: Checkout code
@@ -112,7 +124,20 @@ jobs:
       working-directory: ./skema/skema-rs
       run: |
         cargo test --verbose --all
-
+   
+    - name: Free disk space (aggressively)
+      # https://github.com/jlumbroso/free-disk-space
+      uses: jlumbroso/[email protected]
+      with:
+        android: true
+        dotnet: true
+        haskell: true
+        large-packages: true
+        docker-images: true
+        swap-storage: true
+        # NOTE: this might remove things we actually need.
+        tool-cache: true
+
     # docs (other)
     - name: "Create documentation (other)"
       run: |

diff --git a/docs/changes.md b/docs/changes.md
@@ -31,6 +31,12 @@ We publish tagged images to dockerhub for each commit made to our primary branch
 - [PRs](https://github.com/ml4ai/skema/pulls?q=is%3Apr+is%3Amerged+merged%3A2023-09-04..2023-09-17)
 - [resolved issues](https://github.com/ml4ai/skema/issues?q=is%3Aissue+is%3Aclosed+closed%3A2023-09-04..2023-09-17) -->
 
+## [v1.9.4](https://github.com/ml4ai/skema/releases/tag/v1.9.4)
+
+- [PRs](https://github.com/ml4ai/skema/pulls?q=is%3Apr+is%3Amerged+merged%3A2023-10-04..2023-10-16)
+- [resolved issues](https://github.com/ml4ai/skema/issues?q=is%3Aissue+is%3Aclosed+closed%3A2023-10-04..2023-10-16)
+
+
 ## [v1.9.3](https://github.com/ml4ai/skema/releases/tag/v1.9.3)
 
 - [PRs](https://github.com/ml4ai/skema/pulls?q=is%3Apr+is%3Amerged+merged%3A2023-09-19..2023-10-02)

diff --git a/pyproject.toml b/pyproject.toml
@@ -127,14 +127,13 @@ testpaths = [
     "skema/program_analysis/tests",
     "skema/program_analysis/comment_extractor/tests",
     "skema/program_analysis/tree_sitter_parsers/tests",
-    # TODO: Fix fortran tests
-    #"skema/program_analysis/CAST/fortran/tests",
     "skema/program_analysis/CAST/matlab/tests",
-    #"skema/program_analysis/CAST/python/tests",
     "skema/rest/tests",
-    # FIXME: skema_py is not being tested
-    # and should probably be removed
-    "skema/skema_py/tests"
+    "skema/skema_py/tests",
+    "skema/gromet/execution_engine/tests",
+    # TODO: Fix fortran tests
+    #"skema/program_analysis/CAST/fortran/tests"
+    #"skema/program_analysis/CAST/python/tests"
 ]
 
 # Configuration for Black.

diff --git a/skema/gromet/execution_engine/execution_engine.py b/skema/gromet/execution_engine/execution_engine.py
@@ -1,19 +1,26 @@
 import yaml
 import argparse
 import asyncio
+import subprocess
+
+from ast import literal_eval
 from pathlib import Path
 from typing import Any, List, Dict
 
 import torch
 from gqlalchemy import Memgraph
 
 from skema.program_analysis.CAST.pythonAST.builtin_map import retrieve_operator
+from skema.program_analysis.single_file_ingester import process_file
 from skema.gromet.execution_engine.execute import execute_primitive
 
 # TODO: Broken import: from skema.rest.workflows import code_snippets_to_pn_amr
 from skema.skema_py.server import System
 from skema.gromet.execution_engine.query_runner import QueryRunner
 from skema.gromet.execution_engine.symbol_table import SymbolTable
+from skema.utils.fold import dictionary_to_gromet_json, del_nulls
+
+SKEMA_BIN = Path(__file__).resolve().parents[2] / "skema-rs" / "skema" / "src" / "bin"
 
 
 class Execute(torch.autograd.Function):
@@ -30,11 +37,37 @@ def backward(ctx, grad_output):
 
 
 class ExecutionEngine:
-    def __init__(self, host: str, port: str, filename: str):
+    def __init__(self, host: str, port: str, source_path: str):
         self.query_runner = QueryRunner(host, port)
         self.symbol_table = SymbolTable()
+        self.source_path = source_path
+
+        # Filename is source path filename minus the extension
+        self.filename = Path(source_path).stem
+
+        # Upload source to Memgraph instance
+        self.upload_source()
+
+    def upload_source(self):
+        """Ingest source file and upload Gromet to Memgraph"""
 
-        self.filename = filename
+        # Currently, the Gromet ingester writes the output JSON to the directory where the script is run from.
+        # Instead, we want to store it alongside the source so that we can upload it to Memgraph.
+        gromet_collection = process_file(self.source_path)
+        gromet_name = f"{self.filename}--Gromet-FN-auto.json"
+        gromet_path = Path(self.source_path).parent / gromet_name
+        gromet_path.write_text(
+            dictionary_to_gromet_json(del_nulls(gromet_collection.to_dict()))
+        )
+
+        # The Memgraph database state should be reset before running any queries.
+        # Unexpected nodes/edges can cause issues with execution.
+        self.query_runner.run_query("reset_state")
+
+        # Upload to memgraph
+        subprocess.run(
+            ["cargo", "run", "--bin", "gromet2graphdb", str(gromet_path)], cwd=SKEMA_BIN
+        )
 
     def execute(
         self,
@@ -77,8 +110,8 @@ def visit(self, node):
             return self.visit_primitive(node)
 
     def visit_module(self, node):
-        "Visitor for top-level module"
-        node_id = node._id
+        """Visitor for top-level module"""
+        node_id = str(node._id)
 
         expressions = self.query_runner.run_query("ordered_expressions", id=node_id)
         for expression in expressions:
@@ -100,6 +133,7 @@ def visit_expression(self, node):
         symbol = self.visit(left_hand_side[0])
 
         # The right hand side can be either a LiteralValue, an Expression, or a Primitive
+        # A Literal
         index = {"Primitive": 1, "Expression": 1, "Literal": 2}
         right_hand_side = sorted(
             right_hand_side, key=lambda node: index[list(node._labels)[0]]
@@ -132,7 +166,21 @@ def visit_opi(self, node):
         return node.name
 
     def visit_literal(self, node):
-        # Convert to Tensor for execution
+        def create_dummy_node(value: Dict):
+            """Create a dummy gqlalchemy node so that we can pass a LiteralValue to a visitor."""
+
+            class DummyNode:
+                pass
+
+            node = DummyNode()
+            node._id = -1
+            node._labels = ["Literal"]
+            node.value = value
+
+            # TODO: Update LiteralValue representation for List types
+            node.value["value"] = str(node.value["value"])
+
+            return node
 
         # TODO: Update LiteralValue to remove wrapping "" characters
         value = node.value["value"].strip('"')
@@ -142,15 +190,21 @@ def visit_literal(self, node):
             return torch.tensor(int(value), dtype=torch.int)
         elif value_type == "AbstractFloat":
             return torch.tensor(float(value), dtype=torch.float)
+        elif value_type == "Complex":
+            print(
+                "WARNING: Execution for type Complex not support and will be skipped."
+            )
         elif value_type == "Boolean":
             return torch.tensor(value == "True", dtype=torch.bool)
         elif value_type == "List":
-            # TODO - Add support for List
-            print("WARNING: List LiteralValue not currently supported and will be skipped in computations")
+            if isinstance(value, str):
+                return None
+            list = literal_eval(value)
+            return [self.visit(create_dummy_node(element)) for element in list]
         elif value_type == "Map":
-            # TODO - Add support for Map
-            print("WARNING: Map LiteralValue not currently supported and will be skipped in computations")
-
+            print("WARNING: Execution for type Map not support and will be skipped.")
+        elif value_type == "None":
+            return None
 
     def visit_primitive(self, node):
         """Visitor for :Primitive node type"""
@@ -174,7 +228,7 @@ def visit_primitive(self, node):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Parameter Extraction Script")
-    parser.add_argument("file_name", type=str, help="File name of source to execute")
+    parser.add_argument("source_path", type=str, help="File path to source to execute")
     parser.add_argument(
         "--host",
         default="localhost",
@@ -184,14 +238,11 @@ def visit_primitive(self, node):
     parser.add_argument(
         "--port", default=7687, type=int, help="Port serving the megraph database"
     )
-
     args = parser.parse_args()
 
-    engine = ExecutionEngine(args.host, args.port, args.file_name)
+    engine = ExecutionEngine(args.host, args.port, args.source_path)
     print(engine.parameter_extraction())
 
-    """TODO: Currently the file already has to be uploaded to memgraph. Add support for uploading the file at runtime."""
-
     """ TODO: New arguments to add with function execution support
     group = parser.add_mutually_exclusive_group(required=True)
     group.add_argument("--main", action="store_true", help="Extract parameters from the main module")

diff --git a/skema/gromet/execution_engine/queries.yaml b/skema/gromet/execution_engine/queries.yaml
@@ -1,4 +1,10 @@
 ---
+reset_state:
+  "
+  MATCH (n)
+  DETACH DELETE (n)
+  "
+
 module:
   "
   MATCH (n)-[r*]->(m)

diff --git a/skema/gromet/execution_engine/query_runner.py b/skema/gromet/execution_engine/query_runner.py
@@ -1,4 +1,5 @@
 import yaml
+import traceback
 from pathlib import Path
 from gqlalchemy import Memgraph
 
@@ -20,7 +21,7 @@ def run_query(
         n_or_m: str = "m",
         filename: str = None,
         function: str = None,
-        id: int = None,
+        id: str = None,
     ):
         # Check if query is in query map. Currently we return None if its not found
         # TODO: Improve error handling

diff --git a/skema/gromet/execution_engine/tests/test_execution.py b/skema/gromet/execution_engine/tests/test_execution.py
@@ -0,0 +1,31 @@
+import pytest
+import torch
+from pathlib import Path
+from tempfile import TemporaryDirectory, TemporaryFile
+
+from skema.gromet.execution_engine.execution_engine import ExecutionEngine
+
+MEMGRAPH_CI_HOST = "localhost"
+MEMGRAPH_CI_PORT = 7687
+
+
+@pytest.mark.ci_only
+def test_parameter_extraction():
+    """Unit test for testing basic parameter extraction with execution engine"""
+    input = """
+x = 2
+y = x+1
+z = x+y
+"""
+    expected_output = {"x": torch.tensor(2), "y": torch.tensor(3), "z": torch.tensor(5)}
+
+    with TemporaryDirectory() as temp:
+        source_path = Path(temp) / "test_parameter_extraction.py"
+        source_path.write_text(input)
+
+        output = ExecutionEngine(
+            host=MEMGRAPH_CI_HOST, port=MEMGRAPH_CI_PORT, source_path=source_path
+        ).parameter_extraction()
+
+        # torch.tensor overrides the equality '==' operator, so the following is a valid check
+        assert output == expected_output
diff --git a/skema/program_analysis/CAST/fortran/node_helper.py b/skema/program_analysis/CAST/fortran/node_helper.py
@@ -18,12 +18,19 @@
     "*",
     "**",
     "/",
-    "/="
+    "/=",
     ">",
     "<",
     "<=",
     ">=",
     "only",
+    "\.not\.",
+    "\.gt\.",
+    "\.ge\.",
+    "\.lt\.",
+    "\.le\.",
+    "\.eq\.",
+    "\.ne\.",
 ]
 
 class NodeHelper():
@@ -62,6 +69,19 @@ def get_identifier(self, node: Node) -> str:
 
         return identifier
 
+def remove_comments(node: Node):
+    """Remove comment nodes from tree-sitter parse tree"""
+    # NOTE: tree-sitter Node objects are read-only, so we have to be careful about how we remove comments
+    # The below has been carefully designed to work around this restriction.
+    to_remove = sorted([index for index,child in enumerate(node.children) if child.type == "comment"], reverse=True)
+    for index in to_remove:
+        del node.children[index]
+
+    for i in range(len(node.children)):
+        node.children[i] = remove_comments(node.children[i])
+
+    return node
+
 def get_first_child_by_type(node: Node, type: str, recurse=False):
     """Takes in a node and a type string as inputs and returns the first child matching that type. Otherwise, return None
     When the recurse argument is set, it will also recursivly search children nodes as well.

diff --git a/skema/program_analysis/CAST/fortran/preprocessor/preprocess.py b/skema/program_analysis/CAST/fortran/preprocessor/preprocess.py
@@ -34,6 +34,7 @@ def preprocess(
     """
     # NOTE: The order of preprocessing steps does matter. We have to run the GCC preprocessor before correcting the continuation lines or there could be issues
 
+    # TODO: Create single location for generating include base path 
     source = source_path.read_text()
 
     # Get paths for intermediate products
@@ -66,18 +67,21 @@ def preprocess(
 
     # Step 2: Correct include directives to remove system references
     source = fix_include_directives(source)
-
+    
     # Step 3: Process with gcc c-preprocessor
-    source = run_c_preprocessor(source, source_path.parent)
+    include_base_directory = Path(source_path.parent, f"include_{source_path.stem}")
+    if not include_base_directory.exists():
+        include_base_directory = include_base_directory.parent
+    source = run_c_preprocessor(source, include_base_directory)
     if out_gcc:
         gcc_path.write_text(source)
-
+    
     # Step 4: Prepare for tree-sitter
     # This step removes any additional preprocessor directives added or not removed by GCC
     source = "\n".join(
         ["!" + line if line.startswith("#") else line for line in source.splitlines()]
     )
-
+    
     # Step 5: Check for unsupported idioms
     if out_unsupported:
         unsupported_path.write_text(
@@ -181,7 +185,6 @@ def run_c_preprocessor(source: str, include_base_path: Path) -> str:
 
 def convert_to_free_form(source: str) -> str:
     """If fixed-form Fortran source, convert to free-form"""
-
     def validate_parse_tree(source: str) -> bool:
         """Parse source with tree-sitter and check if an error is returned."""
         language = Language(INSTALLED_LANGUAGES_FILEPATH, "fortran")
@@ -201,7 +204,7 @@ def validate_parse_tree(source: str) -> bool:
         )
         if validate_parse_tree(free_source):
             return free_source
-
+    
     return source