Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[lldb] Proof of concept data formatter compiler for Python #113734

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

kastiglione
Copy link
Contributor

@kastiglione kastiglione commented Oct 25, 2024

A compiler from Python to the assembly syntax of the lldb data formatter bytecode.

@llvmbot
Copy link
Collaborator

llvmbot commented Oct 25, 2024

@llvm/pr-subscribers-lldb

Author: Dave Lee (kastiglione)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/113734.diff

2 Files Affected:

  • (added) lldb/examples/formatter-bytecode/optional_summary.py (+14)
  • (added) lldb/examples/formatter-bytecode/python_to_assembly.py (+145)
diff --git a/lldb/examples/formatter-bytecode/optional_summary.py b/lldb/examples/formatter-bytecode/optional_summary.py
new file mode 100644
index 00000000000000..68e672d86613d1
--- /dev/null
+++ b/lldb/examples/formatter-bytecode/optional_summary.py
@@ -0,0 +1,14 @@
+def OptionalSummaryProvider(valobj, _):
+    failure = 2
+    storage = valobj.GetChildMemberWithName("Storage")
+    hasVal = storage.GetChildMemberWithName("hasVal").GetValueAsUnsigned(failure)
+    if hasVal == failure:
+        return "<could not read Optional>"
+
+    if hasVal == 0:
+        return "None"
+
+    underlying_type = storage.GetType().GetTemplateArgumentType(0)
+    value = storage.GetChildMemberWithName("value")
+    value = value.Cast(underlying_type)
+    return value.GetSummary()
diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py b/lldb/examples/formatter-bytecode/python_to_assembly.py
new file mode 100755
index 00000000000000..6e2adbe093fdac
--- /dev/null
+++ b/lldb/examples/formatter-bytecode/python_to_assembly.py
@@ -0,0 +1,145 @@
+#!/usr/bin/python3
+
+import ast
+import io
+import sys
+from typing import Any
+
+BUILTINS = {
+    "Cast": "@cast",
+    "GetChildMemberWithName": "@get_child_with_name",
+    "GetSummary": "@get_summary",
+    "GetTemplateArgumentType": "@get_template_argument_type",
+    "GetType": "@get_type",
+    "GetValueAsUnsigned": "@get_value_as_unsigned",
+}
+
+COMPS = {
+    ast.Eq: "=",
+    ast.NotEq: "!=",
+    ast.Lt: "<",
+    ast.LtE: "=<",
+    ast.Gt: ">",
+    ast.GtE: "=>",
+}
+
+class Compiler(ast.NodeVisitor):
+    # Track the stack index of locals variables.
+    #
+    # This is essentially an ordered dictionary, where the key is an index on
+    # the stack, and the value is the name of the variable whose value is at
+    # that index.
+    #
+    # Ex: `locals[0]` is the name of the first value pushed on the stack, etc.
+    locals: list[str]
+
+    buffer: io.StringIO
+    final_buffer: io.StringIO
+
+    def __init__(self) -> None:
+        self.locals = []
+        self.buffer = io.StringIO()
+        self.final_buffer = io.StringIO()
+
+    def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
+        # Initialize `locals` with the (positional) arguments.
+        self.locals = [arg.arg for arg in node.args.args]
+        self.generic_visit(node)
+        self.locals.clear()
+
+    def visit_Compare(self, node: ast.Compare) -> None:
+        self.visit(node.left)
+        # XXX: Does not handle multiple comparisons, ex: `0 < x < 10`
+        self.visit(node.comparators[0])
+        self._output(COMPS[type(node.ops[0])])
+
+    def visit_If(self, node: ast.If) -> None:
+        self.visit(node.test)
+
+        # Does the body `return`?
+        has_return = any(isinstance(x, ast.Return) for x in node.body)
+
+        self._output("{")
+        self._visit_each(node.body)
+        if not node.orelse and not has_return:
+            # No else, and no early exit: a simple `if`
+            self._output("} if")
+            return
+
+        self._output("}")
+        if node.orelse:
+            # Handle else.
+            self._output("{")
+            self._visit_each(node.orelse)
+            self._output("} ifelse")
+        elif has_return:
+            # Convert early exit into an `ifelse`.
+            self._output("{")
+            self._output("} ifelse", final=True)
+
+    def visit_Constant(self, node: ast.Constant) -> None:
+        if isinstance(node.value, str):
+            self._output(f'"{node.value}"')
+        elif isinstance(node.value, bool):
+            self._output(int(node.value))
+        else:
+            self._output(node.value)
+
+    def visit_Call(self, node: ast.Call) -> None:
+        if isinstance(node.func, ast.Attribute):
+            # The receiver is the left hande side of the dot.
+            receiver = node.func.value
+            method = node.func.attr
+            if selector := BUILTINS.get(method):
+                # Visit the method's receiver to have its value on the stack.
+                self.visit(receiver)
+                # Visit the args to position them on the stack.
+                self._visit_each(node.args)
+                self._output(f"{selector} call")
+            else:
+                # TODO: fail
+                print(f"error: unsupported method {node.func.attr}", file=sys.stderr)
+
+    def visit_Assign(self, node: ast.Assign) -> None:
+        # Visit RHS first, putting values on the stack.
+        self.visit(node.value)
+        # Determine the name(s). Either a single Name, or a Tuple of Names.
+        target = node.targets[0]
+        if isinstance(target, ast.Name):
+            names = [target.id]
+        elif isinstance(target, ast.Tuple):
+            # These tuple elements are Name nodes.
+            names = [x.id for x in target.elts]
+
+        # Forget any previous bindings of these names.
+        # Their values are orphaned on the stack.
+        for local in self.locals:
+            if local in names:
+                old_idx = self.locals.index(local)
+                self.locals[old_idx] = ""
+
+        self.locals.extend(names)
+
+    def visit_Name(self, node: ast.Name) -> None:
+        idx = self.locals.index(node.id)
+        self._output(f"{idx} pick # {node.id}")
+
+    def _visit_each(self, nodes: list[ast.AST]) -> None:
+        for child in nodes:
+            self.visit(child)
+
+    def _output(self, x: Any, final: bool = False) -> None:
+        dest = self.final_buffer if final else self.buffer
+        print(x, file=dest)
+
+    @property
+    def output(self) -> str:
+        return compiler.buffer.getvalue() + compiler.final_buffer.getvalue()
+
+
+if __name__ == "__main__":
+    with open(sys.argv[1]) as f:
+        root = ast.parse(f.read())
+    compiler = Compiler()
+    compiler.visit(root)
+    print(compiler.output)

Copy link

⚠️ Python code formatter, darker found issues in your code. ⚠️

You can test this locally with the following command:
darker --check --diff -r eb9f4756bc3daaa4b19f4f46521dc05180814de4...57223942e91c47d0a61b148a65247cd9cbb16496 lldb/examples/formatter-bytecode/optional_summary.py lldb/examples/formatter-bytecode/python_to_assembly.py
View the diff from darker here.
--- python_to_assembly.py	2024-10-25 19:56:12.000000 +0000
+++ python_to_assembly.py	2024-10-25 19:59:11.920862 +0000
@@ -20,10 +20,11 @@
     ast.Lt: "<",
     ast.LtE: "=<",
     ast.Gt: ">",
     ast.GtE: "=>",
 }
+
 
 class Compiler(ast.NodeVisitor):
     # Track the stack index of locals variables.
     #
     # This is essentially an ordered dictionary, where the key is an index on

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants