Skip to content

Commit

Permalink
feat: mappings from bytecode to contract name (#429)
Browse files Browse the repository at this point in the history
  • Loading branch information
daejunpark authored Jan 7, 2025
1 parent 6fe84ee commit c705e37
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/halmos/cheatcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def create_calldata_generic(ex, sevm, contract_name, filename=None, include_view
The contract is identified by its contract name and optional filename.
TODO: provide variants that require only the contract address.
"""
contract_json = BuildOut().get_by_name(contract_name, filename)[0]
contract_json = BuildOut().get_by_name(contract_name, filename)

abi = get_abi(contract_json)
methodIdentifiers = contract_json["methodIdentifiers"]
Expand Down
107 changes: 105 additions & 2 deletions src/halmos/mapper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from collections import defaultdict
from dataclasses import dataclass, field
from typing import Any, Optional
from typing import Any, ForwardRef, Optional

from .exceptions import HalmosException
from .logs import warn

SELECTOR_FIELDS = {
"VariableDeclaration": "functionSelector",
Expand Down Expand Up @@ -83,21 +84,123 @@ class BuildOut(metaclass=SingletonMeta):
def __init__(self):
self._build_out_map: dict = None
self._build_out_map_reverse: dict = None
self._build_out_map_code: dict = None

def set_build_out(self, build_out_map: dict):
if self._build_out_map is build_out_map:
return

self._build_out_map = build_out_map
self._build_out_map_reverse = None
self._build_out_map_code = None

def create_build_out_map_reverse(self):
# create reverse mapping
self._build_out_map_reverse = defaultdict(dict)
for filename, file_map in self._build_out_map.items():
for contract_name, contract_map in file_map.items():
for contract_name, (contract_map, _, _) in file_map.items():
self._build_out_map_reverse[contract_name][filename] = contract_map

def get_placeholders(self, deployed):
"""
Extracts a list of placeholders from the deployedBytecode json entry.
Each placeholder is represented as a tuple of the starting and ending indices.
These placeholders are used to mark positions for immutables or library addresses within the bytecode.
The list of placeholders is sorted based on their starting index.
The method ensures that:
- There are no overlapping placeholders.
- The ending index of the last placeholder does not exceed the length of the bytecode.
"""
placeholders = []

for links in deployed.get("immutableReferences", {}).values():
for link in links:
start = link["start"]
end = start + link["length"]
placeholders.append((start, end))

for libs in deployed.get("linkReferences", {}).values():
for links in libs.values():
for link in links:
start = link["start"]
end = start + link["length"]
placeholders.append((start, end))

placeholders = sorted(placeholders, key=lambda x: x[0])

# sanity check
last = 0
for start, end in placeholders:
if not (last <= start and start < end):
raise ValueError("invalid placeholders")
last = end
if last > len(deployed["object"][2:]) // 2:
raise ValueError("invalid placeholders")

return placeholders

def create_build_out_map_code(self):
"""
Creates a mapping between deployed bytecode and contract names.
This mapping utilizes deployed bytecode because it retains its original length,
unlike creation bytecode which can be expanded by constructor arguments.
Since compile-time bytecode may contain placeholders for immutables or library addresses,
the actual deployed bytecode can differ from its compile-time code.
To accommodate this, the mapping is constructed from bytecode size to a list of tuples containing bytecode and contract name.
When querying this mapping, it first retrieves a list of tuples based on their size,
and then iterates through the list, comparing each bytecode.
"""
self._build_out_map_code = defaultdict(list)

for filename, file_map in self._build_out_map.items():
for contract_name, (contract_map, _, _) in file_map.items():
deployed = contract_map["deployedBytecode"]
hexcode = deployed["object"][2:] # drop '0x' prefix
if not hexcode:
continue

size = len(hexcode) // 2 # byte length
placeholders = self.get_placeholders(deployed)
code_data = (hexcode, placeholders, contract_name, filename)
self._build_out_map_code[size].append(code_data)

def get_by_code(self, bytecode: ForwardRef("ByteVec")) -> tuple:
"""
Return the contract name and file name of the given deployed bytecode.
"""
if not self._build_out_map_code:
self.create_build_out_map_code()

# compares the deployed bytecode with the compile-time hexcode, excluding placeholders from the comparison
def eq_except_placeholders(hexcode: str, placeholders):
last = 0
for start, end in placeholders:
if not eq_bytes(bytecode[last:start], hexcode[2 * last : 2 * start]):
return False
last = end
return eq_bytes(bytecode[last:], hexcode[2 * last :])

def eq_bytes(bytecode: ForwardRef("ByteVec"), hexcode: str):
bytecode = bytecode.unwrap()
if not isinstance(bytecode, bytes):
# non-concrete bytecode chunk cannot be equal to hexcode
return False
# bytes.fromhex() should not fail, because the given hexcode chunk does not contain placeholder characters
return bytecode == bytes.fromhex(hexcode)

for code_data in self._build_out_map_code[len(bytecode)]:
hexcode, placeholders, contract_name, filename = code_data
if eq_except_placeholders(hexcode, placeholders):
return (contract_name, filename)

warn(f"unknown deployed bytecode: {bytecode}")
return (None, None)

def get_by_name(self, contract_name: str, filename: str = None) -> dict:
"""
Return the build output json for the given contract name.
Expand Down
19 changes: 17 additions & 2 deletions src/halmos/sevm.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
warn,
warn_code,
)
from .mapper import BuildOut
from .utils import (
EVM,
Address,
Expand Down Expand Up @@ -500,7 +501,12 @@ class Contract:
_next_pc: dict[int, int]
_jumpdests: tuple[set] | None

def __init__(self, code: ByteVec | None = None) -> None:
contract_name: str | None
filename: str | None

def __init__(
self, code: ByteVec | None = None, contract_name=None, filename=None
) -> None:
if not isinstance(code, ByteVec):
code = ByteVec(code)

Expand All @@ -519,6 +525,9 @@ def __init__(self, code: ByteVec | None = None) -> None:
self._next_pc = dict()
self._jumpdests = None

self.contract_name = contract_name
self.filename = filename

def __deepcopy__(self, memo):
# the class is essentially immutable (the only mutable fields are caches)
# so we can return the object itself instead of creating a new copy
Expand Down Expand Up @@ -2451,8 +2460,14 @@ def callback(new_ex: Exec, stack, step_id):
return

elif subcall.output.error is None:
deployed_bytecode = subcall.output.data

# retrieve contract name
(contract_name, filename) = BuildOut().get_by_code(deployed_bytecode)

# new contract code, will revert if data is None
new_ex.set_code(new_addr, Contract(subcall.output.data))
new_code = Contract(deployed_bytecode, contract_name, filename)
new_ex.set_code(new_addr, new_code)

# push new address to stack
new_ex.st.push(uint256(new_addr))
Expand Down

0 comments on commit c705e37

Please sign in to comment.