From 574a30d8e832b12f168ab22e98061b2872da53b7 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Mon, 4 Apr 2022 19:01:06 -0600 Subject: [PATCH 01/22] adding instruction feature extractors --- .github/mypy/mypy.ini | 3 + capa/features/extractors/dotnet/__init__.py | 0 capa/features/extractors/dotnet/helpers.py | 71 +++++++++++++++ capa/features/extractors/dotnet/insn.py | 99 +++++++++++++++++++++ 4 files changed, 173 insertions(+) create mode 100644 capa/features/extractors/dotnet/__init__.py create mode 100644 capa/features/extractors/dotnet/helpers.py create mode 100644 capa/features/extractors/dotnet/insn.py diff --git a/.github/mypy/mypy.ini b/.github/mypy/mypy.ini index 6d177d40a..3d22b05f7 100644 --- a/.github/mypy/mypy.ini +++ b/.github/mypy/mypy.ini @@ -74,3 +74,6 @@ ignore_missing_imports = True [mypy-elftools.*] ignore_missing_imports = True + +[mypy-dncil.*] +ignore_missing_imports = True \ No newline at end of file diff --git a/capa/features/extractors/dotnet/__init__.py b/capa/features/extractors/dotnet/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py new file mode 100644 index 000000000..8bd1ef9c8 --- /dev/null +++ b/capa/features/extractors/dotnet/helpers.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from dnfile.mdtable import MemberRefRow + from dnfile.mdtable import MethodDefRow + from dnfile import dnPE + +import dnfile +from dnfile.enums import MetadataTables +from dncil.cil.body import CilMethodBody +from dncil.clr.token import Token, InvalidToken +from dncil.cil.body.reader import CilMethodBodyReaderBase + +# key indexes to dotnet metadata tables +DOTNET_META_TABLES_BY_INDEX = {table.value: table.name for table in MetadataTables} + + +class DnfileMethodBodyReader(CilMethodBodyReaderBase): + def __init__(self, pe: dnfile.dnPE, row: MethodDefRow): + """ """ + self.pe = pe + self.rva = self.pe.get_offset_from_rva(row.Rva) + + def read(self, n): + """ """ + data = self.pe.get_data(self.pe.get_rva_from_offset(self.rva), n) + self.rva += n + return data + + def tell(self): + """ """ + return self.rva + + def seek(self, rva): + """ """ + self.rva = rva + + def get_token(self, value, is_str=False): + """ """ + token = Token(value) + + if is_str: + return self.pe.net.user_strings.get_us(token.rid).value + + table_name = DOTNET_META_TABLES_BY_INDEX.get(token.table, "") + if not table_name: + # table_index is not valid + return InvalidToken(token.value) + + table = getattr(self.pe.net.mdtables, table_name, None) + if table is None: + # table index is valid but table is not present + return InvalidToken(token.value) + + try: + return table.rows[token.rid - 1] + except IndexError: + # table index is valid but row index is not valid + return InvalidToken(token.value) + + +def read_dotnet_method_body(pe: dnPE, row: MethodDefRow) -> CilMethodBody: + """ """ + return CilMethodBody(DnfileMethodBodyReader(pe, row)) + + +def get_imported_class_name(row: MemberRefRow) -> str: + """ """ + return f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}" diff --git a/capa/features/extractors/dotnet/insn.py b/capa/features/extractors/dotnet/insn.py new file mode 100644 index 000000000..3272d2d41 --- /dev/null +++ b/capa/features/extractors/dotnet/insn.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, List, Tuple, Union, Callable, Generator + +if TYPE_CHECKING: + from dncil.cil.instruction import Instruction + from dncil.cil.body import CilMethodBody + +import dncil +import dnfile +from dncil.cil.error import MethodBodyFormatError +from dncil.cil.opcode import OpCodes + +import capa.features.extractors.helpers +import capa.features.extractors.dotnet.helpers +from capa.features.insn import API, Number +from capa.features.common import String + + +def extract_insn_api_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[API, int], None, None]: + """parse instruction API features + + see https://www.ntcore.com/files/dotnetformat.htm + + 10 - MemberRef Table + Each row represents an imported method. + Class (index into the TypeRef, ModuleRef, MethodDef, TypeSpec or TypeDef tables) + 01 - TypeRef Table + Each row represents an imported class, its namespace and the assembly which contains it. + TypeName (index into String heap) + TypeNamespace (index into String heap) + """ + if insn.opcode in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): + if isinstance(insn.operand, dnfile.mdtable.MemberRefRow): + if isinstance(insn.operand.Class.row, (dnfile.mdtable.TypeRefRow,)): + class_name = capa.features.extractors.dotnet.helpers.get_imported_class_name(insn.operand) + method_name = insn.operand.Name + yield API(f"{class_name}::{method_name}"), insn.offset + + +def extract_insn_number_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[Number, int], None, None]: + """parse instruction number features""" + if insn.is_ldc(): + yield Number(insn.get_ldc()), insn.offset + + +def extract_insn_string_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[String, int], None, None]: + """parse instruction string features""" + if insn.is_ldstr(): + yield String(insn.operand), insn.offset + + +def extract_features( + f: CilMethodBody, insn: Instruction +) -> Generator[Tuple[Union[API, String, Number], int], None, None]: + """extract instruction features""" + for inst_handler in INSTRUCTION_HANDLERS: + for (feature, ea) in inst_handler(f, insn): + yield feature, ea + + +INSTRUCTION_HANDLERS = ( + extract_insn_api_features, + extract_insn_number_features, + extract_insn_string_features, +) + + +def main(args): + """ """ + dn = dnfile.dnPE(args.path) + + features = [] + for row in dn.net.mdtables.MethodDef: + if row.ImplFlags.miIL: + try: + body = read_dotnet_method_body(dn, row) + except MethodBodyFormatError as e: + print(e) + continue + + for insn in body.instructions: + features.extend(list(extract_features(body, insn))) + + import pprint + + pprint.pprint(features) + + +if __name__ == "__main__": + """ """ + import argparse + + from capa.features.extractors.dotnet.helpers import read_dotnet_method_body + + parser = argparse.ArgumentParser(prog="parse instruction features from .NET PE") + parser.add_argument("path", type=str, help="full path to .NET PE") + + main(parser.parse_args()) From 69474978aa3a927fccd58a45936c4105740b69de Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Tue, 5 Apr 2022 12:14:30 -0600 Subject: [PATCH 02/22] adding support to parse imports --- capa/features/extractors/dotnet/helpers.py | 139 ++++++++++++++++----- capa/features/extractors/dotnet/insn.py | 73 ++++++----- 2 files changed, 150 insertions(+), 62 deletions(-) diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py index 8bd1ef9c8..df2eb7a35 100644 --- a/capa/features/extractors/dotnet/helpers.py +++ b/capa/features/extractors/dotnet/helpers.py @@ -1,6 +1,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Tuple, Generator, Any, Dict +from itertools import chain if TYPE_CHECKING: from dnfile.mdtable import MemberRefRow @@ -10,7 +11,7 @@ import dnfile from dnfile.enums import MetadataTables from dncil.cil.body import CilMethodBody -from dncil.clr.token import Token, InvalidToken +from dncil.clr.token import Token, StringToken, InvalidToken from dncil.cil.body.reader import CilMethodBodyReaderBase # key indexes to dotnet metadata tables @@ -18,54 +19,128 @@ class DnfileMethodBodyReader(CilMethodBodyReaderBase): - def __init__(self, pe: dnfile.dnPE, row: MethodDefRow): + def __init__(self, pe: dnPE, row: MethodDefRow): """ """ - self.pe = pe - self.rva = self.pe.get_offset_from_rva(row.Rva) + self.pe: dnPE = pe + self.offset: int = self.pe.get_offset_from_rva(row.Rva) - def read(self, n): + def read(self, n: int) -> bytes: """ """ - data = self.pe.get_data(self.pe.get_rva_from_offset(self.rva), n) - self.rva += n + data: bytes = self.pe.get_data(self.pe.get_rva_from_offset(self.offset), n) + self.offset += n return data - def tell(self): + def tell(self) -> int: """ """ - return self.rva + return self.offset - def seek(self, rva): + def seek(self, offset: int) -> int: """ """ - self.rva = rva + self.offset = offset + return self.offset - def get_token(self, value, is_str=False): - """ """ - token = Token(value) - if is_str: - return self.pe.net.user_strings.get_us(token.rid).value +def make_token(table: int, rid: int) -> int: + """ """ + return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) + - table_name = DOTNET_META_TABLES_BY_INDEX.get(token.table, "") - if not table_name: - # table_index is not valid - return InvalidToken(token.value) +def resolve_token(pe: dnPE, token: Token) -> Any: + """ """ + if isinstance(token, StringToken): + return pe.net.user_strings.get_us(token.rid).value - table = getattr(self.pe.net.mdtables, table_name, None) - if table is None: - # table index is valid but table is not present - return InvalidToken(token.value) + table_name: str = DOTNET_META_TABLES_BY_INDEX.get(token.table, "") + if not table_name: + # table_index is not valid + return InvalidToken(token.value) - try: - return table.rows[token.rid - 1] - except IndexError: - # table index is valid but row index is not valid - return InvalidToken(token.value) + table: Any = getattr(pe.net.mdtables, table_name, None) + if table is None: + # table index is valid but table is not present + return InvalidToken(token.value) + try: + return table.rows[token.rid - 1] + except IndexError: + # table index is valid but row index is not valid + return InvalidToken(token.value) -def read_dotnet_method_body(pe: dnPE, row: MethodDefRow) -> CilMethodBody: + +def get_method_body(pe: dnPE, row: MethodDefRow) -> CilMethodBody: """ """ return CilMethodBody(DnfileMethodBodyReader(pe, row)) -def get_imported_class_name(row: MemberRefRow) -> str: +def get_class_import_name(row: MemberRefRow) -> str: """ """ return f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}" + + +def get_class_imports(pe: dnPE) -> Generator[Tuple[int, str], None, None]: + """parse class imports + + see https://www.ntcore.com/files/dotnetformat.htm + + 10 - MemberRef Table + Each row represents an imported method + Class (index into the TypeRef, ModuleRef, MethodDef, TypeSpec or TypeDef tables) + Name (index into String heap) + 01 - TypeRef Table + Each row represents an imported class, its namespace and the assembly which contains it + TypeName (index into String heap) + TypeNamespace (index into String heap) + """ + if not hasattr(pe.net.mdtables, "MemberRef"): + return + + for (rid, row) in enumerate(pe.net.mdtables.MemberRef): + if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)): + continue + + class_imp = f"{get_class_import_name(row)}::{row.Name}" + token = make_token(MetadataTables.MemberRef.value, rid + 1) + + yield token, class_imp + + +def get_native_imports(pe: dnPE) -> Generator[Tuple[int, str], None, None]: + """parse native imports + + see https://www.ntcore.com/files/dotnetformat.htm + + 28 - ImplMap Table + ImplMap table holds information about unmanaged methods that can be reached from managed code, using PInvoke dispatch + MemberForwarded (index into the Field or MethodDef table; more precisely, a MemberForwarded coded index) + ImportName (index into the String heap) + ImportScope (index into the ModuleRef table) + """ + if not hasattr(pe.net.mdtables, "ImplMap"): + return + + for row in pe.net.mdtables.ImplMap: + dll: str = row.ImportScope.row.Name + symbol: str = row.ImportName + + # like Kernel32.dll + if dll and "." in dll: + dll = dll.split(".")[0].lower() + + # like kernel32.CreateFileA + native_imp: str = f"{dll}.{symbol}" + + # ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the + # name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded + # MethodDef table token to help us later record native import method calls made from CIL + member_forwarded_token = make_token(row.MemberForwarded.table.number, row.MemberForwarded.row_index) + + yield member_forwarded_token, native_imp + + +def get_imports(pe: dnPE) -> Dict[int, str]: + """ """ + imps: Dict[int, str] = {} + + for (token, imp) in chain(get_class_imports(pe), get_native_imports(pe)): + imps[token] = imp + return imps diff --git a/capa/features/extractors/dotnet/insn.py b/capa/features/extractors/dotnet/insn.py index 3272d2d41..417c7234f 100644 --- a/capa/features/extractors/dotnet/insn.py +++ b/capa/features/extractors/dotnet/insn.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, List, Tuple, Union, Callable, Generator +from typing import TYPE_CHECKING, Dict, List, Tuple, Union, Callable, Generator, Any if TYPE_CHECKING: from dncil.cil.instruction import Instruction @@ -17,25 +17,28 @@ from capa.features.common import String +def get_imports(ctx): + """ """ + if "imports_cache" not in ctx: + ctx["imports_cache"] = capa.features.extractors.dotnet.helpers.get_imports(ctx["pe"]) + return ctx["imports_cache"] + + def extract_insn_api_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[API, int], None, None]: - """parse instruction API features - - see https://www.ntcore.com/files/dotnetformat.htm - - 10 - MemberRef Table - Each row represents an imported method. - Class (index into the TypeRef, ModuleRef, MethodDef, TypeSpec or TypeDef tables) - 01 - TypeRef Table - Each row represents an imported class, its namespace and the assembly which contains it. - TypeName (index into String heap) - TypeNamespace (index into String heap) - """ - if insn.opcode in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): - if isinstance(insn.operand, dnfile.mdtable.MemberRefRow): - if isinstance(insn.operand.Class.row, (dnfile.mdtable.TypeRefRow,)): - class_name = capa.features.extractors.dotnet.helpers.get_imported_class_name(insn.operand) - method_name = insn.operand.Name - yield API(f"{class_name}::{method_name}"), insn.offset + """parse instruction API features""" + if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): + return + + name = get_imports(f.ctx).get(insn.operand.value, "") + if not name: + return + + if "::" in name: + yield API(name), insn.offset + else: + dll, _, symbol = name.rpartition(".") + for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name_variant), insn.offset def extract_insn_number_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[Number, int], None, None]: @@ -47,7 +50,8 @@ def extract_insn_number_features(f: CilMethodBody, insn: Instruction) -> Generat def extract_insn_string_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[String, int], None, None]: """parse instruction string features""" if insn.is_ldstr(): - yield String(insn.operand), insn.offset + user_string = capa.features.extractors.dotnet.helpers.resolve_token(f.ctx["pe"], insn.operand) + yield String(user_string), insn.offset def extract_features( @@ -68,16 +72,25 @@ def extract_features( def main(args): """ """ - dn = dnfile.dnPE(args.path) + pe: dnPE = dnfile.dnPE(args.path) + + # data structure shared across functions yielded here. + # useful for caching analysis relevant across a single workspace. + ctx = {} + ctx["pe"] = pe + + features: List[Any] = [] + for row in pe.net.mdtables.MethodDef: + if not row.ImplFlags.miIL or any((row.Flags.mdAbstract, row.Flags.mdPinvokeImpl)): + continue + + try: + body: CilMethodBody = get_method_body(pe, row) + except MethodBodyFormatError as e: + print(e) + continue - features = [] - for row in dn.net.mdtables.MethodDef: - if row.ImplFlags.miIL: - try: - body = read_dotnet_method_body(dn, row) - except MethodBodyFormatError as e: - print(e) - continue + setattr(body, "ctx", ctx) for insn in body.instructions: features.extend(list(extract_features(body, insn))) @@ -91,7 +104,7 @@ def main(args): """ """ import argparse - from capa.features.extractors.dotnet.helpers import read_dotnet_method_body + from capa.features.extractors.dotnet.helpers import get_method_body parser = argparse.ArgumentParser(prog="parse instruction features from .NET PE") parser.add_argument("path", type=str, help="full path to .NET PE") From e3c749d357d8497feb81ea72963eae53ce4cf54f Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Wed, 6 Apr 2022 09:11:47 -0600 Subject: [PATCH 03/22] move API name normalization to helper function --- capa/features/extractors/helpers.py | 3 +++ capa/features/insn.py | 5 ----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/capa/features/extractors/helpers.py b/capa/features/extractors/helpers.py index f6d91333b..24eee27bd 100644 --- a/capa/features/extractors/helpers.py +++ b/capa/features/extractors/helpers.py @@ -52,6 +52,9 @@ def generate_symbols(dll: str, symbol: str) -> Iterator[str]: - CreateFileA - CreateFile """ + # normalize dll name + dll = dll.lower() + # kernel32.CreateFileA yield "%s.%s" % (dll, symbol) diff --git a/capa/features/insn.py b/capa/features/insn.py index 85ef9a399..4843d95c4 100644 --- a/capa/features/insn.py +++ b/capa/features/insn.py @@ -12,11 +12,6 @@ class API(Feature): def __init__(self, name: str, description=None): - # Downcase library name if given - if "." in name: - modname, _, impname = name.rpartition(".") - name = modname.lower() + "." + impname - super(API, self).__init__(name, description=description) From efd8b300dabde212f25bcff41889c242db6fb3bc Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Wed, 6 Apr 2022 10:56:03 -0600 Subject: [PATCH 04/22] adding dnfile feature extractor --- capa/features/extractors/dotnet/extractor.py | 59 ++++++++++++++++++ capa/features/extractors/dotnet/helpers.py | 34 ++++++++--- capa/features/extractors/dotnet/insn.py | 63 ++++++++++---------- 3 files changed, 118 insertions(+), 38 deletions(-) create mode 100644 capa/features/extractors/dotnet/extractor.py diff --git a/capa/features/extractors/dotnet/extractor.py b/capa/features/extractors/dotnet/extractor.py new file mode 100644 index 000000000..3c9f49b42 --- /dev/null +++ b/capa/features/extractors/dotnet/extractor.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from dnfile import dnPE + +import dncil +import dnfile + +import capa.features.extractors.dotnet.file +import capa.features.extractors.dotnet.insn +import capa.features.extractors.dotnet.function + +from capa.features.extractors.dotnet import get_dotnet_methods +from capa.features.extractors.base_extractor import FeatureExtractor + + +class DnfileFeatureExtractor(FeatureExtractor): + def __init__(self, path: str): + super(DnfileFeatureExtractor, self).__init__() + self.global_features = [] + + self.pe: dnPE = dnfile.dnPE(path) + + def get_base_address(self): + raise NotImplementedError() + + def extract_global_features(self): + yield from self.global_features + + def extract_file_features(self): + raise NotImplementedError() + + def get_functions(self): + ctx = {} + ctx["pe"] = self.pe + + for method in get_dotnet_methods(self.pe): + setattr(method, "ctx", ctx) + yield method + + def extract_function_features(self, f): + raise NotImplementedError() + + def get_basic_blocks(self, f): + # we don't support basic blocks for dotnet and treat each method as one large basic block + return f + + def extract_basic_block_features(self, f, bb): + # we don't support basic block features for dotnet + return + + def get_instructions(self, f, bb): + # we don't support basic blocks for dotnet and treat each method as one large basic block + yield from f.instructions + + def extract_insn_features(self, f, bb, insn): + yield from capa.features.extractors.dotnet.insn.extract_features(f, bb, insn) \ No newline at end of file diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py index df2eb7a35..d9013300b 100644 --- a/capa/features/extractors/dotnet/helpers.py +++ b/capa/features/extractors/dotnet/helpers.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Tuple, Generator, Any, Dict +from typing import TYPE_CHECKING, Any, Dict, Tuple, Generator from itertools import chain if TYPE_CHECKING: @@ -40,12 +40,12 @@ def seek(self, offset: int) -> int: return self.offset -def make_token(table: int, rid: int) -> int: +def generate_dotnet_token(table: int, rid: int) -> int: """ """ return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) -def resolve_token(pe: dnPE, token: Token) -> Any: +def resolve_dotnet_token(pe: dnPE, token: Token) -> Any: """ """ if isinstance(token, StringToken): return pe.net.user_strings.get_us(token.rid).value @@ -67,7 +67,7 @@ def resolve_token(pe: dnPE, token: Token) -> Any: return InvalidToken(token.value) -def get_method_body(pe: dnPE, row: MethodDefRow) -> CilMethodBody: +def read_dotnet_method_body(pe: dnPE, row: MethodDefRow) -> CilMethodBody: """ """ return CilMethodBody(DnfileMethodBodyReader(pe, row)) @@ -99,7 +99,7 @@ def get_class_imports(pe: dnPE) -> Generator[Tuple[int, str], None, None]: continue class_imp = f"{get_class_import_name(row)}::{row.Name}" - token = make_token(MetadataTables.MemberRef.value, rid + 1) + token = generate_dotnet_token(MetadataTables.MemberRef.value, rid + 1) yield token, class_imp @@ -132,15 +132,35 @@ def get_native_imports(pe: dnPE) -> Generator[Tuple[int, str], None, None]: # ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the # name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded # MethodDef table token to help us later record native import method calls made from CIL - member_forwarded_token = make_token(row.MemberForwarded.table.number, row.MemberForwarded.row_index) + member_forwarded_token = generate_dotnet_token(row.MemberForwarded.table.number, row.MemberForwarded.row_index) yield member_forwarded_token, native_imp -def get_imports(pe: dnPE) -> Dict[int, str]: +def get_dotnet_imports(pe: dnPE) -> Dict[int, str]: """ """ imps: Dict[int, str] = {} for (token, imp) in chain(get_class_imports(pe), get_native_imports(pe)): imps[token] = imp + return imps + + +def get_dotnet_methods(pe: dnPE) -> Generator[CilMethodBody, None, None]: + """read managed methods from MethodDef table""" + if not hasattr(pe.net.mdtables, "MethodDef"): + return + + for row in pe.net.mdtables.MethodDef: + if not row.ImplFlags.miIL or any((row.Flags.mdAbstract, row.Flags.mdPinvokeImpl)): + # skip methods that do not have a method body + continue + + try: + body: CilMethodBody = read_dotnet_method_body(pe, row) + except MethodBodyFormatError: + # TODO: logging? + continue + + yield body diff --git a/capa/features/extractors/dotnet/insn.py b/capa/features/extractors/dotnet/insn.py index 417c7234f..1d0c21a6e 100644 --- a/capa/features/extractors/dotnet/insn.py +++ b/capa/features/extractors/dotnet/insn.py @@ -1,30 +1,30 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Dict, List, Tuple, Union, Callable, Generator, Any +from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union, Generator if TYPE_CHECKING: from dncil.cil.instruction import Instruction from dncil.cil.body import CilMethodBody -import dncil -import dnfile -from dncil.cil.error import MethodBodyFormatError +from dncil.clr.token import StringToken from dncil.cil.opcode import OpCodes import capa.features.extractors.helpers -import capa.features.extractors.dotnet.helpers from capa.features.insn import API, Number from capa.features.common import String +from capa.features.extractors.dotnet.helpers import get_dotnet_imports def get_imports(ctx): """ """ if "imports_cache" not in ctx: - ctx["imports_cache"] = capa.features.extractors.dotnet.helpers.get_imports(ctx["pe"]) + ctx["imports_cache"] = get_dotnet_imports(ctx["pe"]) return ctx["imports_cache"] -def extract_insn_api_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[API, int], None, None]: +def extract_insn_api_features( + f: CilMethodBody, bb: CilMethodBody, insn: Instruction +) -> Generator[Tuple[API, int], None, None]: """parse instruction API features""" if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): return @@ -34,32 +34,43 @@ def extract_insn_api_features(f: CilMethodBody, insn: Instruction) -> Generator[ return if "::" in name: + # like System.IO.File::OpenRead yield API(name), insn.offset else: + # like kernel32.CreateFileA dll, _, symbol = name.rpartition(".") for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): yield API(name_variant), insn.offset -def extract_insn_number_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[Number, int], None, None]: +def extract_insn_number_features( + f: CilMethodBody, bb: CilMethodBody, insn: Instruction +) -> Generator[Tuple[Number, int], None, None]: """parse instruction number features""" if insn.is_ldc(): yield Number(insn.get_ldc()), insn.offset -def extract_insn_string_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[String, int], None, None]: +def extract_insn_string_features( + f: CilMethodBody, bb: CilMethodBody, insn: Instruction +) -> Generator[Tuple[String, int], None, None]: """parse instruction string features""" - if insn.is_ldstr(): - user_string = capa.features.extractors.dotnet.helpers.resolve_token(f.ctx["pe"], insn.operand) - yield String(user_string), insn.offset + if not insn.is_ldstr(): + return + + if not isinstance(insn.operand, StringToken): + return + + user_string = f.ctx["pe"].net.user_strings.get_us(insn.operand.rid).value + yield String(user_string), insn.offset def extract_features( - f: CilMethodBody, insn: Instruction + f: CilMethodBody, bb: CilMethodBody, insn: Instruction ) -> Generator[Tuple[Union[API, String, Number], int], None, None]: """extract instruction features""" for inst_handler in INSTRUCTION_HANDLERS: - for (feature, ea) in inst_handler(f, insn): + for (feature, ea) in inst_handler(f, bb, insn): yield feature, ea @@ -74,26 +85,14 @@ def main(args): """ """ pe: dnPE = dnfile.dnPE(args.path) - # data structure shared across functions yielded here. - # useful for caching analysis relevant across a single workspace. ctx = {} ctx["pe"] = pe features: List[Any] = [] - for row in pe.net.mdtables.MethodDef: - if not row.ImplFlags.miIL or any((row.Flags.mdAbstract, row.Flags.mdPinvokeImpl)): - continue - - try: - body: CilMethodBody = get_method_body(pe, row) - except MethodBodyFormatError as e: - print(e) - continue - - setattr(body, "ctx", ctx) - - for insn in body.instructions: - features.extend(list(extract_features(body, insn))) + for method in get_dotnet_methods(pe): + setattr(method, "ctx", ctx) + for insn in method.instructions: + features.extend(list(extract_features(method, method, insn))) import pprint @@ -104,7 +103,9 @@ def main(args): """ """ import argparse - from capa.features.extractors.dotnet.helpers import get_method_body + import dnfile + + from capa.features.extractors.dotnet.helpers import get_dotnet_methods parser = argparse.ArgumentParser(prog="parse instruction features from .NET PE") parser.add_argument("path", type=str, help="full path to .NET PE") From 656776f26ef04d3c413a6a62e55d3cd0e85c7cdc Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Wed, 6 Apr 2022 15:01:27 -0600 Subject: [PATCH 05/22] dotnet feature extractor cleanup --- capa/features/extractors/dnfile_.py | 29 +++++--- capa/features/extractors/dotnet/extractor.py | 42 +++++------ capa/features/extractors/dotnet/file.py | 43 +++++++++++ capa/features/extractors/dotnet/helpers.py | 76 +++++++++----------- capa/features/extractors/dotnet/insn.py | 54 +++----------- capa/main.py | 5 +- scripts/show-features.py | 6 ++ setup.py | 1 + 8 files changed, 140 insertions(+), 116 deletions(-) create mode 100644 capa/features/extractors/dotnet/file.py diff --git a/capa/features/extractors/dnfile_.py b/capa/features/extractors/dnfile_.py index 715e8a5f2..ae71e1906 100644 --- a/capa/features/extractors/dnfile_.py +++ b/capa/features/extractors/dnfile_.py @@ -4,21 +4,36 @@ import dnfile import pefile +import capa.features.extractors.helpers +from capa.features.file import Import from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature from capa.features.extractors.base_extractor import FeatureExtractor +from capa.features.extractors.dotnet.helpers import get_dotnet_imports logger = logging.getLogger(__name__) -def extract_file_format(**kwargs): +def extract_file_format(**kwargs) -> Iterator[Tuple[Format, int]]: yield Format(FORMAT_DOTNET), 0x0 -def extract_file_os(**kwargs): +def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, int]]: + for (token, imp) in get_dotnet_imports(pe).items(): + if "::" in imp: + # like System.IO.File::OpenRead + yield Import(imp), token + else: + # like kernel32.CreateFileA + dll, _, symbol = imp.rpartition(".") + for symbol_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield Import(symbol_variant), token + + +def extract_file_os(**kwargs) -> Iterator[Tuple[OS, int]]: yield OS(OS_ANY), 0x0 -def extract_file_arch(pe, **kwargs): +def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, int]]: # to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020 # .NET 4.5 added option: any CPU, 32-bit preferred if pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE: @@ -36,11 +51,9 @@ def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: FILE_HANDLERS = ( - # extract_file_export_names, - # extract_file_import_names, - # extract_file_section_names, - # extract_file_strings, - # extract_file_function_names, + extract_file_import_names, + # TODO extract_file_strings, + # TODO extract_file_function_names, extract_file_format, ) diff --git a/capa/features/extractors/dotnet/extractor.py b/capa/features/extractors/dotnet/extractor.py index 3c9f49b42..63c6ef6e7 100644 --- a/capa/features/extractors/dotnet/extractor.py +++ b/capa/features/extractors/dotnet/extractor.py @@ -1,59 +1,61 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, List, Tuple if TYPE_CHECKING: - from dnfile import dnPE + from capa.features.common import Feature -import dncil import dnfile +import capa.features.extractors import capa.features.extractors.dotnet.file import capa.features.extractors.dotnet.insn -import capa.features.extractors.dotnet.function - -from capa.features.extractors.dotnet import get_dotnet_methods from capa.features.extractors.base_extractor import FeatureExtractor +from capa.features.extractors.dotnet.helpers import get_dotnet_methods class DnfileFeatureExtractor(FeatureExtractor): def __init__(self, path: str): super(DnfileFeatureExtractor, self).__init__() - self.global_features = [] + self.pe: dnfile.dnPE = dnfile.dnPE(path) - self.pe: dnPE = dnfile.dnPE(path) + # pre-compute these because we'll yield them at *every* scope. + self.global_features: List[Tuple[Feature, int]] = [] + self.global_features.extend(capa.features.extractors.dnfile_.extract_file_os(pe=self.pe)) + self.global_features.extend(capa.features.extractors.dnfile_.extract_file_arch(pe=self.pe)) def get_base_address(self): - raise NotImplementedError() + return 0x0 def extract_global_features(self): yield from self.global_features def extract_file_features(self): - raise NotImplementedError() + yield from capa.features.extractors.dotnet.file.extract_features(self.pe) def get_functions(self): ctx = {} ctx["pe"] = self.pe - for method in get_dotnet_methods(self.pe): - setattr(method, "ctx", ctx) - yield method + for f in get_dotnet_methods(self.pe): + setattr(f, "ctx", ctx) + yield f def extract_function_features(self, f): - raise NotImplementedError() + # TODO + yield from [] def get_basic_blocks(self, f): - # we don't support basic blocks for dotnet and treat each method as one large basic block - return f + # each dotnet method is considered 1 basic block + yield f def extract_basic_block_features(self, f, bb): - # we don't support basic block features for dotnet - return + # we don't support basic block features + yield from [] def get_instructions(self, f, bb): - # we don't support basic blocks for dotnet and treat each method as one large basic block + # each dotnet method is considered 1 basic block yield from f.instructions def extract_insn_features(self, f, bb, insn): - yield from capa.features.extractors.dotnet.insn.extract_features(f, bb, insn) \ No newline at end of file + yield from capa.features.extractors.dotnet.insn.extract_features(f, bb, insn) diff --git a/capa/features/extractors/dotnet/file.py b/capa/features/extractors/dotnet/file.py new file mode 100644 index 000000000..81901899c --- /dev/null +++ b/capa/features/extractors/dotnet/file.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, List, Tuple, Iterator + +if TYPE_CHECKING: + import dnfile + from capa.features.common import Feature + +import capa.features.extractors.helpers +from capa.features.file import Import +from capa.features.common import FORMAT_DOTNET, Format +from capa.features.extractors.dotnet.helpers import get_dotnet_imports + + +def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]: + """extract file imports""" + for (token, imp) in get_dotnet_imports(pe).items(): + if "::" in imp: + # like System.IO.File::OpenRead + yield Import(imp), token + else: + # like kernel32.CreateFileA + dll, _, symbol = imp.rpartition(".") + for symbol_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield Import(symbol_variant), token + + +def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]: + yield Format(FORMAT_DOTNET), 0x0 + + +def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: + for file_handler in FILE_HANDLERS: + for (feature, token) in file_handler(pe): + yield feature, token + + +FILE_HANDLERS = ( + extract_file_import_names, + # TODO extract_file_strings, + # TODO extract_file_function_names, + extract_file_format, +) diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py index d9013300b..eae00292e 100644 --- a/capa/features/extractors/dotnet/helpers.py +++ b/capa/features/extractors/dotnet/helpers.py @@ -1,52 +1,42 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Dict, Tuple, Generator +from typing import Any, Dict, Tuple, Iterator from itertools import chain -if TYPE_CHECKING: - from dnfile.mdtable import MemberRefRow - from dnfile.mdtable import MethodDefRow - from dnfile import dnPE - import dnfile -from dnfile.enums import MetadataTables from dncil.cil.body import CilMethodBody +from dncil.cil.error import MethodBodyFormatError from dncil.clr.token import Token, StringToken, InvalidToken from dncil.cil.body.reader import CilMethodBodyReaderBase # key indexes to dotnet metadata tables -DOTNET_META_TABLES_BY_INDEX = {table.value: table.name for table in MetadataTables} +DOTNET_META_TABLES_BY_INDEX = {table.value: table.name for table in dnfile.enums.MetadataTables} class DnfileMethodBodyReader(CilMethodBodyReaderBase): - def __init__(self, pe: dnPE, row: MethodDefRow): - """ """ - self.pe: dnPE = pe + def __init__(self, pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow): + self.pe: dnfile.dnPE = pe self.offset: int = self.pe.get_offset_from_rva(row.Rva) def read(self, n: int) -> bytes: - """ """ data: bytes = self.pe.get_data(self.pe.get_rva_from_offset(self.offset), n) self.offset += n return data def tell(self) -> int: - """ """ return self.offset def seek(self, offset: int) -> int: - """ """ self.offset = offset return self.offset def generate_dotnet_token(table: int, rid: int) -> int: - """ """ return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) -def resolve_dotnet_token(pe: dnPE, token: Token) -> Any: - """ """ +def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any: + """map generic token to string or table row""" if isinstance(token, StringToken): return pe.net.user_strings.get_us(token.rid).value @@ -67,18 +57,21 @@ def resolve_dotnet_token(pe: dnPE, token: Token) -> Any: return InvalidToken(token.value) -def read_dotnet_method_body(pe: dnPE, row: MethodDefRow) -> CilMethodBody: - """ """ +def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) -> CilMethodBody: + """read dotnet method body""" return CilMethodBody(DnfileMethodBodyReader(pe, row)) -def get_class_import_name(row: MemberRefRow) -> str: - """ """ +def get_class_import_name(row: dnfile.mdtable.MemberRefRow) -> str: + """get class import name from TypeRef table""" + if not isinstance(row.Class.row, dnfile.mdtable.TypeRefRow): + return "" + # like System.IO.File return f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}" -def get_class_imports(pe: dnPE) -> Generator[Tuple[int, str], None, None]: - """parse class imports +def get_class_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: + """get class imports from MemberRef table see https://www.ntcore.com/files/dotnetformat.htm @@ -98,14 +91,15 @@ def get_class_imports(pe: dnPE) -> Generator[Tuple[int, str], None, None]: if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)): continue - class_imp = f"{get_class_import_name(row)}::{row.Name}" - token = generate_dotnet_token(MetadataTables.MemberRef.value, rid + 1) + token = generate_dotnet_token(dnfile.enums.MetadataTables.MemberRef.value, rid + 1) + # like System.IO.File::OpenRead + imp = f"{get_class_import_name(row)}::{row.Name}" - yield token, class_imp + yield token, imp -def get_native_imports(pe: dnPE) -> Generator[Tuple[int, str], None, None]: - """parse native imports +def get_native_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: + """get native p/invoke calls from ImplMap table see https://www.ntcore.com/files/dotnetformat.htm @@ -122,23 +116,23 @@ def get_native_imports(pe: dnPE) -> Generator[Tuple[int, str], None, None]: dll: str = row.ImportScope.row.Name symbol: str = row.ImportName + # ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the + # name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded + # MethodDef table token to help us later record native import method calls made from CIL + token: int = generate_dotnet_token(row.MemberForwarded.table.number, row.MemberForwarded.row_index) + # like Kernel32.dll if dll and "." in dll: - dll = dll.split(".")[0].lower() + dll = dll.split(".")[0] # like kernel32.CreateFileA - native_imp: str = f"{dll}.{symbol}" - - # ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the - # name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded - # MethodDef table token to help us later record native import method calls made from CIL - member_forwarded_token = generate_dotnet_token(row.MemberForwarded.table.number, row.MemberForwarded.row_index) + imp: str = f"{dll}.{symbol}" - yield member_forwarded_token, native_imp + yield token, imp -def get_dotnet_imports(pe: dnPE) -> Dict[int, str]: - """ """ +def get_dotnet_imports(pe: dnfile.dnPE) -> Dict[int, str]: + """get class imports and native p/invoke calls""" imps: Dict[int, str] = {} for (token, imp) in chain(get_class_imports(pe), get_native_imports(pe)): @@ -147,8 +141,8 @@ def get_dotnet_imports(pe: dnPE) -> Dict[int, str]: return imps -def get_dotnet_methods(pe: dnPE) -> Generator[CilMethodBody, None, None]: - """read managed methods from MethodDef table""" +def get_dotnet_methods(pe: dnfile.dnPE) -> Iterator[CilMethodBody]: + """get managed methods from MethodDef table""" if not hasattr(pe.net.mdtables, "MethodDef"): return @@ -160,7 +154,7 @@ def get_dotnet_methods(pe: dnPE) -> Generator[CilMethodBody, None, None]: try: body: CilMethodBody = read_dotnet_method_body(pe, row) except MethodBodyFormatError: - # TODO: logging? + # TODO continue yield body diff --git a/capa/features/extractors/dotnet/insn.py b/capa/features/extractors/dotnet/insn.py index 1d0c21a6e..2fedde733 100644 --- a/capa/features/extractors/dotnet/insn.py +++ b/capa/features/extractors/dotnet/insn.py @@ -1,10 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union, Generator +from typing import TYPE_CHECKING, Dict, Tuple, Iterator if TYPE_CHECKING: from dncil.cil.instruction import Instruction from dncil.cil.body import CilMethodBody + from capa.features.common import Feature from dncil.clr.token import StringToken from dncil.cil.opcode import OpCodes @@ -15,16 +16,13 @@ from capa.features.extractors.dotnet.helpers import get_dotnet_imports -def get_imports(ctx): - """ """ +def get_imports(ctx: Dict) -> Dict: if "imports_cache" not in ctx: ctx["imports_cache"] = get_dotnet_imports(ctx["pe"]) return ctx["imports_cache"] -def extract_insn_api_features( - f: CilMethodBody, bb: CilMethodBody, insn: Instruction -) -> Generator[Tuple[API, int], None, None]: +def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]: """parse instruction API features""" if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): return @@ -45,7 +43,7 @@ def extract_insn_api_features( def extract_insn_number_features( f: CilMethodBody, bb: CilMethodBody, insn: Instruction -) -> Generator[Tuple[Number, int], None, None]: +) -> Iterator[Tuple[Number, int]]: """parse instruction number features""" if insn.is_ldc(): yield Number(insn.get_ldc()), insn.offset @@ -53,7 +51,7 @@ def extract_insn_number_features( def extract_insn_string_features( f: CilMethodBody, bb: CilMethodBody, insn: Instruction -) -> Generator[Tuple[String, int], None, None]: +) -> Iterator[Tuple[String, int]]: """parse instruction string features""" if not insn.is_ldstr(): return @@ -65,13 +63,11 @@ def extract_insn_string_features( yield String(user_string), insn.offset -def extract_features( - f: CilMethodBody, bb: CilMethodBody, insn: Instruction -) -> Generator[Tuple[Union[API, String, Number], int], None, None]: +def extract_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[Feature, int]]: """extract instruction features""" for inst_handler in INSTRUCTION_HANDLERS: - for (feature, ea) in inst_handler(f, bb, insn): - yield feature, ea + for (feature, offset) in inst_handler(f, bb, insn): + yield feature, offset INSTRUCTION_HANDLERS = ( @@ -79,35 +75,3 @@ def extract_features( extract_insn_number_features, extract_insn_string_features, ) - - -def main(args): - """ """ - pe: dnPE = dnfile.dnPE(args.path) - - ctx = {} - ctx["pe"] = pe - - features: List[Any] = [] - for method in get_dotnet_methods(pe): - setattr(method, "ctx", ctx) - for insn in method.instructions: - features.extend(list(extract_features(method, method, insn))) - - import pprint - - pprint.pprint(features) - - -if __name__ == "__main__": - """ """ - import argparse - - import dnfile - - from capa.features.extractors.dotnet.helpers import get_dotnet_methods - - parser = argparse.ArgumentParser(prog="parse instruction features from .NET PE") - parser.add_argument("path", type=str, help="full path to .NET PE") - - main(parser.parse_args()) diff --git a/capa/main.py b/capa/main.py index 7499afc3a..4e527787a 100644 --- a/capa/main.py +++ b/capa/main.py @@ -506,8 +506,9 @@ def get_extractor( raise UnsupportedOSError() if format_ == FORMAT_DOTNET: - # TODO return capa.features.extractors.dotnet.extractor.DnFeatureExtractor(...) - raise NotImplementedError("DnFeatureExtractor") + import capa.features.extractors.dotnet.extractor + + return capa.features.extractors.dotnet.extractor.DnfileFeatureExtractor(path) if backend == "smda": from smda.SmdaConfig import SmdaConfig diff --git a/scripts/show-features.py b/scripts/show-features.py index a070f653b..134d1df65 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -122,6 +122,12 @@ def main(argv=None): log_unsupported_runtime_error() return -1 + for feature, va in extractor.extract_global_features(): + if va: + print("global: 0x%08x: %s" % (va, feature)) + else: + print("global: 0x00000000: %s" % (feature)) + if not args.function: for feature, va in extractor.extract_file_features(): if va: diff --git a/setup.py b/setup.py index f07ae6c98..5d8f802ff 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ "pefile==2021.9.3", "pyelftools==0.28", "dnfile==0.10.0", + "dncil==1.0.0", ] # this sets __version__ From f9f5b291fb441ad6673eea4a4d2d5d1670dba593 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 7 Apr 2022 12:57:42 -0600 Subject: [PATCH 06/22] adding guard rails to #US stream reads --- capa/features/extractors/dotnet/helpers.py | 18 ++++++++++++++++-- capa/features/extractors/dotnet/insn.py | 9 ++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py index eae00292e..e063bd463 100644 --- a/capa/features/extractors/dotnet/helpers.py +++ b/capa/features/extractors/dotnet/helpers.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Dict, Tuple, Iterator +from typing import Any, Dict, Tuple, Iterator, Optional from itertools import chain import dnfile @@ -38,7 +38,10 @@ def generate_dotnet_token(table: int, rid: int) -> int: def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any: """map generic token to string or table row""" if isinstance(token, StringToken): - return pe.net.user_strings.get_us(token.rid).value + user_string: Optional[str] = read_dotnet_user_string(pe, token) + if user_string is None: + return InvalidToken(token.value) + return user_string table_name: str = DOTNET_META_TABLES_BY_INDEX.get(token.table, "") if not table_name: @@ -62,6 +65,17 @@ def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) - return CilMethodBody(DnfileMethodBodyReader(pe, row)) +def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]: + """read user string from #US stream""" + try: + user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid) + except UnicodeDecodeError: + return None + if user_string is None: + return None + return user_string.value + + def get_class_import_name(row: dnfile.mdtable.MemberRefRow) -> str: """get class import name from TypeRef table""" if not isinstance(row.Class.row, dnfile.mdtable.TypeRefRow): diff --git a/capa/features/extractors/dotnet/insn.py b/capa/features/extractors/dotnet/insn.py index 2fedde733..eed38ad3f 100644 --- a/capa/features/extractors/dotnet/insn.py +++ b/capa/features/extractors/dotnet/insn.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Dict, Tuple, Iterator +from typing import TYPE_CHECKING, Dict, Tuple, Iterator, Optional if TYPE_CHECKING: from dncil.cil.instruction import Instruction @@ -13,7 +13,7 @@ import capa.features.extractors.helpers from capa.features.insn import API, Number from capa.features.common import String -from capa.features.extractors.dotnet.helpers import get_dotnet_imports +from capa.features.extractors.dotnet.helpers import get_dotnet_imports, read_dotnet_user_string def get_imports(ctx: Dict) -> Dict: @@ -59,7 +59,10 @@ def extract_insn_string_features( if not isinstance(insn.operand, StringToken): return - user_string = f.ctx["pe"].net.user_strings.get_us(insn.operand.rid).value + user_string: Optional[str] = read_dotnet_user_string(f.ctx["pe"], insn.operand) + if user_string is None: + return + yield String(user_string), insn.offset From 6cd5f279d256e49656763e2c0899244e98c5b517 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 7 Apr 2022 14:22:57 -0600 Subject: [PATCH 07/22] update function names to get un/managed dotnet imports --- capa/features/extractors/dnfile_.py | 5 ++-- capa/features/extractors/dotnet/file.py | 5 ++-- capa/features/extractors/dotnet/helpers.py | 35 ++++++++-------------- capa/features/extractors/dotnet/insn.py | 12 ++++++-- 4 files changed, 29 insertions(+), 28 deletions(-) diff --git a/capa/features/extractors/dnfile_.py b/capa/features/extractors/dnfile_.py index ae71e1906..75d3c97c9 100644 --- a/capa/features/extractors/dnfile_.py +++ b/capa/features/extractors/dnfile_.py @@ -1,5 +1,6 @@ import logging from typing import Tuple, Iterator +from itertools import chain import dnfile import pefile @@ -8,7 +9,7 @@ from capa.features.file import Import from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature from capa.features.extractors.base_extractor import FeatureExtractor -from capa.features.extractors.dotnet.helpers import get_dotnet_imports +from capa.features.extractors.dotnet.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports logger = logging.getLogger(__name__) @@ -18,7 +19,7 @@ def extract_file_format(**kwargs) -> Iterator[Tuple[Format, int]]: def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, int]]: - for (token, imp) in get_dotnet_imports(pe).items(): + for (token, imp) in chain(get_dotnet_managed_imports(pe), get_dotnet_unmanaged_imports(pe)): if "::" in imp: # like System.IO.File::OpenRead yield Import(imp), token diff --git a/capa/features/extractors/dotnet/file.py b/capa/features/extractors/dotnet/file.py index 81901899c..b191a706b 100644 --- a/capa/features/extractors/dotnet/file.py +++ b/capa/features/extractors/dotnet/file.py @@ -1,6 +1,7 @@ from __future__ import annotations from typing import TYPE_CHECKING, List, Tuple, Iterator +from itertools import chain if TYPE_CHECKING: import dnfile @@ -9,12 +10,12 @@ import capa.features.extractors.helpers from capa.features.file import Import from capa.features.common import FORMAT_DOTNET, Format -from capa.features.extractors.dotnet.helpers import get_dotnet_imports +from capa.features.extractors.dotnet.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]: """extract file imports""" - for (token, imp) in get_dotnet_imports(pe).items(): + for (token, imp) in chain(get_dotnet_managed_imports(pe), get_dotnet_unmanaged_imports(pe)): if "::" in imp: # like System.IO.File::OpenRead yield Import(imp), token diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py index e063bd463..a320f9e1e 100644 --- a/capa/features/extractors/dotnet/helpers.py +++ b/capa/features/extractors/dotnet/helpers.py @@ -1,7 +1,6 @@ from __future__ import annotations -from typing import Any, Dict, Tuple, Iterator, Optional -from itertools import chain +from typing import Any, Tuple, Iterator, Optional import dnfile from dncil.cil.body import CilMethodBody @@ -60,9 +59,13 @@ def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any: return InvalidToken(token.value) -def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) -> CilMethodBody: +def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) -> Optional[CilMethodBody]: """read dotnet method body""" - return CilMethodBody(DnfileMethodBodyReader(pe, row)) + try: + return CilMethodBody(DnfileMethodBodyReader(pe, row)) + except MethodBodyFormatError as e: + print(e) + return None def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]: @@ -84,8 +87,8 @@ def get_class_import_name(row: dnfile.mdtable.MemberRefRow) -> str: return f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}" -def get_class_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: - """get class imports from MemberRef table +def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: + """get managed imports from MemberRef table see https://www.ntcore.com/files/dotnetformat.htm @@ -112,8 +115,8 @@ def get_class_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: yield token, imp -def get_native_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: - """get native p/invoke calls from ImplMap table +def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: + """get unmanaged imports from ImplMap table see https://www.ntcore.com/files/dotnetformat.htm @@ -145,16 +148,6 @@ def get_native_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: yield token, imp -def get_dotnet_imports(pe: dnfile.dnPE) -> Dict[int, str]: - """get class imports and native p/invoke calls""" - imps: Dict[int, str] = {} - - for (token, imp) in chain(get_class_imports(pe), get_native_imports(pe)): - imps[token] = imp - - return imps - - def get_dotnet_methods(pe: dnfile.dnPE) -> Iterator[CilMethodBody]: """get managed methods from MethodDef table""" if not hasattr(pe.net.mdtables, "MethodDef"): @@ -165,10 +158,8 @@ def get_dotnet_methods(pe: dnfile.dnPE) -> Iterator[CilMethodBody]: # skip methods that do not have a method body continue - try: - body: CilMethodBody = read_dotnet_method_body(pe, row) - except MethodBodyFormatError: - # TODO + body: Optional[CilMethodBody] = read_dotnet_method_body(pe, row) + if body is None: continue yield body diff --git a/capa/features/extractors/dotnet/insn.py b/capa/features/extractors/dotnet/insn.py index eed38ad3f..f947cb50f 100644 --- a/capa/features/extractors/dotnet/insn.py +++ b/capa/features/extractors/dotnet/insn.py @@ -1,6 +1,7 @@ from __future__ import annotations from typing import TYPE_CHECKING, Dict, Tuple, Iterator, Optional +from itertools import chain if TYPE_CHECKING: from dncil.cil.instruction import Instruction @@ -13,12 +14,19 @@ import capa.features.extractors.helpers from capa.features.insn import API, Number from capa.features.common import String -from capa.features.extractors.dotnet.helpers import get_dotnet_imports, read_dotnet_user_string +from capa.features.extractors.dotnet.helpers import ( + read_dotnet_user_string, + get_dotnet_managed_imports, + get_dotnet_unmanaged_imports, +) def get_imports(ctx: Dict) -> Dict: if "imports_cache" not in ctx: - ctx["imports_cache"] = get_dotnet_imports(ctx["pe"]) + ctx["imports_cache"] = { + token: imp + for (token, imp) in chain(get_dotnet_managed_imports(ctx["pe"]), get_dotnet_unmanaged_imports(ctx["pe"])) + } return ctx["imports_cache"] From ab8384a59298dbc9e0d2cdd5279d334d4e41a4d3 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 7 Apr 2022 14:28:27 -0600 Subject: [PATCH 08/22] use dnfile_ extractor for file format --- capa/features/extractors/dotnet/file.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/capa/features/extractors/dotnet/file.py b/capa/features/extractors/dotnet/file.py index b191a706b..d2bcb790b 100644 --- a/capa/features/extractors/dotnet/file.py +++ b/capa/features/extractors/dotnet/file.py @@ -7,6 +7,7 @@ import dnfile from capa.features.common import Feature +import capa.features.extractors import capa.features.extractors.helpers from capa.features.file import Import from capa.features.common import FORMAT_DOTNET, Format @@ -27,7 +28,7 @@ def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]: def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]: - yield Format(FORMAT_DOTNET), 0x0 + yield from capa.features.extractors.dnfile_.extract_file_format() def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: From a921b838ad3228ca2f20526cd1bbc8a2f96fd206 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 7 Apr 2022 14:46:31 -0600 Subject: [PATCH 09/22] PR updateS --- capa/features/extractors/dotnet/extractor.py | 3 ++- capa/features/extractors/dotnet/helpers.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/dotnet/extractor.py b/capa/features/extractors/dotnet/extractor.py index 63c6ef6e7..a4e56ec3c 100644 --- a/capa/features/extractors/dotnet/extractor.py +++ b/capa/features/extractors/dotnet/extractor.py @@ -34,6 +34,8 @@ def extract_file_features(self): yield from capa.features.extractors.dotnet.file.extract_features(self.pe) def get_functions(self): + # data structure shared across functions yielded here. + # useful for caching analysis relevant across a single workspace. ctx = {} ctx["pe"] = self.pe @@ -54,7 +56,6 @@ def extract_basic_block_features(self, f, bb): yield from [] def get_instructions(self, f, bb): - # each dotnet method is considered 1 basic block yield from f.instructions def extract_insn_features(self, f, bb, insn): diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py index a320f9e1e..1b2b815ef 100644 --- a/capa/features/extractors/dotnet/helpers.py +++ b/capa/features/extractors/dotnet/helpers.py @@ -63,8 +63,8 @@ def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) - """read dotnet method body""" try: return CilMethodBody(DnfileMethodBodyReader(pe, row)) - except MethodBodyFormatError as e: - print(e) + except MethodBodyFormatError: + # TODO return None From 45392c5091acd3b2e1a878369571f63d477f9419 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 7 Apr 2022 15:22:10 -0600 Subject: [PATCH 10/22] add debug message for MethodBodyFormat errors --- capa/features/extractors/dotnet/helpers.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py index 1b2b815ef..944b9b3e1 100644 --- a/capa/features/extractors/dotnet/helpers.py +++ b/capa/features/extractors/dotnet/helpers.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging from typing import Any, Tuple, Iterator, Optional import dnfile @@ -8,6 +9,8 @@ from dncil.clr.token import Token, StringToken, InvalidToken from dncil.cil.body.reader import CilMethodBodyReaderBase +logger = logging.getLogger(__name__) + # key indexes to dotnet metadata tables DOTNET_META_TABLES_BY_INDEX = {table.value: table.name for table in dnfile.enums.MetadataTables} @@ -63,8 +66,8 @@ def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) - """read dotnet method body""" try: return CilMethodBody(DnfileMethodBodyReader(pe, row)) - except MethodBodyFormatError: - # TODO + except MethodBodyFormatError as e: + logger.warn("bad MethodDef row @ 0x%08x (%s)" % (row.Rva, e)) return None From 0a5d99aa4799e5c4fd871ad31eb2c162fcb2bb65 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 7 Apr 2022 15:29:27 -0600 Subject: [PATCH 11/22] additional typing --- capa/features/extractors/dotnet/helpers.py | 4 ++-- capa/features/extractors/dotnet/insn.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py index 944b9b3e1..2b30d208b 100644 --- a/capa/features/extractors/dotnet/helpers.py +++ b/capa/features/extractors/dotnet/helpers.py @@ -111,9 +111,9 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)): continue - token = generate_dotnet_token(dnfile.enums.MetadataTables.MemberRef.value, rid + 1) + token: int = generate_dotnet_token(dnfile.enums.MetadataTables.MemberRef.value, rid + 1) # like System.IO.File::OpenRead - imp = f"{get_class_import_name(row)}::{row.Name}" + imp: str = f"{get_class_import_name(row)}::{row.Name}" yield token, imp diff --git a/capa/features/extractors/dotnet/insn.py b/capa/features/extractors/dotnet/insn.py index f947cb50f..015090369 100644 --- a/capa/features/extractors/dotnet/insn.py +++ b/capa/features/extractors/dotnet/insn.py @@ -35,7 +35,7 @@ def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruc if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): return - name = get_imports(f.ctx).get(insn.operand.value, "") + name: str = get_imports(f.ctx).get(insn.operand.value, "") if not name: return From 6cfaccc2fa94de15669beabd25a52ab14e1d8250 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 7 Apr 2022 15:36:38 -0600 Subject: [PATCH 12/22] removing get_class_import_name --- capa/features/extractors/dotnet/helpers.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py index 2b30d208b..d5b5a4abe 100644 --- a/capa/features/extractors/dotnet/helpers.py +++ b/capa/features/extractors/dotnet/helpers.py @@ -82,14 +82,6 @@ def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str return user_string.value -def get_class_import_name(row: dnfile.mdtable.MemberRefRow) -> str: - """get class import name from TypeRef table""" - if not isinstance(row.Class.row, dnfile.mdtable.TypeRefRow): - return "" - # like System.IO.File - return f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}" - - def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: """get managed imports from MemberRef table @@ -113,7 +105,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: token: int = generate_dotnet_token(dnfile.enums.MetadataTables.MemberRef.value, rid + 1) # like System.IO.File::OpenRead - imp: str = f"{get_class_import_name(row)}::{row.Name}" + imp: str = f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}::{row.Name}" yield token, imp From c2c54d3e82e678ff39771b2de2bb60fa4e0763a3 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 8 Apr 2022 08:41:58 -0600 Subject: [PATCH 13/22] reuse code from dnfile_ to extract file imports --- capa/features/extractors/dotnet/file.py | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/capa/features/extractors/dotnet/file.py b/capa/features/extractors/dotnet/file.py index d2bcb790b..535e07055 100644 --- a/capa/features/extractors/dotnet/file.py +++ b/capa/features/extractors/dotnet/file.py @@ -1,34 +1,21 @@ from __future__ import annotations -from typing import TYPE_CHECKING, List, Tuple, Iterator -from itertools import chain +from typing import TYPE_CHECKING, Tuple, Iterator if TYPE_CHECKING: import dnfile - from capa.features.common import Feature + from capa.features.common import Feature, Format + from capa.features.file import Import import capa.features.extractors -import capa.features.extractors.helpers -from capa.features.file import Import -from capa.features.common import FORMAT_DOTNET, Format -from capa.features.extractors.dotnet.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]: - """extract file imports""" - for (token, imp) in chain(get_dotnet_managed_imports(pe), get_dotnet_unmanaged_imports(pe)): - if "::" in imp: - # like System.IO.File::OpenRead - yield Import(imp), token - else: - # like kernel32.CreateFileA - dll, _, symbol = imp.rpartition(".") - for symbol_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): - yield Import(symbol_variant), token + yield from capa.features.extractors.dnfile_.extract_file_import_names(pe) def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]: - yield from capa.features.extractors.dnfile_.extract_file_format() + yield from capa.features.extractors.dnfile_.extract_file_format(pe=pe) def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: From 30c599ac38e8d2a478efeffd7d43f9dc82708354 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 8 Apr 2022 08:56:16 -0600 Subject: [PATCH 14/22] rename generate_dotnet_token to be more descriptive --- capa/features/extractors/dotnet/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py index d5b5a4abe..586f51664 100644 --- a/capa/features/extractors/dotnet/helpers.py +++ b/capa/features/extractors/dotnet/helpers.py @@ -33,7 +33,7 @@ def seek(self, offset: int) -> int: return self.offset -def generate_dotnet_token(table: int, rid: int) -> int: +def calculate_dotnet_token_value(table: int, rid: int) -> int: return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) @@ -103,7 +103,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)): continue - token: int = generate_dotnet_token(dnfile.enums.MetadataTables.MemberRef.value, rid + 1) + token: int = calculate_dotnet_token_value(dnfile.enums.MetadataTables.MemberRef.value, rid + 1) # like System.IO.File::OpenRead imp: str = f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}::{row.Name}" @@ -131,7 +131,7 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: # ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the # name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded # MethodDef table token to help us later record native import method calls made from CIL - token: int = generate_dotnet_token(row.MemberForwarded.table.number, row.MemberForwarded.row_index) + token: int = calculate_dotnet_token_value(row.MemberForwarded.table.number, row.MemberForwarded.row_index) # like Kernel32.dll if dll and "." in dll: From 48b5abd434f3c853f2fc70f6493aaf9bb34a2ab3 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 8 Apr 2022 09:03:00 -0600 Subject: [PATCH 15/22] renaming get_dotnet_methods to be more descriptive --- capa/features/extractors/dotnet/extractor.py | 4 ++-- capa/features/extractors/dotnet/helpers.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/dotnet/extractor.py b/capa/features/extractors/dotnet/extractor.py index a4e56ec3c..5c5bd517d 100644 --- a/capa/features/extractors/dotnet/extractor.py +++ b/capa/features/extractors/dotnet/extractor.py @@ -11,7 +11,7 @@ import capa.features.extractors.dotnet.file import capa.features.extractors.dotnet.insn from capa.features.extractors.base_extractor import FeatureExtractor -from capa.features.extractors.dotnet.helpers import get_dotnet_methods +from capa.features.extractors.dotnet.helpers import get_dotnet_managed_method_bodies class DnfileFeatureExtractor(FeatureExtractor): @@ -39,7 +39,7 @@ def get_functions(self): ctx = {} ctx["pe"] = self.pe - for f in get_dotnet_methods(self.pe): + for f in get_dotnet_managed_method_bodies(self.pe): setattr(f, "ctx", ctx) yield f diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py index 586f51664..6fab56688 100644 --- a/capa/features/extractors/dotnet/helpers.py +++ b/capa/features/extractors/dotnet/helpers.py @@ -143,7 +143,7 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: yield token, imp -def get_dotnet_methods(pe: dnfile.dnPE) -> Iterator[CilMethodBody]: +def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]: """get managed methods from MethodDef table""" if not hasattr(pe.net.mdtables, "MethodDef"): return From 6c499df48376a79e1e4189bd0fd27e80b7d6f54c Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 8 Apr 2022 09:11:55 -0600 Subject: [PATCH 16/22] adding debug message for string decode errors --- capa/features/extractors/dotnet/helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py index 6fab56688..cad15895d 100644 --- a/capa/features/extractors/dotnet/helpers.py +++ b/capa/features/extractors/dotnet/helpers.py @@ -75,7 +75,8 @@ def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str """read user string from #US stream""" try: user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid) - except UnicodeDecodeError: + except UnicodeDecodeError as e: + logger.warn("failed to decode #US stream index 0x%08x (%s)" % (token.rid, e)) return None if user_string is None: return None From 61366fca2fbe6dcac778b0f09358b6b9f4df2165 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 8 Apr 2022 09:15:52 -0600 Subject: [PATCH 17/22] updating logging messages --- capa/features/extractors/dotnet/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py index cad15895d..a4bc04e8a 100644 --- a/capa/features/extractors/dotnet/helpers.py +++ b/capa/features/extractors/dotnet/helpers.py @@ -67,7 +67,7 @@ def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) - try: return CilMethodBody(DnfileMethodBodyReader(pe, row)) except MethodBodyFormatError as e: - logger.warn("bad MethodDef row @ 0x%08x (%s)" % (row.Rva, e)) + logger.warn("failed to parse managed method body @ 0x%08x (%s)" % (row.Rva, e)) return None From 21a35da726e543a31524afc2d1fdee02641904cd Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 8 Apr 2022 11:06:19 -0600 Subject: [PATCH 18/22] rename dnfile_ to dotnetfile --- .../extractors/{dotnet => dnfile}/__init__.py | 0 .../{dotnet => dnfile}/extractor.py | 14 ++++++------ .../extractors/{dotnet => dnfile}/file.py | 4 ++-- .../extractors/{dotnet => dnfile}/helpers.py | 0 .../extractors/{dotnet => dnfile}/insn.py | 2 +- .../extractors/{dnfile_.py => dotnetfile.py} | 22 +++++++++---------- capa/main.py | 14 ++++++------ tests/fixtures.py | 14 ++++++------ ...eatures.py => test_dotnetfile_features.py} | 18 +++++++-------- 9 files changed, 44 insertions(+), 44 deletions(-) rename capa/features/extractors/{dotnet => dnfile}/__init__.py (100%) rename capa/features/extractors/{dotnet => dnfile}/extractor.py (76%) rename capa/features/extractors/{dotnet => dnfile}/file.py (82%) rename capa/features/extractors/{dotnet => dnfile}/helpers.py (100%) rename capa/features/extractors/{dotnet => dnfile}/insn.py (98%) rename capa/features/extractors/{dnfile_.py => dotnetfile.py} (78%) rename tests/{test_dotnet_features.py => test_dotnetfile_features.py} (59%) diff --git a/capa/features/extractors/dotnet/__init__.py b/capa/features/extractors/dnfile/__init__.py similarity index 100% rename from capa/features/extractors/dotnet/__init__.py rename to capa/features/extractors/dnfile/__init__.py diff --git a/capa/features/extractors/dotnet/extractor.py b/capa/features/extractors/dnfile/extractor.py similarity index 76% rename from capa/features/extractors/dotnet/extractor.py rename to capa/features/extractors/dnfile/extractor.py index 5c5bd517d..68607ce3d 100644 --- a/capa/features/extractors/dotnet/extractor.py +++ b/capa/features/extractors/dnfile/extractor.py @@ -8,10 +8,10 @@ import dnfile import capa.features.extractors -import capa.features.extractors.dotnet.file -import capa.features.extractors.dotnet.insn +import capa.features.extractors.dnfile.file +import capa.features.extractors.dnfile.insn from capa.features.extractors.base_extractor import FeatureExtractor -from capa.features.extractors.dotnet.helpers import get_dotnet_managed_method_bodies +from capa.features.extractors.dnfile.helpers import get_dotnet_managed_method_bodies class DnfileFeatureExtractor(FeatureExtractor): @@ -21,8 +21,8 @@ def __init__(self, path: str): # pre-compute these because we'll yield them at *every* scope. self.global_features: List[Tuple[Feature, int]] = [] - self.global_features.extend(capa.features.extractors.dnfile_.extract_file_os(pe=self.pe)) - self.global_features.extend(capa.features.extractors.dnfile_.extract_file_arch(pe=self.pe)) + self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe)) + self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe)) def get_base_address(self): return 0x0 @@ -31,7 +31,7 @@ def extract_global_features(self): yield from self.global_features def extract_file_features(self): - yield from capa.features.extractors.dotnet.file.extract_features(self.pe) + yield from capa.features.extractors.dnfile.file.extract_features(self.pe) def get_functions(self): # data structure shared across functions yielded here. @@ -59,4 +59,4 @@ def get_instructions(self, f, bb): yield from f.instructions def extract_insn_features(self, f, bb, insn): - yield from capa.features.extractors.dotnet.insn.extract_features(f, bb, insn) + yield from capa.features.extractors.dnfile.insn.extract_features(f, bb, insn) diff --git a/capa/features/extractors/dotnet/file.py b/capa/features/extractors/dnfile/file.py similarity index 82% rename from capa/features/extractors/dotnet/file.py rename to capa/features/extractors/dnfile/file.py index 535e07055..9ffbaed3a 100644 --- a/capa/features/extractors/dotnet/file.py +++ b/capa/features/extractors/dnfile/file.py @@ -11,11 +11,11 @@ def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]: - yield from capa.features.extractors.dnfile_.extract_file_import_names(pe) + yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe) def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]: - yield from capa.features.extractors.dnfile_.extract_file_format(pe=pe) + yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe) def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dnfile/helpers.py similarity index 100% rename from capa/features/extractors/dotnet/helpers.py rename to capa/features/extractors/dnfile/helpers.py diff --git a/capa/features/extractors/dotnet/insn.py b/capa/features/extractors/dnfile/insn.py similarity index 98% rename from capa/features/extractors/dotnet/insn.py rename to capa/features/extractors/dnfile/insn.py index 015090369..7dff46d41 100644 --- a/capa/features/extractors/dotnet/insn.py +++ b/capa/features/extractors/dnfile/insn.py @@ -14,7 +14,7 @@ import capa.features.extractors.helpers from capa.features.insn import API, Number from capa.features.common import String -from capa.features.extractors.dotnet.helpers import ( +from capa.features.extractors.dnfile.helpers import ( read_dotnet_user_string, get_dotnet_managed_imports, get_dotnet_unmanaged_imports, diff --git a/capa/features/extractors/dnfile_.py b/capa/features/extractors/dotnetfile.py similarity index 78% rename from capa/features/extractors/dnfile_.py rename to capa/features/extractors/dotnetfile.py index ac08d34a0..a9a2c6001 100644 --- a/capa/features/extractors/dnfile_.py +++ b/capa/features/extractors/dotnetfile.py @@ -9,7 +9,7 @@ from capa.features.file import Import from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature from capa.features.extractors.base_extractor import FeatureExtractor -from capa.features.extractors.dotnet.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports +from capa.features.extractors.dnfile.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports logger = logging.getLogger(__name__) @@ -71,9 +71,9 @@ def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: ) -class DnfileFeatureExtractor(FeatureExtractor): +class DotnetFileFeatureExtractor(FeatureExtractor): def __init__(self, path: str): - super(DnfileFeatureExtractor, self).__init__() + super(DotnetFileFeatureExtractor, self).__init__() self.path: str = path self.pe: dnfile.dnPE = dnfile.dnPE(path) @@ -105,25 +105,25 @@ def get_meta_version_string(self) -> str: return self.pe.net.metadata.struct.Version.rstrip(b"\x00").decode("utf-8") def get_functions(self): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") def extract_function_features(self, f): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") def get_basic_blocks(self, f): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") def extract_basic_block_features(self, f, bb): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") def get_instructions(self, f, bb): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") def extract_insn_features(self, f, bb, insn): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") def is_library_function(self, va): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") def get_function_name(self, va): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") diff --git a/capa/main.py b/capa/main.py index e728b26bc..016f3fd0b 100644 --- a/capa/main.py +++ b/capa/main.py @@ -41,8 +41,8 @@ import capa.features.extractors import capa.features.extractors.common import capa.features.extractors.pefile -import capa.features.extractors.dnfile_ import capa.features.extractors.elffile +import capa.features.extractors.dotnetfile from capa.rules import Rule, Scope, RuleSet from capa.engine import FeatureSet, MatchResults from capa.helpers import ( @@ -506,9 +506,9 @@ def get_extractor( raise UnsupportedOSError() if format_ == FORMAT_DOTNET: - import capa.features.extractors.dotnet.extractor + import capa.features.extractors.dnfile.extractor - return capa.features.extractors.dotnet.extractor.DnfileFeatureExtractor(path) + return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path) if backend == "smda": from smda.SmdaConfig import SmdaConfig @@ -549,9 +549,9 @@ def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]: if format_ == capa.features.extractors.common.FORMAT_PE: file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample)) - dnfile_extractor = capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample) - if dnfile_extractor.is_dotnet_file(): - file_extractors.append(dnfile_extractor) + dotnetfile_extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(sample) + if dotnetfile_extractor.is_dotnet_file(): + file_extractors.append(dotnetfile_extractor) elif format_ == capa.features.extractors.common.FORMAT_ELF: file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample)) @@ -1060,7 +1060,7 @@ def main(argv=None): logger.debug("file limitation short circuit, won't analyze fully.") return E_FILE_LIMITATION - if isinstance(file_extractor, capa.features.extractors.dnfile_.DnfileFeatureExtractor): + if isinstance(file_extractor, capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor): format_ = FORMAT_DOTNET if format_ == FORMAT_FREEZE: diff --git a/tests/fixtures.py b/tests/fixtures.py index d7d7f4b8a..c71a181cc 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -136,10 +136,10 @@ def get_pefile_extractor(path): return capa.features.extractors.pefile.PefileFeatureExtractor(path) -def get_dnfile_extractor(path): - import capa.features.extractors.dnfile_ +def get_dotnetfile_extractor(path): + import capa.features.extractors.dotnetfile - return capa.features.extractors.dnfile_.DnfileFeatureExtractor(path) + return capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path) def extract_global_features(extractor): @@ -781,10 +781,10 @@ def pingtaest_extractor(): @pytest.fixture -def b9f5b_dnfile_extractor(): - return get_dnfile_extractor(get_data_path_by_name("b9f5b")) +def b9f5b_dotnetfile_extractor(): + return get_dotnetfile_extractor(get_data_path_by_name("b9f5b")) @pytest.fixture -def mixed_mode_64_dnfile_extractor(): - return get_dnfile_extractor(get_data_path_by_name("mixed-mode-64")) +def mixed_mode_64_dotnetfile_extractor(): + return get_dotnetfile_extractor(get_data_path_by_name("mixed-mode-64")) diff --git a/tests/test_dotnet_features.py b/tests/test_dotnetfile_features.py similarity index 59% rename from tests/test_dotnet_features.py rename to tests/test_dotnetfile_features.py index b6714419f..15677c875 100644 --- a/tests/test_dotnet_features.py +++ b/tests/test_dotnetfile_features.py @@ -17,21 +17,21 @@ fixtures.FEATURE_PRESENCE_TESTS_DOTNET, indirect=["sample", "scope"], ) -def test_dnfile_features(sample, scope, feature, expected): - fixtures.do_test_feature_presence(fixtures.get_dnfile_extractor, sample, scope, feature, expected) +def test_dotnetfile_features(sample, scope, feature, expected): + fixtures.do_test_feature_presence(fixtures.get_dotnetfile_extractor, sample, scope, feature, expected) @parametrize( "extractor,function,expected", [ - ("b9f5b_dnfile_extractor", "is_dotnet_file", True), - ("b9f5b_dnfile_extractor", "is_mixed_mode", False), - ("mixed_mode_64_dnfile_extractor", "is_mixed_mode", True), - ("b9f5b_dnfile_extractor", "get_entry_point", 0x6000007), - ("b9f5b_dnfile_extractor", "get_runtime_version", (2, 5)), - ("b9f5b_dnfile_extractor", "get_meta_version_string", "v2.0.50727"), + ("b9f5b_dotnetfile_extractor", "is_dotnet_file", True), + ("b9f5b_dotnetfile_extractor", "is_mixed_mode", False), + ("mixed_mode_64_dotnetfile_extractor", "is_mixed_mode", True), + ("b9f5b_dotnetfile_extractor", "get_entry_point", 0x6000007), + ("b9f5b_dotnetfile_extractor", "get_runtime_version", (2, 5)), + ("b9f5b_dotnetfile_extractor", "get_meta_version_string", "v2.0.50727"), ], ) -def test_dnfile_extractor(request, extractor, function, expected): +def test_dotnetfile_extractor(request, extractor, function, expected): extractor_function = getattr(request.getfixturevalue(extractor), function) assert extractor_function() == expected From 7509bb73d11bc49ff5428bd2e4708fc63f39768e Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 8 Apr 2022 12:23:57 -0600 Subject: [PATCH 19/22] adding tests --- tests/fixtures.py | 43 +++++++++++++++++++++++++++++++ tests/test_dnfile_features.py | 30 +++++++++++++++++++++ tests/test_dotnetfile_features.py | 6 +++++ 3 files changed, 79 insertions(+) create mode 100644 tests/test_dnfile_features.py diff --git a/tests/fixtures.py b/tests/fixtures.py index c71a181cc..108cebf15 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -142,6 +142,13 @@ def get_dotnetfile_extractor(path): return capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path) +@lru_cache(maxsize=1) +def get_dnfile_extractor(path): + import capa.features.extractors.dnfile.extractor + + return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path) + + def extract_global_features(extractor): features = collections.defaultdict(set) for feature, va in extractor.extract_global_features(): @@ -244,6 +251,10 @@ def get_data_path_by_name(name): return os.path.join(CD, "data", "b9f5bd514485fb06da39beff051b9fdc.exe_") elif name.startswith("mixed-mode-64"): return os.path.join(DNFILE_TESTFILES, "mixed-mode", "ModuleCode", "bin", "ModuleCode_amd64.exe") + elif name.startswith("hello-world"): + return os.path.join(DNFILE_TESTFILES, "hello-world", "hello-world.exe") + elif name.startswith("_1c444"): + return os.path.join(CD, "data", "dotnet", "1c444ebeba24dcba8628b7dfe5fec7c6.exe_") else: raise ValueError("unexpected sample fixture: %s" % name) @@ -660,6 +671,25 @@ def parametrize(params, values, **kwargs): ("mixed-mode-64", "file", Arch(ARCH_I386), False), ("b9f5b", "file", OS(OS_ANY), True), ("b9f5b", "file", Format(FORMAT_DOTNET), True), + ("hello-world", "function=0x250", capa.features.common.String("Hello World!"), True), + ("hello-world", "function=0x250, bb=0x250, insn=0x252", capa.features.common.String("Hello World!"), True), + ("hello-world", "function=0x250", capa.features.insn.API("System.Console::WriteLine"), True), + ("hello-world", "file", capa.features.file.Import("System.Console::WriteLine"), True), + ("_1c444", "file", capa.features.file.Import("gdi32.CreateCompatibleBitmap"), True), + ("_1c444", "file", capa.features.file.Import("CreateCompatibleBitmap"), True), + ("_1c444", "file", capa.features.file.Import("gdi32::CreateCompatibleBitmap"), False), + ("_1c444", "function=0x1F68", capa.features.insn.API("GetWindowDC"), True), + ("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), True), + ("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True), + ("_1c444", "function=0x1F68", capa.features.insn.Number(0x0), True), + ("_1c444", "function=0x1F68", capa.features.insn.Number(0x1), False), + ( + "_1c444", + "function=0x1F68, bb=0x1F68, insn=0x1FF9", + capa.features.insn.API("System.Drawing.Image::FromHbitmap"), + True, + ), + ("_1c444", "function=0x1F68, bb=0x1F68, insn=0x1FF9", capa.features.insn.API("FromHbitmap"), False), ], # order tests by (file, item) # so that our LRU cache is most effective. @@ -681,6 +711,9 @@ def parametrize(params, values, **kwargs): ] +FEATURE_COUNT_TESTS_DOTNET = [] + + def do_test_feature_presence(get_extractor, sample, scope, feature, expected): extractor = get_extractor(sample) features = scope(extractor) @@ -788,3 +821,13 @@ def b9f5b_dotnetfile_extractor(): @pytest.fixture def mixed_mode_64_dotnetfile_extractor(): return get_dotnetfile_extractor(get_data_path_by_name("mixed-mode-64")) + + +@pytest.fixture +def hello_world_dnfile_extractor(): + return get_dnfile_extractor(get_data_path_by_name("hello-world")) + + +@pytest.fixture +def _1c444_dnfile_extractor(): + return get_dnfile_extractor(get_data_path_by_name("1c444...")) diff --git a/tests/test_dnfile_features.py b/tests/test_dnfile_features.py new file mode 100644 index 000000000..0ae391702 --- /dev/null +++ b/tests/test_dnfile_features.py @@ -0,0 +1,30 @@ +# Copyright (C) 2020 FireEye, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +import pytest +import fixtures +from fixtures import * +from fixtures import parametrize + + +@parametrize( + "sample,scope,feature,expected", + fixtures.FEATURE_PRESENCE_TESTS_DOTNET, + indirect=["sample", "scope"], +) +def test_dnfile_features(sample, scope, feature, expected): + fixtures.do_test_feature_presence(fixtures.get_dnfile_extractor, sample, scope, feature, expected) + + +@parametrize( + "sample,scope,feature,expected", + fixtures.FEATURE_COUNT_TESTS_DOTNET, + indirect=["sample", "scope"], +) +def test_dnfile_feature_counts(sample, scope, feature, expected): + fixtures.do_test_feature_count(fixtures.get_dnfile_extractor, sample, scope, feature, expected) diff --git a/tests/test_dotnetfile_features.py b/tests/test_dotnetfile_features.py index 15677c875..01d2e8a96 100644 --- a/tests/test_dotnetfile_features.py +++ b/tests/test_dotnetfile_features.py @@ -18,6 +18,12 @@ indirect=["sample", "scope"], ) def test_dotnetfile_features(sample, scope, feature, expected): + if scope.__name__ != "file": + pytest.xfail("pefile only extracts file scope features") + + if isinstance(feature, capa.features.file.FunctionName): + pytest.xfail("pefile doesn't extract function names") + fixtures.do_test_feature_presence(fixtures.get_dotnetfile_extractor, sample, scope, feature, expected) From 60214d24ece5508efb1d7b5781071df0042ed4ac Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 8 Apr 2022 12:29:02 -0600 Subject: [PATCH 20/22] updating file headers --- capa/features/extractors/dnfile/extractor.py | 8 ++++++++ capa/features/extractors/dnfile/file.py | 8 ++++++++ capa/features/extractors/dnfile/helpers.py | 8 ++++++++ capa/features/extractors/dnfile/insn.py | 8 ++++++++ tests/test_dnfile_features.py | 2 +- 5 files changed, 33 insertions(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/extractor.py b/capa/features/extractors/dnfile/extractor.py index 68607ce3d..f82364a21 100644 --- a/capa/features/extractors/dnfile/extractor.py +++ b/capa/features/extractors/dnfile/extractor.py @@ -1,3 +1,11 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + from __future__ import annotations from typing import TYPE_CHECKING, Any, List, Tuple diff --git a/capa/features/extractors/dnfile/file.py b/capa/features/extractors/dnfile/file.py index 9ffbaed3a..99e2643ce 100644 --- a/capa/features/extractors/dnfile/file.py +++ b/capa/features/extractors/dnfile/file.py @@ -1,3 +1,11 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + from __future__ import annotations from typing import TYPE_CHECKING, Tuple, Iterator diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index a4bc04e8a..c73044622 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -1,3 +1,11 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + from __future__ import annotations import logging diff --git a/capa/features/extractors/dnfile/insn.py b/capa/features/extractors/dnfile/insn.py index 7dff46d41..262b97798 100644 --- a/capa/features/extractors/dnfile/insn.py +++ b/capa/features/extractors/dnfile/insn.py @@ -1,3 +1,11 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + from __future__ import annotations from typing import TYPE_CHECKING, Dict, Tuple, Iterator, Optional diff --git a/tests/test_dnfile_features.py b/tests/test_dnfile_features.py index 0ae391702..76540468f 100644 --- a/tests/test_dnfile_features.py +++ b/tests/test_dnfile_features.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 FireEye, Inc. All Rights Reserved. +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: [package root]/LICENSE.txt From 29410504d9a18cc0fc68c96cd017aeaaf8d9fbea Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 8 Apr 2022 12:38:41 -0600 Subject: [PATCH 21/22] adding mypy ignore --- tests/fixtures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 108cebf15..7f5abc732 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -711,7 +711,7 @@ def parametrize(params, values, **kwargs): ] -FEATURE_COUNT_TESTS_DOTNET = [] +FEATURE_COUNT_TESTS_DOTNET = [] # type: ignore def do_test_feature_presence(get_extractor, sample, scope, feature, expected): From 48b8110b3e33dd3a57eb8ea0fe66daa538e4d2f7 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 8 Apr 2022 13:01:05 -0600 Subject: [PATCH 22/22] fix typo in test file --- tests/test_dotnetfile_features.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_dotnetfile_features.py b/tests/test_dotnetfile_features.py index 01d2e8a96..28d63cdbe 100644 --- a/tests/test_dotnetfile_features.py +++ b/tests/test_dotnetfile_features.py @@ -19,10 +19,10 @@ ) def test_dotnetfile_features(sample, scope, feature, expected): if scope.__name__ != "file": - pytest.xfail("pefile only extracts file scope features") + pytest.xfail("dotnetfile only extracts file scope features") if isinstance(feature, capa.features.file.FunctionName): - pytest.xfail("pefile doesn't extract function names") + pytest.xfail("dotnetfile doesn't extract function names") fixtures.do_test_feature_presence(fixtures.get_dotnetfile_extractor, sample, scope, feature, expected)