diff --git a/.github/mypy/mypy.ini b/.github/mypy/mypy.ini index 6d177d40a..3d22b05f7 100644 --- a/.github/mypy/mypy.ini +++ b/.github/mypy/mypy.ini @@ -74,3 +74,6 @@ ignore_missing_imports = True [mypy-elftools.*] ignore_missing_imports = True + +[mypy-dncil.*] +ignore_missing_imports = True \ No newline at end of file diff --git a/capa/features/extractors/dnfile/__init__.py b/capa/features/extractors/dnfile/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/capa/features/extractors/dnfile/extractor.py b/capa/features/extractors/dnfile/extractor.py new file mode 100644 index 000000000..f82364a21 --- /dev/null +++ b/capa/features/extractors/dnfile/extractor.py @@ -0,0 +1,70 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, List, Tuple + +if TYPE_CHECKING: + from capa.features.common import Feature + +import dnfile + +import capa.features.extractors +import capa.features.extractors.dnfile.file +import capa.features.extractors.dnfile.insn +from capa.features.extractors.base_extractor import FeatureExtractor +from capa.features.extractors.dnfile.helpers import get_dotnet_managed_method_bodies + + +class DnfileFeatureExtractor(FeatureExtractor): + def __init__(self, path: str): + super(DnfileFeatureExtractor, self).__init__() + self.pe: dnfile.dnPE = dnfile.dnPE(path) + + # pre-compute these because we'll yield them at *every* scope. + self.global_features: List[Tuple[Feature, int]] = [] + self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe)) + self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe)) + + def get_base_address(self): + return 0x0 + + def extract_global_features(self): + yield from self.global_features + + def extract_file_features(self): + yield from capa.features.extractors.dnfile.file.extract_features(self.pe) + + def get_functions(self): + # data structure shared across functions yielded here. + # useful for caching analysis relevant across a single workspace. + ctx = {} + ctx["pe"] = self.pe + + for f in get_dotnet_managed_method_bodies(self.pe): + setattr(f, "ctx", ctx) + yield f + + def extract_function_features(self, f): + # TODO + yield from [] + + def get_basic_blocks(self, f): + # each dotnet method is considered 1 basic block + yield f + + def extract_basic_block_features(self, f, bb): + # we don't support basic block features + yield from [] + + def get_instructions(self, f, bb): + yield from f.instructions + + def extract_insn_features(self, f, bb, insn): + yield from capa.features.extractors.dnfile.insn.extract_features(f, bb, insn) diff --git a/capa/features/extractors/dnfile/file.py b/capa/features/extractors/dnfile/file.py new file mode 100644 index 000000000..99e2643ce --- /dev/null +++ b/capa/features/extractors/dnfile/file.py @@ -0,0 +1,40 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING, Tuple, Iterator + +if TYPE_CHECKING: + import dnfile + from capa.features.common import Feature, Format + from capa.features.file import Import + +import capa.features.extractors + + +def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]: + yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe) + + +def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]: + yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe) + + +def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: + for file_handler in FILE_HANDLERS: + for (feature, token) in file_handler(pe): + yield feature, token + + +FILE_HANDLERS = ( + extract_file_import_names, + # TODO extract_file_strings, + # TODO extract_file_function_names, + extract_file_format, +) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py new file mode 100644 index 000000000..c73044622 --- /dev/null +++ b/capa/features/extractors/dnfile/helpers.py @@ -0,0 +1,169 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +from __future__ import annotations + +import logging +from typing import Any, Tuple, Iterator, Optional + +import dnfile +from dncil.cil.body import CilMethodBody +from dncil.cil.error import MethodBodyFormatError +from dncil.clr.token import Token, StringToken, InvalidToken +from dncil.cil.body.reader import CilMethodBodyReaderBase + +logger = logging.getLogger(__name__) + +# key indexes to dotnet metadata tables +DOTNET_META_TABLES_BY_INDEX = {table.value: table.name for table in dnfile.enums.MetadataTables} + + +class DnfileMethodBodyReader(CilMethodBodyReaderBase): + def __init__(self, pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow): + self.pe: dnfile.dnPE = pe + self.offset: int = self.pe.get_offset_from_rva(row.Rva) + + def read(self, n: int) -> bytes: + data: bytes = self.pe.get_data(self.pe.get_rva_from_offset(self.offset), n) + self.offset += n + return data + + def tell(self) -> int: + return self.offset + + def seek(self, offset: int) -> int: + self.offset = offset + return self.offset + + +def calculate_dotnet_token_value(table: int, rid: int) -> int: + return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) + + +def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any: + """map generic token to string or table row""" + if isinstance(token, StringToken): + user_string: Optional[str] = read_dotnet_user_string(pe, token) + if user_string is None: + return InvalidToken(token.value) + return user_string + + table_name: str = DOTNET_META_TABLES_BY_INDEX.get(token.table, "") + if not table_name: + # table_index is not valid + return InvalidToken(token.value) + + table: Any = getattr(pe.net.mdtables, table_name, None) + if table is None: + # table index is valid but table is not present + return InvalidToken(token.value) + + try: + return table.rows[token.rid - 1] + except IndexError: + # table index is valid but row index is not valid + return InvalidToken(token.value) + + +def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) -> Optional[CilMethodBody]: + """read dotnet method body""" + try: + return CilMethodBody(DnfileMethodBodyReader(pe, row)) + except MethodBodyFormatError as e: + logger.warn("failed to parse managed method body @ 0x%08x (%s)" % (row.Rva, e)) + return None + + +def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]: + """read user string from #US stream""" + try: + user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid) + except UnicodeDecodeError as e: + logger.warn("failed to decode #US stream index 0x%08x (%s)" % (token.rid, e)) + return None + if user_string is None: + return None + return user_string.value + + +def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: + """get managed imports from MemberRef table + + see https://www.ntcore.com/files/dotnetformat.htm + + 10 - MemberRef Table + Each row represents an imported method + Class (index into the TypeRef, ModuleRef, MethodDef, TypeSpec or TypeDef tables) + Name (index into String heap) + 01 - TypeRef Table + Each row represents an imported class, its namespace and the assembly which contains it + TypeName (index into String heap) + TypeNamespace (index into String heap) + """ + if not hasattr(pe.net.mdtables, "MemberRef"): + return + + for (rid, row) in enumerate(pe.net.mdtables.MemberRef): + if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)): + continue + + token: int = calculate_dotnet_token_value(dnfile.enums.MetadataTables.MemberRef.value, rid + 1) + # like System.IO.File::OpenRead + imp: str = f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}::{row.Name}" + + yield token, imp + + +def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: + """get unmanaged imports from ImplMap table + + see https://www.ntcore.com/files/dotnetformat.htm + + 28 - ImplMap Table + ImplMap table holds information about unmanaged methods that can be reached from managed code, using PInvoke dispatch + MemberForwarded (index into the Field or MethodDef table; more precisely, a MemberForwarded coded index) + ImportName (index into the String heap) + ImportScope (index into the ModuleRef table) + """ + if not hasattr(pe.net.mdtables, "ImplMap"): + return + + for row in pe.net.mdtables.ImplMap: + dll: str = row.ImportScope.row.Name + symbol: str = row.ImportName + + # ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the + # name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded + # MethodDef table token to help us later record native import method calls made from CIL + token: int = calculate_dotnet_token_value(row.MemberForwarded.table.number, row.MemberForwarded.row_index) + + # like Kernel32.dll + if dll and "." in dll: + dll = dll.split(".")[0] + + # like kernel32.CreateFileA + imp: str = f"{dll}.{symbol}" + + yield token, imp + + +def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]: + """get managed methods from MethodDef table""" + if not hasattr(pe.net.mdtables, "MethodDef"): + return + + for row in pe.net.mdtables.MethodDef: + if not row.ImplFlags.miIL or any((row.Flags.mdAbstract, row.Flags.mdPinvokeImpl)): + # skip methods that do not have a method body + continue + + body: Optional[CilMethodBody] = read_dotnet_method_body(pe, row) + if body is None: + continue + + yield body diff --git a/capa/features/extractors/dnfile/insn.py b/capa/features/extractors/dnfile/insn.py new file mode 100644 index 000000000..262b97798 --- /dev/null +++ b/capa/features/extractors/dnfile/insn.py @@ -0,0 +1,96 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING, Dict, Tuple, Iterator, Optional +from itertools import chain + +if TYPE_CHECKING: + from dncil.cil.instruction import Instruction + from dncil.cil.body import CilMethodBody + from capa.features.common import Feature + +from dncil.clr.token import StringToken +from dncil.cil.opcode import OpCodes + +import capa.features.extractors.helpers +from capa.features.insn import API, Number +from capa.features.common import String +from capa.features.extractors.dnfile.helpers import ( + read_dotnet_user_string, + get_dotnet_managed_imports, + get_dotnet_unmanaged_imports, +) + + +def get_imports(ctx: Dict) -> Dict: + if "imports_cache" not in ctx: + ctx["imports_cache"] = { + token: imp + for (token, imp) in chain(get_dotnet_managed_imports(ctx["pe"]), get_dotnet_unmanaged_imports(ctx["pe"])) + } + return ctx["imports_cache"] + + +def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]: + """parse instruction API features""" + if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): + return + + name: str = get_imports(f.ctx).get(insn.operand.value, "") + if not name: + return + + if "::" in name: + # like System.IO.File::OpenRead + yield API(name), insn.offset + else: + # like kernel32.CreateFileA + dll, _, symbol = name.rpartition(".") + for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield API(name_variant), insn.offset + + +def extract_insn_number_features( + f: CilMethodBody, bb: CilMethodBody, insn: Instruction +) -> Iterator[Tuple[Number, int]]: + """parse instruction number features""" + if insn.is_ldc(): + yield Number(insn.get_ldc()), insn.offset + + +def extract_insn_string_features( + f: CilMethodBody, bb: CilMethodBody, insn: Instruction +) -> Iterator[Tuple[String, int]]: + """parse instruction string features""" + if not insn.is_ldstr(): + return + + if not isinstance(insn.operand, StringToken): + return + + user_string: Optional[str] = read_dotnet_user_string(f.ctx["pe"], insn.operand) + if user_string is None: + return + + yield String(user_string), insn.offset + + +def extract_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[Feature, int]]: + """extract instruction features""" + for inst_handler in INSTRUCTION_HANDLERS: + for (feature, offset) in inst_handler(f, bb, insn): + yield feature, offset + + +INSTRUCTION_HANDLERS = ( + extract_insn_api_features, + extract_insn_number_features, + extract_insn_string_features, +) diff --git a/capa/features/extractors/dnfile_.py b/capa/features/extractors/dotnetfile.py similarity index 56% rename from capa/features/extractors/dnfile_.py rename to capa/features/extractors/dotnetfile.py index af9befcc9..a9a2c6001 100644 --- a/capa/features/extractors/dnfile_.py +++ b/capa/features/extractors/dotnetfile.py @@ -1,24 +1,40 @@ import logging from typing import Tuple, Iterator +from itertools import chain import dnfile import pefile +import capa.features.extractors.helpers +from capa.features.file import Import from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature from capa.features.extractors.base_extractor import FeatureExtractor +from capa.features.extractors.dnfile.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports logger = logging.getLogger(__name__) -def extract_file_format(**kwargs): +def extract_file_format(**kwargs) -> Iterator[Tuple[Format, int]]: yield Format(FORMAT_DOTNET), 0x0 -def extract_file_os(**kwargs): +def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, int]]: + for (token, imp) in chain(get_dotnet_managed_imports(pe), get_dotnet_unmanaged_imports(pe)): + if "::" in imp: + # like System.IO.File::OpenRead + yield Import(imp), token + else: + # like kernel32.CreateFileA + dll, _, symbol = imp.rpartition(".") + for symbol_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol): + yield Import(symbol_variant), token + + +def extract_file_os(**kwargs) -> Iterator[Tuple[OS, int]]: yield OS(OS_ANY), 0x0 -def extract_file_arch(pe, **kwargs): +def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, int]]: # to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020 # .NET 4.5 added option: any CPU, 32-bit preferred if pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE: @@ -36,11 +52,9 @@ def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: FILE_HANDLERS = ( - # extract_file_export_names, - # extract_file_import_names, - # extract_file_section_names, - # extract_file_strings, - # extract_file_function_names, + extract_file_import_names, + # TODO extract_file_strings, + # TODO extract_file_function_names, extract_file_format, ) @@ -57,9 +71,9 @@ def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: ) -class DnfileFeatureExtractor(FeatureExtractor): +class DotnetFileFeatureExtractor(FeatureExtractor): def __init__(self, path: str): - super(DnfileFeatureExtractor, self).__init__() + super(DotnetFileFeatureExtractor, self).__init__() self.path: str = path self.pe: dnfile.dnPE = dnfile.dnPE(path) @@ -91,25 +105,25 @@ def get_meta_version_string(self) -> str: return self.pe.net.metadata.struct.Version.rstrip(b"\x00").decode("utf-8") def get_functions(self): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") def extract_function_features(self, f): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") def get_basic_blocks(self, f): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") def extract_basic_block_features(self, f, bb): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") def get_instructions(self, f, bb): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") def extract_insn_features(self, f, bb, insn): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") def is_library_function(self, va): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") def get_function_name(self, va): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") diff --git a/capa/features/extractors/helpers.py b/capa/features/extractors/helpers.py index 8593a5bbd..8fe920c58 100644 --- a/capa/features/extractors/helpers.py +++ b/capa/features/extractors/helpers.py @@ -51,6 +51,9 @@ def generate_symbols(dll: str, symbol: str) -> Iterator[str]: - CreateFileA - CreateFile """ + # normalize dll name + dll = dll.lower() + # kernel32.CreateFileA yield "%s.%s" % (dll, symbol) diff --git a/capa/features/insn.py b/capa/features/insn.py index 00a549f14..bb8924eee 100644 --- a/capa/features/insn.py +++ b/capa/features/insn.py @@ -13,11 +13,6 @@ class API(Feature): def __init__(self, name: str, description=None): - # Downcase library name if given - if "." in name: - modname, _, impname = name.rpartition(".") - name = modname.lower() + "." + impname - super(API, self).__init__(name, description=description) diff --git a/capa/main.py b/capa/main.py index 5b306c7c4..016f3fd0b 100644 --- a/capa/main.py +++ b/capa/main.py @@ -41,8 +41,8 @@ import capa.features.extractors import capa.features.extractors.common import capa.features.extractors.pefile -import capa.features.extractors.dnfile_ import capa.features.extractors.elffile +import capa.features.extractors.dotnetfile from capa.rules import Rule, Scope, RuleSet from capa.engine import FeatureSet, MatchResults from capa.helpers import ( @@ -506,8 +506,9 @@ def get_extractor( raise UnsupportedOSError() if format_ == FORMAT_DOTNET: - # TODO return capa.features.extractors.dotnet.extractor.DnFeatureExtractor(...) - raise NotImplementedError("DnFeatureExtractor") + import capa.features.extractors.dnfile.extractor + + return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path) if backend == "smda": from smda.SmdaConfig import SmdaConfig @@ -548,9 +549,9 @@ def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]: if format_ == capa.features.extractors.common.FORMAT_PE: file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample)) - dnfile_extractor = capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample) - if dnfile_extractor.is_dotnet_file(): - file_extractors.append(dnfile_extractor) + dotnetfile_extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(sample) + if dotnetfile_extractor.is_dotnet_file(): + file_extractors.append(dotnetfile_extractor) elif format_ == capa.features.extractors.common.FORMAT_ELF: file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample)) @@ -1059,7 +1060,7 @@ def main(argv=None): logger.debug("file limitation short circuit, won't analyze fully.") return E_FILE_LIMITATION - if isinstance(file_extractor, capa.features.extractors.dnfile_.DnfileFeatureExtractor): + if isinstance(file_extractor, capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor): format_ = FORMAT_DOTNET if format_ == FORMAT_FREEZE: diff --git a/scripts/show-features.py b/scripts/show-features.py index a070f653b..134d1df65 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -122,6 +122,12 @@ def main(argv=None): log_unsupported_runtime_error() return -1 + for feature, va in extractor.extract_global_features(): + if va: + print("global: 0x%08x: %s" % (va, feature)) + else: + print("global: 0x00000000: %s" % (feature)) + if not args.function: for feature, va in extractor.extract_file_features(): if va: diff --git a/setup.py b/setup.py index 64e5f03e7..9dea50430 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ "pefile==2021.9.3", "pyelftools==0.28", "dnfile==0.10.0", + "dncil==1.0.0", ] # this sets __version__ diff --git a/tests/fixtures.py b/tests/fixtures.py index d7d7f4b8a..7f5abc732 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -136,10 +136,17 @@ def get_pefile_extractor(path): return capa.features.extractors.pefile.PefileFeatureExtractor(path) +def get_dotnetfile_extractor(path): + import capa.features.extractors.dotnetfile + + return capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path) + + +@lru_cache(maxsize=1) def get_dnfile_extractor(path): - import capa.features.extractors.dnfile_ + import capa.features.extractors.dnfile.extractor - return capa.features.extractors.dnfile_.DnfileFeatureExtractor(path) + return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path) def extract_global_features(extractor): @@ -244,6 +251,10 @@ def get_data_path_by_name(name): return os.path.join(CD, "data", "b9f5bd514485fb06da39beff051b9fdc.exe_") elif name.startswith("mixed-mode-64"): return os.path.join(DNFILE_TESTFILES, "mixed-mode", "ModuleCode", "bin", "ModuleCode_amd64.exe") + elif name.startswith("hello-world"): + return os.path.join(DNFILE_TESTFILES, "hello-world", "hello-world.exe") + elif name.startswith("_1c444"): + return os.path.join(CD, "data", "dotnet", "1c444ebeba24dcba8628b7dfe5fec7c6.exe_") else: raise ValueError("unexpected sample fixture: %s" % name) @@ -660,6 +671,25 @@ def parametrize(params, values, **kwargs): ("mixed-mode-64", "file", Arch(ARCH_I386), False), ("b9f5b", "file", OS(OS_ANY), True), ("b9f5b", "file", Format(FORMAT_DOTNET), True), + ("hello-world", "function=0x250", capa.features.common.String("Hello World!"), True), + ("hello-world", "function=0x250, bb=0x250, insn=0x252", capa.features.common.String("Hello World!"), True), + ("hello-world", "function=0x250", capa.features.insn.API("System.Console::WriteLine"), True), + ("hello-world", "file", capa.features.file.Import("System.Console::WriteLine"), True), + ("_1c444", "file", capa.features.file.Import("gdi32.CreateCompatibleBitmap"), True), + ("_1c444", "file", capa.features.file.Import("CreateCompatibleBitmap"), True), + ("_1c444", "file", capa.features.file.Import("gdi32::CreateCompatibleBitmap"), False), + ("_1c444", "function=0x1F68", capa.features.insn.API("GetWindowDC"), True), + ("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), True), + ("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True), + ("_1c444", "function=0x1F68", capa.features.insn.Number(0x0), True), + ("_1c444", "function=0x1F68", capa.features.insn.Number(0x1), False), + ( + "_1c444", + "function=0x1F68, bb=0x1F68, insn=0x1FF9", + capa.features.insn.API("System.Drawing.Image::FromHbitmap"), + True, + ), + ("_1c444", "function=0x1F68, bb=0x1F68, insn=0x1FF9", capa.features.insn.API("FromHbitmap"), False), ], # order tests by (file, item) # so that our LRU cache is most effective. @@ -681,6 +711,9 @@ def parametrize(params, values, **kwargs): ] +FEATURE_COUNT_TESTS_DOTNET = [] # type: ignore + + def do_test_feature_presence(get_extractor, sample, scope, feature, expected): extractor = get_extractor(sample) features = scope(extractor) @@ -781,10 +814,20 @@ def pingtaest_extractor(): @pytest.fixture -def b9f5b_dnfile_extractor(): - return get_dnfile_extractor(get_data_path_by_name("b9f5b")) +def b9f5b_dotnetfile_extractor(): + return get_dotnetfile_extractor(get_data_path_by_name("b9f5b")) + + +@pytest.fixture +def mixed_mode_64_dotnetfile_extractor(): + return get_dotnetfile_extractor(get_data_path_by_name("mixed-mode-64")) + + +@pytest.fixture +def hello_world_dnfile_extractor(): + return get_dnfile_extractor(get_data_path_by_name("hello-world")) @pytest.fixture -def mixed_mode_64_dnfile_extractor(): - return get_dnfile_extractor(get_data_path_by_name("mixed-mode-64")) +def _1c444_dnfile_extractor(): + return get_dnfile_extractor(get_data_path_by_name("1c444...")) diff --git a/tests/test_dotnet_features.py b/tests/test_dnfile_features.py similarity index 60% rename from tests/test_dotnet_features.py rename to tests/test_dnfile_features.py index b6714419f..76540468f 100644 --- a/tests/test_dotnet_features.py +++ b/tests/test_dnfile_features.py @@ -22,16 +22,9 @@ def test_dnfile_features(sample, scope, feature, expected): @parametrize( - "extractor,function,expected", - [ - ("b9f5b_dnfile_extractor", "is_dotnet_file", True), - ("b9f5b_dnfile_extractor", "is_mixed_mode", False), - ("mixed_mode_64_dnfile_extractor", "is_mixed_mode", True), - ("b9f5b_dnfile_extractor", "get_entry_point", 0x6000007), - ("b9f5b_dnfile_extractor", "get_runtime_version", (2, 5)), - ("b9f5b_dnfile_extractor", "get_meta_version_string", "v2.0.50727"), - ], + "sample,scope,feature,expected", + fixtures.FEATURE_COUNT_TESTS_DOTNET, + indirect=["sample", "scope"], ) -def test_dnfile_extractor(request, extractor, function, expected): - extractor_function = getattr(request.getfixturevalue(extractor), function) - assert extractor_function() == expected +def test_dnfile_feature_counts(sample, scope, feature, expected): + fixtures.do_test_feature_count(fixtures.get_dnfile_extractor, sample, scope, feature, expected) diff --git a/tests/test_dotnetfile_features.py b/tests/test_dotnetfile_features.py new file mode 100644 index 000000000..28d63cdbe --- /dev/null +++ b/tests/test_dotnetfile_features.py @@ -0,0 +1,43 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +import pytest +import fixtures +from fixtures import * +from fixtures import parametrize + + +@parametrize( + "sample,scope,feature,expected", + fixtures.FEATURE_PRESENCE_TESTS_DOTNET, + indirect=["sample", "scope"], +) +def test_dotnetfile_features(sample, scope, feature, expected): + if scope.__name__ != "file": + pytest.xfail("dotnetfile only extracts file scope features") + + if isinstance(feature, capa.features.file.FunctionName): + pytest.xfail("dotnetfile doesn't extract function names") + + fixtures.do_test_feature_presence(fixtures.get_dotnetfile_extractor, sample, scope, feature, expected) + + +@parametrize( + "extractor,function,expected", + [ + ("b9f5b_dotnetfile_extractor", "is_dotnet_file", True), + ("b9f5b_dotnetfile_extractor", "is_mixed_mode", False), + ("mixed_mode_64_dotnetfile_extractor", "is_mixed_mode", True), + ("b9f5b_dotnetfile_extractor", "get_entry_point", 0x6000007), + ("b9f5b_dotnetfile_extractor", "get_runtime_version", (2, 5)), + ("b9f5b_dotnetfile_extractor", "get_meta_version_string", "v2.0.50727"), + ], +) +def test_dotnetfile_extractor(request, extractor, function, expected): + extractor_function = getattr(request.getfixturevalue(extractor), function) + assert extractor_function() == expected