From 0094c0966476134f953be9559e6f82e4593d482b Mon Sep 17 00:00:00 2001 From: BitsOfBinary <48534004+BitsOfBinary@users.noreply.github.com> Date: Fri, 7 Apr 2023 22:24:09 +0100 Subject: [PATCH] Create index_of_coincidence functions --- docs/modules/math.rst | 16 ++++++++ libyara/modules/math/math.c | 74 +++++++++++++++++++++++++++++++++++++ tests/test-math.c | 18 +++++++++ 3 files changed, 108 insertions(+) diff --git a/docs/modules/math.rst b/docs/modules/math.rst index b2baf4fb35..a5ff5f2d20 100644 --- a/docs/modules/math.rst +++ b/docs/modules/math.rst @@ -184,3 +184,19 @@ file and create signatures based on those results. *Example: math.to_string(32, 16) == "20"* *Example: math.to_string(-1, 16) == "ffffffffffffffff"* + +.. c:function:: index_of_coincidence(offset, size) + + Returns the index of coincidence for *size* bytes starting at *offset*. When scanning a + running process the *offset* argument should be a virtual address within + the process address space. The returned value is a float. + + Note: English text has an index of coincidence around 0.068. + + *Example: math.index_of_coincidence(0, filesize) >= 0.05* + +.. c:function:: index_of_coincidence(string) + + Returns the index of coincidence for the given string. + + *math.index_of_coincidence("dummy") >= 0.05* diff --git a/libyara/modules/math/math.c b/libyara/modules/math/math.c index 98193c4a63..723539070c 100644 --- a/libyara/modules/math/math.c +++ b/libyara/modules/math/math.c @@ -758,6 +758,78 @@ define_function(to_string_base) return_string(&str); } +define_function(data_index_of_coincidence) +{ + size_t i; + size_t frequency; + size_t total = 0; + double ioc = 0.0; + + int64_t offset = integer_argument(1); + int64_t length = integer_argument(2); + + YR_SCAN_CONTEXT* context = yr_scan_context(); + + uint32_t* data = get_distribution(offset, length, context); + + if (data == NULL) + return_float(YR_UNDEFINED); + + for (i = 'a'; i <= 'z'; i++) + { + // get frequencies of both upper and lower case + frequency = data[i] + data[i - 32]; + + total += frequency; + ioc += frequency * (frequency - 1); + } + + if (total < 2) total = 2; + + ioc /= total * (total - 1); + + yr_free(data); + return_float(ioc); +} + +define_function(string_index_of_coincidence) +{ + size_t i; + size_t frequencies[26] = {0}; + size_t total = 0; + double ioc = 0.0; + + SIZED_STRING* s = sized_string_argument(1); + + for (i = 0; i < s->length; i++) + { + uint8_t c = s->c_string[i]; + + if (c >= 'A' && c <= 'Z') + { + frequencies[c - 'A']++; + total++; + } + + else if (c >= 'a' && c <= 'z') + { + frequencies[c - 'a']++; + total++; + } + } + + for (i = 0; i < 26; i++) + { + ioc += frequencies[i] * (frequencies[i] - 1); + } + + if (total < 2) total = 2; + + ioc /= total * (total - 1); + + return_float(ioc); +} + begin_declarations declare_float("MEAN_BYTES"); declare_function("in_range", "fff", "i", in_range); @@ -783,6 +855,8 @@ begin_declarations declare_function("mode", "", "i", mode_global); declare_function("to_string", "i", "s", to_string); declare_function("to_string", "ii", "s", to_string_base); + declare_function("index_of_coincidence", "ii", "f", data_index_of_coincidence); + declare_function("index_of_coincidence", "s", "f", string_index_of_coincidence); end_declarations int module_initialize(YR_MODULE* module) diff --git a/tests/test-math.c b/tests/test-math.c index 89dcabcb8f..cd958283dc 100644 --- a/tests/test-math.c +++ b/tests/test-math.c @@ -283,6 +283,24 @@ int main(int argc, char** argv) not defined(math.to_string(32, 9)) \ }", NULL); + + assert_true_rule_blob( + "import \"math\" \ + rule test { \ + condition: \ + math.index_of_coincidence(3, 43) > 0.021848 and \ + math.index_of_coincidence(3, 43) < 0.021850 \ + }", + "AAAThe quick brown fox jumps over the lazy dogAAA"); + + assert_true_rule( + "import \"math\" \ + rule test { \ + condition: \ + math.index_of_coincidence(\"The quick brown fox jumps over the lazy dog\") > 0.021848 and \ + math.index_of_coincidence(\"The quick brown fox jumps over the lazy dog\") < 0.021850 \ + }", + NULL); yr_finalize();