diff --git a/.ci/fuzz.sh b/.ci/fuzz.sh new file mode 100755 index 000000000..c59786d4b --- /dev/null +++ b/.ci/fuzz.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -e -u -o pipefail + +# check the existence of the clang toolchain +command -v clang &> /dev/null + +# compile +make clean +clang \ + -g -O1 \ + -fsanitize=fuzzer,address,undefined \ + -include src/common.h \ + -D RV32_FEATURE_EXT_F=0 \ + -D RV32_FEATURE_SDL=0 \ + -D DEFAULT_STACK_ADDR=0xFFFFE000 \ + -D DEFAULT_ARGS_ADDR=0xFFFFF000 \ + -D FUZZER \ + -o build/rv32emu_fuzz \ + src/fuzz-target.cc \ + src/map.c \ + src/utils.c \ + src/decode.c \ + src/io.c \ + src/syscall.c \ + src/emulate.c \ + src/riscv.c \ + src/elf.c \ + src/cache.c \ + src/mpool.c \ + src/main.c + +# populate the initial CORPUS for the fuzzer using valid elf +mkdir -p build/fuzz/CORPUS_DIR +cp build/*.elf build/fuzz/CORPUS_DIR + +# execute +./build/rv32emu_fuzz build/fuzz/CORPUS_DIR -timeout=3 -max_total_time=1200 diff --git a/.codacy.yml b/.codacy.yml new file mode 100644 index 000000000..5e91213f6 --- /dev/null +++ b/.codacy.yml @@ -0,0 +1,5 @@ +exclude_paths: + - ".github/**" + - "build/**" + - "docs/**" + - "tests/**" \ No newline at end of file diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 000000000..f8d1e4b8b --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,17 @@ +name: Fuzz Test + +on: [push, pull_request] + +jobs: + rv32emu: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - name: install-dependencies + run: | + sudo apt-get update + sudo apt-get install clang + shell: bash + - name: Run fuzzer + run: .ci/fuzz.sh + shell: bash diff --git a/.gitignore b/.gitignore index 212a53248..069bf9a55 100644 --- a/.gitignore +++ b/.gitignore @@ -6,18 +6,28 @@ build/id1/ build/gfx.wad build/doomrc toolchain/ +.vscode # built objects build/.config build/rv32emu +build/rv32emu_fuzz build/arch-test build/mini-gdbstub build/softfloat build/cache/ build/map/ build/path/ +build/fuzz/ *.o *.o.d tests/**/*.elf tests/arch-test-target/config.ini __pycache__/ + +# fuzzer +crash-* +leak-* +timeout-* +fuzz.elf +*.log diff --git a/Dockerfile b/Dockerfile index 8f1676e50..ce3efc0a4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ LABEL maintainer="henrybear327@gmail.com" # Install packages required for the emulator to compile and execute correctly RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y \ - libsdl2-dev libsdl2-mixer-dev python3-pip git + libsdl2-dev libsdl2-mixer-dev python3-pip git clang RUN python3 -m pip install git+https://github.com/riscv/riscof diff --git a/Makefile b/Makefile index dc0ea912b..6b59a81e7 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ include mk/toolchain.mk OUT ?= build BIN := $(OUT)/rv32emu +FUZZ_BIN := $(OUT)/rv32emu_fuzz CONFIG_FILE := $(OUT)/.config -include $(CONFIG_FILE) @@ -214,7 +215,7 @@ endif endif clean: - $(RM) $(BIN) $(OBJS) $(HIST_BIN) $(HIST_OBJS) $(deps) $(CACHE_OUT) + $(RM) $(BIN) $(FUZZ_BIN) $(OBJS) $(HIST_BIN) $(HIST_OBJS) $(deps) $(CACHE_OUT) distclean: clean -$(RM) $(DOOM_DATA) $(QUAKE_DATA) $(RM) -r $(OUT)/id1 diff --git a/docs/fuzzer.md b/docs/fuzzer.md new file mode 100644 index 000000000..823fc2367 --- /dev/null +++ b/docs/fuzzer.md @@ -0,0 +1,23 @@ +# Fuzzing + +We are using the [LLVM Fuzzer](https://llvm.org/docs/LibFuzzer.html). + +The fuzzer used here is without structured input generation. Instead, we rely +on the fuzzer to mutate the input. + +The initial seeds are all the ELF files in the `build` directory. + +## Execution + +The script compiles the emulator and links it with the LibFuzzer, prepares the seed corpus, and executes the fuzzing tests. + +- `.ci/fuzz.sh` + +## References + +> Inspired by the fuzzer from [libriscv](https://github.com/fwsGonzo/libriscv/tree/master/fuzz). + +- [LLVM official LibFuzzer documentation](https://llvm.org/docs/LibFuzzer.html#corpus) +- [Chromium - Getting started with LibFuzzer](https://chromium.googlesource.com/chromium/src/+/refs/heads/main/testing/libfuzzer/getting_started_with_libfuzzer.md) +- [Fuzzing tutorial](https://github.com/google/fuzzing/blob/master/tutorial/libFuzzerTutorial.md) +- [UBSAN](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html) diff --git a/src/elf.c b/src/elf.c index 0d579c5eb..d077af3b8 100644 --- a/src/elf.c +++ b/src/elf.c @@ -97,7 +97,7 @@ void elf_delete(elf_t *e) /* release a loaded ELF file */ static void release(elf_t *e) { -#if !defined(USE_MMAP) +#if !defined(USE_MMAP) && !defined(FUZZER) free(e->raw_data); #endif @@ -291,18 +291,36 @@ bool elf_load(elf_t *e, riscv_t *rv, memory_t *mem) return true; } +#ifdef FUZZER +bool elf_open(elf_t *e, uint8_t *data, size_t len) +#else bool elf_open(elf_t *e, const char *input) +#endif { /* free previous memory */ if (e->raw_data) release(e); - + +#ifndef FUZZER char *path = sanitize_path(input); if (!path) { return false; } +#endif -#if defined(USE_MMAP) +#if defined(FUZZER) + if (!data || !len) { + /* if the fuzzer sent in an empty buffer, we don't proceed further */ + return false; + } + + /* get file size */ + e->raw_size = len; + + /* allocate memory */ + free(e->raw_data); + e->raw_data = (uint8_t *) data; +#elif defined(USE_MMAP) int fd = open(path, O_RDONLY); if (fd < 0) { free(path); @@ -324,7 +342,6 @@ bool elf_open(elf_t *e, const char *input) return false; } close(fd); - #else /* fallback to standard I/O text stream */ FILE *f = fopen(path, "rb"); if (!f) { @@ -357,16 +374,24 @@ bool elf_open(elf_t *e, const char *input) #endif /* USE_MMAP */ /* point to the header */ + if (sizeof(struct Elf32_Ehdr) > e->raw_size) { + release(e); + return false; + } e->hdr = (const struct Elf32_Ehdr *) e->raw_data; /* check it is a valid ELF file */ if (!is_valid(e)) { release(e); +#ifndef FUZZER free(path); +#endif return false; } +#ifndef FUZZER free(path); +#endif return true; } diff --git a/src/elf.h b/src/elf.h index d3de2617e..fa949d79a 100644 --- a/src/elf.h +++ b/src/elf.h @@ -133,7 +133,11 @@ elf_t *elf_new(); void elf_delete(elf_t *e); /* Open an ELF file from specified path */ +#ifdef FUZZER +bool elf_open(elf_t *e, uint8_t *data, size_t len); +#else bool elf_open(elf_t *e, const char *path); +#endif /* Find a symbol entry */ const struct Elf32_Sym *elf_get_symbol(elf_t *e, const char *name); diff --git a/src/fuzz-target.cc b/src/fuzz-target.cc new file mode 100644 index 000000000..7cbfcd73d --- /dev/null +++ b/src/fuzz-target.cc @@ -0,0 +1,75 @@ +#include +#include +#include +#include "riscv.h" + +const int max_cycles = 5000; +const char *fake_rv32emu_name = "./fake_rv32emu"; +const char *fake_elf_name = "fake_elf"; + +/* In order to be able to inspect a coredump we want to crash on every ASAN + * error. + */ +extern "C" void __asan_on_error() +{ + abort(); +} +extern "C" void __msan_on_error() +{ + abort(); +} + +static void fuzz_elf_loader(const uint8_t *data, size_t len) +{ + int argc = 1 + 2 * 3 + 1; + char **args = (char **) malloc(sizeof(char *) * argc); + + char *arg0 = (char *) malloc(strlen(fake_rv32emu_name) + 1); + strncpy(arg0, fake_rv32emu_name, strlen(fake_rv32emu_name) + 1); + args[0] = arg0; + + char *arg1 = (char *) malloc(3); + strncpy(arg1, "-s", 3); + args[1] = arg1; + args[2] = (char *) data; + + char *arg3 = (char *) malloc(3); + strncpy(arg3, "-l", 3); + args[3] = arg3; + char *len_str = (char *) malloc(20 + 1); /* LLONG_MIN in base 10 has 20 chars */ + sprintf(len_str, "%zu", len); + args[4] = len_str; + + char *arg5 = (char *) malloc(3); + strncpy(arg5, "-k", 3); + args[5] = arg5; + char *max_cycles_str = + (char *) malloc(11 + 1); /* INT_MIN in base 10 has 11 chars */ + sprintf(max_cycles_str, "%d", max_cycles); + args[6] = max_cycles_str; + + char *arg7 = (char *) malloc(strlen(fake_elf_name) + 1); + strncpy(arg7, fake_elf_name, strlen(fake_elf_name) + 1); + args[7] = arg7; + + int ret = rv_init_and_execute_elf(argc, args); + if (ret == 0) { + fprintf(stderr, "Executed successfully\n"); + } else { + fprintf(stderr, "Executed with failure\n"); + } + + free(arg0); + free(arg1); + free(arg3); + free(len_str); + free(arg5); + free(max_cycles_str); + free(arg7); + free(args); +} + +extern "C" void LLVMFuzzerTestOneInput(const uint8_t *data, size_t len) +{ + fuzz_elf_loader(data, len); +} diff --git a/src/main.c b/src/main.c index 7f35034c4..91d005bed 100644 --- a/src/main.c +++ b/src/main.c @@ -34,6 +34,18 @@ static bool opt_quiet_outputs = false; /* target executable */ static const char *opt_prog_name = "a.out"; +#ifdef FUZZER +/* ELF input as string (for fuzzing) */ +static bool opt_elf_string = false; +static uint8_t *elf_string = NULL; + +static bool opt_elf_strlen = NULL; +static int elf_strlen = 0; + +static bool opt_max_execution_cycles = NULL; +static int max_execution_cycles = 0; +#endif + /* target argc and argv */ static int prog_argc; static char **prog_args; @@ -83,6 +95,13 @@ static void run_and_trace(riscv_t *rv, elf_t *elf) } } +#ifdef FUZZER +static void run(riscv_t *rv, int max_cycles) +{ + /* step instructions */ + rv_step(rv, max_cycles); +} +#else static void run(riscv_t *rv) { const uint32_t cycles_per_step = 100; @@ -91,6 +110,7 @@ static void run(riscv_t *rv) rv_step(rv, cycles_per_step); } } +#endif static void print_usage(const char *filename) { @@ -117,6 +137,40 @@ static bool parse_args(int argc, char **args) int opt; int emu_argc = 0; +#ifdef FUZZER + /* + * getopt() won't work with binary data as control characters will screw the + * string parsing + */ + int idx = 1; + while (idx + 1 < argc) { + emu_argc++; + char opt = args[idx][1]; + char *optarg = args[idx + 1]; + + switch (opt) { + case 's': // binary string + opt_elf_string = true; + elf_string = (uint8_t *) optarg; + emu_argc++; + break; + case 'l': // binary string len + opt_elf_strlen = true; + elf_strlen = atoi(optarg); + emu_argc++; + break; + case 'k': // max execution cycle + opt_max_execution_cycles = true; + max_execution_cycles = atoi(optarg); + emu_argc++; + break; + default: + return false; + } + + idx += 2; + } +#else while ((opt = getopt(argc, args, optstr)) != -1) { emu_argc++; @@ -151,6 +205,7 @@ static bool parse_args(int argc, char **args) return false; } } +#endif prog_argc = argc - emu_argc - 1; /* optind points to the first non-option string, so it should indicate the @@ -187,7 +242,7 @@ static void dump_test_signature(elf_t *elf) fclose(f); } -int main(int argc, char **args) +int rv_init_and_execute_elf(int argc, char **args) { if (argc == 1 || !parse_args(argc, args)) { print_usage(args[0]); @@ -196,8 +251,13 @@ int main(int argc, char **args) /* open the ELF file from the file system */ elf_t *elf = elf_new(); +#ifdef FUZZER + if (!elf_open(elf, (uint8_t *) elf_string, elf_strlen)) { +#else if (!elf_open(elf, opt_prog_name)) { +#endif fprintf(stderr, "Unable to open ELF file '%s'\n", opt_prog_name); + elf_delete(elf); return 1; } @@ -251,7 +311,11 @@ int main(int argc, char **args) } #endif else { +#ifdef FUZZER + run(rv, max_execution_cycles); +#else run(rv); +#endif } /* dump registers as JSON */ @@ -269,3 +333,10 @@ int main(int argc, char **args) return 0; } + +#ifndef FUZZER +int main(int argc, char **args) +{ + return rv_init_and_execute_elf(argc, args); +} +#endif diff --git a/src/riscv.h b/src/riscv.h index 1d5e45a3c..c8f2add54 100644 --- a/src/riscv.h +++ b/src/riscv.h @@ -197,6 +197,8 @@ bool rv_has_halted(riscv_t *rv); /* return the flag of outputting exit code */ bool rv_enables_to_output_exit_code(riscv_t *rv); +/* the init and execute logic shared by main and fuzzer */ +int rv_init_and_execute_elf(int argc, char **args); #ifdef __cplusplus }; #endif