Skip to content

Commit

Permalink
Allocate a single pool at startup for all the jitted code that can be…
Browse files Browse the repository at this point in the history
… created, instead of allocating multiple pools at runtime on demand

Testing with nxbx/nboxkrnl shows that this cuts in half the execution time
  • Loading branch information
ergo720 committed Jun 23, 2024
1 parent d9f9c37 commit 9d32958
Show file tree
Hide file tree
Showing 9 changed files with 50 additions and 164 deletions.
104 changes: 35 additions & 69 deletions lib86cpu/core/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,92 +11,70 @@
#include "os_exceptions.h"


mem_manager::block_header_t *
mem_manager::create_pool()
mem_manager::mem_manager()
{
block_header_t *start = static_cast<block_header_t *>(os_alloc(POOL_SIZE));
block_header_t *addr = start;
for (unsigned i = 0; i < BLOCKS_PER_POOL - 1; i++) {
m_code_block_area = os_alloc(CODE_CACHE_MAX_SIZE * BLOCK_SIZE + BLOCK_SIZE); // 32768 code blocks + another one for aux functions
init_pool();
}

void
mem_manager::init_pool()
{
block_header_t *addr = static_cast<block_header_t *>(m_code_block_area);
m_head = addr;
for (unsigned i = 0; i < CODE_CACHE_MAX_SIZE - 1; ++i) {
addr->next = reinterpret_cast<block_header_t *>(reinterpret_cast<uint8_t *>(addr) + BLOCK_SIZE);
addr = addr->next;
}

addr->next = nullptr;
blocks.emplace_back(start);
return start;
}

void *
mem_manager::alloc()
{
if (head == nullptr) {
head = create_pool();
}

block_header_t *addr = head;
head = head->next;
assert(m_head);
block_header_t *addr = m_head;
m_head = m_head->next;
return addr;
}

void
mem_manager::free(void *ptr)
{
// this is necessary because we mark the code section memory as read-only after the code is written to it
os_protect(ptr, BLOCK_SIZE, get_mem_flags(MEM_READ | MEM_WRITE));
static_cast<block_header_t *>(ptr)->next = head;
head = static_cast<block_header_t *>(ptr);
static_cast<block_header_t *>(ptr)->next = m_head;
m_head = static_cast<block_header_t *>(ptr);
}

void
mem_manager::destroy_all_blocks()
{
#if defined(_WIN64) || defined(__linux__)
for (const auto &eh_pair : eh_frames) {
for (const auto &eh_pair : m_eh_frames) {
os_delete_exp_info(eh_pair.second);
}

eh_frames.clear();
m_eh_frames.clear();
#endif

#if defined(_WIN64)
for (auto &addr : blocks) {
os_free(addr);
}

for (auto &block : big_blocks) {
for (auto &block : m_big_blocks) {
os_free(block.first);
}
#elif defined(__linux__)
for (auto &addr : blocks) {
os_free(addr, POOL_SIZE);
}

for (auto &block : big_blocks) {
for (auto &block : m_big_blocks) {
os_free(block.first, block.second);
}
#endif

big_blocks.clear();
blocks.clear();
head = nullptr;
init_pool();
m_big_blocks.clear();
}

void
mem_manager::purge_all_blocks()
{
destroy_all_blocks();

#if defined(_WIN64)
for (auto &block : hidden_blocks) {
os_free(block.first);
}
#elif defined(__linux__)
for (auto &block : hidden_blocks) {
os_free(block.first, block.second);
}
#endif

hidden_blocks.clear();
os_free(m_code_block_area);
}

mem_block
Expand All @@ -110,29 +88,25 @@ mem_manager::allocate_sys_mem(size_t num_bytes)
size_t block_size = (num_bytes + PAGE_MASK) & ~PAGE_MASK;
void *addr = os_alloc(block_size);
mem_block block(addr, block_size);
big_blocks.emplace(addr, block_size);
m_big_blocks.emplace(addr, block_size);
return block;
}

return mem_block(alloc(), BLOCK_SIZE);
}

mem_block
mem_manager::allocate_non_pooled_sys_mem(size_t num_bytes)
mem_manager::get_non_pooled_sys_mem(size_t num_bytes)
{
if (num_bytes == 0) {
return mem_block();
}

size_t block_size = (num_bytes + PAGE_MASK) & ~PAGE_MASK;
void *addr = os_alloc(block_size);
mem_block block(addr, block_size);
hidden_blocks.emplace(addr, block_size);
return block;
return mem_block(reinterpret_cast<uint8_t *>(m_code_block_area) + CODE_CACHE_MAX_SIZE * BLOCK_SIZE, BLOCK_SIZE);
}

void
mem_manager::protect_sys_mem(const mem_block &block, unsigned flags)
mem_manager::flush_instr_cache(const mem_block &block)
{
void *addr = block.addr;
size_t size = block.size;
Expand All @@ -141,17 +115,13 @@ mem_manager::protect_sys_mem(const mem_block &block, unsigned flags)
return;
}

os_protect(addr, size, get_mem_flags(flags));

if (flags & MEM_EXEC) {
#if defined(_WIN64)
os_flush_instr_cache(addr, size);
os_flush_instr_cache(addr, size);
#elif defined(__linux__)
void *start = addr;
void *end = static_cast<char *>(addr) + size;
os_flush_instr_cache(start, end);
void *start = addr;
void *end = static_cast<char *>(addr) + size;
os_flush_instr_cache(start, end);
#endif
}
}

void
Expand All @@ -161,25 +131,21 @@ mem_manager::release_sys_mem(void *addr)
return;
}

if (auto it = hidden_blocks.find(addr); it != hidden_blocks.end()) {
return;
}

#if defined(_WIN64) || defined(__linux__)
void *main_addr = reinterpret_cast<uint8_t *>(addr) + 16;
if (auto it = eh_frames.find(main_addr); it != eh_frames.end()) {
if (auto it = m_eh_frames.find(main_addr); it != m_eh_frames.end()) {
os_delete_exp_info(it->second);
eh_frames.erase(main_addr);
m_eh_frames.erase(main_addr);
}
#endif

if (auto it = big_blocks.find(addr); it != big_blocks.end()) {
if (auto it = m_big_blocks.find(addr); it != m_big_blocks.end()) {
#if defined(_WIN64)
os_free(it->first);
#elif defined(__linux__)
os_free(it->first, it->second);
#endif
big_blocks.erase(addr);
m_big_blocks.erase(addr);
return;
}

Expand Down
17 changes: 8 additions & 9 deletions lib86cpu/core/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,26 +28,25 @@ struct mem_block {
class mem_manager {
public:
mem_block allocate_sys_mem(size_t num_bytes);
mem_block allocate_non_pooled_sys_mem(size_t num_bytes);
void protect_sys_mem(const mem_block &block, unsigned flags);
mem_block get_non_pooled_sys_mem(size_t num_bytes);
void flush_instr_cache(const mem_block &block);
void release_sys_mem(void *addr);
void destroy_all_blocks();
~mem_manager() { purge_all_blocks(); }
mem_manager();

#if defined(_WIN64) || defined(__linux__)
std::map<void *, void *> eh_frames;
std::map<void *, void *> m_eh_frames;
#endif

private:
struct block_header_t {
block_header_t *next;
};
block_header_t *head = nullptr;
std::vector<void *> blocks;
std::map<void *, size_t> big_blocks;
std::map<void *, size_t> hidden_blocks;

block_header_t *create_pool();
block_header_t *m_head;
std::map<void *, size_t> m_big_blocks;
void *m_code_block_area;
void init_pool();
void *alloc();
void free(void *ptr);
void purge_all_blocks();
Expand Down
6 changes: 3 additions & 3 deletions lib86cpu/core/emitter/x64/jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ lc86_jit::gen_code_block()
#endif

// This code block is complete, so protect and flush the instruction cache now
m_mem.protect_sys_mem(block, MEM_READ | MEM_EXEC);
m_mem.flush_instr_cache(block);

tc->ptr_code = reinterpret_cast<entry_t>(main_offset);
tc->jmp_offset[0] = tc->jmp_offset[1] = tc->jmp_offset[2] = reinterpret_cast<entry_t>(exit_offset);
Expand Down Expand Up @@ -600,7 +600,7 @@ lc86_jit::gen_aux_funcs()
throw lc86_exp_abort("The generated code has a zero size", lc86_status::internal_error);
}

auto block = m_mem.allocate_non_pooled_sys_mem(estimated_code_size);
auto block = m_mem.get_non_pooled_sys_mem(estimated_code_size);
if (auto err = m_code.relocateToBase(reinterpret_cast<uintptr_t>(block.addr))) {
std::string err_str("Asmjit failed at relocateToBase() with the error ");
err_str += DebugUtils::errorAsString(err);
Expand All @@ -616,7 +616,7 @@ lc86_jit::gen_aux_funcs()
assert(offset + buff_size <= estimated_code_size);
std::memcpy(static_cast<uint8_t *>(block.addr) + offset, section->data(), buff_size);

m_mem.protect_sys_mem(block, MEM_READ | MEM_EXEC);
m_mem.flush_instr_cache(block);

m_cpu->read_int_fn = reinterpret_cast<read_int_t>(static_cast<uint8_t *>(block.addr) + offset);
m_cpu->raise_int_fn = reinterpret_cast<raise_int_t>(static_cast<uint8_t *>(block.addr) + offset + raise_int_off_aligned16);
Expand Down
2 changes: 1 addition & 1 deletion lib86cpu/core/linux/os_exceptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ lc86_jit::gen_exception_info(uint8_t *code_ptr, size_t code_size)
size_t aligned_code_size = (code_size + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
cie_t *cie = reinterpret_cast<cie_t *>(code_ptr + aligned_code_size);
write_eh_frame(cie, code_ptr, code_size);
m_mem.eh_frames.emplace(code_ptr, cie);
m_mem.m_eh_frames.emplace(code_ptr, cie);
}

void
Expand Down
39 changes: 1 addition & 38 deletions lib86cpu/core/linux/os_mem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,40 +10,10 @@
#include "os_mem.h"


int
get_mem_flags(unsigned flags)
{
switch (flags)
{
case MEM_READ:
return PROT_READ;

case MEM_WRITE:
return PROT_WRITE;

case MEM_READ | MEM_WRITE:
return PROT_READ | PROT_WRITE;

case MEM_READ | MEM_EXEC:
return PROT_READ | PROT_EXEC;

case MEM_READ | MEM_WRITE | MEM_EXEC:
return PROT_READ | PROT_WRITE | PROT_EXEC;

case MEM_EXEC:
return PROT_READ | PROT_EXEC;

default:
LIB86CPU_ABORT();
}

return PROT_NONE;
}

void *
os_alloc(size_t size)
{
auto addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
auto addr = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
if (addr == MAP_FAILED) {
throw lc86_exp_abort("Failed to allocate memory for the generated code", lc86_status::no_memory);
}
Expand All @@ -57,13 +27,6 @@ os_free(void *addr, size_t size)
assert(!ret);
}

void
os_protect(void *addr, size_t size, int prot)
{
[[maybe_unused]] auto ret = mprotect(addr, size, prot);
assert(!ret);
}

void
os_flush_instr_cache(void *start, void *end)
{
Expand Down
2 changes: 0 additions & 2 deletions lib86cpu/core/linux/os_mem.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
#pragma once


int get_mem_flags(unsigned flags);
void *os_alloc(size_t size);
void os_free(void *addr, size_t size);
void os_protect(void *addr, size_t size, int prot);
void os_flush_instr_cache(void *addr, void *end);
2 changes: 1 addition & 1 deletion lib86cpu/core/windows/os_exceptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ lc86_jit::gen_exception_info(uint8_t *code_ptr, size_t code_size)
table->BeginAddress = 0;
table->EndAddress = code_size;
table->UnwindInfoAddress = aligned_code_size;
m_mem.eh_frames.emplace(code_ptr, table);
m_mem.m_eh_frames.emplace(code_ptr, table);

[[maybe_unused]] auto ret = RtlAddFunctionTable(table, 1, reinterpret_cast<DWORD64>(code_ptr));
assert(ret);
Expand Down
Loading

0 comments on commit 9d32958

Please sign in to comment.