From fb43a53db0f664dc4469e9c0298d7f8c439e1001 Mon Sep 17 00:00:00 2001 From: Ara Adkins Date: Mon, 6 Jan 2025 16:36:29 -0700 Subject: [PATCH] Comply with the new memory model Unfortunately our memory model has had to change in order to support the kinds of type punning that we see in the output of `rustc`. What this primarily means is that we have changed to a byte-addressable model, reducing overall memory usage on the Cairo VM at the cost of more computation steps. Each machine word fits 28 bytes, along with 28 bits worth of flags for future use by the memory subsystem. These flags are not treated as part of contiguous memory. The primary changes are: - A swap to calculate both `size_of` and `align_of` in bytes in the LLVM type system used by the compiler. This relies on the specified data layout for accurate computation. - Alterations to the semantics of the memory-related polyfills and opcodes to compute offsets and sizes properly using the new machinery for doing so. This commit also includes documentation of the new memory model. --- crates/compiler/src/llvm/typesystem.rs | 623 +++++++++++++++++++++++-- crates/compiler/src/messages.rs | 8 + crates/compiler/src/obj_gen/mod.rs | 73 +-- crates/compiler/src/polyfill.rs | 20 +- docs/Memory Model.md | 171 +++++++ 5 files changed, 809 insertions(+), 86 deletions(-) create mode 100644 docs/Memory Model.md diff --git a/crates/compiler/src/llvm/typesystem.rs b/crates/compiler/src/llvm/typesystem.rs index ab51711..0cc7fde 100644 --- a/crates/compiler/src/llvm/typesystem.rs +++ b/crates/compiler/src/llvm/typesystem.rs @@ -1,7 +1,10 @@ //! The compiler's internal representation of LLVM types, without being tied to //! the context as the [`BasicTypeEnum`] is. -use std::fmt::{Display, Formatter}; +use std::{ + cmp, + fmt::{Display, Formatter}, +}; use hieratika_errors::compile::{llvm, llvm::Error}; use inkwell::{ @@ -23,6 +26,12 @@ use inkwell::{ }; use itertools::Itertools; +use crate::{ + constant::BYTE_SIZE_BITS, + llvm::data_layout::DataLayout, + messages::STRUCT_TYPE_WITH_NO_MEMBERS, +}; + /// A representation of the LLVM [types](https://llvm.org/docs/LangRef.html#type-system) /// for use within the compiler. /// @@ -215,41 +224,109 @@ impl LLVMType { self.as_function().expect("`self` value was not Self::Function") } - /// Gets the size of `self` in felts. + /// Gets the size of `self` in bits under the provided data layout. #[must_use] - pub fn size_of(&self) -> usize { - use LLVMType::{ - Array, - Function, - Metadata, - Structure, - bool, - f16, - f32, - f64, - i8, - i16, - i24, - i32, - i64, - i128, - ptr, - void, - }; + #[expect(clippy::match_same_arms)] // The similarities are incidental. + pub fn size_of(&self, data_layout: &DataLayout) -> usize { match self { - bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => 1, - void | Metadata => 0, - Array(array_ty) => array_ty.size_of(), - Structure(struct_ty) => struct_ty.size_of(), - Function(func_ty) => func_ty.size_of(), + LLVMType::bool => 1, + LLVMType::i8 => 8, + LLVMType::i16 => 16, + LLVMType::i24 => 24, + LLVMType::i32 => 32, + LLVMType::i64 => 64, + LLVMType::i128 => 128, + LLVMType::f16 => 16, + LLVMType::f32 => 32, + LLVMType::f64 => 64, + LLVMType::ptr => data_layout.default_pointer_layout().size, + LLVMType::void => 0, + LLVMType::Array(array_type) => array_type.size_of(data_layout), + LLVMType::Structure(struct_type) => struct_type.size_of(data_layout), + LLVMType::Function(function_type) => function_type.size_of(data_layout), + LLVMType::Metadata => 0, } } - /// Gets the ABI alignment of `self` in felts. + /// Gets the maximum number of bits that may be overwritten by storing + /// `self`. + /// + /// This is always a multiple of eight. #[must_use] - pub fn align_of(&self) -> usize { - // At the moment we align everything to the nearest felt boundary. - 1 + pub fn store_size_of(&self, data_layout: &DataLayout) -> usize { + let min_size_bits = self.size_of(data_layout); + + ((min_size_bits + 7) / 8) * 8 + } + + /// Returns the offset in bits between successive objects of the + /// specified type, including the alignment padding. + /// + /// This is always a multiple of eight. + #[must_use] + pub fn alloc_size_of(&self, data_layout: &DataLayout) -> usize { + // https://llvm.org/doxygen/DataLayout_8h_source.html#l00457 + align_to( + self.store_size_of(data_layout), + self.align_of(AlignType::ABI, data_layout), + ) + } + + /// Gets the alignment of `self` in bits under the provided data layout. + #[must_use] + #[expect(clippy::match_same_arms)] // The similarities are incidental. + pub fn align_of(&self, align_type: AlignType, data_layout: &DataLayout) -> usize { + match self { + LLVMType::bool => match align_type { + AlignType::ABI => data_layout.expect_int_spec_of(1).abi_alignment, + AlignType::Preferred => data_layout.expect_int_spec_of(1).preferred_alignment, + }, + LLVMType::i8 => match align_type { + AlignType::ABI => data_layout.expect_int_spec_of(8).abi_alignment, + AlignType::Preferred => data_layout.expect_int_spec_of(8).preferred_alignment, + }, + LLVMType::i16 => match align_type { + AlignType::ABI => data_layout.expect_int_spec_of(16).abi_alignment, + AlignType::Preferred => data_layout.expect_int_spec_of(16).preferred_alignment, + }, + LLVMType::i24 => match align_type { + AlignType::ABI => data_layout.expect_int_spec_of(24).abi_alignment, + AlignType::Preferred => data_layout.expect_int_spec_of(24).preferred_alignment, + }, + LLVMType::i32 => match align_type { + AlignType::ABI => data_layout.expect_int_spec_of(32).abi_alignment, + AlignType::Preferred => data_layout.expect_int_spec_of(32).preferred_alignment, + }, + LLVMType::i64 => match align_type { + AlignType::ABI => data_layout.expect_int_spec_of(64).abi_alignment, + AlignType::Preferred => data_layout.expect_int_spec_of(64).preferred_alignment, + }, + LLVMType::i128 => match align_type { + AlignType::ABI => data_layout.expect_int_spec_of(128).abi_alignment, + AlignType::Preferred => data_layout.expect_int_spec_of(128).preferred_alignment, + }, + LLVMType::f16 => match align_type { + AlignType::ABI => data_layout.expect_float_spec_of(16).abi_alignment, + AlignType::Preferred => data_layout.expect_float_spec_of(16).preferred_alignment, + }, + LLVMType::f32 => match align_type { + AlignType::ABI => data_layout.expect_float_spec_of(32).abi_alignment, + AlignType::Preferred => data_layout.expect_float_spec_of(32).preferred_alignment, + }, + LLVMType::f64 => match align_type { + AlignType::ABI => data_layout.expect_float_spec_of(64).abi_alignment, + AlignType::Preferred => data_layout.expect_float_spec_of(64).preferred_alignment, + }, + LLVMType::ptr => match align_type { + AlignType::ABI => data_layout.default_pointer_layout().abi_alignment, + AlignType::Preferred => data_layout.default_pointer_layout().preferred_alignment, + }, + LLVMType::void => 0, + LLVMType::Array(array_type) => array_type.align_of(align_type, data_layout), + LLVMType::Structure(struct_type) => struct_type.align_of(align_type, data_layout), + LLVMType::Function(function_type) => function_type.align_of(align_type, data_layout), + LLVMType::Metadata => 0, + } } } @@ -593,16 +670,31 @@ impl LLVMArray { Self { count, typ } } - /// Gets the size of `self` in felts. + /// Gets the size of `self` in bits under the provided data layout. #[must_use] - pub fn size_of(&self) -> usize { - self.typ.size_of() * self.count + pub fn size_of(&self, data_layout: &DataLayout) -> usize { + // The size is the allocation size of the element type multiplied by the number + // of elements. https://llvm.org/doxygen/DataLayout_8h_source.html#l00625 + self.typ.alloc_size_of(data_layout) * self.count } - /// Gets the ABI alignment of `self` in felts. + /// Gets the alignment of `self` in bits under the provided data layout. #[must_use] - pub fn align_of(&self) -> usize { - 1 + pub fn align_of(&self, align_type: AlignType, data_layout: &DataLayout) -> usize { + // https://llvm.org/doxygen/DataLayout_8cpp_source.html#l00780 + self.typ.align_of(align_type, data_layout) + } + + /// Calculates the offset in bits of the element at the provided `index` in + /// the array `self` under the provided data layout. + /// + /// Note that this method will happily calculate element offsets that are + /// outside the bounds of the array, as this is not an uncommon usage for + /// LLVM Array types. + #[must_use] + pub fn offset_of_element_at(&self, index: usize, data_layout: &DataLayout) -> usize { + let element_size = self.typ.size_of(data_layout); + element_size * index } } @@ -680,16 +772,132 @@ impl LLVMStruct { Self { packed, elements } } - /// Gets the size of `self` in felts. + /// Creates a new packed LLVM struct from the provided `elements` types. + #[must_use] + pub fn packed(elements: &[LLVMType]) -> Self { + Self::new(true, elements) + } + + /// Creates a new unpacked LLVM struct from the provided `elements` types. + #[must_use] + pub fn unpacked(elements: &[LLVMType]) -> Self { + Self::new(false, elements) + } + + /// Gets the size of `self` in bits under the provided data layout. + /// + /// # Panics + /// + /// - If `self` is a struct type without any members. + #[must_use] + pub fn size_of(&self, data_layout: &DataLayout) -> usize { + let mut struct_size = 0; + + assert!(!self.elements.is_empty(), "{}", STRUCT_TYPE_WITH_NO_MEMBERS); + + // Adapted from https://llvm.org/doxygen/DataLayout_8cpp_source.html#l00048 + for element in &self.elements { + // We start by getting the alignment of the element. + let element_align = if self.packed { + 8 + } else { + element.align_of(AlignType::ABI, data_layout) + }; + + // To add the next element the size of the struct needs to be aligned to that + // element's alignment, so we pad it out if needed. + if !is_aligned(element_align, struct_size) { + struct_size = align_to(struct_size, element_align); + } + + // We then increment the size of the struct by the size of the element's + // allocation. + struct_size += element.alloc_size_of(data_layout); + } + + // Finally, we need to ensure that the struct is aligned properly so that it can + // fit into arrays contiguously. + let struct_alignment = self.align_of(AlignType::ABI, data_layout); + if !is_aligned(struct_alignment, struct_size) { + struct_size = align_to(struct_size, struct_alignment); + } + + struct_size + } + + /// Gets the alignment of `self` in bits under the provided data layout. + /// + /// # Panics + /// + /// - If `self` is a struct type without any members. #[must_use] - pub fn size_of(&self) -> usize { - self.elements.iter().map(LLVMType::size_of).sum() + pub fn align_of(&self, align_type: AlignType, data_layout: &DataLayout) -> usize { + if self.packed && matches!(align_type, AlignType::ABI) { + // If it is packed, the language reference specifies that it has 1 byte + // ABI alignment https://llvm.org/docs/LangRef.html#structure-type + 8 + } else { + // In this case, things are more complex, as the alignment becomes + // the maximum required alignment of the child elements. + // + // https://llvm.org/doxygen/DataLayout_8cpp_source.html#l00048 + let max_child_alignment = self + .elements + .iter() + .map(|e| e.align_of(AlignType::ABI, data_layout)) + .max() + .expect(STRUCT_TYPE_WITH_NO_MEMBERS); + + let dl_align = match align_type { + AlignType::ABI => data_layout.aggregate_layout.abi_alignment, + AlignType::Preferred => data_layout.aggregate_layout.preferred_alignment, + }; + + cmp::max(max_child_alignment, dl_align) + } } - /// Gets the ABI alignment of `self` in felts. + /// Calculates the offset in bits of the element at the provided `index` in + /// the struct `self` under the provided data layout. + /// + /// # Panics + /// + /// - If the index is not within the bounds of the structure. #[must_use] - pub fn align_of(&self) -> usize { - 1 + pub fn offset_of_element_at(&self, index: usize, data_layout: &DataLayout) -> usize { + // We cannot compute the offset at all if the index exceeds the number of + // elements in the structure. + assert!( + index < self.elements.len(), + "Element index {index} was not in bounds of structure with {} elements", + self.elements.len() + ); + + let mut current_offset = 0; + + for (ix, element) in self.elements.iter().enumerate() { + // We need the element alignment. + let element_align = if self.packed { + 8 + } else { + element.align_of(AlignType::ABI, data_layout) + }; + + // We then force the element aligned, just like for computing size. + if !is_aligned(element_align, current_offset) { + current_offset = align_to(current_offset, element_align); + } + + // If we have reached the target index, current_offset will contain the right + // value as long as we have forced alignment as above. + if ix == index { + break; + } + + current_offset += element.alloc_size_of(data_layout); + } + + current_offset } } @@ -768,21 +976,20 @@ impl LLVMFunction { } } - /// Gets the size of `self` in felts. - /// - /// This is given by the size of the function's return type. + /// Gets the size of `self` in bits under the provided data layout. #[must_use] - pub fn size_of(&self) -> usize { - self.return_type.size_of() + pub fn size_of(&self, data_layout: &DataLayout) -> usize { + self.return_type.size_of(data_layout) } - /// Gets the ABI alignment of `self` in felts under the provided data + /// Gets the ABI alignment of `self` in bits under the provided data /// layout. /// - /// This is given by the ABI alignment of the function's return type. + /// This is just the required ABI alignment, and does not account for the + /// case where a type may be _larger_ than its alignment. #[must_use] - pub fn align_of(&self) -> usize { - self.return_type.align_of() + pub fn align_of(&self, align_type: AlignType, data_layout: &DataLayout) -> usize { + self.return_type.align_of(align_type, data_layout) } } @@ -829,3 +1036,317 @@ impl<'ctx> TryFrom<&FunctionType<'ctx>> for LLVMFunction { Ok(Self::new(return_type, ¶m_types)) } } + +/// The type of alignment to be requested. +#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub enum AlignType { + /// The required alignment of the type as given by the ABI. + ABI, + + /// The preferred alignment of the type as given by the data layout. + Preferred, +} + +/// Gets the nearest multiple of the provided `alignment` that fits `size` +/// within it (in bits). +/// +/// The algorithm is the same as the one used +/// [in LLVM](https://llvm.org/doxygen/namespacellvm.html#ab102f0f12dd38aeea5906b1d80c792ff). +#[must_use] +pub fn align_to(size_bits: usize, alignment_bits: usize) -> usize { + let size_bytes = size_bits / BYTE_SIZE_BITS; + let alignment_bytes = alignment_bits / BYTE_SIZE_BITS; + let aligned_bytes = size_bytes.div_ceil(alignment_bytes) * alignment_bytes; + aligned_bytes * BYTE_SIZE_BITS +} + +/// Checks that `size_bits` is a multiple of `align_bits`. +#[must_use] +pub fn is_aligned(align_bits: usize, size_bits: usize) -> bool { + let size_bytes = size_bits / BYTE_SIZE_BITS; + let align_bytes = align_bits / BYTE_SIZE_BITS; + + size_bytes % align_bytes == 0 +} + +#[cfg(test)] +mod test { + use crate::{ + constant::TARGET_DATA_LAYOUT, + llvm::{ + data_layout::DataLayout, + typesystem::{ + AlignType::{ABI, Preferred}, + LLVMArray, + LLVMFunction, + LLVMStruct, + LLVMType, + }, + }, + }; + + /// Allows quick access to a data layout for passing in as part of the + /// tests. + fn dl() -> DataLayout { + DataLayout::new(TARGET_DATA_LAYOUT).expect("Constant data layout should parse correctly") + } + + #[test] + fn calculates_correct_size_for_bool() { + assert_eq!(LLVMType::bool.size_of(&dl()), 1); + assert_eq!(LLVMType::bool.store_size_of(&dl()), 8); + } + + #[test] + fn calculates_correct_size_for_i8() { + assert_eq!(LLVMType::i8.size_of(&dl()), 8); + assert_eq!(LLVMType::i8.store_size_of(&dl()), 8); + } + + #[test] + fn calculates_correct_size_for_i16() { + assert_eq!(LLVMType::i16.size_of(&dl()), 16); + assert_eq!(LLVMType::i16.size_of(&dl()), 16); + } + + #[test] + fn calculates_correct_size_for_i24() { + assert_eq!(LLVMType::i24.size_of(&dl()), 24); + assert_eq!(LLVMType::i24.store_size_of(&dl()), 24); + } + + #[test] + fn calculates_correct_size_for_i32() { + assert_eq!(LLVMType::i32.size_of(&dl()), 32); + assert_eq!(LLVMType::i32.store_size_of(&dl()), 32); + } + + #[test] + fn calculates_correct_size_for_i64() { + assert_eq!(LLVMType::i64.size_of(&dl()), 64); + assert_eq!(LLVMType::i64.store_size_of(&dl()), 64); + } + + #[test] + fn calculates_correct_size_for_i128() { + assert_eq!(LLVMType::i128.size_of(&dl()), 128); + assert_eq!(LLVMType::i128.store_size_of(&dl()), 128); + } + + #[test] + fn calculates_correct_size_for_f16() { + assert_eq!(LLVMType::f16.size_of(&dl()), 16); + assert_eq!(LLVMType::f16.store_size_of(&dl()), 16); + } + + #[test] + fn calculates_correct_size_for_f32() { + assert_eq!(LLVMType::f32.size_of(&dl()), 32); + assert_eq!(LLVMType::f32.store_size_of(&dl()), 32); + } + + #[test] + fn calculates_correct_size_for_f64() { + assert_eq!(LLVMType::f64.size_of(&dl()), 64); + assert_eq!(LLVMType::f64.store_size_of(&dl()), 64); + } + + #[test] + fn calculates_correct_size_for_ptr() { + assert_eq!(LLVMType::ptr.size_of(&dl()), 64); + assert_eq!(LLVMType::ptr.store_size_of(&dl()), 64); + } + + #[test] + fn calculates_correct_size_for_void() { + assert_eq!(LLVMType::void.size_of(&dl()), 0); + assert_eq!(LLVMType::void.store_size_of(&dl()), 0); + } + + #[test] + fn calculates_correct_size_for_array() { + // It should work easily for simple types + let simple_array = LLVMType::Array(LLVMArray::new(10, LLVMType::i8)); + assert_eq!(simple_array.size_of(&dl()), 80); + assert_eq!(simple_array.store_size_of(&dl()), 80); + + // But also for compound types + let complex_array = LLVMType::Array(LLVMArray::new( + 8, + LLVMType::Structure(LLVMStruct::unpacked(&[ + LLVMType::bool, + LLVMType::i8, + LLVMType::i32, + ])), + )); + assert_eq!(complex_array.size_of(&dl()), 512); + assert_eq!(complex_array.alloc_size_of(&dl()), 512); + } + + #[test] + fn calculates_correct_size_for_struct() { + // Unpacked structs contain padding between elements. + let unpacked_struct_type = LLVMType::Structure(LLVMStruct::unpacked(&[ + LLVMType::bool, + LLVMType::i8, + LLVMType::i32, + ])); + assert_eq!(unpacked_struct_type.size_of(&dl()), 64); + assert_eq!(unpacked_struct_type.alloc_size_of(&dl()), 64); + + // But packed structs (even with the same elements) do not. + let packed_struct_type = LLVMType::Structure(LLVMStruct::packed(&[ + LLVMType::bool, + LLVMType::i8, + LLVMType::i32, + ])); + assert_eq!(packed_struct_type.size_of(&dl()), 48); + assert_eq!(packed_struct_type.alloc_size_of(&dl()), 48); + } + + #[test] + fn calculates_correct_size_for_function() { + let fn_type = LLVMFunction::new(LLVMType::i8, &[LLVMType::bool, LLVMType::ptr]); + assert_eq!(LLVMType::Function(fn_type).size_of(&dl()), 8); + } + + #[test] + fn calculates_correct_size_for_metadata() { + assert_eq!(LLVMType::Metadata.size_of(&dl()), 0); + assert_eq!(LLVMType::Metadata.store_size_of(&dl()), 0); + } + + #[test] + fn calculates_correct_alignment_for_bool() { + assert_eq!(LLVMType::bool.align_of(ABI, &dl()), 8); + assert_eq!(LLVMType::bool.align_of(Preferred, &dl()), 8); + } + + #[test] + fn calculates_correct_alignment_for_i8() { + assert_eq!(LLVMType::i8.align_of(ABI, &dl()), 8); + assert_eq!(LLVMType::i8.align_of(Preferred, &dl()), 8); + } + + #[test] + fn calculates_correct_alignment_for_i16() { + assert_eq!(LLVMType::i16.align_of(ABI, &dl()), 16); + assert_eq!(LLVMType::i16.align_of(Preferred, &dl()), 16); + } + + #[test] + fn calculates_correct_alignment_for_i32() { + assert_eq!(LLVMType::i32.align_of(ABI, &dl()), 32); + assert_eq!(LLVMType::i32.align_of(Preferred, &dl()), 32); + } + + #[test] + fn calculates_correct_alignment_for_i64() { + assert_eq!(LLVMType::i64.align_of(ABI, &dl()), 64); + assert_eq!(LLVMType::i64.align_of(Preferred, &dl()), 64); + } + + #[test] + fn calculates_correct_alignment_for_i128() { + assert_eq!(LLVMType::i128.align_of(ABI, &dl()), 128); + assert_eq!(LLVMType::i128.align_of(Preferred, &dl()), 128); + } + + #[test] + fn calculates_correct_alignment_for_f16() { + assert_eq!(LLVMType::f16.align_of(ABI, &dl()), 16); + assert_eq!(LLVMType::f16.align_of(Preferred, &dl()), 16); + } + + #[test] + fn calculates_correct_alignment_for_f32() { + assert_eq!(LLVMType::f32.align_of(ABI, &dl()), 32); + assert_eq!(LLVMType::f32.align_of(Preferred, &dl()), 32); + } + + #[test] + fn calculates_correct_alignment_for_f64() { + assert_eq!(LLVMType::f64.align_of(ABI, &dl()), 64); + assert_eq!(LLVMType::f64.align_of(Preferred, &dl()), 64); + } + + #[test] + fn calculates_correct_alignment_for_ptr() { + assert_eq!(LLVMType::ptr.align_of(ABI, &dl()), 64); + assert_eq!(LLVMType::ptr.align_of(Preferred, &dl()), 64); + } + + #[test] + fn calculates_correct_alignment_for_void() { + assert_eq!(LLVMType::void.align_of(ABI, &dl()), 0); + assert_eq!(LLVMType::void.align_of(Preferred, &dl()), 0); + } + + #[test] + fn calculates_correct_alignment_for_array() { + assert_eq!( + LLVMArray::new(10, LLVMType::i8).align_of(ABI, &dl()), + LLVMType::i8.align_of(ABI, &dl()) + ); + assert_eq!( + LLVMArray::new(10, LLVMType::i8).align_of(Preferred, &dl()), + LLVMType::i8.align_of(Preferred, &dl()) + ); + } + + #[test] + fn calculates_correct_alignment_for_struct() { + let unpacked_struct = LLVMStruct::unpacked(&[LLVMType::bool, LLVMType::i16, LLVMType::i64]); + assert_eq!(unpacked_struct.align_of(ABI, &dl()), 64); + assert_eq!(unpacked_struct.align_of(Preferred, &dl()), 64); + + let packed_struct = LLVMStruct::packed(&[LLVMType::bool, LLVMType::i16, LLVMType::i64]); + assert_eq!(packed_struct.align_of(ABI, &dl()), 8); + assert_eq!(packed_struct.align_of(Preferred, &dl()), 64); + } + + #[test] + fn calculates_correct_alignment_for_function() { + let fn_type = LLVMFunction::new(LLVMType::i8, &[LLVMType::bool, LLVMType::ptr]); + assert_eq!(fn_type.align_of(ABI, &dl()), 8); + assert_eq!(fn_type.align_of(Preferred, &dl()), 8); + } + + #[test] + fn calculates_correct_alignment_for_metadata() { + assert_eq!(LLVMType::Metadata.align_of(ABI, &dl()), 0); + assert_eq!(LLVMType::Metadata.align_of(Preferred, &dl()), 0); + } + + #[test] + fn calculates_correct_offset_for_array_element() { + let simple_array = LLVMArray::new(10, LLVMType::i8); + assert_eq!(simple_array.offset_of_element_at(6, &dl()), 48); + + let complex_array = LLVMArray::new( + 8, + LLVMType::Structure(LLVMStruct::unpacked(&[ + LLVMType::bool, + LLVMType::i8, + LLVMType::i32, + ])), + ); + assert_eq!(complex_array.offset_of_element_at(10, &dl()), 640); + } + + #[test] + fn calculates_correct_offset_for_struct_element() { + let packed_struct_type = LLVMStruct::packed(&[LLVMType::bool, LLVMType::i8, LLVMType::i64]); + assert_eq!(packed_struct_type.offset_of_element_at(0, &dl()), 0); + assert_eq!(packed_struct_type.offset_of_element_at(1, &dl()), 8); + assert_eq!(packed_struct_type.offset_of_element_at(2, &dl()), 16); + assert_eq!(packed_struct_type.size_of(&dl()), 80); + + let unpacked_struct_type = + LLVMStruct::unpacked(&[LLVMType::bool, LLVMType::i8, LLVMType::i64]); + assert_eq!(unpacked_struct_type.offset_of_element_at(0, &dl()), 0); + assert_eq!(unpacked_struct_type.offset_of_element_at(1, &dl()), 8); + assert_eq!(unpacked_struct_type.offset_of_element_at(2, &dl()), 64); + assert_eq!(unpacked_struct_type.size_of(&dl()), 128); + } +} diff --git a/crates/compiler/src/messages.rs b/crates/compiler/src/messages.rs index f3a40ec..a8a302e 100644 --- a/crates/compiler/src/messages.rs +++ b/crates/compiler/src/messages.rs @@ -10,6 +10,14 @@ use crate::llvm::typesystem::LLVMType; pub const INSTRUCTION_NAMED: &str = "Instruction was not named, but all non-terminator instructions should be"; +/// An error message for use when expecting that the module mapping pass exists. +pub const MISSING_MODULE_MAP: &str = + "No data was available for the module mapping pass, but is required"; + +/// An error message for use when expecting that a struct type has at least one +/// element. +pub const STRUCT_TYPE_WITH_NO_MEMBERS: &str = "Struct type had no members but must have at least 1"; + /// Asserts that the provided `instruction` is an instruction of the `expected` /// opcode. /// diff --git a/crates/compiler/src/obj_gen/mod.rs b/crates/compiler/src/obj_gen/mod.rs index 540b24b..f604553 100644 --- a/crates/compiler/src/obj_gen/mod.rs +++ b/crates/compiler/src/obj_gen/mod.rs @@ -50,11 +50,13 @@ use itertools::Itertools; use crate::{ context::SourceContext, llvm::{ + data_layout::DataLayout, special_intrinsics::SpecialIntrinsics, typesystem::{LLVMArray, LLVMFunction, LLVMStruct, LLVMType}, }, messages::{ INSTRUCTION_NAMED, + MISSING_MODULE_MAP, assert_correct_opcode, missing_indices_error, non_constant_constant_error, @@ -195,10 +197,7 @@ impl ObjectGenerator { // We need the module map to be able to make correct code generation decisions // here, so we start by grabbing this. If it doesn't exist, this is a programmer // error, so we crash loudly. - let module_map = self.pass_data().get::().expect( - "The module mapping pass does not appear to have been run but is required for code \ - generation.", - ); + let module_map = self.get_module_map(); // We start by generating code for globals, as they are referenced in function // definitions. @@ -946,6 +945,7 @@ impl ObjectGenerator { bb: &mut BlockBuilder, func_ctx: &mut FunctionContext, polyfills: &PolyfillMap, + data_layout: &DataLayout, ) -> Result { // If the gep_index is constant, we can compute this at compile time. let const_value = if let BasicValueEnum::IntValue(int_val) = gep_index { @@ -960,18 +960,18 @@ impl ObjectGenerator { }; let actual_offset = if let Some(const_value) = const_value { - let offset = const_value * typ.size_of(); + let offset_bits = const_value * typ.alloc_size_of(data_layout); // In this case, it is a constant that we can compute at compile time. bb.simple_assign_new_const(ConstantValue { - value: offset as u128, + value: offset_bits as u128, typ: Type::Unsigned64, }) } else { // In this case it is non-constant, so we have to defer the offset computation // to runtime. let type_size_felts_const = bb.simple_assign_new_const(ConstantValue { - value: typ.size_of() as u128, + value: typ.alloc_size_of(data_layout) as u128, typ: Type::Unsigned64, }); @@ -1055,6 +1055,7 @@ impl ObjectGenerator { bb, func_ctx, &self.polyfills, + self.get_data_layout(), )?; // Then we can issue the call to the first offset within the GEP instruction. @@ -1088,14 +1089,10 @@ impl ObjectGenerator { // Our offset is then the sum of the sizes of all the elements in the struct // _before_ the element indicated by the GEP index. - let felts_before_index: usize = struct_type - .elements - .iter() - .take(gep_index_value) - .map(LLVMType::size_of) - .sum(); + let bits_before_index = struct_type + .offset_of_element_at(gep_index_value, self.get_data_layout()); let const_offset = bb.simple_assign_new_const(ConstantValue { - value: felts_before_index as u128, + value: bits_before_index as u128, typ: Type::Signed64, }); @@ -1118,6 +1115,7 @@ impl ObjectGenerator { bb, func_ctx, &self.polyfills, + self.get_data_layout(), )?; // Then we can issue the call to the first offset within the GEP @@ -1316,7 +1314,8 @@ impl ObjectGenerator { // offsets. let mut accumulated_offset = initial_offset; - for (elem_ty, elem_val) in struct_elements.iter().zip(element_variables.into_iter()) { + let elems_with_vars_and_ix = struct_elements.iter().zip(element_variables).enumerate(); + for (ix, (elem_ty, elem_val)) in elems_with_vars_and_ix { match elem_ty { bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { self.store_primitive(elem_ty, elem_val, pointer, accumulated_offset, bb)?; @@ -1333,7 +1332,7 @@ impl ObjectGenerator { )))?, } - accumulated_offset += elem_ty.size_of(); + accumulated_offset = struct_type.offset_of_element_at(ix, self.get_data_layout()); } // There is nothing to return, so we are done. @@ -1377,7 +1376,7 @@ impl ObjectGenerator { // needed. let mut accumulated_offset = initial_offset; - for array_element in array_elements { + for (ix, array_element) in array_elements.into_iter().enumerate() { match &array_elem_type { bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { self.store_primitive( @@ -1400,7 +1399,7 @@ impl ObjectGenerator { )))?, } - accumulated_offset += array_elem_type.size_of(); + accumulated_offset = array_type.offset_of_element_at(ix, self.get_data_layout()); } // There is nothing to return, so we are done. @@ -1569,7 +1568,8 @@ impl ObjectGenerator { let component_variables: Vec = struct_type .elements .iter() - .map(|elem_ty| { + .enumerate() + .map(|(ix, elem_ty)| { // We have to start by dispatching based on the child type let loaded_var = match elem_ty { bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { @@ -1589,8 +1589,7 @@ impl ObjectGenerator { // We always have to finish by incrementing the offset by the size of the thing // we just loaded so that the next load proceeds correctly. - let increment_offset_by = elem_ty.size_of(); - accumulated_offset += increment_offset_by; + accumulated_offset = struct_type.offset_of_element_at(ix, self.get_data_layout()); // Then we return the loaded variable for use in the struct constructor. Ok(loaded_var) @@ -1631,7 +1630,7 @@ impl ObjectGenerator { // We need a variable that is the result of loading each element type. let mut component_variables: Vec = Vec::new(); - for _ in 0..array_elem_count { + for ix in 0..array_elem_count { component_variables.push(match array_elem_type { bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { self.load_primitive(array_elem_type, pointer, accumulated_offset, bb)? @@ -1650,8 +1649,7 @@ impl ObjectGenerator { // We always have to finish by incrementing the offset by the size of the thing // we just loaded so that the next load proceeds correctly. - let increment_offset_by = array_elem_type.size_of(); - accumulated_offset += increment_offset_by; + accumulated_offset = array_type.offset_of_element_at(ix, self.get_data_layout()); } // In FLO, we do not have any first-class array type, so arrays are structures @@ -2595,8 +2593,7 @@ impl ObjectGenerator { /// # Panics /// /// - If the provided instruction is _not_ an `alloca`. - /// - If the [`crate::pass::analysis::module_map::ModuleMap`] data is not - /// available. + /// - If the [`ModuleMap`] data is not available. /// - If the `alloca` instruction does not have a type to allocate. /// - If the `alloca` instruction does not have a count of that type to /// allocate. @@ -2615,7 +2612,7 @@ impl ObjectGenerator { "Alloca instruction encountered without a specified type to allocate", ) })?)?; - let type_size = allocated_type.size_of(); + let type_size = allocated_type.alloc_size_of(self.get_data_layout()); // We also need to know the allocation count, which inkwell always fills in with // the default of 1 for us if not otherwise specified. @@ -3174,6 +3171,28 @@ impl ObjectGenerator { } } +/// Utility methods on the object generator. +impl ObjectGenerator { + /// Gets a reference to the module map for the current module. + /// + /// # Panics + /// + /// - If the module map is not present in the stored pass data. + pub fn get_module_map(&self) -> &ModuleMap { + self.pass_data.get::().expect(MISSING_MODULE_MAP) + } + + /// Gets a reference to the data layout for the current module. + /// + /// # Panics + /// + /// - If the module map containing the data layout is not present in the + /// stored pass data. + pub fn get_data_layout(&self) -> &DataLayout { + &self.get_module_map().data_layout + } +} + #[cfg(test)] mod test { use std::path::Path; diff --git a/crates/compiler/src/polyfill.rs b/crates/compiler/src/polyfill.rs index 0388837..4d1f28b 100644 --- a/crates/compiler/src/polyfill.rs +++ b/crates/compiler/src/polyfill.rs @@ -386,23 +386,26 @@ impl PolyfillMap { /// The definition of the [memory access and addressing operations](https://llvm.org/docs/LangRef.html#memory-access-and-addressing-operations). impl PolyfillMap { + /// This is the platform-level allocator. + fn alloc(&mut self) { + // The first argument is the size of the allocation in bits, while the second + // argument is the number of instances of that size to allocate. + self.mk("alloc", &[LLVMType::i64, LLVMType::i64], LLVMType::ptr); + } + fn alloca(&mut self) { - // The first argument is the size, in felts, of the allocation, while the second + // The first argument is the size of the allocation in bits, while the second // argument is the number of instances of that size to allocate. self.mk("alloca", &[LLVMType::i64, LLVMType::i64], LLVMType::ptr); } - // TODO composites via iteration. load_* for each prim type, taking an offset - // from the ptr and the ptr. Need to fix insertvalue and extractvalue. Use - // construct and destructure to deal with these things. - fn load(&mut self) { // Due to the nature of the types available in FLO, we can only load and store // PRIMITIVE types. To that end, we need a load variant for _each_ primitive // type, and we have to decompose loads and stores of aggregates into loads and // stores using primitive types. - // Our load function takes the pointer to load and an offset (in felts) from + // Our load function takes the pointer to load and an offset in bits from // that pointer, and returns the result of loading from that pointer. for typ in Self::numptr_types() { self.mk("load", &[LLVMType::ptr, LLVMType::i64], typ); @@ -416,7 +419,7 @@ impl PolyfillMap { // stores using primitive types. // Our store function takes the value to store, the pointer to store it at, and - // an offset (in felts) from that pointer at which the primitive value should be + // an offset in bits from that pointer at which the primitive value should be // stored. for typ in Self::numptr_types() { self.mk( @@ -487,6 +490,7 @@ impl PolyfillMap { } fn all_memory_ops(&mut self) { + self.alloc(); self.alloca(); self.load(); self.store(); @@ -1250,6 +1254,6 @@ mod test { fn has_correct_polyfill_count() { let polyfills = PolyfillMap::new(); let count = polyfills.iter().count(); - assert_eq!(count, 1103); + assert_eq!(count, 1104); } } diff --git a/docs/Memory Model.md b/docs/Memory Model.md new file mode 100644 index 0000000..b698d71 --- /dev/null +++ b/docs/Memory Model.md @@ -0,0 +1,171 @@ +# Hieratika Memory Model + +CairoVM provides a memory model that is best described as having "write once" semantics. Each memory +cell—implicitly the size of a 252-bit field element (felt)—can only be written to _once_ during the +execution of the program. LLVM IR, however, expects a standard model of mutable memory, with memory +regions that can be allocated and mutated separately from the static single-assignment (SSA) form +used for its non-pointer-based values. + +While Cairo provides [mechanisms](#emulating-mutable-memory-in-cairo) for the emulation of mutable +memory semantics, these still operate on felt-based addressing, and are not suitable for the +read-write semantics expected from LLVM IR. To that end, this document outlines a +[system](#the-model) for presenting memory to the compiler and LLVM IR code to preserve its expected +semantics, and for mapping those semantics onto Cairo's. + +## The Model + +Hieratika has settled on a traditional, byte-addressed memory model. In other words, it is a core +operation to be able to offset and read pointers on byte boundaries. The key tenets of this model +are as follows: + +- Memory is addressable in increments of one (8-bit) byte. +- Each felt stores 28 bytes of data (224 bits) toward its MSB, and a region of 28 bits of metadata + toward its LSB. +- These metadata bits are _not_ part of contiguous memory. The 28 bits of flags are excluded to form + a contiguous, byte-addressable space that is semantically uniform while the underlying + representation consists of 28-byte chunks encoded into felts. +- The memory subsystem will allow accessing memory at any byte offset of any allocated pointer. +- Reading from uninitialized memory is well-defined and will return zero bytes. +- Allocations will be handled by the [memory subsystem](#the-memory-subsystem), which will handle + making allocations contiguously or on felt boundaries as needed. +- The memory model provides no means to perform deallocation, in keeping with Cairo's write-once + model. While guest code will be able to call `deallocate`, this is a no-op. + +## The Polyfills + +[Polyfills](../crates/compiler/src/polyfill.rs) are pieces of functionality with _known names_ that +are implemented directly in Cairo to provide common runtime operations to the compiled code. They +range from simple things like `__llvm_add_i8_i8` (add two `i8`s together) to far more complex things +like `atomicrmw`. This memory model design is concerned with the core memory polyfills. These need +to be able to allocate memory, both on the heap and on the "stack", while also being able to +manipulate that memory. + +Hieratika define two polyfills and two _sets of_ polyfills for interacting with memory. The two +polyfills are as follows: + +- `alloc : ( size : BitCount ) -> ( count : usize ) -> ptr`: This polyfill allocates a contiguous + region of `size * count` bits of memory, and returns the pointer to the start of that memory + region. This can be thought of as a heap allocation. +- `alloca : (size : BitCount ) -> ( count : usize ) -> ptr`: This polyfill allocates a contiguous + region of `size * count` bits of memory, and returns the pointer to the start of that memory + region. This can be thought of as a local allocation. Said allocations go out of scope once the + function containing the allocation returns. Due to the memory model, however, they are never + deallocated, and this is purely an illustrative difference to `alloc` above. + +Hieratika also needs to be able to `load` from and `store` to memory. Unfortunately, the tool's +strongly-typed target (in the form of `FlatLowered`) means that types simply cannot be punned. In +other words, there needs to exist a `load` and `store` for every type. + +The problem with _this_, however, is that the space of types is _infinite_. To that end, the +hieratika compiler decomposes loads and stores of aggregate types (structures, arrays, and so on) +into loads and stores to primitives at the correct offsets. To that end, the `load` and `store` +polyfills are defined for each of the following primitive types: `bool` (`i1`), `i8`, `i16`, `i32`, +`i64`, `i128`, `f32` (`float`), `f64` (`double`), and `ptr`. These families of polyfills are as +follows: + +- `load : forall T => (address : Pointer) -> (offset : BitCount) -> (value : T)`: This polyfill + takes an `address`, as well as a `offset` from that address in bits, and loads a value of type `T` + from the specified location. +- `store : forall T => (value : T) -> (address : Pointer) -> (offset : BitCount) -> ()`: This + polyfill takes a value of type `T`, an `address` and an `offset` from that address in bits, and + stores the provided `value` at the specified location. + +For now, if any of these polyfills fails to operate correctly (such as encountering a load from a +non-allocated memory region), they panic. + +## The Memory Subsystem + +The memory subsystem refers to the runtime functionality for managing memory. It is responsible for +allocation of memory, but also loading and storing runtime values into the memory cells. It consists +of the [allocator](#the-allocator), and an underlying +[emulation](#emulating-mutable-memory-in-cairo) of mutable memory semantics to present to the guest +code. + +### The Allocator + +The allocator is responsible for providing memory to the guest program when requested, as well as +handling mapping `load`s and `store`s from the LLVM memory semantics to that of the underlying +memory. + +- The allocator is based on some [kind](#emulating-mutable-memory-in-cairo) of + semantically-contiguous buffer that allows it to present an emulation of contiguous memory to the + guest code. Note that the underlying memory _will not_ be truly contiguous, spanning across felts + that may or may not be adjacent. +- The allocator handles mapping between this contiguous memory and the types of the data stored + using the `load` and `store` instructions. +- The allocator may perform runtime optimization to align values on felt boundaries to potentially + reduce load and store complexity. The LSB end of a felt is still byte-aligned, so this is allowed + under the new model. + +Due to the write-once nature of Cairo's memory, the allocator does _not_ have to handle the freeing +of memory. On this platform, freeing memory is a no-op. + +### Emulating Mutable Memory in Cairo + +As Cairo's memory is write-once, hieratika needs to _emulate_ mutability on top of this. The +prototypical way to do this—as proposed in the original paper on Cairo's AIR and used today in Cairo +Lang itself—is to have a dictionary. Under such an approach, each key would serve as a pointer, +while each value contains the latest value stored at that pointer. + +The fundamental issue with this is that looking up the current value at a given pointer requires +time linear in the number of times that memory cell has been written to. While Cairo Lang encourages +a style that avoids requiring many writes, the fundamental nature of LLVM IR is that it will write +to and read from a given pointer many times in succession, making these lookups a significant +performance bottleneck. + +To that end, Hieratika's memory model is going to use a multi-level lookup mechanism that works as +follows: + +- It defines a factor $l$ which is the maximum number of linear steps that can be taken to find the + current value of a cell. +- Pointer lookup operates through a lookup buffer that aims to maintain low numbers of steps to + lookup memory values. +- This buffer is swapped for a new buffer beginning with zero linear steps if more than $n$ pointers + in the previous buffer have lookup $> \frac{l}{n}$ or any linear step traversal reaches $l$ steps. + +While this does increase overall memory usage for the bookkeeping within the memory subsystem, it +should dramatically reduce the number of CairoVM steps it takes to read the current value from a +given pointer and offset. + +## Global Value Pointers + +Integrated with the system of simply-allocated pointers are the pointers that allow referencing two +different kinds of global value. These are the [constants](#constant-pointers), which are +initialized and allocated at program startup, and the [functions](#function-pointers), which allow +referencing (and calling) functions dynamically at runtime. The below sections deal with the +implementation of these features. + +### Constant Pointers + +This section is TBD, and will be filled in as part of the work on constant pointers. + +### Function Pointers + +This section is TBD, and will be filled in as part of the work on function pointers. + +## Felt-Aligned Addressing - An Alternative Model + +Hieratika initially used a design for a memory model that operated on _felt_-aligned addressing +instead of our current byte-aligned [model](#the-model). This would have significantly-reduced the +complexity of `load` and `store` operations in trade for significantly higher memory usage. + +Take the type `{ i1, i8, i64 }`, for example. + +- In the byte-addressed model, this is all stored in a single felt: 8 bytes for the `i1`, 8 bytes + for the `i8`, 48 bytes of padding and then 64 bytes for the `i64`. Loading the `i8` for example, + would require extracting that second byte from the 252-bit felt value. +- In the felt-addressed model, however, this would be stored in _three_ felts. 1 bit in the first + felt, 8 bits in the second, and 64 bits in the third. Loading the `i8` would be as simple as + loading from that memory cell. + +For simplicity's sake, Hieratika originally operated on the assumption of the second model, in order +to gain experience and determine the features necessary for a more complex model. Unfortunately, +real-world LLVM IR inputs quickly made it clear that type punning—interpreting a value of one type +as a value of another type under byte-addressing and alignment rules—was rampant. + +As an example, it proved common to see IR that allocated `[4 x i8]` and then wrote an `i16` to the +first two bytes and read an `i16` from the other two. As, in the felt-aligned model, the first two +`i8`s would be written to individual felts, reading them back as an `i16` is significantly complex. + +To that end, the project was forced to abandon this model in favor of a more-traditional +byte-aligned addressing model.