diff --git a/crates/compiler/src/llvm/typesystem.rs b/crates/compiler/src/llvm/typesystem.rs index ab517116..0cc7fde0 100644 --- a/crates/compiler/src/llvm/typesystem.rs +++ b/crates/compiler/src/llvm/typesystem.rs @@ -1,7 +1,10 @@ //! The compiler's internal representation of LLVM types, without being tied to //! the context as the [`BasicTypeEnum`] is. -use std::fmt::{Display, Formatter}; +use std::{ + cmp, + fmt::{Display, Formatter}, +}; use hieratika_errors::compile::{llvm, llvm::Error}; use inkwell::{ @@ -23,6 +26,12 @@ use inkwell::{ }; use itertools::Itertools; +use crate::{ + constant::BYTE_SIZE_BITS, + llvm::data_layout::DataLayout, + messages::STRUCT_TYPE_WITH_NO_MEMBERS, +}; + /// A representation of the LLVM [types](https://llvm.org/docs/LangRef.html#type-system) /// for use within the compiler. /// @@ -215,41 +224,109 @@ impl LLVMType { self.as_function().expect("`self` value was not Self::Function") } - /// Gets the size of `self` in felts. + /// Gets the size of `self` in bits under the provided data layout. #[must_use] - pub fn size_of(&self) -> usize { - use LLVMType::{ - Array, - Function, - Metadata, - Structure, - bool, - f16, - f32, - f64, - i8, - i16, - i24, - i32, - i64, - i128, - ptr, - void, - }; + #[expect(clippy::match_same_arms)] // The similarities are incidental. + pub fn size_of(&self, data_layout: &DataLayout) -> usize { match self { - bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => 1, - void | Metadata => 0, - Array(array_ty) => array_ty.size_of(), - Structure(struct_ty) => struct_ty.size_of(), - Function(func_ty) => func_ty.size_of(), + LLVMType::bool => 1, + LLVMType::i8 => 8, + LLVMType::i16 => 16, + LLVMType::i24 => 24, + LLVMType::i32 => 32, + LLVMType::i64 => 64, + LLVMType::i128 => 128, + LLVMType::f16 => 16, + LLVMType::f32 => 32, + LLVMType::f64 => 64, + LLVMType::ptr => data_layout.default_pointer_layout().size, + LLVMType::void => 0, + LLVMType::Array(array_type) => array_type.size_of(data_layout), + LLVMType::Structure(struct_type) => struct_type.size_of(data_layout), + LLVMType::Function(function_type) => function_type.size_of(data_layout), + LLVMType::Metadata => 0, } } - /// Gets the ABI alignment of `self` in felts. + /// Gets the maximum number of bits that may be overwritten by storing + /// `self`. + /// + /// This is always a multiple of eight. #[must_use] - pub fn align_of(&self) -> usize { - // At the moment we align everything to the nearest felt boundary. - 1 + pub fn store_size_of(&self, data_layout: &DataLayout) -> usize { + let min_size_bits = self.size_of(data_layout); + + ((min_size_bits + 7) / 8) * 8 + } + + /// Returns the offset in bits between successive objects of the + /// specified type, including the alignment padding. + /// + /// This is always a multiple of eight. + #[must_use] + pub fn alloc_size_of(&self, data_layout: &DataLayout) -> usize { + // https://llvm.org/doxygen/DataLayout_8h_source.html#l00457 + align_to( + self.store_size_of(data_layout), + self.align_of(AlignType::ABI, data_layout), + ) + } + + /// Gets the alignment of `self` in bits under the provided data layout. + #[must_use] + #[expect(clippy::match_same_arms)] // The similarities are incidental. + pub fn align_of(&self, align_type: AlignType, data_layout: &DataLayout) -> usize { + match self { + LLVMType::bool => match align_type { + AlignType::ABI => data_layout.expect_int_spec_of(1).abi_alignment, + AlignType::Preferred => data_layout.expect_int_spec_of(1).preferred_alignment, + }, + LLVMType::i8 => match align_type { + AlignType::ABI => data_layout.expect_int_spec_of(8).abi_alignment, + AlignType::Preferred => data_layout.expect_int_spec_of(8).preferred_alignment, + }, + LLVMType::i16 => match align_type { + AlignType::ABI => data_layout.expect_int_spec_of(16).abi_alignment, + AlignType::Preferred => data_layout.expect_int_spec_of(16).preferred_alignment, + }, + LLVMType::i24 => match align_type { + AlignType::ABI => data_layout.expect_int_spec_of(24).abi_alignment, + AlignType::Preferred => data_layout.expect_int_spec_of(24).preferred_alignment, + }, + LLVMType::i32 => match align_type { + AlignType::ABI => data_layout.expect_int_spec_of(32).abi_alignment, + AlignType::Preferred => data_layout.expect_int_spec_of(32).preferred_alignment, + }, + LLVMType::i64 => match align_type { + AlignType::ABI => data_layout.expect_int_spec_of(64).abi_alignment, + AlignType::Preferred => data_layout.expect_int_spec_of(64).preferred_alignment, + }, + LLVMType::i128 => match align_type { + AlignType::ABI => data_layout.expect_int_spec_of(128).abi_alignment, + AlignType::Preferred => data_layout.expect_int_spec_of(128).preferred_alignment, + }, + LLVMType::f16 => match align_type { + AlignType::ABI => data_layout.expect_float_spec_of(16).abi_alignment, + AlignType::Preferred => data_layout.expect_float_spec_of(16).preferred_alignment, + }, + LLVMType::f32 => match align_type { + AlignType::ABI => data_layout.expect_float_spec_of(32).abi_alignment, + AlignType::Preferred => data_layout.expect_float_spec_of(32).preferred_alignment, + }, + LLVMType::f64 => match align_type { + AlignType::ABI => data_layout.expect_float_spec_of(64).abi_alignment, + AlignType::Preferred => data_layout.expect_float_spec_of(64).preferred_alignment, + }, + LLVMType::ptr => match align_type { + AlignType::ABI => data_layout.default_pointer_layout().abi_alignment, + AlignType::Preferred => data_layout.default_pointer_layout().preferred_alignment, + }, + LLVMType::void => 0, + LLVMType::Array(array_type) => array_type.align_of(align_type, data_layout), + LLVMType::Structure(struct_type) => struct_type.align_of(align_type, data_layout), + LLVMType::Function(function_type) => function_type.align_of(align_type, data_layout), + LLVMType::Metadata => 0, + } } } @@ -593,16 +670,31 @@ impl LLVMArray { Self { count, typ } } - /// Gets the size of `self` in felts. + /// Gets the size of `self` in bits under the provided data layout. #[must_use] - pub fn size_of(&self) -> usize { - self.typ.size_of() * self.count + pub fn size_of(&self, data_layout: &DataLayout) -> usize { + // The size is the allocation size of the element type multiplied by the number + // of elements. https://llvm.org/doxygen/DataLayout_8h_source.html#l00625 + self.typ.alloc_size_of(data_layout) * self.count } - /// Gets the ABI alignment of `self` in felts. + /// Gets the alignment of `self` in bits under the provided data layout. #[must_use] - pub fn align_of(&self) -> usize { - 1 + pub fn align_of(&self, align_type: AlignType, data_layout: &DataLayout) -> usize { + // https://llvm.org/doxygen/DataLayout_8cpp_source.html#l00780 + self.typ.align_of(align_type, data_layout) + } + + /// Calculates the offset in bits of the element at the provided `index` in + /// the array `self` under the provided data layout. + /// + /// Note that this method will happily calculate element offsets that are + /// outside the bounds of the array, as this is not an uncommon usage for + /// LLVM Array types. + #[must_use] + pub fn offset_of_element_at(&self, index: usize, data_layout: &DataLayout) -> usize { + let element_size = self.typ.size_of(data_layout); + element_size * index } } @@ -680,16 +772,132 @@ impl LLVMStruct { Self { packed, elements } } - /// Gets the size of `self` in felts. + /// Creates a new packed LLVM struct from the provided `elements` types. + #[must_use] + pub fn packed(elements: &[LLVMType]) -> Self { + Self::new(true, elements) + } + + /// Creates a new unpacked LLVM struct from the provided `elements` types. + #[must_use] + pub fn unpacked(elements: &[LLVMType]) -> Self { + Self::new(false, elements) + } + + /// Gets the size of `self` in bits under the provided data layout. + /// + /// # Panics + /// + /// - If `self` is a struct type without any members. + #[must_use] + pub fn size_of(&self, data_layout: &DataLayout) -> usize { + let mut struct_size = 0; + + assert!(!self.elements.is_empty(), "{}", STRUCT_TYPE_WITH_NO_MEMBERS); + + // Adapted from https://llvm.org/doxygen/DataLayout_8cpp_source.html#l00048 + for element in &self.elements { + // We start by getting the alignment of the element. + let element_align = if self.packed { + 8 + } else { + element.align_of(AlignType::ABI, data_layout) + }; + + // To add the next element the size of the struct needs to be aligned to that + // element's alignment, so we pad it out if needed. + if !is_aligned(element_align, struct_size) { + struct_size = align_to(struct_size, element_align); + } + + // We then increment the size of the struct by the size of the element's + // allocation. + struct_size += element.alloc_size_of(data_layout); + } + + // Finally, we need to ensure that the struct is aligned properly so that it can + // fit into arrays contiguously. + let struct_alignment = self.align_of(AlignType::ABI, data_layout); + if !is_aligned(struct_alignment, struct_size) { + struct_size = align_to(struct_size, struct_alignment); + } + + struct_size + } + + /// Gets the alignment of `self` in bits under the provided data layout. + /// + /// # Panics + /// + /// - If `self` is a struct type without any members. #[must_use] - pub fn size_of(&self) -> usize { - self.elements.iter().map(LLVMType::size_of).sum() + pub fn align_of(&self, align_type: AlignType, data_layout: &DataLayout) -> usize { + if self.packed && matches!(align_type, AlignType::ABI) { + // If it is packed, the language reference specifies that it has 1 byte + // ABI alignment https://llvm.org/docs/LangRef.html#structure-type + 8 + } else { + // In this case, things are more complex, as the alignment becomes + // the maximum required alignment of the child elements. + // + // https://llvm.org/doxygen/DataLayout_8cpp_source.html#l00048 + let max_child_alignment = self + .elements + .iter() + .map(|e| e.align_of(AlignType::ABI, data_layout)) + .max() + .expect(STRUCT_TYPE_WITH_NO_MEMBERS); + + let dl_align = match align_type { + AlignType::ABI => data_layout.aggregate_layout.abi_alignment, + AlignType::Preferred => data_layout.aggregate_layout.preferred_alignment, + }; + + cmp::max(max_child_alignment, dl_align) + } } - /// Gets the ABI alignment of `self` in felts. + /// Calculates the offset in bits of the element at the provided `index` in + /// the struct `self` under the provided data layout. + /// + /// # Panics + /// + /// - If the index is not within the bounds of the structure. #[must_use] - pub fn align_of(&self) -> usize { - 1 + pub fn offset_of_element_at(&self, index: usize, data_layout: &DataLayout) -> usize { + // We cannot compute the offset at all if the index exceeds the number of + // elements in the structure. + assert!( + index < self.elements.len(), + "Element index {index} was not in bounds of structure with {} elements", + self.elements.len() + ); + + let mut current_offset = 0; + + for (ix, element) in self.elements.iter().enumerate() { + // We need the element alignment. + let element_align = if self.packed { + 8 + } else { + element.align_of(AlignType::ABI, data_layout) + }; + + // We then force the element aligned, just like for computing size. + if !is_aligned(element_align, current_offset) { + current_offset = align_to(current_offset, element_align); + } + + // If we have reached the target index, current_offset will contain the right + // value as long as we have forced alignment as above. + if ix == index { + break; + } + + current_offset += element.alloc_size_of(data_layout); + } + + current_offset } } @@ -768,21 +976,20 @@ impl LLVMFunction { } } - /// Gets the size of `self` in felts. - /// - /// This is given by the size of the function's return type. + /// Gets the size of `self` in bits under the provided data layout. #[must_use] - pub fn size_of(&self) -> usize { - self.return_type.size_of() + pub fn size_of(&self, data_layout: &DataLayout) -> usize { + self.return_type.size_of(data_layout) } - /// Gets the ABI alignment of `self` in felts under the provided data + /// Gets the ABI alignment of `self` in bits under the provided data /// layout. /// - /// This is given by the ABI alignment of the function's return type. + /// This is just the required ABI alignment, and does not account for the + /// case where a type may be _larger_ than its alignment. #[must_use] - pub fn align_of(&self) -> usize { - self.return_type.align_of() + pub fn align_of(&self, align_type: AlignType, data_layout: &DataLayout) -> usize { + self.return_type.align_of(align_type, data_layout) } } @@ -829,3 +1036,317 @@ impl<'ctx> TryFrom<&FunctionType<'ctx>> for LLVMFunction { Ok(Self::new(return_type, ¶m_types)) } } + +/// The type of alignment to be requested. +#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub enum AlignType { + /// The required alignment of the type as given by the ABI. + ABI, + + /// The preferred alignment of the type as given by the data layout. + Preferred, +} + +/// Gets the nearest multiple of the provided `alignment` that fits `size` +/// within it (in bits). +/// +/// The algorithm is the same as the one used +/// [in LLVM](https://llvm.org/doxygen/namespacellvm.html#ab102f0f12dd38aeea5906b1d80c792ff). +#[must_use] +pub fn align_to(size_bits: usize, alignment_bits: usize) -> usize { + let size_bytes = size_bits / BYTE_SIZE_BITS; + let alignment_bytes = alignment_bits / BYTE_SIZE_BITS; + let aligned_bytes = size_bytes.div_ceil(alignment_bytes) * alignment_bytes; + aligned_bytes * BYTE_SIZE_BITS +} + +/// Checks that `size_bits` is a multiple of `align_bits`. +#[must_use] +pub fn is_aligned(align_bits: usize, size_bits: usize) -> bool { + let size_bytes = size_bits / BYTE_SIZE_BITS; + let align_bytes = align_bits / BYTE_SIZE_BITS; + + size_bytes % align_bytes == 0 +} + +#[cfg(test)] +mod test { + use crate::{ + constant::TARGET_DATA_LAYOUT, + llvm::{ + data_layout::DataLayout, + typesystem::{ + AlignType::{ABI, Preferred}, + LLVMArray, + LLVMFunction, + LLVMStruct, + LLVMType, + }, + }, + }; + + /// Allows quick access to a data layout for passing in as part of the + /// tests. + fn dl() -> DataLayout { + DataLayout::new(TARGET_DATA_LAYOUT).expect("Constant data layout should parse correctly") + } + + #[test] + fn calculates_correct_size_for_bool() { + assert_eq!(LLVMType::bool.size_of(&dl()), 1); + assert_eq!(LLVMType::bool.store_size_of(&dl()), 8); + } + + #[test] + fn calculates_correct_size_for_i8() { + assert_eq!(LLVMType::i8.size_of(&dl()), 8); + assert_eq!(LLVMType::i8.store_size_of(&dl()), 8); + } + + #[test] + fn calculates_correct_size_for_i16() { + assert_eq!(LLVMType::i16.size_of(&dl()), 16); + assert_eq!(LLVMType::i16.size_of(&dl()), 16); + } + + #[test] + fn calculates_correct_size_for_i24() { + assert_eq!(LLVMType::i24.size_of(&dl()), 24); + assert_eq!(LLVMType::i24.store_size_of(&dl()), 24); + } + + #[test] + fn calculates_correct_size_for_i32() { + assert_eq!(LLVMType::i32.size_of(&dl()), 32); + assert_eq!(LLVMType::i32.store_size_of(&dl()), 32); + } + + #[test] + fn calculates_correct_size_for_i64() { + assert_eq!(LLVMType::i64.size_of(&dl()), 64); + assert_eq!(LLVMType::i64.store_size_of(&dl()), 64); + } + + #[test] + fn calculates_correct_size_for_i128() { + assert_eq!(LLVMType::i128.size_of(&dl()), 128); + assert_eq!(LLVMType::i128.store_size_of(&dl()), 128); + } + + #[test] + fn calculates_correct_size_for_f16() { + assert_eq!(LLVMType::f16.size_of(&dl()), 16); + assert_eq!(LLVMType::f16.store_size_of(&dl()), 16); + } + + #[test] + fn calculates_correct_size_for_f32() { + assert_eq!(LLVMType::f32.size_of(&dl()), 32); + assert_eq!(LLVMType::f32.store_size_of(&dl()), 32); + } + + #[test] + fn calculates_correct_size_for_f64() { + assert_eq!(LLVMType::f64.size_of(&dl()), 64); + assert_eq!(LLVMType::f64.store_size_of(&dl()), 64); + } + + #[test] + fn calculates_correct_size_for_ptr() { + assert_eq!(LLVMType::ptr.size_of(&dl()), 64); + assert_eq!(LLVMType::ptr.store_size_of(&dl()), 64); + } + + #[test] + fn calculates_correct_size_for_void() { + assert_eq!(LLVMType::void.size_of(&dl()), 0); + assert_eq!(LLVMType::void.store_size_of(&dl()), 0); + } + + #[test] + fn calculates_correct_size_for_array() { + // It should work easily for simple types + let simple_array = LLVMType::Array(LLVMArray::new(10, LLVMType::i8)); + assert_eq!(simple_array.size_of(&dl()), 80); + assert_eq!(simple_array.store_size_of(&dl()), 80); + + // But also for compound types + let complex_array = LLVMType::Array(LLVMArray::new( + 8, + LLVMType::Structure(LLVMStruct::unpacked(&[ + LLVMType::bool, + LLVMType::i8, + LLVMType::i32, + ])), + )); + assert_eq!(complex_array.size_of(&dl()), 512); + assert_eq!(complex_array.alloc_size_of(&dl()), 512); + } + + #[test] + fn calculates_correct_size_for_struct() { + // Unpacked structs contain padding between elements. + let unpacked_struct_type = LLVMType::Structure(LLVMStruct::unpacked(&[ + LLVMType::bool, + LLVMType::i8, + LLVMType::i32, + ])); + assert_eq!(unpacked_struct_type.size_of(&dl()), 64); + assert_eq!(unpacked_struct_type.alloc_size_of(&dl()), 64); + + // But packed structs (even with the same elements) do not. + let packed_struct_type = LLVMType::Structure(LLVMStruct::packed(&[ + LLVMType::bool, + LLVMType::i8, + LLVMType::i32, + ])); + assert_eq!(packed_struct_type.size_of(&dl()), 48); + assert_eq!(packed_struct_type.alloc_size_of(&dl()), 48); + } + + #[test] + fn calculates_correct_size_for_function() { + let fn_type = LLVMFunction::new(LLVMType::i8, &[LLVMType::bool, LLVMType::ptr]); + assert_eq!(LLVMType::Function(fn_type).size_of(&dl()), 8); + } + + #[test] + fn calculates_correct_size_for_metadata() { + assert_eq!(LLVMType::Metadata.size_of(&dl()), 0); + assert_eq!(LLVMType::Metadata.store_size_of(&dl()), 0); + } + + #[test] + fn calculates_correct_alignment_for_bool() { + assert_eq!(LLVMType::bool.align_of(ABI, &dl()), 8); + assert_eq!(LLVMType::bool.align_of(Preferred, &dl()), 8); + } + + #[test] + fn calculates_correct_alignment_for_i8() { + assert_eq!(LLVMType::i8.align_of(ABI, &dl()), 8); + assert_eq!(LLVMType::i8.align_of(Preferred, &dl()), 8); + } + + #[test] + fn calculates_correct_alignment_for_i16() { + assert_eq!(LLVMType::i16.align_of(ABI, &dl()), 16); + assert_eq!(LLVMType::i16.align_of(Preferred, &dl()), 16); + } + + #[test] + fn calculates_correct_alignment_for_i32() { + assert_eq!(LLVMType::i32.align_of(ABI, &dl()), 32); + assert_eq!(LLVMType::i32.align_of(Preferred, &dl()), 32); + } + + #[test] + fn calculates_correct_alignment_for_i64() { + assert_eq!(LLVMType::i64.align_of(ABI, &dl()), 64); + assert_eq!(LLVMType::i64.align_of(Preferred, &dl()), 64); + } + + #[test] + fn calculates_correct_alignment_for_i128() { + assert_eq!(LLVMType::i128.align_of(ABI, &dl()), 128); + assert_eq!(LLVMType::i128.align_of(Preferred, &dl()), 128); + } + + #[test] + fn calculates_correct_alignment_for_f16() { + assert_eq!(LLVMType::f16.align_of(ABI, &dl()), 16); + assert_eq!(LLVMType::f16.align_of(Preferred, &dl()), 16); + } + + #[test] + fn calculates_correct_alignment_for_f32() { + assert_eq!(LLVMType::f32.align_of(ABI, &dl()), 32); + assert_eq!(LLVMType::f32.align_of(Preferred, &dl()), 32); + } + + #[test] + fn calculates_correct_alignment_for_f64() { + assert_eq!(LLVMType::f64.align_of(ABI, &dl()), 64); + assert_eq!(LLVMType::f64.align_of(Preferred, &dl()), 64); + } + + #[test] + fn calculates_correct_alignment_for_ptr() { + assert_eq!(LLVMType::ptr.align_of(ABI, &dl()), 64); + assert_eq!(LLVMType::ptr.align_of(Preferred, &dl()), 64); + } + + #[test] + fn calculates_correct_alignment_for_void() { + assert_eq!(LLVMType::void.align_of(ABI, &dl()), 0); + assert_eq!(LLVMType::void.align_of(Preferred, &dl()), 0); + } + + #[test] + fn calculates_correct_alignment_for_array() { + assert_eq!( + LLVMArray::new(10, LLVMType::i8).align_of(ABI, &dl()), + LLVMType::i8.align_of(ABI, &dl()) + ); + assert_eq!( + LLVMArray::new(10, LLVMType::i8).align_of(Preferred, &dl()), + LLVMType::i8.align_of(Preferred, &dl()) + ); + } + + #[test] + fn calculates_correct_alignment_for_struct() { + let unpacked_struct = LLVMStruct::unpacked(&[LLVMType::bool, LLVMType::i16, LLVMType::i64]); + assert_eq!(unpacked_struct.align_of(ABI, &dl()), 64); + assert_eq!(unpacked_struct.align_of(Preferred, &dl()), 64); + + let packed_struct = LLVMStruct::packed(&[LLVMType::bool, LLVMType::i16, LLVMType::i64]); + assert_eq!(packed_struct.align_of(ABI, &dl()), 8); + assert_eq!(packed_struct.align_of(Preferred, &dl()), 64); + } + + #[test] + fn calculates_correct_alignment_for_function() { + let fn_type = LLVMFunction::new(LLVMType::i8, &[LLVMType::bool, LLVMType::ptr]); + assert_eq!(fn_type.align_of(ABI, &dl()), 8); + assert_eq!(fn_type.align_of(Preferred, &dl()), 8); + } + + #[test] + fn calculates_correct_alignment_for_metadata() { + assert_eq!(LLVMType::Metadata.align_of(ABI, &dl()), 0); + assert_eq!(LLVMType::Metadata.align_of(Preferred, &dl()), 0); + } + + #[test] + fn calculates_correct_offset_for_array_element() { + let simple_array = LLVMArray::new(10, LLVMType::i8); + assert_eq!(simple_array.offset_of_element_at(6, &dl()), 48); + + let complex_array = LLVMArray::new( + 8, + LLVMType::Structure(LLVMStruct::unpacked(&[ + LLVMType::bool, + LLVMType::i8, + LLVMType::i32, + ])), + ); + assert_eq!(complex_array.offset_of_element_at(10, &dl()), 640); + } + + #[test] + fn calculates_correct_offset_for_struct_element() { + let packed_struct_type = LLVMStruct::packed(&[LLVMType::bool, LLVMType::i8, LLVMType::i64]); + assert_eq!(packed_struct_type.offset_of_element_at(0, &dl()), 0); + assert_eq!(packed_struct_type.offset_of_element_at(1, &dl()), 8); + assert_eq!(packed_struct_type.offset_of_element_at(2, &dl()), 16); + assert_eq!(packed_struct_type.size_of(&dl()), 80); + + let unpacked_struct_type = + LLVMStruct::unpacked(&[LLVMType::bool, LLVMType::i8, LLVMType::i64]); + assert_eq!(unpacked_struct_type.offset_of_element_at(0, &dl()), 0); + assert_eq!(unpacked_struct_type.offset_of_element_at(1, &dl()), 8); + assert_eq!(unpacked_struct_type.offset_of_element_at(2, &dl()), 64); + assert_eq!(unpacked_struct_type.size_of(&dl()), 128); + } +} diff --git a/crates/compiler/src/messages.rs b/crates/compiler/src/messages.rs index f3a40ec4..a8a302e7 100644 --- a/crates/compiler/src/messages.rs +++ b/crates/compiler/src/messages.rs @@ -10,6 +10,14 @@ use crate::llvm::typesystem::LLVMType; pub const INSTRUCTION_NAMED: &str = "Instruction was not named, but all non-terminator instructions should be"; +/// An error message for use when expecting that the module mapping pass exists. +pub const MISSING_MODULE_MAP: &str = + "No data was available for the module mapping pass, but is required"; + +/// An error message for use when expecting that a struct type has at least one +/// element. +pub const STRUCT_TYPE_WITH_NO_MEMBERS: &str = "Struct type had no members but must have at least 1"; + /// Asserts that the provided `instruction` is an instruction of the `expected` /// opcode. /// diff --git a/crates/compiler/src/obj_gen/mod.rs b/crates/compiler/src/obj_gen/mod.rs index 540b24bd..f6045537 100644 --- a/crates/compiler/src/obj_gen/mod.rs +++ b/crates/compiler/src/obj_gen/mod.rs @@ -50,11 +50,13 @@ use itertools::Itertools; use crate::{ context::SourceContext, llvm::{ + data_layout::DataLayout, special_intrinsics::SpecialIntrinsics, typesystem::{LLVMArray, LLVMFunction, LLVMStruct, LLVMType}, }, messages::{ INSTRUCTION_NAMED, + MISSING_MODULE_MAP, assert_correct_opcode, missing_indices_error, non_constant_constant_error, @@ -195,10 +197,7 @@ impl ObjectGenerator { // We need the module map to be able to make correct code generation decisions // here, so we start by grabbing this. If it doesn't exist, this is a programmer // error, so we crash loudly. - let module_map = self.pass_data().get::().expect( - "The module mapping pass does not appear to have been run but is required for code \ - generation.", - ); + let module_map = self.get_module_map(); // We start by generating code for globals, as they are referenced in function // definitions. @@ -946,6 +945,7 @@ impl ObjectGenerator { bb: &mut BlockBuilder, func_ctx: &mut FunctionContext, polyfills: &PolyfillMap, + data_layout: &DataLayout, ) -> Result { // If the gep_index is constant, we can compute this at compile time. let const_value = if let BasicValueEnum::IntValue(int_val) = gep_index { @@ -960,18 +960,18 @@ impl ObjectGenerator { }; let actual_offset = if let Some(const_value) = const_value { - let offset = const_value * typ.size_of(); + let offset_bits = const_value * typ.alloc_size_of(data_layout); // In this case, it is a constant that we can compute at compile time. bb.simple_assign_new_const(ConstantValue { - value: offset as u128, + value: offset_bits as u128, typ: Type::Unsigned64, }) } else { // In this case it is non-constant, so we have to defer the offset computation // to runtime. let type_size_felts_const = bb.simple_assign_new_const(ConstantValue { - value: typ.size_of() as u128, + value: typ.alloc_size_of(data_layout) as u128, typ: Type::Unsigned64, }); @@ -1055,6 +1055,7 @@ impl ObjectGenerator { bb, func_ctx, &self.polyfills, + self.get_data_layout(), )?; // Then we can issue the call to the first offset within the GEP instruction. @@ -1088,14 +1089,10 @@ impl ObjectGenerator { // Our offset is then the sum of the sizes of all the elements in the struct // _before_ the element indicated by the GEP index. - let felts_before_index: usize = struct_type - .elements - .iter() - .take(gep_index_value) - .map(LLVMType::size_of) - .sum(); + let bits_before_index = struct_type + .offset_of_element_at(gep_index_value, self.get_data_layout()); let const_offset = bb.simple_assign_new_const(ConstantValue { - value: felts_before_index as u128, + value: bits_before_index as u128, typ: Type::Signed64, }); @@ -1118,6 +1115,7 @@ impl ObjectGenerator { bb, func_ctx, &self.polyfills, + self.get_data_layout(), )?; // Then we can issue the call to the first offset within the GEP @@ -1316,7 +1314,8 @@ impl ObjectGenerator { // offsets. let mut accumulated_offset = initial_offset; - for (elem_ty, elem_val) in struct_elements.iter().zip(element_variables.into_iter()) { + let elems_with_vars_and_ix = struct_elements.iter().zip(element_variables).enumerate(); + for (ix, (elem_ty, elem_val)) in elems_with_vars_and_ix { match elem_ty { bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { self.store_primitive(elem_ty, elem_val, pointer, accumulated_offset, bb)?; @@ -1333,7 +1332,7 @@ impl ObjectGenerator { )))?, } - accumulated_offset += elem_ty.size_of(); + accumulated_offset = struct_type.offset_of_element_at(ix, self.get_data_layout()); } // There is nothing to return, so we are done. @@ -1377,7 +1376,7 @@ impl ObjectGenerator { // needed. let mut accumulated_offset = initial_offset; - for array_element in array_elements { + for (ix, array_element) in array_elements.into_iter().enumerate() { match &array_elem_type { bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { self.store_primitive( @@ -1400,7 +1399,7 @@ impl ObjectGenerator { )))?, } - accumulated_offset += array_elem_type.size_of(); + accumulated_offset = array_type.offset_of_element_at(ix, self.get_data_layout()); } // There is nothing to return, so we are done. @@ -1569,7 +1568,8 @@ impl ObjectGenerator { let component_variables: Vec = struct_type .elements .iter() - .map(|elem_ty| { + .enumerate() + .map(|(ix, elem_ty)| { // We have to start by dispatching based on the child type let loaded_var = match elem_ty { bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { @@ -1589,8 +1589,7 @@ impl ObjectGenerator { // We always have to finish by incrementing the offset by the size of the thing // we just loaded so that the next load proceeds correctly. - let increment_offset_by = elem_ty.size_of(); - accumulated_offset += increment_offset_by; + accumulated_offset = struct_type.offset_of_element_at(ix, self.get_data_layout()); // Then we return the loaded variable for use in the struct constructor. Ok(loaded_var) @@ -1631,7 +1630,7 @@ impl ObjectGenerator { // We need a variable that is the result of loading each element type. let mut component_variables: Vec = Vec::new(); - for _ in 0..array_elem_count { + for ix in 0..array_elem_count { component_variables.push(match array_elem_type { bool | i8 | i16 | i24 | i32 | i64 | i128 | f16 | f32 | f64 | ptr => { self.load_primitive(array_elem_type, pointer, accumulated_offset, bb)? @@ -1650,8 +1649,7 @@ impl ObjectGenerator { // We always have to finish by incrementing the offset by the size of the thing // we just loaded so that the next load proceeds correctly. - let increment_offset_by = array_elem_type.size_of(); - accumulated_offset += increment_offset_by; + accumulated_offset = array_type.offset_of_element_at(ix, self.get_data_layout()); } // In FLO, we do not have any first-class array type, so arrays are structures @@ -2595,8 +2593,7 @@ impl ObjectGenerator { /// # Panics /// /// - If the provided instruction is _not_ an `alloca`. - /// - If the [`crate::pass::analysis::module_map::ModuleMap`] data is not - /// available. + /// - If the [`ModuleMap`] data is not available. /// - If the `alloca` instruction does not have a type to allocate. /// - If the `alloca` instruction does not have a count of that type to /// allocate. @@ -2615,7 +2612,7 @@ impl ObjectGenerator { "Alloca instruction encountered without a specified type to allocate", ) })?)?; - let type_size = allocated_type.size_of(); + let type_size = allocated_type.alloc_size_of(self.get_data_layout()); // We also need to know the allocation count, which inkwell always fills in with // the default of 1 for us if not otherwise specified. @@ -3174,6 +3171,28 @@ impl ObjectGenerator { } } +/// Utility methods on the object generator. +impl ObjectGenerator { + /// Gets a reference to the module map for the current module. + /// + /// # Panics + /// + /// - If the module map is not present in the stored pass data. + pub fn get_module_map(&self) -> &ModuleMap { + self.pass_data.get::().expect(MISSING_MODULE_MAP) + } + + /// Gets a reference to the data layout for the current module. + /// + /// # Panics + /// + /// - If the module map containing the data layout is not present in the + /// stored pass data. + pub fn get_data_layout(&self) -> &DataLayout { + &self.get_module_map().data_layout + } +} + #[cfg(test)] mod test { use std::path::Path; diff --git a/crates/compiler/src/polyfill.rs b/crates/compiler/src/polyfill.rs index 03888370..4d1f28b5 100644 --- a/crates/compiler/src/polyfill.rs +++ b/crates/compiler/src/polyfill.rs @@ -386,23 +386,26 @@ impl PolyfillMap { /// The definition of the [memory access and addressing operations](https://llvm.org/docs/LangRef.html#memory-access-and-addressing-operations). impl PolyfillMap { + /// This is the platform-level allocator. + fn alloc(&mut self) { + // The first argument is the size of the allocation in bits, while the second + // argument is the number of instances of that size to allocate. + self.mk("alloc", &[LLVMType::i64, LLVMType::i64], LLVMType::ptr); + } + fn alloca(&mut self) { - // The first argument is the size, in felts, of the allocation, while the second + // The first argument is the size of the allocation in bits, while the second // argument is the number of instances of that size to allocate. self.mk("alloca", &[LLVMType::i64, LLVMType::i64], LLVMType::ptr); } - // TODO composites via iteration. load_* for each prim type, taking an offset - // from the ptr and the ptr. Need to fix insertvalue and extractvalue. Use - // construct and destructure to deal with these things. - fn load(&mut self) { // Due to the nature of the types available in FLO, we can only load and store // PRIMITIVE types. To that end, we need a load variant for _each_ primitive // type, and we have to decompose loads and stores of aggregates into loads and // stores using primitive types. - // Our load function takes the pointer to load and an offset (in felts) from + // Our load function takes the pointer to load and an offset in bits from // that pointer, and returns the result of loading from that pointer. for typ in Self::numptr_types() { self.mk("load", &[LLVMType::ptr, LLVMType::i64], typ); @@ -416,7 +419,7 @@ impl PolyfillMap { // stores using primitive types. // Our store function takes the value to store, the pointer to store it at, and - // an offset (in felts) from that pointer at which the primitive value should be + // an offset in bits from that pointer at which the primitive value should be // stored. for typ in Self::numptr_types() { self.mk( @@ -487,6 +490,7 @@ impl PolyfillMap { } fn all_memory_ops(&mut self) { + self.alloc(); self.alloca(); self.load(); self.store(); @@ -1250,6 +1254,6 @@ mod test { fn has_correct_polyfill_count() { let polyfills = PolyfillMap::new(); let count = polyfills.iter().count(); - assert_eq!(count, 1103); + assert_eq!(count, 1104); } } diff --git a/docs/Memory Model.md b/docs/Memory Model.md new file mode 100644 index 00000000..b698d710 --- /dev/null +++ b/docs/Memory Model.md @@ -0,0 +1,171 @@ +# Hieratika Memory Model + +CairoVM provides a memory model that is best described as having "write once" semantics. Each memory +cell—implicitly the size of a 252-bit field element (felt)—can only be written to _once_ during the +execution of the program. LLVM IR, however, expects a standard model of mutable memory, with memory +regions that can be allocated and mutated separately from the static single-assignment (SSA) form +used for its non-pointer-based values. + +While Cairo provides [mechanisms](#emulating-mutable-memory-in-cairo) for the emulation of mutable +memory semantics, these still operate on felt-based addressing, and are not suitable for the +read-write semantics expected from LLVM IR. To that end, this document outlines a +[system](#the-model) for presenting memory to the compiler and LLVM IR code to preserve its expected +semantics, and for mapping those semantics onto Cairo's. + +## The Model + +Hieratika has settled on a traditional, byte-addressed memory model. In other words, it is a core +operation to be able to offset and read pointers on byte boundaries. The key tenets of this model +are as follows: + +- Memory is addressable in increments of one (8-bit) byte. +- Each felt stores 28 bytes of data (224 bits) toward its MSB, and a region of 28 bits of metadata + toward its LSB. +- These metadata bits are _not_ part of contiguous memory. The 28 bits of flags are excluded to form + a contiguous, byte-addressable space that is semantically uniform while the underlying + representation consists of 28-byte chunks encoded into felts. +- The memory subsystem will allow accessing memory at any byte offset of any allocated pointer. +- Reading from uninitialized memory is well-defined and will return zero bytes. +- Allocations will be handled by the [memory subsystem](#the-memory-subsystem), which will handle + making allocations contiguously or on felt boundaries as needed. +- The memory model provides no means to perform deallocation, in keeping with Cairo's write-once + model. While guest code will be able to call `deallocate`, this is a no-op. + +## The Polyfills + +[Polyfills](../crates/compiler/src/polyfill.rs) are pieces of functionality with _known names_ that +are implemented directly in Cairo to provide common runtime operations to the compiled code. They +range from simple things like `__llvm_add_i8_i8` (add two `i8`s together) to far more complex things +like `atomicrmw`. This memory model design is concerned with the core memory polyfills. These need +to be able to allocate memory, both on the heap and on the "stack", while also being able to +manipulate that memory. + +Hieratika define two polyfills and two _sets of_ polyfills for interacting with memory. The two +polyfills are as follows: + +- `alloc : ( size : BitCount ) -> ( count : usize ) -> ptr`: This polyfill allocates a contiguous + region of `size * count` bits of memory, and returns the pointer to the start of that memory + region. This can be thought of as a heap allocation. +- `alloca : (size : BitCount ) -> ( count : usize ) -> ptr`: This polyfill allocates a contiguous + region of `size * count` bits of memory, and returns the pointer to the start of that memory + region. This can be thought of as a local allocation. Said allocations go out of scope once the + function containing the allocation returns. Due to the memory model, however, they are never + deallocated, and this is purely an illustrative difference to `alloc` above. + +Hieratika also needs to be able to `load` from and `store` to memory. Unfortunately, the tool's +strongly-typed target (in the form of `FlatLowered`) means that types simply cannot be punned. In +other words, there needs to exist a `load` and `store` for every type. + +The problem with _this_, however, is that the space of types is _infinite_. To that end, the +hieratika compiler decomposes loads and stores of aggregate types (structures, arrays, and so on) +into loads and stores to primitives at the correct offsets. To that end, the `load` and `store` +polyfills are defined for each of the following primitive types: `bool` (`i1`), `i8`, `i16`, `i32`, +`i64`, `i128`, `f32` (`float`), `f64` (`double`), and `ptr`. These families of polyfills are as +follows: + +- `load : forall T => (address : Pointer) -> (offset : BitCount) -> (value : T)`: This polyfill + takes an `address`, as well as a `offset` from that address in bits, and loads a value of type `T` + from the specified location. +- `store : forall T => (value : T) -> (address : Pointer) -> (offset : BitCount) -> ()`: This + polyfill takes a value of type `T`, an `address` and an `offset` from that address in bits, and + stores the provided `value` at the specified location. + +For now, if any of these polyfills fails to operate correctly (such as encountering a load from a +non-allocated memory region), they panic. + +## The Memory Subsystem + +The memory subsystem refers to the runtime functionality for managing memory. It is responsible for +allocation of memory, but also loading and storing runtime values into the memory cells. It consists +of the [allocator](#the-allocator), and an underlying +[emulation](#emulating-mutable-memory-in-cairo) of mutable memory semantics to present to the guest +code. + +### The Allocator + +The allocator is responsible for providing memory to the guest program when requested, as well as +handling mapping `load`s and `store`s from the LLVM memory semantics to that of the underlying +memory. + +- The allocator is based on some [kind](#emulating-mutable-memory-in-cairo) of + semantically-contiguous buffer that allows it to present an emulation of contiguous memory to the + guest code. Note that the underlying memory _will not_ be truly contiguous, spanning across felts + that may or may not be adjacent. +- The allocator handles mapping between this contiguous memory and the types of the data stored + using the `load` and `store` instructions. +- The allocator may perform runtime optimization to align values on felt boundaries to potentially + reduce load and store complexity. The LSB end of a felt is still byte-aligned, so this is allowed + under the new model. + +Due to the write-once nature of Cairo's memory, the allocator does _not_ have to handle the freeing +of memory. On this platform, freeing memory is a no-op. + +### Emulating Mutable Memory in Cairo + +As Cairo's memory is write-once, hieratika needs to _emulate_ mutability on top of this. The +prototypical way to do this—as proposed in the original paper on Cairo's AIR and used today in Cairo +Lang itself—is to have a dictionary. Under such an approach, each key would serve as a pointer, +while each value contains the latest value stored at that pointer. + +The fundamental issue with this is that looking up the current value at a given pointer requires +time linear in the number of times that memory cell has been written to. While Cairo Lang encourages +a style that avoids requiring many writes, the fundamental nature of LLVM IR is that it will write +to and read from a given pointer many times in succession, making these lookups a significant +performance bottleneck. + +To that end, Hieratika's memory model is going to use a multi-level lookup mechanism that works as +follows: + +- It defines a factor $l$ which is the maximum number of linear steps that can be taken to find the + current value of a cell. +- Pointer lookup operates through a lookup buffer that aims to maintain low numbers of steps to + lookup memory values. +- This buffer is swapped for a new buffer beginning with zero linear steps if more than $n$ pointers + in the previous buffer have lookup $> \frac{l}{n}$ or any linear step traversal reaches $l$ steps. + +While this does increase overall memory usage for the bookkeeping within the memory subsystem, it +should dramatically reduce the number of CairoVM steps it takes to read the current value from a +given pointer and offset. + +## Global Value Pointers + +Integrated with the system of simply-allocated pointers are the pointers that allow referencing two +different kinds of global value. These are the [constants](#constant-pointers), which are +initialized and allocated at program startup, and the [functions](#function-pointers), which allow +referencing (and calling) functions dynamically at runtime. The below sections deal with the +implementation of these features. + +### Constant Pointers + +This section is TBD, and will be filled in as part of the work on constant pointers. + +### Function Pointers + +This section is TBD, and will be filled in as part of the work on function pointers. + +## Felt-Aligned Addressing - An Alternative Model + +Hieratika initially used a design for a memory model that operated on _felt_-aligned addressing +instead of our current byte-aligned [model](#the-model). This would have significantly-reduced the +complexity of `load` and `store` operations in trade for significantly higher memory usage. + +Take the type `{ i1, i8, i64 }`, for example. + +- In the byte-addressed model, this is all stored in a single felt: 8 bytes for the `i1`, 8 bytes + for the `i8`, 48 bytes of padding and then 64 bytes for the `i64`. Loading the `i8` for example, + would require extracting that second byte from the 252-bit felt value. +- In the felt-addressed model, however, this would be stored in _three_ felts. 1 bit in the first + felt, 8 bits in the second, and 64 bits in the third. Loading the `i8` would be as simple as + loading from that memory cell. + +For simplicity's sake, Hieratika originally operated on the assumption of the second model, in order +to gain experience and determine the features necessary for a more complex model. Unfortunately, +real-world LLVM IR inputs quickly made it clear that type punning—interpreting a value of one type +as a value of another type under byte-addressing and alignment rules—was rampant. + +As an example, it proved common to see IR that allocated `[4 x i8]` and then wrote an `i16` to the +first two bytes and read an `i16` from the other two. As, in the felt-aligned model, the first two +`i8`s would be written to individual felts, reading them back as an `i16` is significantly complex. + +To that end, the project was forced to abandon this model in favor of a more-traditional +byte-aligned addressing model.