From 5e29b6f480fcf52dc0a6b14f409d75505b520fbb Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 20 Aug 2024 17:16:28 -0700 Subject: [PATCH 1/2] Update handling of unreachable code and heap types This commit updates validation of wasm modules with unreachable code using gc/heap types. It notably fixes cases with the shared-everything-threads proposal where existing instructions are polymorphic over `shared` and not and wasn't supported before. Specifically code was refactored to use `MaybeType` a bit more to propagate the bottom-ness and the `MaybeType::HeapBot` variant has grown a new `AbstractHeapType` payload for various instructions to use such as `any.convert_extern`. --- crates/wasmparser/src/readers/core/types.rs | 24 ++- crates/wasmparser/src/validator/operators.rs | 197 ++++++++++++------ crates/wasmparser/src/validator/types.rs | 20 +- .../gc-unreachable.wast | 152 ++++++++++++++ .../gc-unreachable.wast.json | 52 +++++ .../gc-unreachable.wast/0.print | 88 ++++++++ 6 files changed, 452 insertions(+), 81 deletions(-) create mode 100644 tests/local/shared-everything-threads/gc-unreachable.wast create mode 100644 tests/snapshots/local/shared-everything-threads/gc-unreachable.wast.json create mode 100644 tests/snapshots/local/shared-everything-threads/gc-unreachable.wast/0.print diff --git a/crates/wasmparser/src/readers/core/types.rs b/crates/wasmparser/src/readers/core/types.rs index 7b78c579b5..33cdee4230 100644 --- a/crates/wasmparser/src/readers/core/types.rs +++ b/crates/wasmparser/src/readers/core/types.rs @@ -1464,7 +1464,7 @@ pub enum AbstractHeapType { } impl AbstractHeapType { - const fn as_str(&self, nullable: bool) -> &str { + pub(crate) const fn as_str(&self, nullable: bool) -> &str { use AbstractHeapType::*; match (nullable, self) { (_, Any) => "any", @@ -1485,6 +1485,28 @@ impl AbstractHeapType { (false, NoExn) => "noexn", } } + + pub(crate) fn is_subtype_of(&self, other: AbstractHeapType) -> bool { + use AbstractHeapType::*; + + match (self, other) { + (a, b) if *a == b => true, + (Eq | I31 | Struct | Array | None, Any) => true, + (I31 | Struct | Array | None, Eq) => true, + (NoExtern, Extern) => true, + (NoFunc, Func) => true, + (None, I31 | Array | Struct) => true, + (NoExn, Exn) => true, + // Nothing else matches. (Avoid full wildcard matches so + // that adding/modifying variants is easier in the + // future.) + ( + Func | Extern | Exn | Any | Eq | Array | I31 | Struct | None | NoFunc | NoExtern + | NoExn, + _, + ) => false, + } + } } impl<'a> FromReader<'a> for StorageType { diff --git a/crates/wasmparser/src/validator/operators.rs b/crates/wasmparser/src/validator/operators.rs index b04e457cfb..ac523bde32 100644 --- a/crates/wasmparser/src/validator/operators.rs +++ b/crates/wasmparser/src/validator/operators.rs @@ -160,13 +160,23 @@ pub struct OperatorValidatorAllocations { /// Type storage within the validator. /// -/// This is used to manage the operand stack and notably isn't just `ValType` to -/// handle unreachable code and the "bottom" type. +/// This is used to manage the operand stack and notably isn't just `ValType` +/// to handle unreachable code and the "bottom" type. #[derive(Debug, Copy, Clone)] -enum MaybeType { +enum MaybeType { + /// A "bottom" type which represents unconstrained unreachable code. There + /// are no constraints on what this type may be. Bot, - HeapBot, - Type(ValType), + /// A "bottom" type for specifically heap types. + /// + /// This type is known to be a reference type, optionally the specific + /// abstract heap type listed. This type can be interpeted as either + /// `shared` or not-`shared`. Additionally it can be either nullable or + /// not. Currently no further refinements are required for wasm + /// instructions today, but this may grow in the future. + HeapBot(Option), + /// A known type with the type `T`. + Type(T), } // The validator is pretty performance-sensitive and `MaybeType` is the main @@ -180,7 +190,14 @@ impl core::fmt::Display for MaybeType { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { MaybeType::Bot => write!(f, "bot"), - MaybeType::HeapBot => write!(f, "heap-bot"), + MaybeType::HeapBot(ty) => { + write!(f, "(ref shared? ")?; + match ty { + Some(ty) => write!(f, "{}bot", ty.as_str(true))?, + None => write!(f, "bot")?, + } + write!(f, ")") + } MaybeType::Type(ty) => core::fmt::Display::fmt(ty, f), } } @@ -198,6 +215,33 @@ impl From for MaybeType { ty.into() } } +impl From> for MaybeType { + fn from(ty: MaybeType) -> MaybeType { + match ty { + MaybeType::Bot => MaybeType::Bot, + MaybeType::HeapBot(ty) => MaybeType::HeapBot(ty), + MaybeType::Type(t) => MaybeType::Type(t.into()), + } + } +} + +impl MaybeType { + fn as_non_null(&self) -> MaybeType { + match self { + MaybeType::Bot => MaybeType::Bot, + MaybeType::HeapBot(ty) => MaybeType::HeapBot(*ty), + MaybeType::Type(ty) => MaybeType::Type(ty.as_non_null()), + } + } + + fn is_maybe_shared(&self, resources: &impl WasmModuleResources) -> Option { + match self { + MaybeType::Bot => None, + MaybeType::HeapBot(_) => None, + MaybeType::Type(ty) => Some(resources.is_shared(*ty)), + } + } +} impl OperatorValidator { fn new(features: &WasmFeatures, allocs: OperatorValidatorAllocations) -> Self { @@ -330,7 +374,7 @@ impl OperatorValidator { pub fn peek_operand_at(&self, depth: usize) -> Option> { Some(match self.operands.iter().rev().nth(depth)? { MaybeType::Type(t) => Some(*t), - MaybeType::Bot | MaybeType::HeapBot => None, + MaybeType::Bot | MaybeType::HeapBot(..) => None, }) } @@ -570,10 +614,33 @@ where if let Some(expected) = expected { match (actual, expected) { // The bottom type matches all expectations - (MaybeType::Bot, _) + (MaybeType::Bot, _) => {} + // The "heap bottom" type only matches other references types, - // but not any integer types. - | (MaybeType::HeapBot, ValType::Ref(_)) => {} + // but not any integer types. Note that if the heap bottom is + // known to have a specific abstract heap type then a subtype + // check is performed against hte expected type. + (MaybeType::HeapBot(actual_ty), ValType::Ref(expected)) => { + if let Some(actual) = actual_ty { + let expected_shared = self.resources.is_shared(expected); + let actual = RefType::new( + false, + HeapType::Abstract { + shared: expected_shared, + ty: actual, + }, + ) + .unwrap(); + if !self.resources.is_subtype(actual.into(), expected.into()) { + bail!( + self.offset, + "type mismatch: expected {}, found {}", + ty_to_str(expected.into()), + ty_to_str(actual.into()) + ); + } + } + } // Use the `is_subtype` predicate to test if a found type matches // the expectation. @@ -590,7 +657,7 @@ where // A "heap bottom" type cannot match any numeric types. ( - MaybeType::HeapBot, + MaybeType::HeapBot(..), ValType::I32 | ValType::I64 | ValType::F32 | ValType::F64 | ValType::V128, ) => { bail!( @@ -605,10 +672,11 @@ where } /// Pop a reference type from the operand stack. - fn pop_ref(&mut self, expected: Option) -> Result> { + fn pop_ref(&mut self, expected: Option) -> Result> { match self.pop_operand(expected.map(|t| t.into()))? { - MaybeType::Bot | MaybeType::HeapBot => Ok(None), - MaybeType::Type(ValType::Ref(rt)) => Ok(Some(rt)), + MaybeType::Bot => Ok(MaybeType::HeapBot(None)), + MaybeType::HeapBot(ty) => Ok(MaybeType::HeapBot(ty)), + MaybeType::Type(ValType::Ref(rt)) => Ok(MaybeType::Type(rt)), MaybeType::Type(ty) => bail!( self.offset, "type mismatch: expected ref but found {}", @@ -622,13 +690,22 @@ where /// /// This function returns the popped reference type and its `shared`-ness, /// saving extra lookups for concrete types. - fn pop_maybe_shared_ref( - &mut self, - expected: AbstractHeapType, - ) -> Result> { + fn pop_maybe_shared_ref(&mut self, expected: AbstractHeapType) -> Result> { let actual = match self.pop_ref(None)? { - Some(rt) => rt, - None => return Ok(None), + MaybeType::Bot => return Ok(MaybeType::Bot), + MaybeType::HeapBot(None) => return Ok(MaybeType::HeapBot(None)), + MaybeType::HeapBot(Some(actual)) => { + if !actual.is_subtype_of(expected) { + bail!( + self.offset, + "type mismatch: expected subtype of {}, found {}", + expected.as_str(false), + actual.as_str(false), + ) + } + return Ok(MaybeType::HeapBot(Some(actual))); + } + MaybeType::Type(ty) => ty, }; // Change our expectation based on whether we're dealing with an actual // shared or unshared type. @@ -651,7 +728,7 @@ where "type mismatch: expected subtype of {expected}, found {actual}", ) } - Ok(Some((actual, is_actual_shared))) + Ok(MaybeType::Type(actual)) } /// Fetches the type for the local at `idx`, returning an error if it's out @@ -1695,8 +1772,8 @@ where let ty = match (ty1, ty2) { // All heap-related types aren't allowed with the `select` // instruction - (MaybeType::HeapBot, _) - | (_, MaybeType::HeapBot) + (MaybeType::HeapBot(..), _) + | (_, MaybeType::HeapBot(..)) | (MaybeType::Type(ValType::Ref(_)), _) | (_, MaybeType::Type(ValType::Ref(_))) => { bail!( @@ -2666,18 +2743,12 @@ where } fn visit_ref_as_non_null(&mut self) -> Self::Output { - let ty = match self.pop_ref(None)? { - Some(ty) => MaybeType::Type(ValType::Ref(ty.as_non_null())), - None => MaybeType::HeapBot, - }; + let ty = self.pop_ref(None)?.as_non_null(); self.push_operand(ty)?; Ok(()) } fn visit_br_on_null(&mut self, relative_depth: u32) -> Self::Output { - let ref_ty = match self.pop_ref(None)? { - None => MaybeType::HeapBot, - Some(ty) => MaybeType::Type(ValType::Ref(ty.as_non_null())), - }; + let ref_ty = self.pop_ref(None)?.as_non_null(); let (ft, kind) = self.jump(relative_depth)?; let label_types = self.label_types(ft, kind)?; self.pop_push_label_types(label_types)?; @@ -2734,8 +2805,14 @@ where fn visit_ref_eq(&mut self) -> Self::Output { let a = self.pop_maybe_shared_ref(AbstractHeapType::Eq)?; let b = self.pop_maybe_shared_ref(AbstractHeapType::Eq)?; - match (a, b) { - (Some((_, is_a_shared)), Some((_, is_b_shared))) => { + let a_is_shared = a.is_maybe_shared(&self.resources); + let b_is_shared = b.is_maybe_shared(&self.resources); + match (a_is_shared, b_is_shared) { + // One or both of the types are from unreachable code; assume + // the shared-ness matches. + (None, Some(_)) | (Some(_), None) | (None, None) => {} + + (Some(is_a_shared), Some(is_b_shared)) => { if is_a_shared != is_b_shared { bail!( self.offset, @@ -2743,10 +2820,6 @@ where ); } } - _ => { - // One or both of the types are from unreachable code; assume - // the shared-ness matches. - } } self.push_operand(ValType::I32) } @@ -4404,35 +4477,37 @@ where Ok(()) } fn visit_any_convert_extern(&mut self) -> Self::Output { - let extern_ref = self.pop_maybe_shared_ref(AbstractHeapType::Extern)?; - let (is_nullable, shared) = if let Some((extern_ref, shared)) = extern_ref { - (extern_ref.is_nullable(), shared) - } else { - // TODO: propagating unshared may be incorrect here - // (https://github.com/WebAssembly/shared-everything-threads/issues/80) - (false, false) - }; - let heap_type = HeapType::Abstract { - shared, - ty: AbstractHeapType::Any, + let any_ref = match self.pop_maybe_shared_ref(AbstractHeapType::Extern)? { + MaybeType::Bot | MaybeType::HeapBot(_) => { + MaybeType::HeapBot(Some(AbstractHeapType::Any)) + } + MaybeType::Type(ty) => { + let shared = self.resources.is_shared(ty); + let heap_type = HeapType::Abstract { + shared, + ty: AbstractHeapType::Any, + }; + let any_ref = RefType::new(ty.is_nullable(), heap_type).unwrap(); + MaybeType::Type(any_ref) + } }; - let any_ref = RefType::new(is_nullable, heap_type).unwrap(); self.push_operand(any_ref) } fn visit_extern_convert_any(&mut self) -> Self::Output { - let any_ref = self.pop_maybe_shared_ref(AbstractHeapType::Any)?; - let (is_nullable, shared) = if let Some((any_ref, shared)) = any_ref { - (any_ref.is_nullable(), shared) - } else { - // TODO: propagating unshared may be incorrect here - // (https://github.com/WebAssembly/shared-everything-threads/issues/80) - (false, false) - }; - let heap_type = HeapType::Abstract { - shared, - ty: AbstractHeapType::Extern, + let extern_ref = match self.pop_maybe_shared_ref(AbstractHeapType::Any)? { + MaybeType::Bot | MaybeType::HeapBot(_) => { + MaybeType::HeapBot(Some(AbstractHeapType::Extern)) + } + MaybeType::Type(ty) => { + let shared = self.resources.is_shared(ty); + let heap_type = HeapType::Abstract { + shared, + ty: AbstractHeapType::Extern, + }; + let extern_ref = RefType::new(ty.is_nullable(), heap_type).unwrap(); + MaybeType::Type(extern_ref) + } }; - let extern_ref = RefType::new(is_nullable, heap_type).unwrap(); self.push_operand(extern_ref) } fn visit_ref_test_non_null(&mut self, heap_type: HeapType) -> Self::Output { diff --git a/crates/wasmparser/src/validator/types.rs b/crates/wasmparser/src/validator/types.rs index 1843a46214..c131d63169 100644 --- a/crates/wasmparser/src/validator/types.rs +++ b/crates/wasmparser/src/validator/types.rs @@ -2798,25 +2798,7 @@ impl TypeList { shared: b_shared, ty: b_ty, }, - ) => { - a_shared == b_shared - && match (a_ty, b_ty) { - (Eq | I31 | Struct | Array | None, Any) => true, - (I31 | Struct | Array | None, Eq) => true, - (NoExtern, Extern) => true, - (NoFunc, Func) => true, - (None, I31 | Array | Struct) => true, - (NoExn, Exn) => true, - // Nothing else matches. (Avoid full wildcard matches so - // that adding/modifying variants is easier in the - // future.) - ( - Func | Extern | Exn | Any | Eq | Array | I31 | Struct | None | NoFunc - | NoExtern | NoExn, - _, - ) => false, - } - } + ) => a_shared == b_shared && a_ty.is_subtype_of(b_ty), (HT::Concrete(a), HT::Abstract { shared, ty }) => { let a_ty = &subtype(a_group, a).composite_type; diff --git a/tests/local/shared-everything-threads/gc-unreachable.wast b/tests/local/shared-everything-threads/gc-unreachable.wast new file mode 100644 index 0000000000..15a730076a --- /dev/null +++ b/tests/local/shared-everything-threads/gc-unreachable.wast @@ -0,0 +1,152 @@ +(module + (func + unreachable + ref.null (shared eq) + ref.eq + drop + + ref.null eq + ref.eq + drop + ) + (func + unreachable + any.convert_extern + ref.cast (ref i31) + drop + ) + (func + unreachable + any.convert_extern + ref.cast (ref (shared i31)) + drop + ) + (func + unreachable + ref.eq + drop + ) + (func + (local $a (ref null (shared eq))) + unreachable + local.get $a + ref.eq + drop + ) + (func + (local $a (ref null eq)) + unreachable + local.get $a + ref.eq + drop + ) + (func + (local $a (ref null eq)) + unreachable + ref.cast (ref eq) + local.get $a + ref.eq + drop + ) + (func + (local $a (ref null (shared eq))) + unreachable + ref.cast (ref (shared eq)) + local.get $a + ref.eq + drop + ) + (func + (local $a (ref null (shared eq))) + unreachable + any.convert_extern + extern.convert_any + any.convert_extern + ref.cast (ref (shared eq)) + local.get $a + ref.eq + drop + ) + (func + (local $a (ref null eq)) + unreachable + any.convert_extern + extern.convert_any + any.convert_extern + ref.cast (ref eq) + local.get $a + ref.eq + drop + ) + (func + unreachable + any.convert_extern + ref.cast (ref (shared array)) + array.len + drop + ) +) + +(assert_invalid + (module + (func + unreachable + any.convert_extern + ref.null eq + ref.eq + drop + ) + ) + "expected subtype of eq, found any") + +(assert_invalid + (module + (func + unreachable + extern.convert_any + ref.cast (ref (shared i31)) + drop + ) + ) + "expected (shared anyref), found (ref (shared extern))") + +(assert_invalid + (module + (func + unreachable + any.convert_extern + ref.cast (ref extern) + drop + ) + ) + "expected externref, found (ref any)") +(assert_invalid + (module + (func + unreachable + any.convert_extern + array.len + drop + ) + ) + "expected subtype of array, found any") +(assert_invalid + (module + (func + unreachable + ref.as_non_null + i32.eq + drop + ) + ) + "expected i32, found heap type") +(assert_invalid + (module + (func + block $l + unreachable + br_on_cast $l (ref any) (ref any) + end + ) + ) + "br_on_cast to label with empty types") diff --git a/tests/snapshots/local/shared-everything-threads/gc-unreachable.wast.json b/tests/snapshots/local/shared-everything-threads/gc-unreachable.wast.json new file mode 100644 index 0000000000..559fbd9f9f --- /dev/null +++ b/tests/snapshots/local/shared-everything-threads/gc-unreachable.wast.json @@ -0,0 +1,52 @@ +{ + "source_filename": "tests/local/shared-everything-threads/gc-unreachable.wast", + "commands": [ + { + "type": "module", + "line": 1, + "filename": "gc-unreachable.0.wasm" + }, + { + "type": "assert_invalid", + "line": 91, + "filename": "gc-unreachable.1.wasm", + "text": "expected subtype of eq, found any", + "module_type": "binary" + }, + { + "type": "assert_invalid", + "line": 103, + "filename": "gc-unreachable.2.wasm", + "text": "expected (shared anyref), found (ref (shared extern))", + "module_type": "binary" + }, + { + "type": "assert_invalid", + "line": 114, + "filename": "gc-unreachable.3.wasm", + "text": "expected externref, found (ref any)", + "module_type": "binary" + }, + { + "type": "assert_invalid", + "line": 124, + "filename": "gc-unreachable.4.wasm", + "text": "expected subtype of array, found any", + "module_type": "binary" + }, + { + "type": "assert_invalid", + "line": 134, + "filename": "gc-unreachable.5.wasm", + "text": "expected i32, found heap type", + "module_type": "binary" + }, + { + "type": "assert_invalid", + "line": 144, + "filename": "gc-unreachable.6.wasm", + "text": "br_on_cast to label with empty types", + "module_type": "binary" + } + ] +} \ No newline at end of file diff --git a/tests/snapshots/local/shared-everything-threads/gc-unreachable.wast/0.print b/tests/snapshots/local/shared-everything-threads/gc-unreachable.wast/0.print new file mode 100644 index 0000000000..5216e830c2 --- /dev/null +++ b/tests/snapshots/local/shared-everything-threads/gc-unreachable.wast/0.print @@ -0,0 +1,88 @@ +(module + (type (;0;) (func)) + (func (;0;) (type 0) + unreachable + ref.null (shared eq) + ref.eq + drop + ref.null eq + ref.eq + drop + ) + (func (;1;) (type 0) + unreachable + any.convert_extern + ref.cast (ref i31) + drop + ) + (func (;2;) (type 0) + unreachable + any.convert_extern + ref.cast (ref (shared i31)) + drop + ) + (func (;3;) (type 0) + unreachable + ref.eq + drop + ) + (func (;4;) (type 0) + (local $a (ref null (shared eq))) + unreachable + local.get $a + ref.eq + drop + ) + (func (;5;) (type 0) + (local $a eqref) + unreachable + local.get $a + ref.eq + drop + ) + (func (;6;) (type 0) + (local $a eqref) + unreachable + ref.cast (ref eq) + local.get $a + ref.eq + drop + ) + (func (;7;) (type 0) + (local $a (ref null (shared eq))) + unreachable + ref.cast (ref (shared eq)) + local.get $a + ref.eq + drop + ) + (func (;8;) (type 0) + (local $a (ref null (shared eq))) + unreachable + any.convert_extern + extern.convert_any + any.convert_extern + ref.cast (ref (shared eq)) + local.get $a + ref.eq + drop + ) + (func (;9;) (type 0) + (local $a eqref) + unreachable + any.convert_extern + extern.convert_any + any.convert_extern + ref.cast (ref eq) + local.get $a + ref.eq + drop + ) + (func (;10;) (type 0) + unreachable + any.convert_extern + ref.cast (ref (shared array)) + array.len + drop + ) +) From 72a8286182fe4a3b8ea15a9d0cfbb2904410f93d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 21 Aug 2024 14:22:21 -0700 Subject: [PATCH 2/2] Fix dead code warning --- crates/wasmparser/src/readers/core/types.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/wasmparser/src/readers/core/types.rs b/crates/wasmparser/src/readers/core/types.rs index 33cdee4230..2a60095799 100644 --- a/crates/wasmparser/src/readers/core/types.rs +++ b/crates/wasmparser/src/readers/core/types.rs @@ -1486,6 +1486,7 @@ impl AbstractHeapType { } } + #[cfg(feature = "validate")] pub(crate) fn is_subtype_of(&self, other: AbstractHeapType) -> bool { use AbstractHeapType::*;