From eaebb8714b3a9c7558574b3bcdb04b003a5c1d15 Mon Sep 17 00:00:00 2001 From: Predrag Gruevski <2348618+obi1kenobi@users.noreply.github.com> Date: Sun, 18 Aug 2024 14:41:14 -0400 Subject: [PATCH] Implement sealed trait analysis and expose it in the adapter. (#343) (#346) --- src/adapter/mod.rs | 7 +- src/adapter/properties.rs | 17 ++- src/adapter/tests.rs | 122 +++++++++++++++++ src/indexed_crate.rs | 66 +++++++++- src/lib.rs | 4 +- src/rustdoc_schema.graphql | 15 +++ src/sealed_trait.rs | 187 +++++++++++++++++++++++++++ test_crates/sealed_traits/Cargo.toml | 6 + test_crates/sealed_traits/src/lib.rs | 85 ++++++++++++ 9 files changed, 503 insertions(+), 6 deletions(-) create mode 100644 src/sealed_trait.rs create mode 100644 test_crates/sealed_traits/Cargo.toml create mode 100644 test_crates/sealed_traits/src/lib.rs diff --git a/src/adapter/mod.rs b/src/adapter/mod.rs index 29569e0..8a50fe7 100644 --- a/src/adapter/mod.rs +++ b/src/adapter/mod.rs @@ -134,7 +134,12 @@ impl<'a> Adapter<'a> for RustdocAdapter<'a> { "AttributeMetaItem" => { properties::resolve_attribute_meta_item_property(contexts, property_name) } - "Trait" => properties::resolve_trait_property(contexts, property_name), + "Trait" => properties::resolve_trait_property( + contexts, + property_name, + self.current_crate, + self.previous_crate, + ), "ImplementedTrait" => { properties::resolve_implemented_trait_property(contexts, property_name) } diff --git a/src/adapter/properties.rs b/src/adapter/properties.rs index 7873c5c..9c386c3 100644 --- a/src/adapter/properties.rs +++ b/src/adapter/properties.rs @@ -7,9 +7,9 @@ use trustfall::{ FieldValue, }; -use crate::attributes::Attribute; +use crate::{attributes::Attribute, IndexedCrate}; -use super::vertex::Vertex; +use super::{origin::Origin, vertex::Vertex}; pub(super) fn resolve_crate_property<'a, V: AsVertex> + 'a>( contexts: ContextIterator<'a, V>, @@ -456,10 +456,23 @@ pub(super) fn resolve_raw_type_property<'a, V: AsVertex> + 'a>( pub(super) fn resolve_trait_property<'a, V: AsVertex> + 'a>( contexts: ContextIterator<'a, V>, property_name: &str, + current_crate: &'a IndexedCrate<'a>, + previous_crate: Option<&'a IndexedCrate<'a>>, ) -> ContextOutcomeIterator<'a, V, FieldValue> { match property_name { "unsafe" => resolve_property_with(contexts, field_property!(as_trait, is_unsafe)), "object_safe" => resolve_property_with(contexts, field_property!(as_trait, is_object_safe)), + "sealed" => resolve_property_with(contexts, move |vertex| { + let trait_item = vertex.as_item().expect("not an Item"); + let origin = vertex.origin; + + let indexed_crate = match origin { + Origin::CurrentCrate => current_crate, + Origin::PreviousCrate => previous_crate.expect("no previous crate provided"), + }; + + indexed_crate.is_trait_sealed(&trait_item.id).into() + }), _ => unreachable!("Trait property {property_name}"), } } diff --git a/src/adapter/tests.rs b/src/adapter/tests.rs index 8dff6e9..1d00db0 100644 --- a/src/adapter/tests.rs +++ b/src/adapter/tests.rs @@ -1,3 +1,8 @@ +// The Trustfall API requires the adapter to be passed in as an Arc. +// Our adapter is not Send/Sync (it doesn't need it), +// but there's currently nothing we can do about this lint. +#![allow(clippy::arc_with_non_send_sync)] + use std::collections::BTreeMap; use std::sync::Arc; @@ -154,6 +159,123 @@ fn rustdoc_finds_supertrait() { ); } +#[test] +fn rustdoc_sealed_traits() { + let path = "./localdata/test_data/sealed_traits/rustdoc.json"; + let content = std::fs::read_to_string(path) + .with_context(|| format!("Could not load {path} file, did you forget to run ./scripts/regenerate_test_rustdocs.sh ?")) + .expect("failed to load rustdoc"); + + let crate_ = serde_json::from_str(&content).expect("failed to parse rustdoc"); + let indexed_crate = IndexedCrate::new(&crate_); + let adapter = RustdocAdapter::new(&indexed_crate, None); + + let query = r#" +{ + Crate { + item { + ... on Trait { + name @output + sealed @output + } + } + } +} +"#; + + let variables: BTreeMap<&str, &str> = BTreeMap::default(); + + let schema = + Schema::parse(include_str!("../rustdoc_schema.graphql")).expect("schema failed to parse"); + + #[derive(Debug, PartialOrd, Ord, PartialEq, Eq, serde::Deserialize)] + struct Output { + name: String, + sealed: bool, + } + + let mut results: Vec<_> = + trustfall::execute_query(&schema, adapter.into(), query, variables.clone()) + .expect("failed to run query") + .map(|row| row.try_into_struct().expect("shape mismatch")) + .collect(); + results.sort_unstable(); + + let mut expected_results = vec![ + Output { + name: "Sealed".into(), + sealed: true, + }, + Output { + name: "InternalMarker".into(), + sealed: true, + }, + Output { + name: "DirectlyTraitSealed".into(), + sealed: true, + }, + Output { + name: "TransitivelyTraitSealed".into(), + sealed: true, + }, + Output { + name: "Unsealed".into(), + sealed: false, + }, + Output { + name: "MethodSealed".into(), + sealed: true, + }, + Output { + name: "TransitivelyMethodSealed".into(), + sealed: true, + }, + Output { + name: "NotMethodSealedBecauseOfDefaultImpl".into(), + sealed: false, + }, + Output { + name: "NotTransitivelySealed".into(), + sealed: false, + }, + Output { + name: "GenericSealed".into(), + sealed: true, + }, + Output { + name: "NotGenericSealedBecauseOfDefaultImpl".into(), + sealed: false, + }, + Output { + name: "IteratorExt".into(), + sealed: false, + }, + Output { + name: "Iterator".into(), + sealed: true, + }, + Output { + name: "ShadowedSubIterator".into(), + sealed: true, + }, + Output { + name: "Super".into(), + sealed: false, + }, + Output { + name: "Marker".into(), + sealed: true, + }, + Output { + name: "NotGenericSealedBecauseOfPubSupertrait".into(), + sealed: false, + }, + ]; + expected_results.sort_unstable(); + + similar_asserts::assert_eq!(expected_results, results,); +} + #[test] fn rustdoc_finds_consts() { let path = "./localdata/test_data/consts/rustdoc.json"; diff --git a/src/indexed_crate.rs b/src/indexed_crate.rs index 355194a..0bfce7e 100644 --- a/src/indexed_crate.rs +++ b/src/indexed_crate.rs @@ -1,11 +1,12 @@ use std::{ borrow::Borrow, + cell::RefCell, collections::{BTreeSet, HashMap}, }; use rustdoc_types::{Crate, Id, Item}; -use crate::{adapter::supported_item_kind, visibility_tracker::VisibilityTracker}; +use crate::{adapter::supported_item_kind, sealed_trait, visibility_tracker::VisibilityTracker}; /// The rustdoc for a crate, together with associated indexed data to speed up common operations. /// @@ -25,6 +26,11 @@ pub struct IndexedCrate<'a> { /// index: impl owner + impl'd item name -> list of (impl itself, the named item)) pub(crate) impl_index: Option, Vec<(&'a Item, &'a Item)>>>, + /// Caching whether a trait by the given `Id`` is known to be sealed, or known to be unsealed. + /// + /// If the `Id` isn't present in the cache, the trait's sealed status has not yet been computed. + sealed_trait_cache: RefCell>, + /// Trait items defined in external crates are not present in the `inner: &Crate` field, /// even if they are implemented by a type in that crate. This also includes /// Rust's built-in traits like `Debug, Send, Eq` etc. @@ -49,6 +55,7 @@ impl<'a> IndexedCrate<'a> { manually_inlined_builtin_traits: create_manually_inlined_builtin_traits(crate_), imports_index: None, impl_index: None, + sealed_trait_cache: Default::default(), }; let mut imports_index: HashMap> = @@ -153,6 +160,63 @@ impl<'a> IndexedCrate<'a> { Default::default() } } + + /// Return `true` if our analysis indicates the trait is sealed, and `false` otherwise. + /// + /// Our analysis is conservative: it has false-negatives but no false-positives. + /// If this method returns `true`, the trait is *definitely* sealed or else you've found a bug. + /// It may be possible to construct traits that *technically* are sealed for which our analysis + /// returns `false`. + /// + /// The goal of this method is to reflect author intent, not technicalities. + /// When Rustaceans seal traits on purpose, they do so with a limited number of techniques + /// that are well-defined and immediately recognizable to readers in the community: + /// - + /// - + /// + /// The analysis here looks for such techniques, which are always applied at the type signature + /// level. It does not inspect function bodies or do interprocedural analysis. + /// + /// ## Panics + /// + /// This method will panic if the provided `id` is not an item in this crate, + /// or does not correspond to a trait in this crate. + /// + /// ## Re-entrancy + /// + /// This method is re-entrant: calling it may cause additional calls to itself, inquiring about + /// the sealed-ness of a trait's supertraits. + /// + /// We rely on rustc to reject supertrait cycles in order to prevent infinite loops. + /// Here's a supertrait cycle that must be rejected by rustc: + /// ```compile_fail + /// pub trait First: Third {} + /// + /// pub trait Second: First {} + /// + /// pub trait Third: Second {} + /// ``` + pub fn is_trait_sealed(&self, id: &'a Id) -> bool { + let cache_ref = self.sealed_trait_cache.borrow(); + if let Some(cached) = cache_ref.get(id) { + return *cached; + } + + // Explicitly drop the ref, because this method is re-entrant + // and may need to write in a subsequent re-entrant call. + drop(cache_ref); + + let trait_item = &self.inner.index[id]; + let decision = sealed_trait::is_trait_sealed(self, trait_item); + + // Unfortunately there's no easy way to avoid the double-hashing here. + // This method is re-entrant (we may need to decide if a supertrait is sealed *first*), + // and the borrow-checker doesn't like holding the `entry()` API's mut borrow + // while resolving the answer in `.or_insert_with()`. + self.sealed_trait_cache.borrow_mut().insert(id, decision); + + decision + } } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] diff --git a/src/lib.rs b/src/lib.rs index 59f47ec..e23ddb8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,12 +1,12 @@ mod adapter; mod attributes; mod indexed_crate; +mod sealed_trait; +mod visibility_tracker; #[cfg(test)] pub(crate) mod test_util; -mod visibility_tracker; - // Re-export the Crate type so we can deserialize it. pub use rustdoc_types::Crate; diff --git a/src/rustdoc_schema.graphql b/src/rustdoc_schema.graphql index 9a6b07a..a1394f2 100644 --- a/src/rustdoc_schema.graphql +++ b/src/rustdoc_schema.graphql @@ -902,9 +902,24 @@ type Trait implements Item & Importable { visibility_limit: String! # own properties + """ + Whether this trait is defined as `unsafe` and requires the `unsafe` keyword when implementing. + """ unsafe: Boolean! + + """ + Whether this trait can be used as `dyn Trait`. + """ object_safe: Boolean! + """ + Whether downstream crates are prevented from implementing this trait themselves. + + Required implementation items can be added to sealed traits without causing a breaking change, + since no implementations of that trait may exist in other crates. + """ + sealed: Boolean! + # edges from Item span: Span attribute: [Attribute!] diff --git a/src/sealed_trait.rs b/src/sealed_trait.rs new file mode 100644 index 0000000..6ed9978 --- /dev/null +++ b/src/sealed_trait.rs @@ -0,0 +1,187 @@ +use rustdoc_types::{Item, Trait}; + +use crate::IndexedCrate; + +pub(crate) fn is_trait_sealed<'a>(indexed_crate: &IndexedCrate<'a>, trait_item: &'a Item) -> bool { + let trait_inner = unwrap_trait(trait_item); + + // If the trait is pub-in-priv, trivially sealed. + if is_pub_in_priv_item(indexed_crate, &trait_item.id) { + return true; + } + + // Does the trait have a supertrait that is: + // - defined in this crate + // - pub-in-priv, or otherwise sealed + if has_sealed_supertrait(indexed_crate, trait_inner) { + return true; + } + + // Does the trait have a method that: + // - does not have a default impl, and either: + // a. takes at least one non-`self` argument that is pub-in-priv, or + // b. has a generic parameter that causes it to be generic-sealed + // If so, the trait is method-sealed or generic-sealed, per: + // https://predr.ag/blog/definitive-guide-to-sealed-traits-in-rust/#sealing-traits-via-method-signatures + // https://jack.wrenn.fyi/blog/private-trait-methods/ + // https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=3393c3ae143cb75c9da7bfe6e8ff8084 + if is_method_sealed(indexed_crate, trait_inner) { + return true; + } + + false +} + +fn is_pub_in_priv_item<'a>(indexed_crate: &IndexedCrate<'a>, id: &'a rustdoc_types::Id) -> bool { + // TODO: We don't need all names here, one is plenty. See if this is worth optimizing. + indexed_crate.publicly_importable_names(id).is_empty() +} + +fn unwrap_trait(item: &Item) -> &'_ Trait { + match &item.inner { + rustdoc_types::ItemEnum::Trait(t) => t, + _ => unreachable!(), + } +} + +fn has_sealed_supertrait<'a>(indexed_crate: &IndexedCrate<'a>, inner: &'a Trait) -> bool { + for bound in &inner.bounds { + let supertrait = match bound { + rustdoc_types::GenericBound::TraitBound { + trait_: trait_path, .. + } => { + match indexed_crate.inner.index.get(&trait_path.id) { + Some(item) => item, + None => { + // Item from another crate, so clearly not pub-in-priv. + // + // TODO: Once we have the ability to do cross-crate analysis, consider + // whether this external trait is sealed. That can have + // some interesting SemVer implications as well. + return false; + } + } + } + rustdoc_types::GenericBound::Outlives(_) => { + continue; + } + }; + + // Otherwise, check if the supertrait is itself sealed. + // This catches cases like: + // ```rust + // mod priv { + // pub trait Sealed {} + // } + // + // pub trait First: Sealed {} + // + // pub trait Second: First {} + // ``` + // + // Here, both `First` and `Second` are sealed. + if is_trait_sealed(indexed_crate, supertrait) { + // N.B.: This cannot infinite-loop, since rustc denies cyclic trait bounds. + return true; + } + } + + false +} + +fn is_method_sealed<'a>(indexed_crate: &IndexedCrate<'a>, trait_inner: &'a Trait) -> bool { + for inner_item_id in &trait_inner.items { + let inner_item = &indexed_crate.inner.index[inner_item_id]; + if let rustdoc_types::ItemEnum::Function(func) = &inner_item.inner { + if func.has_body { + // This trait function has a default implementation. + // An implementation is not required in order to implement this trait on a type. + // Therefore, it cannot on its own cause the trait to be sealed. + continue; + } + + // Check for pub-in-priv function parameters. + for (_, param) in &func.decl.inputs { + if let rustdoc_types::Type::ResolvedPath(path) = param { + if is_local_pub_in_priv_path(indexed_crate, path) { + return true; + } + } + } + + // Check for generics-sealed methods, as described in: + // https://jack.wrenn.fyi/blog/private-trait-methods/ + // https://www.reddit.com/r/rust/comments/12cj6as/comment/jf21zsm/ + for generic_param in &func.generics.params { + match &generic_param.kind { + rustdoc_types::GenericParamDefKind::Type { + bounds, default, .. + } => { + // If the generic parameter has a default, it can't be used to seal. + if default.is_none() { + for bound in bounds { + if is_generic_type_bound_sealed(indexed_crate, bound) { + return true; + } + } + } + } + _ => continue, + } + } + } + } + + false +} + +fn is_generic_type_bound_sealed<'a>( + indexed_crate: &IndexedCrate<'a>, + bound: &'a rustdoc_types::GenericBound, +) -> bool { + match bound { + rustdoc_types::GenericBound::TraitBound { + trait_: trait_path, .. + } => { + // For the bound to be sealing, it needs to: + // - point to a pub-in-priv trait in this crate. + // - all supertraits of the pub-in-priv trait must also be pub-in-priv + let Some(item) = indexed_crate.inner.index.get(&trait_path.id) else { + // Not an item in this crate. + return false; + }; + if !is_pub_in_priv_item(indexed_crate, &item.id) { + return false; + } + + let trait_item = unwrap_trait(item); + + // Check all supertraits to ensure they are pub-in-priv as well. + for trait_bounds in &trait_item.bounds { + if let rustdoc_types::GenericBound::TraitBound { + trait_: inner_trait_path, + .. + } = trait_bounds + { + if !is_local_pub_in_priv_path(indexed_crate, inner_trait_path) { + return false; + } + } + } + + true + } + _ => false, + } +} + +fn is_local_pub_in_priv_path<'a>( + indexed_crate: &IndexedCrate<'a>, + path: &'a rustdoc_types::Path, +) -> bool { + let Some(item) = indexed_crate.inner.index.get(&path.id) else { + // Not an item in this crate. + return false; + }; + is_pub_in_priv_item(indexed_crate, &item.id) +} diff --git a/test_crates/sealed_traits/Cargo.toml b/test_crates/sealed_traits/Cargo.toml new file mode 100644 index 0000000..5db620d --- /dev/null +++ b/test_crates/sealed_traits/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "sealed_traits" +version = "0.1.0" +edition = "2021" + +[dependencies] diff --git a/test_crates/sealed_traits/src/lib.rs b/test_crates/sealed_traits/src/lib.rs new file mode 100644 index 0000000..5eba563 --- /dev/null +++ b/test_crates/sealed_traits/src/lib.rs @@ -0,0 +1,85 @@ +mod private { + pub trait Sealed {} + + pub struct Token; + + pub trait InternalMarker {} +} + +/// This trait is sealed since nobody can implement its pub-in-priv supertrait. +pub trait DirectlyTraitSealed: private::Sealed {} + +/// This trait is sealed since nobody can implement its supertrait. +pub trait TransitivelyTraitSealed: DirectlyTraitSealed {} + +pub trait Unsealed {} + +/// This trait is sealed because its argument type is pub-in-priv, +/// so external implementers cannot name it. +pub trait MethodSealed { + fn method(&self, token: private::Token) -> i64; +} + +/// This trait is sealed since nobody can implement its supertrait. +pub trait TransitivelyMethodSealed: MethodSealed {} + +/// This trait is *not* sealed. Its method cannot be overridden, +/// but implementing it is not required since the trait offers a default impl. +pub trait NotMethodSealedBecauseOfDefaultImpl { + fn method(&self, _token: private::Token) -> i64 { + 0 + } +} + +/// This trait is *not* sealed. Its supertrait is also not sealed. +pub trait NotTransitivelySealed: NotMethodSealedBecauseOfDefaultImpl {} + +/// This trait's method is generic-sealed: downstream implementors are not able +/// to write the trait bound since the internal marker trait is pub-in-priv. +pub trait GenericSealed { + fn method(&self); +} + +/// This trait is *not* sealed. Its method cannot be overridden, +/// but implementing it is not required since the trait offers a default impl. +pub trait NotGenericSealedBecauseOfDefaultImpl { + fn private_method(&self) {} +} + +/// This trait is *not* sealed. It merely depends on a trait from another crate. +pub trait IteratorExt: Iterator {} + +pub mod shadow_builtins { + /// This trait shadows the built-in (prelude) `Iterator` trait. + /// However, it is supertrait-sealed. + pub trait Iterator: super::private::Sealed {} + + /// This trait is also supertrait-sealed. + pub trait ShadowedSubIterator: Iterator {} +} + +pub mod generic_seal { + pub trait Super {} + + mod private { + pub trait Marker: super::Super {} + } + + /// This trait is *not* generic-sealed. + /// + /// While the `Marker` trait bound is pub-in-priv, it has a public supertrait `Super`. + /// The `Super` bound can be used downstream to implement this trait: + /// ```rust + /// use sealed_traits::generic_seal::Super; + /// use sealed_traits::generic_seal::NotGenericSealedBecauseOfPubSupertrait; + /// + /// struct Example; + /// + /// impl NotGenericSealedBecauseOfPubSupertrait for Example { + /// fn method(&self) {} + /// } + /// ``` + pub trait NotGenericSealedBecauseOfPubSupertrait { + fn method(&self); + } +}