From b35b7cd01baa9075b3dee192e80cda0b6bca0574 Mon Sep 17 00:00:00 2001 From: Taeer Bar-Yam Date: Fri, 13 Oct 2023 14:05:34 -0400 Subject: [PATCH 1/5] explicitly annotate lifetimes when unsafe is involved --- core/src/environment.rs | 4 ++-- core/src/identifier.rs | 11 +++++++---- core/src/term/array.rs | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/core/src/environment.rs b/core/src/environment.rs index e7aebec1e..702d84225 100644 --- a/core/src/environment.rs +++ b/core/src/environment.rs @@ -100,7 +100,7 @@ impl Environment { /// Creates an iterator that visits all layers from the most recent one to the oldest. /// The element iterator type is `Rc>`. - pub fn iter_layers(&self) -> EnvLayerIter<'_, K, V> { + pub fn iter_layers<'slf>(&'slf self) -> EnvLayerIter<'slf, K, V> { EnvLayerIter { env: if !self.was_cloned() { Some(NonNull::from(self)) @@ -121,7 +121,7 @@ impl Environment { /// the most recent one. It uses this order, so calling `collect` on this iterator to create a /// hashmap would have the same values as the Environment. The element iterator type is `(&'env /// K, &'env V)`, with `'env` being the lifetime of the Environment. - pub fn iter_elems(&self) -> EnvElemIter<'_, K, V> { + pub fn iter_elems<'slf>(&'slf self) -> EnvElemIter<'slf, K, V> { let mut env: Vec>> = self .iter_layers() // SAFETY: Rc::as_ptr never returnes null diff --git a/core/src/identifier.rs b/core/src/identifier.rs index d3d4caae3..b999905c9 100644 --- a/core/src/identifier.rs +++ b/core/src/identifier.rs @@ -258,12 +258,12 @@ mod interner { /// /// This operation cannot fails since the only way to have a [Symbol] is to have /// [interned](Interner::intern) the corresponding string first. - pub(crate) fn lookup(&self, sym: Symbol) -> &str { + pub(crate) fn lookup<'slf>(&'slf self, sym: Symbol) -> &'slf str { // SAFETY: We are making the returned &str lifetime the same as our struct, // which is okay here since the InnerInterner uses a typed_arena which prevents // deallocations, so the reference will be valid while the InnerInterner exists, // hence while the struct exists. - unsafe { std::mem::transmute(self.0.read().unwrap().lookup(sym)) } + unsafe { std::mem::transmute::<&'_ str, &'slf str>(self.0.read().unwrap().lookup(sym)) } } } @@ -300,8 +300,11 @@ mod interner { // It is also okay to use it from inside the mutex, since typed_arena does not allow // deallocation, so references are valid until the arena drop, which is tied to the // struct drop. + // XXX: we have to use &'a str here, not &'self str like the comment indicates. what's going on? let in_string = unsafe { - std::mem::transmute(self.arena.lock().unwrap().alloc_str(string.as_ref())) + std::mem::transmute::<&'_ str, &'a str>( + self.arena.lock().unwrap().alloc_str(string.as_ref()), + ) }; let sym = Symbol(self.vec.len() as u32); self.vec.push(in_string); @@ -312,7 +315,7 @@ mod interner { /// /// This operation cannot fails since the only way to have a [Symbol] /// is to have [interned](InnerInterner::intern) the corresponding string first. - fn lookup(&self, sym: Symbol) -> &str { + fn lookup<'slf>(&'slf self, sym: Symbol) -> &'slf str { self.vec[sym.0 as usize] } } diff --git a/core/src/term/array.rs b/core/src/term/array.rs index 88bcdc0d7..bcc9d99d1 100644 --- a/core/src/term/array.rs +++ b/core/src/term/array.rs @@ -160,7 +160,7 @@ impl IntoIterator for Array { // Otherwise, we clone everything. unsafe { - let mut inner: Rc<[ManuallyDrop]> = transmute(self.inner); + let mut inner = transmute::, Rc<[ManuallyDrop]>>(self.inner); let idx = self.start; let end = self.end; From d5970b0adfc4b54cb50f80c9e4b480335d549586 Mon Sep 17 00:00:00 2001 From: Taeer Bar-Yam Date: Mon, 16 Oct 2023 15:36:58 -0400 Subject: [PATCH 2/5] use &'a lifetime rather than &'self this simplifies things and eliminates a usage of `unsafe`. --- core/src/identifier.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/core/src/identifier.rs b/core/src/identifier.rs index b999905c9..e4668bc55 100644 --- a/core/src/identifier.rs +++ b/core/src/identifier.rs @@ -258,12 +258,8 @@ mod interner { /// /// This operation cannot fails since the only way to have a [Symbol] is to have /// [interned](Interner::intern) the corresponding string first. - pub(crate) fn lookup<'slf>(&'slf self, sym: Symbol) -> &'slf str { - // SAFETY: We are making the returned &str lifetime the same as our struct, - // which is okay here since the InnerInterner uses a typed_arena which prevents - // deallocations, so the reference will be valid while the InnerInterner exists, - // hence while the struct exists. - unsafe { std::mem::transmute::<&'_ str, &'slf str>(self.0.read().unwrap().lookup(sym)) } + pub(crate) fn lookup(&self, sym: Symbol) -> &'a str { + self.0.read().unwrap().lookup(sym) } } @@ -315,7 +311,7 @@ mod interner { /// /// This operation cannot fails since the only way to have a [Symbol] /// is to have [interned](InnerInterner::intern) the corresponding string first. - fn lookup<'slf>(&'slf self, sym: Symbol) -> &'slf str { + fn lookup(&self, sym: Symbol) -> &'a str { self.vec[sym.0 as usize] } } From b652cbe230f05c45dd825c1ba43b737939dd7199 Mon Sep 17 00:00:00 2001 From: Taeer Bar-Yam Date: Mon, 6 Nov 2023 14:28:10 -0500 Subject: [PATCH 3/5] correctly drop Array::IntoIter --- core/src/term/array.rs | 73 ++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/core/src/term/array.rs b/core/src/term/array.rs index bcc9d99d1..24b203a67 100644 --- a/core/src/term/array.rs +++ b/core/src/term/array.rs @@ -157,29 +157,50 @@ impl IntoIterator for Array { // to the inner slice, then we can: // - Drop the elements outside our inner view, // - Move out the elements inside out inner view. + // - Drop the rest of the elements when we're dropped // Otherwise, we clone everything. - - unsafe { - let mut inner = transmute::, Rc<[ManuallyDrop]>>(self.inner); - let idx = self.start; - let end = self.end; - - if let Some(slice) = Rc::get_mut(&mut inner) { - for term in &mut slice[..idx] { + // + // If we start as a shared reference, we could become the only reference + // later, but we clone everything anyways, so we don't end up in an + // invalid in-between state where some terms have been freed manually + // and others haven't. + + let inner = if Rc::strong_count(&self.inner) != 1 || Rc::weak_count(&self.inner) != 0 { + IntoIterInner::Shared(self.inner) + } else { + unsafe { + let mut inner = + transmute::, Rc<[ManuallyDrop]>>(self.inner); + let slice = + Rc::get_mut(&mut inner).expect("non-unique Rc after checking for uniqueness"); + for term in &mut slice[..self.start] { ManuallyDrop::drop(term) } - for term in &mut slice[end..] { + for term in &mut slice[self.end..] { ManuallyDrop::drop(term) } - } - IntoIter { inner, idx, end } + IntoIterInner::Owned(inner) + } + }; + IntoIter { + inner, + idx: self.start, + end: self.end, } } } +enum IntoIterInner { + Shared(Rc<[RichTerm]>), + // This shouldn't really be an Rc. It should only ever have one reference. + // There may be a way to rewrite this once unsized locals are stabilized. + // But for now, there is no good way to repackage the inner array, so we + // keep it in an Rc. + Owned(Rc<[ManuallyDrop]>), +} pub struct IntoIter { - inner: Rc<[ManuallyDrop]>, + inner: IntoIterInner, idx: usize, end: usize, } @@ -191,14 +212,14 @@ impl Iterator for IntoIter { if self.idx == self.end { None } else { - let term = match Rc::get_mut(&mut self.inner) { - None => self.inner[..self.end] - .get(self.idx) - .cloned() - .map(ManuallyDrop::into_inner), - Some(slice) => slice[..self.end] - .get_mut(self.idx) - .map(|t| unsafe { ManuallyDrop::take(t) }), + let term = match &mut self.inner { + IntoIterInner::Shared(inner) => inner.get(self.idx).cloned(), + IntoIterInner::Owned(inner) => unsafe { + Rc::get_mut(inner) + .expect("non-unique Rc after checking for uniqueness") + .get_mut(self.idx) + .map(|t| ManuallyDrop::take(t)) + }, }; self.idx += 1; @@ -207,6 +228,18 @@ impl Iterator for IntoIter { } } +impl Drop for IntoIter { + fn drop(&mut self) { + // drop the rest of the items we haven't iterated through + if let IntoIterInner::Owned(inner) = &mut self.inner { + let inner = Rc::get_mut(inner).expect("non-unique Rc after checking for uniqueness"); + for term in &mut inner[self.idx..self.end] { + unsafe { ManuallyDrop::drop(term) } + } + } + } +} + impl ExactSizeIterator for IntoIter { fn len(&self) -> usize { self.end - self.idx From 60afe785361ede507e794fbb98f8fdc891b0d640 Mon Sep 17 00:00:00 2001 From: Taeer Bar-Yam Date: Tue, 7 Nov 2023 12:00:57 -0500 Subject: [PATCH 4/5] revert earlier lifetime changes, but clarify comments --- core/src/environment.rs | 1 + core/src/identifier.rs | 37 +++++++++++++++++++++++++------------ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/core/src/environment.rs b/core/src/environment.rs index 702d84225..efc4d5ee5 100644 --- a/core/src/environment.rs +++ b/core/src/environment.rs @@ -121,6 +121,7 @@ impl Environment { /// the most recent one. It uses this order, so calling `collect` on this iterator to create a /// hashmap would have the same values as the Environment. The element iterator type is `(&'env /// K, &'env V)`, with `'env` being the lifetime of the Environment. + #[allow(clippy::needless_lifetimes)] pub fn iter_elems<'slf>(&'slf self) -> EnvElemIter<'slf, K, V> { let mut env: Vec>> = self .iter_layers() diff --git a/core/src/identifier.rs b/core/src/identifier.rs index e4668bc55..9f6f2a3c2 100644 --- a/core/src/identifier.rs +++ b/core/src/identifier.rs @@ -240,9 +240,11 @@ mod interner { /// The interner, which serves a double purpose: it pre-allocates space /// so that [Ident](super::Ident) labels are created faster /// and it makes it so that labels are stored only once, saving space. - pub(crate) struct Interner<'a>(RwLock>); + // NOTE: We set the lifetime parameter of InnerInterner to 'static since + // it's arbitrary, and there's no reason to expose it to users of Interner + pub(crate) struct Interner(RwLock>); - impl<'a> Interner<'a> { + impl Interner { /// Creates an empty [Interner]. pub(crate) fn new() -> Self { Self(RwLock::new(InnerInterner::new())) @@ -258,24 +260,32 @@ mod interner { /// /// This operation cannot fails since the only way to have a [Symbol] is to have /// [interned](Interner::intern) the corresponding string first. - pub(crate) fn lookup(&self, sym: Symbol) -> &'a str { - self.0.read().unwrap().lookup(sym) + pub(crate) fn lookup<'slf>(&'slf self, sym: Symbol) -> &'slf str { + // SAFETY: We are making the returned &str lifetime the same as our struct, + // which is okay here since the InnerInterner uses a typed_arena which prevents + // deallocations, so the reference will be valid while the InnerInterner exists, + // hence while the struct exists. + unsafe { std::mem::transmute::<&'_ str, &'slf str>(self.0.read().unwrap().lookup(sym)) } } } /// The main part of the Interner. - struct InnerInterner<'a> { + /// 'internal should never be exposed to the outside, as it is an + /// implementation detail and could be set to anything. It should be treated + /// within the implementation of InnerInterner as the lifetime of the object + /// itself, since that's what it is turned into in the `lookup()` method. + struct InnerInterner<'internal> { /// Preallocates space where strings are stored. arena: Mutex>, /// Prevents the arena from creating different [Symbols](Symbol) for the same string. - map: HashMap<&'a str, Symbol>, + map: HashMap<&'internal str, Symbol>, /// Allows retrieving a string from a [Symbol]. - vec: Vec<&'a str>, + vec: Vec<&'internal str>, } - impl<'a> InnerInterner<'a> { + impl<'internal> InnerInterner<'internal> { /// Creates an empty [InnerInterner]. fn new() -> Self { Self { @@ -295,10 +305,10 @@ mod interner { // This is okay since the lifetime of the arena is identical to the one of the struct. // It is also okay to use it from inside the mutex, since typed_arena does not allow // deallocation, so references are valid until the arena drop, which is tied to the - // struct drop. - // XXX: we have to use &'a str here, not &'self str like the comment indicates. what's going on? + // struct drop. Since there is no 'self lifetime, we use 'internal instead, which + // at least has 'internal: 'self. let in_string = unsafe { - std::mem::transmute::<&'_ str, &'a str>( + std::mem::transmute::<&'_ str, &'internal str>( self.arena.lock().unwrap().alloc_str(string.as_ref()), ) }; @@ -311,7 +321,10 @@ mod interner { /// /// This operation cannot fails since the only way to have a [Symbol] /// is to have [interned](InnerInterner::intern) the corresponding string first. - fn lookup(&self, sym: Symbol) -> &'a str { + #[allow(clippy::needless_lifetimes)] + fn lookup<'slf>(&'slf self, sym: Symbol) -> &'slf str { + // The lifetime implicitly shrinks from 'internal 'slf, which + // prevents 'internal from leaking out. self.vec[sym.0 as usize] } } From 2d2061ac95b2a5990f19c099369aa9a265480e0c Mon Sep 17 00:00:00 2001 From: Taeer Bar-Yam Date: Tue, 7 Nov 2023 15:55:44 -0500 Subject: [PATCH 5/5] revert needless lifetimes --- core/src/environment.rs | 3 +-- core/src/identifier.rs | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/core/src/environment.rs b/core/src/environment.rs index efc4d5ee5..d19fba41e 100644 --- a/core/src/environment.rs +++ b/core/src/environment.rs @@ -121,8 +121,7 @@ impl Environment { /// the most recent one. It uses this order, so calling `collect` on this iterator to create a /// hashmap would have the same values as the Environment. The element iterator type is `(&'env /// K, &'env V)`, with `'env` being the lifetime of the Environment. - #[allow(clippy::needless_lifetimes)] - pub fn iter_elems<'slf>(&'slf self) -> EnvElemIter<'slf, K, V> { + pub fn iter_elems(&self) -> EnvElemIter<'_, K, V> { let mut env: Vec>> = self .iter_layers() // SAFETY: Rc::as_ptr never returnes null diff --git a/core/src/identifier.rs b/core/src/identifier.rs index 9f6f2a3c2..695f5fdb2 100644 --- a/core/src/identifier.rs +++ b/core/src/identifier.rs @@ -321,9 +321,8 @@ mod interner { /// /// This operation cannot fails since the only way to have a [Symbol] /// is to have [interned](InnerInterner::intern) the corresponding string first. - #[allow(clippy::needless_lifetimes)] - fn lookup<'slf>(&'slf self, sym: Symbol) -> &'slf str { - // The lifetime implicitly shrinks from 'internal 'slf, which + fn lookup(&self, sym: Symbol) -> &str { + // The lifetime implicitly shrinks from 'internal to 'self, which // prevents 'internal from leaking out. self.vec[sym.0 as usize] }