From c404a0a02915e4f6d329d7667ed30b8519b8a964 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Tue, 5 Dec 2023 13:49:56 -0500 Subject: Move soft deletion into the forest implementation This allows us to filter out deleted items whenever we rebuild a tree. --- src/forest.rs | 280 +++++++++++++++++++++++++++++++++++------------- src/frontier.rs | 2 +- src/frontier/image.rs | 4 +- src/frontier/mean.rs | 10 +- src/frontier/min.rs | 12 +-- src/main.rs | 1 - src/soft.rs | 288 -------------------------------------------------- 7 files changed, 215 insertions(+), 382 deletions(-) delete mode 100644 src/soft.rs diff --git a/src/forest.rs b/src/forest.rs index 28ccb24..4feffcb 100644 --- a/src/forest.rs +++ b/src/forest.rs @@ -7,6 +7,19 @@ use acap::vp::FlatVpTree; use std::iter; +/// A trait for objects that can be soft-deleted. +pub trait SoftDelete { + /// Check whether this item is deleted. + fn is_deleted(&self) -> bool; +} + +/// Blanket [SoftDelete] implementation for references. +impl<'a, T: SoftDelete> SoftDelete for &'a T { + fn is_deleted(&self) -> bool { + (*self).is_deleted() + } +} + /// The number of bits dedicated to the flat buffer. const BUFFER_BITS: usize = 6; /// The maximum size of the buffer. @@ -15,7 +28,8 @@ const BUFFER_SIZE: usize = 1 << BUFFER_BITS; /// A dynamic wrapper for a static nearest neighbor search data structure. /// /// This type applies [dynamization](https://en.wikipedia.org/wiki/Dynamization) to an arbitrary -/// nearest neighbor search structure `T`, allowing new items to be added dynamically. +/// nearest neighbor search structure `T`, allowing new items to be added dynamically. It also +/// implements [soft deletion](https://en.wiktionary.org/wiki/soft_deletion) for dynamic removal. #[derive(Debug)] pub struct Forest { /// A flat buffer used for the first few items, to avoid repeatedly rebuilding small trees. @@ -26,6 +40,7 @@ pub struct Forest { impl Forest where + T: SoftDelete, U: FromIterator + IntoIterator, { /// Create a new empty forest. @@ -41,29 +56,65 @@ where self.extend(iter::once(item)); } - /// Get the number of items in the forest. - pub fn len(&self) -> usize { + /// Remove deleted items from the buffer. + fn filter_buffer(&mut self) { + self.buffer.retain(|e| !e.is_deleted()); + } + + /// Drain all items out of the trees and into the buffer. + fn deforest(&mut self) { + self.buffer.extend( + self.trees + .drain(..) + .flatten() + .flatten() + .filter(|e| !e.is_deleted()) + ); + } + + /// Move excess items from the buffer to the trees. + fn reforest(&mut self) { let mut len = self.buffer.len(); - for (i, slot) in self.trees.iter().enumerate() { - if slot.is_some() { - len += 1 << (i + BUFFER_BITS); + + for i in 0.. { + let bit = 1 << (i + BUFFER_BITS); + if bit > len { + break; } - } - len - } - /// Check if this forest is empty. - pub fn is_empty(&self) -> bool { - if !self.buffer.is_empty() { - return false; + if i >= self.trees.len() { + self.trees.push(None); + } + + let tree = self.trees[i].take(); + self.trees[i] = match (tree, len & bit > 0) { + (Some(tree), true) => { + len += bit; + self.buffer.extend(tree.into_iter().filter(|e| !e.is_deleted())); + None + } + (None, true) => { + let offset = self.buffer.len().saturating_sub(bit); + Some(self.buffer.drain(offset..).collect()) + } + (tree, _) => tree, + } } - self.trees.iter().flatten().next().is_none() + debug_assert!(self.buffer.len() < BUFFER_SIZE); + } + + /// Rebuild this index, discarding deleted items. + pub fn rebuild(&mut self) { + self.filter_buffer(); + self.deforest(); + self.reforest(); } } impl Default for Forest where + T: SoftDelete, U: FromIterator + IntoIterator, { fn default() -> Self { @@ -73,44 +124,22 @@ where impl Extend for Forest where + T: SoftDelete, U: FromIterator + IntoIterator, { fn extend>(&mut self, items: I) { self.buffer.extend(items); - if self.buffer.len() < BUFFER_SIZE { - return; - } - - let len = self.len(); - for i in 0.. { - let bit = 1 << (i + BUFFER_BITS); - - if bit > len { - break; - } - - if i >= self.trees.len() { - self.trees.push(None); - } - - if len & bit == 0 { - if let Some(tree) = self.trees[i].take() { - self.buffer.extend(tree); - } - } else if self.trees[i].is_none() { - let offset = self.buffer.len() - bit; - self.trees[i] = Some(self.buffer.drain(offset..).collect()); - } + if self.buffer.len() >= BUFFER_SIZE { + self.filter_buffer(); + self.reforest(); } - - debug_assert!(self.buffer.len() < BUFFER_SIZE); - debug_assert!(self.len() == len); } } impl FromIterator for Forest where + T: SoftDelete, U: FromIterator + IntoIterator, { fn from_iter>(items: I) -> Self { @@ -120,19 +149,55 @@ where } } -impl IntoIterator for Forest { - type Item = T::Item; - type IntoIter = std::vec::IntoIter; +impl IntoIterator for Forest +where + T: SoftDelete, + U: FromIterator + IntoIterator, +{ + type Item = T; + type IntoIter = std::vec::IntoIter; fn into_iter(mut self) -> Self::IntoIter { - self.buffer.extend(self.trees.into_iter().flatten().flatten()); + self.filter_buffer(); + self.deforest(); self.buffer.into_iter() } } +/// [Neighborhood] wrapper that ignores soft-deleted items. +#[derive(Debug)] +struct SoftNeighborhood(N); + +impl Neighborhood for SoftNeighborhood +where + V: SoftDelete, + K: Proximity, + N: Neighborhood, +{ + fn target(&self) -> K { + self.0.target() + } + + fn contains(&self, distance: D) -> bool + where + D: PartialOrd + { + self.0.contains(distance) + } + + fn consider(&mut self, item: V) -> K::Distance { + if item.is_deleted() { + self.target().distance(&item) + } else { + self.0.consider(item) + } + } +} + impl NearestNeighbors for Forest where K: Proximity, + V: SoftDelete, T: NearestNeighbors, T: IntoIterator, { @@ -143,13 +208,18 @@ where N: Neighborhood<&'k K, &'v V> { for item in &self.buffer { - neighborhood.consider(item); + if !item.is_deleted() { + neighborhood.consider(item); + } } + let neighborhood = SoftNeighborhood(neighborhood); + self.trees .iter() .flatten() .fold(neighborhood, |n, t| t.search(n)) + .0 } } @@ -163,7 +233,8 @@ pub type VpForest = Forest>; mod tests { use super::*; - use acap::euclid::Euclidean; + use acap::coords::Coordinates; + use acap::euclid::{euclidean_distance, Euclidean, EuclideanDistance}; use acap::exhaustive::ExhaustiveSearch; use acap::knn::{NearestNeighbors, Neighbor}; @@ -171,10 +242,66 @@ mod tests { type Point = Euclidean<[f32; 3]>; + #[derive(Clone, Debug, PartialEq)] + struct SoftPoint { + point: [f32; 3], + deleted: bool, + } + + impl SoftPoint { + fn new(x: f32, y: f32, z: f32) -> Self { + Self { + point: [x, y, z], + deleted: false, + } + } + + fn deleted(x: f32, y: f32, z: f32) -> Self { + Self { + point: [x, y, z], + deleted: true, + } + } + } + + impl SoftDelete for SoftPoint { + fn is_deleted(&self) -> bool { + self.deleted + } + } + + impl Proximity for SoftPoint { + type Distance = EuclideanDistance; + + fn distance(&self, other: &Self) -> Self::Distance { + euclidean_distance(&self.point, &other.point) + } + } + + impl Coordinates for SoftPoint { + type Value = ::Value; + + fn dims(&self) -> usize { + self.point.dims() + } + + fn coord(&self, i: usize) -> Self::Value { + self.point.coord(i) + } + } + + impl Proximity for Point { + type Distance = EuclideanDistance; + + fn distance(&self, other: &SoftPoint) -> Self::Distance { + euclidean_distance(&self, &other.point) + } + } + fn test_empty(from_iter: &F) where - T: NearestNeighbors, - F: Fn(Vec) -> T, + T: NearestNeighbors, + F: Fn(Vec) -> T, { let points = Vec::new(); let index = from_iter(points); @@ -189,38 +316,39 @@ mod tests { fn test_pythagorean(from_iter: &F) where - T: NearestNeighbors, - F: Fn(Vec) -> T, + T: NearestNeighbors, + F: Fn(Vec) -> T, { let points = vec![ - Euclidean([3.0, 4.0, 0.0]), - Euclidean([5.0, 0.0, 12.0]), - Euclidean([0.0, 8.0, 15.0]), - Euclidean([1.0, 2.0, 2.0]), - Euclidean([2.0, 3.0, 6.0]), - Euclidean([4.0, 4.0, 7.0]), + SoftPoint::deleted(0.0, 0.0, 0.0), + SoftPoint::new(3.0, 4.0, 0.0), + SoftPoint::new(5.0, 0.0, 12.0), + SoftPoint::new(0.0, 8.0, 15.0), + SoftPoint::new(1.0, 2.0, 2.0), + SoftPoint::new(2.0, 3.0, 6.0), + SoftPoint::new(4.0, 4.0, 7.0), ]; let index = from_iter(points); let target = Euclidean([0.0, 0.0, 0.0]); assert_eq!( index.nearest(&target).expect("No nearest neighbor found"), - Neighbor::new(&Euclidean([1.0, 2.0, 2.0]), 3.0) + Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0) ); assert_eq!(index.nearest_within(&target, 2.0), None); assert_eq!( index.nearest_within(&target, 4.0).expect("No nearest neighbor found within 4.0"), - Neighbor::new(&Euclidean([1.0, 2.0, 2.0]), 3.0) + Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0) ); assert!(index.k_nearest(&target, 0).is_empty()); assert_eq!( index.k_nearest(&target, 3), vec![ - Neighbor::new(&Euclidean([1.0, 2.0, 2.0]), 3.0), - Neighbor::new(&Euclidean([3.0, 4.0, 0.0]), 5.0), - Neighbor::new(&Euclidean([2.0, 3.0, 6.0]), 7.0), + Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0), + Neighbor::new(&SoftPoint::new(3.0, 4.0, 0.0), 5.0), + Neighbor::new(&SoftPoint::new(2.0, 3.0, 6.0), 7.0), ] ); @@ -228,32 +356,38 @@ mod tests { assert_eq!( index.k_nearest_within(&target, 3, 6.0), vec![ - Neighbor::new(&Euclidean([1.0, 2.0, 2.0]), 3.0), - Neighbor::new(&Euclidean([3.0, 4.0, 0.0]), 5.0), + Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0), + Neighbor::new(&SoftPoint::new(3.0, 4.0, 0.0), 5.0), ] ); assert_eq!( index.k_nearest_within(&target, 3, 8.0), vec![ - Neighbor::new(&Euclidean([1.0, 2.0, 2.0]), 3.0), - Neighbor::new(&Euclidean([3.0, 4.0, 0.0]), 5.0), - Neighbor::new(&Euclidean([2.0, 3.0, 6.0]), 7.0), + Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0), + Neighbor::new(&SoftPoint::new(3.0, 4.0, 0.0), 5.0), + Neighbor::new(&SoftPoint::new(2.0, 3.0, 6.0), 7.0), ] ); } fn test_random_points(from_iter: &F) where - T: NearestNeighbors, - F: Fn(Vec) -> T, + T: NearestNeighbors, + F: Fn(Vec) -> T, { let mut points = Vec::new(); for _ in 0..255 { - points.push(Euclidean([random(), random(), random()])); + points.push(SoftPoint::new(random(), random(), random())); + points.push(SoftPoint::deleted(random(), random(), random())); } let target = Euclidean([random(), random(), random()]); - let eindex = ExhaustiveSearch::from_iter(points.clone()); + let eindex: ExhaustiveSearch<_> = points + .iter() + .filter(|p| !p.is_deleted()) + .cloned() + .collect(); + let index = from_iter(points); assert_eq!(index.k_nearest(&target, 3), eindex.k_nearest(&target, 3)); @@ -262,8 +396,8 @@ mod tests { /// Test a [NearestNeighbors] impl. fn test_nearest_neighbors(from_iter: F) where - T: NearestNeighbors, - F: Fn(Vec) -> T, + T: NearestNeighbors, + F: Fn(Vec) -> T, { test_empty(&from_iter); test_pythagorean(&from_iter); diff --git a/src/frontier.rs b/src/frontier.rs index 40b1d35..3ca6707 100644 --- a/src/frontier.rs +++ b/src/frontier.rs @@ -5,7 +5,7 @@ pub mod mean; pub mod min; use crate::color::Rgb8; -use crate::soft::SoftDelete; +use crate::forest::SoftDelete; use acap::coords::Coordinates; use acap::distance::{Proximity, Metric}; diff --git a/src/frontier/image.rs b/src/frontier/image.rs index ded0cbe..149a1b0 100644 --- a/src/frontier/image.rs +++ b/src/frontier/image.rs @@ -3,7 +3,7 @@ use super::{Frontier, Pixel, Target}; use crate::color::{ColorSpace, Rgb8}; -use crate::soft::SoftKdTree; +use crate::forest::KdForest; use acap::knn::NearestNeighbors; @@ -12,7 +12,7 @@ use image::RgbImage; /// A [Frontier] that places colors on the closest pixel of a target image. #[derive(Debug)] pub struct ImageFrontier { - nodes: SoftKdTree>, + nodes: KdForest>, width: u32, height: u32, len: usize, diff --git a/src/frontier/mean.rs b/src/frontier/mean.rs index 30443c9..9ccfe7e 100644 --- a/src/frontier/mean.rs +++ b/src/frontier/mean.rs @@ -3,7 +3,7 @@ use super::{neighbors, Frontier, RcPixel, Target}; use crate::color::{ColorSpace, Rgb8}; -use crate::soft::SoftKdForest; +use crate::forest::KdForest; use acap::knn::NearestNeighbors; @@ -33,7 +33,7 @@ where #[derive(Debug)] pub struct MeanFrontier { pixels: Vec>, - forest: SoftKdForest>, + forest: KdForest>, width: u32, height: u32, len: usize, @@ -114,12 +114,6 @@ where self.len += pixels.len(); self.forest.extend(pixels); - - if 2 * self.deleted >= self.len { - self.forest.rebuild(); - self.len -= self.deleted; - self.deleted = 0; - } } } diff --git a/src/frontier/min.rs b/src/frontier/min.rs index da08746..2a332e6 100644 --- a/src/frontier/min.rs +++ b/src/frontier/min.rs @@ -3,7 +3,7 @@ use super::{neighbors, Frontier, RcPixel, Target}; use crate::color::{ColorSpace, Rgb8}; -use crate::soft::SoftKdForest; +use crate::forest::KdForest; use acap::knn::NearestNeighbors; @@ -33,7 +33,7 @@ where pub struct MinFrontier { rng: R, pixels: Vec>, - forest: SoftKdForest>, + forest: KdForest>, width: u32, height: u32, x0: u32, @@ -57,7 +57,7 @@ where Self { rng, pixels, - forest: SoftKdForest::new(), + forest: KdForest::new(), width, height, x0, @@ -118,12 +118,6 @@ where } } - if 2 * self.deleted >= self.len { - self.forest.rebuild(); - self.len -= self.deleted; - self.deleted = 0; - } - Some((x, y)) } } diff --git a/src/main.rs b/src/main.rs index 204a582..c139ee8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,6 @@ pub mod color; pub mod forest; pub mod frontier; pub mod hilbert; -pub mod soft; use crate::color::source::{AllColors, ColorSource, ImageColors}; use crate::color::{order, ColorSpace, LabSpace, LuvSpace, OklabSpace, Rgb8, RgbSpace}; diff --git a/src/soft.rs b/src/soft.rs deleted file mode 100644 index 4ce4204..0000000 --- a/src/soft.rs +++ /dev/null @@ -1,288 +0,0 @@ -//! [Soft deletion](https://en.wiktionary.org/wiki/soft_deletion) for nearest neighbor search. - -use super::forest::{KdForest, VpForest}; - -use acap::distance::Proximity; -use acap::kd::FlatKdTree; -use acap::knn::{NearestNeighbors, Neighborhood}; -use acap::vp::FlatVpTree; - -use std::iter; -use std::mem; - -/// A trait for objects that can be soft-deleted. -pub trait SoftDelete { - /// Check whether this item is deleted. - fn is_deleted(&self) -> bool; -} - -/// Blanket [SoftDelete] implementation for references. -impl<'a, T: SoftDelete> SoftDelete for &'a T { - fn is_deleted(&self) -> bool { - (*self).is_deleted() - } -} - -/// [Neighborhood] wrapper that ignores soft-deleted items. -#[derive(Debug)] -struct SoftNeighborhood(N); - -impl Neighborhood for SoftNeighborhood -where - V: SoftDelete, - K: Proximity, - N: Neighborhood, -{ - fn target(&self) -> K { - self.0.target() - } - - fn contains(&self, distance: D) -> bool - where - D: PartialOrd - { - self.0.contains(distance) - } - - fn consider(&mut self, item: V) -> K::Distance { - if item.is_deleted() { - self.target().distance(&item) - } else { - self.0.consider(item) - } - } -} - -/// A [NearestNeighbors] implementation that supports [soft deletes](https://en.wiktionary.org/wiki/soft_deletion). -#[derive(Debug)] -pub struct SoftSearch(T); - -impl SoftSearch -where - T: SoftDelete, - U: FromIterator + IntoIterator, -{ - /// Create a new empty soft index. - pub fn new() -> Self { - Self(iter::empty().collect()) - } - - /// Push a new item into this index. - pub fn push(&mut self, item: T) - where - U: Extend, - { - self.0.extend(iter::once(item)); - } - - /// Rebuild this index, discarding deleted items. - pub fn rebuild(&mut self) { - let items = mem::replace(&mut self.0, iter::empty().collect()); - self.0 = items.into_iter().filter(|e| !e.is_deleted()).collect(); - } -} - -impl Default for SoftSearch -where - T: SoftDelete, - U: FromIterator + IntoIterator, -{ - fn default() -> Self { - Self::new() - } -} - -impl> Extend for SoftSearch { - fn extend>(&mut self, iter: I) { - self.0.extend(iter); - } -} - -impl> FromIterator for SoftSearch { - fn from_iter>(iter: I) -> Self { - Self(U::from_iter(iter)) - } -} - -impl IntoIterator for SoftSearch { - type Item = T::Item; - type IntoIter = T::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } -} - -impl NearestNeighbors for SoftSearch -where - K: Proximity, - V: SoftDelete, - T: NearestNeighbors, -{ - fn search<'k, 'v, N>(&'v self, neighborhood: N) -> N - where - K: 'k, - V: 'v, - N: Neighborhood<&'k K, &'v V> - { - self.0.search(SoftNeighborhood(neighborhood)).0 - } -} - -/// A k-d forest that supports soft deletes. -pub type SoftKdForest = SoftSearch>; - -/// A k-d tree that supports soft deletes. -pub type SoftKdTree = SoftSearch>; - -/// A VP forest that supports soft deletes. -pub type SoftVpForest = SoftSearch>; - -/// A VP tree that supports soft deletes. -pub type SoftVpTree = SoftSearch>; - -#[cfg(test)] -mod tests { - use super::*; - - use acap::coords::Coordinates; - use acap::euclid::{euclidean_distance, Euclidean, EuclideanDistance}; - use acap::knn::Neighbor; - - type Point = Euclidean<[f32; 3]>; - - #[derive(Debug, PartialEq)] - struct SoftPoint { - point: [f32; 3], - deleted: bool, - } - - impl SoftPoint { - fn new(x: f32, y: f32, z: f32) -> Self { - Self { - point: [x, y, z], - deleted: false, - } - } - - fn deleted(x: f32, y: f32, z: f32) -> Self { - Self { - point: [x, y, z], - deleted: true, - } - } - } - - impl SoftDelete for SoftPoint { - fn is_deleted(&self) -> bool { - self.deleted - } - } - - impl Proximity for SoftPoint { - type Distance = EuclideanDistance; - - fn distance(&self, other: &Self) -> Self::Distance { - euclidean_distance(&self.point, &other.point) - } - } - - impl Coordinates for SoftPoint { - type Value = ::Value; - - fn dims(&self) -> usize { - self.point.dims() - } - - fn coord(&self, i: usize) -> Self::Value { - self.point.coord(i) - } - } - - impl Proximity for Point { - type Distance = EuclideanDistance; - - fn distance(&self, other: &SoftPoint) -> Self::Distance { - euclidean_distance(&self, &other.point) - } - } - - fn test_index(index: &T) - where - T: NearestNeighbors, - { - let target = Euclidean([0.0, 0.0, 0.0]); - - assert_eq!( - index.nearest(&target).expect("No nearest neighbor found"), - Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0) - ); - - assert_eq!(index.nearest_within(&target, 2.0), None); - assert_eq!( - index.nearest_within(&target, 4.0).expect("No nearest neighbor found within 4.0"), - Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0) - ); - - assert_eq!( - index.k_nearest(&target, 3), - vec![ - Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0), - Neighbor::new(&SoftPoint::new(3.0, 4.0, 0.0), 5.0), - Neighbor::new(&SoftPoint::new(2.0, 3.0, 6.0), 7.0), - ] - ); - - assert_eq!( - index.k_nearest_within(&target, 3, 6.0), - vec![ - Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0), - Neighbor::new(&SoftPoint::new(3.0, 4.0, 0.0), 5.0), - ] - ); - assert_eq!( - index.k_nearest_within(&target, 3, 8.0), - vec![ - Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0), - Neighbor::new(&SoftPoint::new(3.0, 4.0, 0.0), 5.0), - Neighbor::new(&SoftPoint::new(2.0, 3.0, 6.0), 7.0), - ] - ); - } - - fn test_soft_index(index: &mut SoftSearch) - where - T: Extend, - T: FromIterator, - T: IntoIterator, - T: NearestNeighbors, - { - let points = vec![ - SoftPoint::deleted(0.0, 0.0, 0.0), - SoftPoint::new(3.0, 4.0, 0.0), - SoftPoint::new(5.0, 0.0, 12.0), - SoftPoint::new(0.0, 8.0, 15.0), - SoftPoint::new(1.0, 2.0, 2.0), - SoftPoint::new(2.0, 3.0, 6.0), - SoftPoint::new(4.0, 4.0, 7.0), - ]; - - for point in points { - index.push(point); - } - test_index(index); - - index.rebuild(); - test_index(index); - } - - #[test] - fn test_soft_kd_forest() { - test_soft_index(&mut SoftKdForest::new()); - } - - #[test] - fn test_soft_vp_forest() { - test_soft_index(&mut SoftVpForest::new()); - } -} -- cgit v1.2.3