From 39c0348c9f98b4dd29bd112a0a2a42faa67c92d4 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Wed, 24 Jun 2020 15:20:02 -0400 Subject: Use the acap nearest neighbors implementation --- src/color.rs | 107 ++++++---- src/forest.rs | 292 +++++++++++++++++++++++++++ src/frontier.rs | 104 +++++++--- src/frontier/image.rs | 11 +- src/frontier/mean.rs | 19 +- src/frontier/min.rs | 17 +- src/main.rs | 3 +- src/metric.rs | 546 -------------------------------------------------- src/metric/approx.rs | 131 ------------ src/metric/forest.rs | 187 ----------------- src/metric/kd.rs | 224 --------------------- src/metric/soft.rs | 292 --------------------------- src/metric/vp.rs | 137 ------------- src/soft.rs | 289 ++++++++++++++++++++++++++ 14 files changed, 745 insertions(+), 1614 deletions(-) create mode 100644 src/forest.rs delete mode 100644 src/metric.rs delete mode 100644 src/metric/approx.rs delete mode 100644 src/metric/forest.rs delete mode 100644 src/metric/kd.rs delete mode 100644 src/metric/soft.rs delete mode 100644 src/metric/vp.rs create mode 100644 src/soft.rs (limited to 'src') diff --git a/src/color.rs b/src/color.rs index 5d50aa1..ff113a7 100644 --- a/src/color.rs +++ b/src/color.rs @@ -3,8 +3,9 @@ pub mod order; pub mod source; -use crate::metric::kd::{Cartesian, CartesianMetric}; -use crate::metric::{Metric, SquaredDistance}; +use acap::coords::{Coordinates, CoordinateMetric, CoordinateProximity}; +use acap::distance::{Metric, Proximity}; +use acap::euclid::{EuclideanDistance, euclidean_distance}; use image::Rgb; @@ -14,7 +15,11 @@ use std::ops::Index; pub type Rgb8 = Rgb; /// A [color space](https://en.wikipedia.org/wiki/Color_space). -pub trait ColorSpace: Copy + From + CartesianMetric { +pub trait ColorSpace: Copy + From + + Coordinates + + Metric + + CoordinateMetric<::Value, Distance = ::Distance> +{ /// Compute the average of the given colors. fn average>(colors: I) -> Self; } @@ -41,32 +46,38 @@ impl From for RgbSpace { } } -impl Metric<[f64]> for RgbSpace { - type Distance = SquaredDistance; +impl Coordinates for RgbSpace { + type Value = f64; - fn distance(&self, other: &[f64]) -> Self::Distance { - self.0.distance(other) + fn dims(&self) -> usize { + self.0.dims() + } + + fn coord(&self, i: usize) -> f64 { + self.0.coord(i) } } -impl Metric for RgbSpace { - type Distance = SquaredDistance; +impl Proximity for RgbSpace { + type Distance = EuclideanDistance; fn distance(&self, other: &Self) -> Self::Distance { - self.0.distance(&other.0) + euclidean_distance(&self.0, &other.0) } } -impl Cartesian for RgbSpace { - fn dimensions(&self) -> usize { - self.0.dimensions() - } +impl Metric for RgbSpace {} + +impl CoordinateProximity for RgbSpace { + type Distance = EuclideanDistance; - fn coordinate(&self, i: usize) -> f64 { - self.0.coordinate(i) + fn distance_to_coords(&self, other: &[f64]) -> Self::Distance { + euclidean_distance(&self.0, other) } } +impl CoordinateMetric for RgbSpace {} + impl ColorSpace for RgbSpace { fn average>(colors: I) -> Self { let mut sum = [0.0, 0.0, 0.0]; @@ -161,32 +172,38 @@ impl From for LabSpace { } } -impl Metric<[f64]> for LabSpace { - type Distance = SquaredDistance; +impl Coordinates for LabSpace { + type Value = f64; + + fn dims(&self) -> usize { + self.0.dims() + } - fn distance(&self, other: &[f64]) -> Self::Distance { - self.0.distance(other) + fn coord(&self, i: usize) -> f64 { + self.0.coord(i) } } -impl Metric for LabSpace { - type Distance = SquaredDistance; +impl Proximity for LabSpace { + type Distance = EuclideanDistance; fn distance(&self, other: &Self) -> Self::Distance { - self.0.distance(&other.0) + euclidean_distance(self.0, other.0) } } -impl Cartesian for LabSpace { - fn dimensions(&self) -> usize { - self.0.dimensions() - } +impl Metric for LabSpace {} - fn coordinate(&self, i: usize) -> f64 { - self.0.coordinate(i) +impl CoordinateProximity for LabSpace { + type Distance = EuclideanDistance; + + fn distance_to_coords(&self, other: &[f64]) -> Self::Distance { + euclidean_distance(&self.0, other) } } +impl CoordinateMetric for LabSpace {} + impl ColorSpace for LabSpace { fn average>(colors: I) -> Self { let mut sum = [0.0, 0.0, 0.0]; @@ -241,32 +258,38 @@ impl From for LuvSpace { } } -impl Metric<[f64]> for LuvSpace { - type Distance = SquaredDistance; +impl Coordinates for LuvSpace { + type Value = f64; + + fn dims(&self) -> usize { + self.0.dims() + } - fn distance(&self, other: &[f64]) -> Self::Distance { - self.0.distance(other) + fn coord(&self, i: usize) -> f64 { + self.0.coord(i) } } -impl Metric for LuvSpace { - type Distance = SquaredDistance; +impl Proximity for LuvSpace { + type Distance = EuclideanDistance; fn distance(&self, other: &Self) -> Self::Distance { - self.0.distance(&other.0) + euclidean_distance(&self.0, &other.0) } } -impl Cartesian for LuvSpace { - fn dimensions(&self) -> usize { - self.0.dimensions() - } +impl Metric for LuvSpace {} + +impl CoordinateProximity for LuvSpace { + type Distance = EuclideanDistance; - fn coordinate(&self, i: usize) -> f64 { - self.0.coordinate(i) + fn distance_to_coords(&self, other: &[f64]) -> Self::Distance { + euclidean_distance(&self.0, other) } } +impl CoordinateMetric for LuvSpace {} + impl ColorSpace for LuvSpace { fn average>(colors: I) -> Self { let mut sum = [0.0, 0.0, 0.0]; diff --git a/src/forest.rs b/src/forest.rs new file mode 100644 index 0000000..56dff7e --- /dev/null +++ b/src/forest.rs @@ -0,0 +1,292 @@ +//! [Dynamization](https://en.wikipedia.org/wiki/Dynamization) for nearest neighbor search. + +use acap::distance::Proximity; +use acap::kd::FlatKdTree; +use acap::vp::FlatVpTree; +use acap::{NearestNeighbors, Neighborhood}; + +use std::iter::{self, Extend, FromIterator}; + +/// The number of bits dedicated to the flat buffer. +const BUFFER_BITS: usize = 6; +/// The maximum size of the buffer. +const BUFFER_SIZE: usize = 1 << BUFFER_BITS; + +/// A dynamic wrapper for a static nearest neighbor search data structure. +/// +/// This type applies [dynamization](https://en.wikipedia.org/wiki/Dynamization) to an arbitrary +/// nearest neighbor search structure `T`, allowing new items to be added dynamically. +#[derive(Debug)] +pub struct Forest { + /// A flat buffer used for the first few items, to avoid repeatedly rebuilding small trees. + buffer: Vec, + /// The trees of the forest, with sizes in geometric progression. + trees: Vec>, +} + +impl Forest +where + U: FromIterator + IntoIterator, +{ + /// Create a new empty forest. + pub fn new() -> Self { + Self { + buffer: Vec::new(), + trees: Vec::new(), + } + } + + /// Add a new item to the forest. + pub fn push(&mut self, item: T) { + self.extend(iter::once(item)); + } + + /// Get the number of items in the forest. + pub fn len(&self) -> usize { + let mut len = self.buffer.len(); + for (i, slot) in self.trees.iter().enumerate() { + if slot.is_some() { + len += 1 << (i + BUFFER_BITS); + } + } + len + } + + /// Check if this forest is empty. + pub fn is_empty(&self) -> bool { + if !self.buffer.is_empty() { + return false; + } + + self.trees.iter().flatten().next().is_none() + } +} + +impl Default for Forest +where + U: FromIterator + IntoIterator, +{ + fn default() -> Self { + Self::new() + } +} + +impl Extend for Forest +where + U: FromIterator + IntoIterator, +{ + fn extend>(&mut self, items: I) { + self.buffer.extend(items); + if self.buffer.len() < BUFFER_SIZE { + return; + } + + let len = self.len(); + + for i in 0.. { + let bit = 1 << (i + BUFFER_BITS); + + if bit > len { + break; + } + + if i >= self.trees.len() { + self.trees.push(None); + } + + if len & bit == 0 { + if let Some(tree) = self.trees[i].take() { + self.buffer.extend(tree); + } + } else if self.trees[i].is_none() { + let offset = self.buffer.len() - bit; + self.trees[i] = Some(self.buffer.drain(offset..).collect()); + } + } + + debug_assert!(self.buffer.len() < BUFFER_SIZE); + debug_assert!(self.len() == len); + } +} + +impl FromIterator for Forest +where + U: FromIterator + IntoIterator, +{ + fn from_iter>(items: I) -> Self { + let mut forest = Self::new(); + forest.extend(items); + forest + } +} + +impl IntoIterator for Forest { + type Item = T::Item; + type IntoIter = std::vec::IntoIter; + + fn into_iter(mut self) -> Self::IntoIter { + self.buffer.extend(self.trees.into_iter().flatten().flatten()); + self.buffer.into_iter() + } +} + +impl NearestNeighbors for Forest +where + K: Proximity, + T: NearestNeighbors, + T: IntoIterator, +{ + fn search<'k, 'v, N>(&'v self, mut neighborhood: N) -> N + where + K: 'k, + V: 'v, + N: Neighborhood<&'k K, &'v V> + { + for item in &self.buffer { + neighborhood.consider(item); + } + + self.trees + .iter() + .flatten() + .fold(neighborhood, |n, t| t.search(n)) + } +} + +/// A forest of k-d trees. +pub type KdForest = Forest>; + +/// A forest of vantage-point trees. +pub type VpForest = Forest>; + +#[cfg(test)] +mod tests { + use super::*; + + use acap::euclid::Euclidean; + use acap::exhaustive::ExhaustiveSearch; + use acap::{NearestNeighbors, Neighbor}; + + use rand::prelude::*; + + type Point = Euclidean<[f32; 3]>; + + fn test_empty(from_iter: &F) + where + T: NearestNeighbors, + F: Fn(Vec) -> T, + { + let points = Vec::new(); + let index = from_iter(points); + let target = Euclidean([0.0, 0.0, 0.0]); + assert_eq!(index.nearest(&target), None); + assert_eq!(index.nearest_within(&target, 1.0), None); + assert!(index.k_nearest(&target, 0).is_empty()); + assert!(index.k_nearest(&target, 3).is_empty()); + assert!(index.k_nearest_within(&target, 0, 1.0).is_empty()); + assert!(index.k_nearest_within(&target, 3, 1.0).is_empty()); + } + + fn test_pythagorean(from_iter: &F) + where + T: NearestNeighbors, + F: Fn(Vec) -> T, + { + let points = vec![ + Euclidean([3.0, 4.0, 0.0]), + Euclidean([5.0, 0.0, 12.0]), + Euclidean([0.0, 8.0, 15.0]), + Euclidean([1.0, 2.0, 2.0]), + Euclidean([2.0, 3.0, 6.0]), + Euclidean([4.0, 4.0, 7.0]), + ]; + let index = from_iter(points); + let target = Euclidean([0.0, 0.0, 0.0]); + + assert_eq!( + index.nearest(&target).expect("No nearest neighbor found"), + Neighbor::new(&Euclidean([1.0, 2.0, 2.0]), 3.0) + ); + + assert_eq!(index.nearest_within(&target, 2.0), None); + assert_eq!( + index.nearest_within(&target, 4.0).expect("No nearest neighbor found within 4.0"), + Neighbor::new(&Euclidean([1.0, 2.0, 2.0]), 3.0) + ); + + assert!(index.k_nearest(&target, 0).is_empty()); + assert_eq!( + index.k_nearest(&target, 3), + vec![ + Neighbor::new(&Euclidean([1.0, 2.0, 2.0]), 3.0), + Neighbor::new(&Euclidean([3.0, 4.0, 0.0]), 5.0), + Neighbor::new(&Euclidean([2.0, 3.0, 6.0]), 7.0), + ] + ); + + assert!(index.k_nearest(&target, 0).is_empty()); + assert_eq!( + index.k_nearest_within(&target, 3, 6.0), + vec![ + Neighbor::new(&Euclidean([1.0, 2.0, 2.0]), 3.0), + Neighbor::new(&Euclidean([3.0, 4.0, 0.0]), 5.0), + ] + ); + assert_eq!( + index.k_nearest_within(&target, 3, 8.0), + vec![ + Neighbor::new(&Euclidean([1.0, 2.0, 2.0]), 3.0), + Neighbor::new(&Euclidean([3.0, 4.0, 0.0]), 5.0), + Neighbor::new(&Euclidean([2.0, 3.0, 6.0]), 7.0), + ] + ); + } + + fn test_random_points(from_iter: &F) + where + T: NearestNeighbors, + F: Fn(Vec) -> T, + { + let mut points = Vec::new(); + for _ in 0..255 { + points.push(Euclidean([random(), random(), random()])); + } + let target = Euclidean([random(), random(), random()]); + + let eindex = ExhaustiveSearch::from_iter(points.clone()); + let index = from_iter(points); + + assert_eq!(index.k_nearest(&target, 3), eindex.k_nearest(&target, 3)); + } + + /// Test a [NearestNeighbors] impl. + fn test_nearest_neighbors(from_iter: F) + where + T: NearestNeighbors, + F: Fn(Vec) -> T, + { + test_empty(&from_iter); + test_pythagorean(&from_iter); + test_random_points(&from_iter); + } + + #[test] + fn test_exhaustive_forest() { + test_nearest_neighbors(Forest::>::from_iter); + } + + #[test] + fn test_forest_forest() { + test_nearest_neighbors(Forest::>>::from_iter); + } + + #[test] + fn test_kd_forest() { + test_nearest_neighbors(KdForest::from_iter); + } + + #[test] + fn test_vp_forest() { + test_nearest_neighbors(VpForest::from_iter); + } +} diff --git a/src/frontier.rs b/src/frontier.rs index c7b7ca5..74c7398 100644 --- a/src/frontier.rs +++ b/src/frontier.rs @@ -5,11 +5,13 @@ pub mod mean; pub mod min; use crate::color::{ColorSpace, Rgb8}; -use crate::metric::kd::Cartesian; -use crate::metric::soft::SoftDelete; -use crate::metric::Metric; +use crate::soft::SoftDelete; + +use acap::coords::{Coordinates, CoordinateMetric, CoordinateProximity}; +use acap::distance::{Proximity, Metric}; use std::cell::Cell; +use std::ops::Deref; use std::rc::Rc; /// A frontier of pixels. @@ -52,23 +54,39 @@ impl Pixel { } } -impl Metric> for C { - type Distance = C::Distance; +/// A reference-counted pixel, to work around the coherence rules. +#[derive(Clone, Debug)] +struct RcPixel(Rc>); - fn distance(&self, other: &Pixel) -> Self::Distance { - self.distance(&other.color) +impl RcPixel { + fn new(x: u32, y: u32, color: C) -> Self { + Self(Rc::new(Pixel::new(x, y, color))) + } +} + +impl Deref for RcPixel { + type Target = Pixel; + + fn deref(&self) -> &Self::Target { + self.0.deref() } } -impl> Metric<[f64]> for Pixel { +/// A search target, to work around the coherence rules. +#[derive(Debug)] +struct Target(C); + +impl Proximity> for Target { type Distance = C::Distance; - fn distance(&self, other: &[f64]) -> Self::Distance { - self.color.distance(other) + fn distance(&self, other: &Pixel) -> Self::Distance { + self.0.distance(&other.color) } } -impl Metric for Pixel { +impl Metric> for Target {} + +impl Proximity for Pixel { type Distance = C::Distance; fn distance(&self, other: &Pixel) -> Self::Distance { @@ -76,13 +94,17 @@ impl Metric for Pixel { } } -impl Cartesian for Pixel { - fn dimensions(&self) -> usize { - self.color.dimensions() +impl Metric for Pixel {} + +impl Coordinates for Pixel { + type Value = C::Value; + + fn dims(&self) -> usize { + self.color.dims() } - fn coordinate(&self, i: usize) -> f64 { - self.color.coordinate(i) + fn coord(&self, i: usize) -> Self::Value { + self.color.coord(i) } } @@ -92,43 +114,63 @@ impl SoftDelete for Pixel { } } -impl> Metric<[f64]> for Rc> { +impl Proximity> for Target { type Distance = C::Distance; - fn distance(&self, other: &[f64]) -> Self::Distance { - (**self).distance(other) + fn distance(&self, other: &RcPixel) -> Self::Distance { + self.0.distance(&other.0.color) + } +} + +impl Metric> for Target {} + +impl Coordinates for Target { + type Value = C::Value; + + fn dims(&self) -> usize { + self.0.dims() + } + + fn coord(&self, i: usize) -> Self::Value { + self.0.coord(i) } } -impl Metric>> for C { +impl> CoordinateProximity for Target { type Distance = C::Distance; - fn distance(&self, other: &Rc>) -> Self::Distance { - self.distance(&other.color) + fn distance_to_coords(&self, coords: &[T]) -> Self::Distance { + self.0.distance_to_coords(coords) } } -impl Metric for Rc> { +impl> CoordinateMetric for Target {} + +impl Proximity for RcPixel { type Distance = C::Distance; fn distance(&self, other: &Self) -> Self::Distance { - (**self).distance(&**other) + (*self.0).distance(&*other.0) } } -impl Cartesian for Rc> { - fn dimensions(&self) -> usize { - (**self).dimensions() +impl Metric for RcPixel {} + +impl Coordinates for RcPixel { + type Value = C::Value; + + fn dims(&self) -> usize { + (*self.0).dims() } - fn coordinate(&self, i: usize) -> f64 { - (**self).coordinate(i) + fn coord(&self, i: usize) -> Self::Value { + (*self.0).coord(i) } } -impl SoftDelete for Rc> { +impl SoftDelete for RcPixel { fn is_deleted(&self) -> bool { - (**self).is_deleted() + (*self.0).is_deleted() } } diff --git a/src/frontier/image.rs b/src/frontier/image.rs index 5642297..18bf620 100644 --- a/src/frontier/image.rs +++ b/src/frontier/image.rs @@ -1,16 +1,17 @@ //! Frontier that targets an image. -use super::{Frontier, Pixel}; +use super::{Frontier, Pixel, Target}; use crate::color::{ColorSpace, Rgb8}; -use crate::metric::soft::SoftKdTree; -use crate::metric::NearestNeighbors; +use crate::soft::SoftKdTree; + +use acap::NearestNeighbors; use image::RgbImage; /// A [Frontier] that places colors on the closest pixel of a target image. #[derive(Debug)] -pub struct ImageFrontier { +pub struct ImageFrontier { nodes: SoftKdTree>, width: u32, height: u32, @@ -58,7 +59,7 @@ impl Frontier for ImageFrontier { fn place(&mut self, rgb8: Rgb8) -> Option<(u32, u32)> { let color = C::from(rgb8); - if let Some(node) = self.nodes.nearest(&color).map(|n| n.item) { + if let Some(node) = self.nodes.nearest(&Target(color)).map(|n| n.item) { let pos = node.pos; node.delete(); diff --git a/src/frontier/mean.rs b/src/frontier/mean.rs index 6a32b97..3c441b8 100644 --- a/src/frontier/mean.rs +++ b/src/frontier/mean.rs @@ -1,19 +1,19 @@ //! Mean selection frontier. -use super::{neighbors, Frontier, Pixel}; +use super::{neighbors, Frontier, RcPixel, Target}; use crate::color::{ColorSpace, Rgb8}; -use crate::metric::soft::SoftKdForest; -use crate::metric::NearestNeighbors; +use crate::soft::SoftKdForest; + +use acap::NearestNeighbors; use std::iter; -use std::rc::Rc; /// A pixel on a mean frontier. #[derive(Debug)] enum MeanPixel { Empty, - Fillable(Rc>), + Fillable(RcPixel), Filled(C), } @@ -27,9 +27,10 @@ impl MeanPixel { } /// A [Frontier] that looks at the average color of each pixel's neighbors. +#[derive(Debug)] pub struct MeanFrontier { pixels: Vec>, - forest: SoftKdForest>>, + forest: SoftKdForest>, width: u32, height: u32, len: usize, @@ -45,7 +46,7 @@ impl MeanFrontier { pixels.push(MeanPixel::Empty); } - let pixel0 = Rc::new(Pixel::new(x0, y0, C::from(Rgb8::from([0, 0, 0])))); + let pixel0 = RcPixel::new(x0, y0, C::from(Rgb8::from([0, 0, 0]))); let i = (x0 + y0 * width) as usize; pixels[i] = MeanPixel::Fillable(pixel0.clone()); @@ -99,7 +100,7 @@ impl MeanFrontier { .map(MeanPixel::filled_color) .flatten(), ); - let pixel = Rc::new(Pixel::new(x, y, color)); + let pixel = RcPixel::new(x, y, color); self.pixels[i] = MeanPixel::Fillable(pixel.clone()); pixels.push(pixel); } @@ -135,7 +136,7 @@ impl Frontier for MeanFrontier { fn place(&mut self, rgb8: Rgb8) -> Option<(u32, u32)> { let color = C::from(rgb8); - let (x, y) = self.forest.nearest(&color).map(|n| n.item.pos)?; + let (x, y) = self.forest.nearest(&Target(color)).map(|n| n.item.pos)?; self.fill(x, y, color); diff --git a/src/frontier/min.rs b/src/frontier/min.rs index 269f3b7..95b3321 100644 --- a/src/frontier/min.rs +++ b/src/frontier/min.rs @@ -1,19 +1,18 @@ //! Minimum selection frontier. -use super::{neighbors, Frontier, Pixel}; +use super::{neighbors, Frontier, RcPixel, Target}; use crate::color::{ColorSpace, Rgb8}; -use crate::metric::soft::SoftKdForest; -use crate::metric::NearestNeighbors; +use crate::soft::SoftKdForest; -use rand::Rng; +use acap::NearestNeighbors; -use std::rc::Rc; +use rand::Rng; /// A pixel on a min frontier. #[derive(Debug)] struct MinPixel { - pixel: Option>>, + pixel: Option>, filled: bool, } @@ -31,7 +30,7 @@ impl MinPixel { pub struct MinFrontier { rng: R, pixels: Vec>, - forest: SoftKdForest>>, + forest: SoftKdForest>, width: u32, height: u32, x0: u32, @@ -94,7 +93,7 @@ impl MinFrontier { return None; } - let rc = Rc::new(Pixel::new(x, y, color)); + let rc = RcPixel::new(x, y, color); pixel.pixel = Some(rc.clone()); pixel.filled = true; @@ -144,7 +143,7 @@ impl Frontier for MinFrontier { let color = C::from(rgb8); let (x, y) = self .forest - .nearest(&color) + .nearest(&Target(color)) .map(|n| n.item.pos) .map(|(x, y)| self.free_neighbor(x, y).unwrap()) .unwrap_or((self.x0, self.y0)); diff --git a/src/main.rs b/src/main.rs index a2f5322..ce54939 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,8 @@ pub mod color; +pub mod forest; pub mod frontier; pub mod hilbert; -pub mod metric; +pub mod soft; use crate::color::source::{AllColors, ColorSource, ImageColors}; use crate::color::{order, ColorSpace, LabSpace, LuvSpace, Rgb8, RgbSpace}; diff --git a/src/metric.rs b/src/metric.rs deleted file mode 100644 index d9938e4..0000000 --- a/src/metric.rs +++ /dev/null @@ -1,546 +0,0 @@ -//! [Metric spaces](https://en.wikipedia.org/wiki/Metric_space). - -pub mod approx; -pub mod forest; -pub mod kd; -pub mod soft; -pub mod vp; - -use std::cmp::Ordering; -use std::collections::BinaryHeap; -use std::iter::FromIterator; - -/// A wrapper that converts a partial ordering into a total one by panicking. -#[derive(Debug, PartialEq, PartialOrd)] -struct Ordered(T); - -impl Ord for Ordered { - fn cmp(&self, other: &Self) -> Ordering { - self.partial_cmp(other).unwrap() - } -} - -impl Eq for Ordered {} - -/// An [order embedding](https://en.wikipedia.org/wiki/Order_embedding) for distances. -/// -/// Implementations of this trait must satisfy, for all non-negative distances `x` and `y`: -/// -/// * `x == Self::from(x).into()` -/// * `x <= y` iff `Self::from(x) <= Self::from(y)` -/// -/// This trait exists to optimize the common case where distances can be compared more efficiently -/// than their exact values can be computed. For example, taking the square root can be avoided -/// when comparing Euclidean distances (see [SquaredDistance]). -pub trait Distance: Copy + From + Into + PartialOrd {} - -impl Distance for f64 {} - -/// A squared distance, to avoid computing square roots unless absolutely necessary. -#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] -pub struct SquaredDistance(f64); - -impl SquaredDistance { - /// Create a SquaredDistance from an already squared value. - pub fn from_squared(value: f64) -> Self { - Self(value) - } -} - -impl From for SquaredDistance { - fn from(value: f64) -> Self { - Self::from_squared(value * value) - } -} - -impl From for f64 { - fn from(value: SquaredDistance) -> Self { - value.0.sqrt() - } -} - -impl Distance for SquaredDistance {} - -/// A [metric space](https://en.wikipedia.org/wiki/Metric_space). -pub trait Metric { - /// The type used to represent distances. - type Distance: Distance; - - /// Computes the distance between this point and another point. This function must satisfy - /// three conditions: - /// - /// * `x.distance(y) == 0` iff `x == y` (identity of indiscernibles) - /// * `x.distance(y) == y.distance(x)` (symmetry) - /// * `x.distance(z) <= x.distance(y) + y.distance(z)` (triangle inequality) - fn distance(&self, other: &T) -> Self::Distance; -} - -/// Blanket [Metric] implementation for references. -impl<'a, 'b, T, U: Metric> Metric<&'a T> for &'b U { - type Distance = U::Distance; - - fn distance(&self, other: &&'a T) -> Self::Distance { - (*self).distance(other) - } -} - -/// The standard [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) metric. -impl Metric for [f64] { - type Distance = SquaredDistance; - - fn distance(&self, other: &Self) -> Self::Distance { - debug_assert!(self.len() == other.len()); - - let mut sum = 0.0; - for i in 0..self.len() { - let diff = self[i] - other[i]; - sum += diff * diff; - } - - Self::Distance::from_squared(sum) - } -} - -/// A nearest neighbor to a target. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct Neighbor { - /// The found item. - pub item: T, - /// The distance from the target. - pub distance: f64, -} - -impl Neighbor { - /// Create a new Neighbor. - pub fn new(item: T, distance: f64) -> Self { - Self { item, distance } - } -} - -/// A candidate nearest neighbor found during a search. -#[derive(Debug)] -struct Candidate { - item: T, - distance: D, -} - -impl Candidate { - fn new(target: U, item: T) -> Self - where - U: Metric, - { - let distance = target.distance(&item); - Self { item, distance } - } - - fn into_neighbor(self) -> Neighbor { - Neighbor::new(self.item, self.distance.into()) - } -} - -impl PartialOrd for Candidate { - fn partial_cmp(&self, other: &Self) -> Option { - self.distance.partial_cmp(&other.distance) - } -} - -impl Ord for Candidate { - fn cmp(&self, other: &Self) -> Ordering { - self.partial_cmp(other).unwrap() - } -} - -impl PartialEq for Candidate { - fn eq(&self, other: &Self) -> bool { - self.distance.eq(&other.distance) - } -} - -impl Eq for Candidate {} - -/// Accumulates nearest neighbor search results. -pub trait Neighborhood> { - /// Returns the target of the nearest neighbor search. - fn target(&self) -> U; - - /// Check whether a distance is within this neighborhood. - fn contains(&self, distance: f64) -> bool { - distance < 0.0 || self.contains_distance(distance.into()) - } - - /// Check whether a distance is within this neighborhood. - fn contains_distance(&self, distance: U::Distance) -> bool; - - /// Consider a new candidate neighbor. - fn consider(&mut self, item: T) -> U::Distance; -} - -/// A [Neighborhood] with at most one result. -#[derive(Debug)] -struct SingletonNeighborhood> { - /// The target of the nearest neighbor search. - target: U, - /// The current threshold distance to the farthest result. - threshold: Option, - /// The current nearest neighbor, if any. - candidate: Option>, -} - -impl SingletonNeighborhood -where - U: Copy + Metric, -{ - /// Create a new single metric result tracker. - /// - /// * `target`: The target fo the nearest neighbor search. - /// * `threshold`: The maximum allowable distance. - fn new(target: U, threshold: Option) -> Self { - Self { - target, - threshold: threshold.map(U::Distance::from), - candidate: None, - } - } - - /// Consider a candidate. - fn push(&mut self, candidate: Candidate) -> U::Distance { - let distance = candidate.distance; - - if self.contains_distance(distance) { - self.threshold = Some(distance); - self.candidate = Some(candidate); - } - - distance - } - - /// Convert this result into an optional neighbor. - fn into_option(self) -> Option> { - self.candidate.map(Candidate::into_neighbor) - } -} - -impl Neighborhood for SingletonNeighborhood -where - U: Copy + Metric, -{ - fn target(&self) -> U { - self.target - } - - fn contains_distance(&self, distance: U::Distance) -> bool { - self.threshold.map(|t| distance <= t).unwrap_or(true) - } - - fn consider(&mut self, item: T) -> U::Distance { - self.push(Candidate::new(self.target, item)) - } -} - -/// A [Neighborhood] of up to `k` results, using a binary heap. -#[derive(Debug)] -struct HeapNeighborhood> { - /// The target of the nearest neighbor search. - target: U, - /// The number of nearest neighbors to find. - k: usize, - /// The current threshold distance to the farthest result. - threshold: Option, - /// A max-heap of the best candidates found so far. - heap: BinaryHeap>, -} - -impl HeapNeighborhood -where - U: Copy + Metric, -{ - /// Create a new metric result tracker. - /// - /// * `target`: The target fo the nearest neighbor search. - /// * `k`: The number of nearest neighbors to find. - /// * `threshold`: The maximum allowable distance. - fn new(target: U, k: usize, threshold: Option) -> Self { - Self { - target, - k, - threshold: threshold.map(U::Distance::from), - heap: BinaryHeap::with_capacity(k), - } - } - - /// Consider a candidate. - fn push(&mut self, candidate: Candidate) -> U::Distance { - let distance = candidate.distance; - - if self.contains_distance(distance) { - let heap = &mut self.heap; - - if heap.len() == self.k { - heap.pop(); - } - - heap.push(candidate); - - if heap.len() == self.k { - self.threshold = self.heap.peek().map(|c| c.distance) - } - } - - distance - } - - /// Convert these results into a vector of neighbors. - fn into_vec(self) -> Vec> { - self.heap - .into_sorted_vec() - .into_iter() - .map(Candidate::into_neighbor) - .collect() - } -} - -impl Neighborhood for HeapNeighborhood -where - U: Copy + Metric, -{ - fn target(&self) -> U { - self.target - } - - fn contains_distance(&self, distance: U::Distance) -> bool { - self.k > 0 && self.threshold.map(|t| distance <= t).unwrap_or(true) - } - - fn consider(&mut self, item: T) -> U::Distance { - self.push(Candidate::new(self.target, item)) - } -} - -/// A [nearest neighbor search](https://en.wikipedia.org/wiki/Nearest_neighbor_search) index. -/// -/// Type parameters: -/// * `T`: The search result type. -/// * `U`: The query type. -pub trait NearestNeighbors = T> { - /// Returns the nearest neighbor to `target` (or `None` if this index is empty). - fn nearest(&self, target: &U) -> Option> { - self.search(SingletonNeighborhood::new(target, None)) - .into_option() - } - - /// Returns the nearest neighbor to `target` within the distance `threshold`, if one exists. - fn nearest_within(&self, target: &U, threshold: f64) -> Option> { - self.search(SingletonNeighborhood::new(target, Some(threshold))) - .into_option() - } - - /// Returns the up to `k` nearest neighbors to `target`. - fn k_nearest(&self, target: &U, k: usize) -> Vec> { - self.search(HeapNeighborhood::new(target, k, None)) - .into_vec() - } - - /// Returns the up to `k` nearest neighbors to `target` within the distance `threshold`. - fn k_nearest_within(&self, target: &U, k: usize, threshold: f64) -> Vec> { - self.search(HeapNeighborhood::new(target, k, Some(threshold))) - .into_vec() - } - - /// Search for nearest neighbors and add them to a neighborhood. - fn search<'a, 'b, N>(&'a self, neighborhood: N) -> N - where - T: 'a, - U: 'b, - N: Neighborhood<&'a T, &'b U>; -} - -/// A [NearestNeighbors] implementation that does exhaustive search. -#[derive(Debug)] -pub struct ExhaustiveSearch(Vec); - -impl ExhaustiveSearch { - /// Create an empty ExhaustiveSearch index. - pub fn new() -> Self { - Self(Vec::new()) - } - - /// Add a new item to the index. - pub fn push(&mut self, item: T) { - self.0.push(item); - } - - /// Get the size of this index. - pub fn len(&self) -> usize { - self.0.len() - } - - /// Check if this index is empty. - pub fn is_empty(&self) -> bool { - self.0.is_empty() - } -} - -impl Default for ExhaustiveSearch { - fn default() -> Self { - Self::new() - } -} - -impl FromIterator for ExhaustiveSearch { - fn from_iter>(items: I) -> Self { - Self(items.into_iter().collect()) - } -} - -impl IntoIterator for ExhaustiveSearch { - type Item = T; - type IntoIter = std::vec::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } -} - -impl Extend for ExhaustiveSearch { - fn extend>(&mut self, iter: I) { - for value in iter { - self.push(value); - } - } -} - -impl> NearestNeighbors for ExhaustiveSearch { - fn search<'a, 'b, N>(&'a self, mut neighborhood: N) -> N - where - T: 'a, - U: 'b, - N: Neighborhood<&'a T, &'b U>, - { - for e in &self.0 { - neighborhood.consider(e); - } - neighborhood - } -} - -#[cfg(test)] -pub mod tests { - use super::*; - - use rand::prelude::*; - - #[derive(Clone, Copy, Debug, PartialEq)] - pub struct Point(pub [f64; 3]); - - impl Metric for Point { - type Distance = SquaredDistance; - - fn distance(&self, other: &Self) -> Self::Distance { - self.0.distance(&other.0) - } - } - - /// Test a [NearestNeighbors] impl. - pub fn test_nearest_neighbors(from_iter: F) - where - T: NearestNeighbors, - F: Fn(Vec) -> T, - { - test_empty(&from_iter); - test_pythagorean(&from_iter); - test_random_points(&from_iter); - } - - fn test_empty(from_iter: &F) - where - T: NearestNeighbors, - F: Fn(Vec) -> T, - { - let points = Vec::new(); - let index = from_iter(points); - let target = Point([0.0, 0.0, 0.0]); - assert_eq!(index.nearest(&target), None); - assert_eq!(index.nearest_within(&target, 1.0), None); - assert!(index.k_nearest(&target, 0).is_empty()); - assert!(index.k_nearest(&target, 3).is_empty()); - assert!(index.k_nearest_within(&target, 0, 1.0).is_empty()); - assert!(index.k_nearest_within(&target, 3, 1.0).is_empty()); - } - - fn test_pythagorean(from_iter: &F) - where - T: NearestNeighbors, - F: Fn(Vec) -> T, - { - let points = vec![ - Point([3.0, 4.0, 0.0]), - Point([5.0, 0.0, 12.0]), - Point([0.0, 8.0, 15.0]), - Point([1.0, 2.0, 2.0]), - Point([2.0, 3.0, 6.0]), - Point([4.0, 4.0, 7.0]), - ]; - let index = from_iter(points); - let target = Point([0.0, 0.0, 0.0]); - - assert_eq!( - index.nearest(&target), - Some(Neighbor::new(&Point([1.0, 2.0, 2.0]), 3.0)) - ); - - assert_eq!(index.nearest_within(&target, 2.0), None); - assert_eq!( - index.nearest_within(&target, 4.0), - Some(Neighbor::new(&Point([1.0, 2.0, 2.0]), 3.0)) - ); - - assert!(index.k_nearest(&target, 0).is_empty()); - assert_eq!( - index.k_nearest(&target, 3), - vec![ - Neighbor::new(&Point([1.0, 2.0, 2.0]), 3.0), - Neighbor::new(&Point([3.0, 4.0, 0.0]), 5.0), - Neighbor::new(&Point([2.0, 3.0, 6.0]), 7.0), - ] - ); - - assert!(index.k_nearest(&target, 0).is_empty()); - assert_eq!( - index.k_nearest_within(&target, 3, 6.0), - vec![ - Neighbor::new(&Point([1.0, 2.0, 2.0]), 3.0), - Neighbor::new(&Point([3.0, 4.0, 0.0]), 5.0), - ] - ); - assert_eq!( - index.k_nearest_within(&target, 3, 8.0), - vec![ - Neighbor::new(&Point([1.0, 2.0, 2.0]), 3.0), - Neighbor::new(&Point([3.0, 4.0, 0.0]), 5.0), - Neighbor::new(&Point([2.0, 3.0, 6.0]), 7.0), - ] - ); - } - - fn test_random_points(from_iter: &F) - where - T: NearestNeighbors, - F: Fn(Vec) -> T, - { - let mut points = Vec::new(); - for _ in 0..255 { - points.push(Point([random(), random(), random()])); - } - let target = Point([random(), random(), random()]); - - let eindex = ExhaustiveSearch::from_iter(points.clone()); - let index = from_iter(points); - - assert_eq!(index.k_nearest(&target, 3), eindex.k_nearest(&target, 3)); - } - - #[test] - fn test_exhaustive_index() { - test_nearest_neighbors(ExhaustiveSearch::from_iter); - } -} diff --git a/src/metric/approx.rs b/src/metric/approx.rs deleted file mode 100644 index c23f9c7..0000000 --- a/src/metric/approx.rs +++ /dev/null @@ -1,131 +0,0 @@ -//! [Approximate nearest neighbor search](https://en.wikipedia.org/wiki/Nearest_neighbor_search#Approximate_nearest_neighbor). - -use super::{Metric, NearestNeighbors, Neighborhood}; - -/// An approximate [Neighborhood], for approximate nearest neighbor searches. -#[derive(Debug)] -struct ApproximateNeighborhood { - inner: N, - ratio: f64, - limit: usize, -} - -impl ApproximateNeighborhood { - fn new(inner: N, ratio: f64, limit: usize) -> Self { - Self { - inner, - ratio, - limit, - } - } -} - -impl Neighborhood for ApproximateNeighborhood -where - U: Metric, - N: Neighborhood, -{ - fn target(&self) -> U { - self.inner.target() - } - - fn contains(&self, distance: f64) -> bool { - if self.limit > 0 { - self.inner.contains(self.ratio * distance) - } else { - false - } - } - - fn contains_distance(&self, distance: U::Distance) -> bool { - self.contains(self.ratio * distance.into()) - } - - fn consider(&mut self, item: T) -> U::Distance { - self.limit = self.limit.saturating_sub(1); - self.inner.consider(item) - } -} - -/// An [approximate nearest neighbor search](https://en.wikipedia.org/wiki/Nearest_neighbor_search#Approximate_nearest_neighbor) -/// index. -/// -/// This wrapper converts an exact nearest neighbor search algorithm into an approximate one by -/// modifying the behavior of [Neighborhood::contains]. The approximation is controlled by two -/// parameters: -/// -/// * `ratio`: The [nearest neighbor distance ratio](https://en.wikipedia.org/wiki/Nearest_neighbor_search#Nearest_neighbor_distance_ratio), -/// which controls how much closer new candidates must be to be considered. For example, a ratio -/// of 2.0 means that a neighbor must be less than half of the current threshold away to be -/// considered. A ratio of 1.0 means an exact search. -/// -/// * `limit`: A limit on the number of candidates to consider. Typical nearest neighbor algorithms -/// find a close match quickly, so setting a limit bounds the worst-case search time while keeping -/// good accuracy. -#[derive(Debug)] -pub struct ApproximateSearch { - inner: T, - ratio: f64, - limit: usize, -} - -impl ApproximateSearch { - /// Create a new ApproximateSearch index. - /// - /// * `inner`: The [NearestNeighbors] implementation to wrap. - /// * `ratio`: The nearest neighbor distance ratio. - /// * `limit`: The maximum number of results to consider. - pub fn new(inner: T, ratio: f64, limit: usize) -> Self { - Self { - inner, - ratio, - limit, - } - } -} - -impl NearestNeighbors for ApproximateSearch -where - U: Metric, - V: NearestNeighbors, -{ - fn search<'a, 'b, N>(&'a self, neighborhood: N) -> N - where - T: 'a, - U: 'b, - N: Neighborhood<&'a T, &'b U>, - { - self.inner - .search(ApproximateNeighborhood::new( - neighborhood, - self.ratio, - self.limit, - )) - .inner - } -} - -#[cfg(test)] -mod tests { - use super::*; - - use crate::metric::kd::KdTree; - use crate::metric::tests::test_nearest_neighbors; - use crate::metric::vp::VpTree; - - use std::iter::FromIterator; - - #[test] - fn test_approx_kd_tree() { - test_nearest_neighbors(|iter| { - ApproximateSearch::new(KdTree::from_iter(iter), 1.0, std::usize::MAX) - }); - } - - #[test] - fn test_approx_vp_tree() { - test_nearest_neighbors(|iter| { - ApproximateSearch::new(VpTree::from_iter(iter), 1.0, std::usize::MAX) - }); - } -} diff --git a/src/metric/forest.rs b/src/metric/forest.rs deleted file mode 100644 index 887ff12..0000000 --- a/src/metric/forest.rs +++ /dev/null @@ -1,187 +0,0 @@ -//! [Dynamization](https://en.wikipedia.org/wiki/Dynamization) for nearest neighbor search. - -use super::kd::KdTree; -use super::vp::VpTree; -use super::{Metric, NearestNeighbors, Neighborhood}; - -use std::iter::{self, Extend, FromIterator}; - -/// The number of bits dedicated to the flat buffer. -const BUFFER_BITS: usize = 6; -/// The maximum size of the buffer. -const BUFFER_SIZE: usize = 1 << BUFFER_BITS; - -/// A dynamic wrapper for a static nearest neighbor search data structure. -/// -/// This type applies [dynamization](https://en.wikipedia.org/wiki/Dynamization) to an arbitrary -/// nearest neighbor search structure `T`, allowing new items to be added dynamically. -#[derive(Debug)] -pub struct Forest { - /// A flat buffer used for the first few items, to avoid repeatedly rebuilding small trees. - buffer: Vec, - /// The trees of the forest, with sizes in geometric progression. - trees: Vec>, -} - -impl Forest -where - U: FromIterator + IntoIterator, -{ - /// Create a new empty forest. - pub fn new() -> Self { - Self { - buffer: Vec::new(), - trees: Vec::new(), - } - } - - /// Add a new item to the forest. - pub fn push(&mut self, item: T) { - self.extend(iter::once(item)); - } - - /// Get the number of items in the forest. - pub fn len(&self) -> usize { - let mut len = self.buffer.len(); - for (i, slot) in self.trees.iter().enumerate() { - if slot.is_some() { - len += 1 << (i + BUFFER_BITS); - } - } - len - } - - /// Check if this forest is empty. - pub fn is_empty(&self) -> bool { - if !self.buffer.is_empty() { - return false; - } - - self.trees.iter().flatten().next().is_none() - } -} - -impl Default for Forest -where - U: FromIterator + IntoIterator, -{ - fn default() -> Self { - Self::new() - } -} - -impl Extend for Forest -where - U: FromIterator + IntoIterator, -{ - fn extend>(&mut self, items: I) { - self.buffer.extend(items); - if self.buffer.len() < BUFFER_SIZE { - return; - } - - let len = self.len(); - - for i in 0.. { - let bit = 1 << (i + BUFFER_BITS); - - if bit > len { - break; - } - - if i >= self.trees.len() { - self.trees.push(None); - } - - if len & bit == 0 { - if let Some(tree) = self.trees[i].take() { - self.buffer.extend(tree); - } - } else if self.trees[i].is_none() { - let offset = self.buffer.len() - bit; - self.trees[i] = Some(self.buffer.drain(offset..).collect()); - } - } - - debug_assert!(self.buffer.len() < BUFFER_SIZE); - debug_assert!(self.len() == len); - } -} - -impl FromIterator for Forest -where - U: FromIterator + IntoIterator, -{ - fn from_iter>(items: I) -> Self { - let mut forest = Self::new(); - forest.extend(items); - forest - } -} - -impl IntoIterator for Forest { - type Item = T::Item; - type IntoIter = std::vec::IntoIter; - - fn into_iter(mut self) -> Self::IntoIter { - self.buffer.extend(self.trees.into_iter().flatten().flatten()); - self.buffer.into_iter() - } -} - -impl NearestNeighbors for Forest -where - U: Metric, - V: NearestNeighbors, - V: IntoIterator, -{ - fn search<'a, 'b, N>(&'a self, mut neighborhood: N) -> N - where - T: 'a, - U: 'b, - N: Neighborhood<&'a T, &'b U>, - { - for item in &self.buffer { - neighborhood.consider(item); - } - - self.trees - .iter() - .flatten() - .fold(neighborhood, |n, t| t.search(n)) - } -} - -/// A forest of k-d trees. -pub type KdForest = Forest>; - -/// A forest of vantage-point trees. -pub type VpForest = Forest>; - -#[cfg(test)] -mod tests { - use super::*; - - use crate::metric::tests::test_nearest_neighbors; - use crate::metric::ExhaustiveSearch; - - #[test] - fn test_exhaustive_forest() { - test_nearest_neighbors(Forest::>::from_iter); - } - - #[test] - fn test_forest_forest() { - test_nearest_neighbors(Forest::>>::from_iter); - } - - #[test] - fn test_kd_forest() { - test_nearest_neighbors(KdForest::from_iter); - } - - #[test] - fn test_vp_forest() { - test_nearest_neighbors(VpForest::from_iter); - } -} diff --git a/src/metric/kd.rs b/src/metric/kd.rs deleted file mode 100644 index 6ea3809..0000000 --- a/src/metric/kd.rs +++ /dev/null @@ -1,224 +0,0 @@ -//! [k-d trees](https://en.wikipedia.org/wiki/K-d_tree). - -use super::{Metric, NearestNeighbors, Neighborhood, Ordered}; - -use std::iter::FromIterator; - -/// A point in Cartesian space. -pub trait Cartesian: Metric<[f64]> { - /// Returns the number of dimensions necessary to describe this point. - fn dimensions(&self) -> usize; - - /// Returns the value of the `i`th coordinate of this point (`i < self.dimensions()`). - fn coordinate(&self, i: usize) -> f64; -} - -/// Blanket [Cartesian] implementation for references. -impl<'a, T: Cartesian> Cartesian for &'a T { - fn dimensions(&self) -> usize { - (*self).dimensions() - } - - fn coordinate(&self, i: usize) -> f64 { - (*self).coordinate(i) - } -} - -/// Blanket [Metric<[f64]>](Metric) implementation for [Cartesian] references. -impl<'a, T: Cartesian> Metric<[f64]> for &'a T { - type Distance = T::Distance; - - fn distance(&self, other: &[f64]) -> Self::Distance { - (*self).distance(other) - } -} - -/// Standard cartesian space. -impl Cartesian for [f64] { - fn dimensions(&self) -> usize { - self.len() - } - - fn coordinate(&self, i: usize) -> f64 { - self[i] - } -} - -/// Marker trait for cartesian metric spaces. -pub trait CartesianMetric: - Cartesian + Metric>::Distance> -{ -} - -/// Blanket [CartesianMetric] implementation for cartesian spaces with compatible metric distance -/// types. -impl CartesianMetric for U -where - T: ?Sized, - U: ?Sized + Cartesian + Metric>::Distance>, -{ -} - -/// A node in a k-d tree. -#[derive(Debug)] -struct KdNode { - /// The value stored in this node. - item: T, - /// The size of the left subtree. - left_len: usize, -} - -impl KdNode { - /// Create a new KdNode. - fn new(item: T) -> Self { - Self { item, left_len: 0 } - } - - /// Build a k-d tree recursively. - fn build(slice: &mut [KdNode], i: usize) { - if slice.is_empty() { - return; - } - - slice.sort_unstable_by_key(|n| Ordered(n.item.coordinate(i))); - - let mid = slice.len() / 2; - slice.swap(0, mid); - - let (node, children) = slice.split_first_mut().unwrap(); - let (left, right) = children.split_at_mut(mid); - node.left_len = left.len(); - - let j = (i + 1) % node.item.dimensions(); - Self::build(left, j); - Self::build(right, j); - } - - /// Recursively search for nearest neighbors. - fn recurse<'a, U, N>( - slice: &'a [KdNode], - i: usize, - closest: &mut [f64], - neighborhood: &mut N, - ) where - T: 'a, - U: CartesianMetric<&'a T>, - N: Neighborhood<&'a T, U>, - { - let (node, children) = slice.split_first().unwrap(); - neighborhood.consider(&node.item); - - let target = neighborhood.target(); - let ti = target.coordinate(i); - let ni = node.item.coordinate(i); - let j = (i + 1) % node.item.dimensions(); - - let (left, right) = children.split_at(node.left_len); - let (near, far) = if ti <= ni { - (left, right) - } else { - (right, left) - }; - - if !near.is_empty() { - Self::recurse(near, j, closest, neighborhood); - } - - if !far.is_empty() { - let saved = closest[i]; - closest[i] = ni; - if neighborhood.contains_distance(target.distance(closest)) { - Self::recurse(far, j, closest, neighborhood); - } - closest[i] = saved; - } - } -} - -/// A [k-d tree](https://en.wikipedia.org/wiki/K-d_tree). -#[derive(Debug)] -pub struct KdTree(Vec>); - -impl FromIterator for KdTree { - /// Create a new k-d tree from a set of points. - fn from_iter>(items: I) -> Self { - let mut nodes: Vec<_> = items.into_iter().map(KdNode::new).collect(); - KdNode::build(nodes.as_mut_slice(), 0); - Self(nodes) - } -} - -impl NearestNeighbors for KdTree -where - T: Cartesian, - U: CartesianMetric, -{ - fn search<'a, 'b, N>(&'a self, mut neighborhood: N) -> N - where - T: 'a, - U: 'b, - N: Neighborhood<&'a T, &'b U>, - { - if !self.0.is_empty() { - let target = neighborhood.target(); - let dims = target.dimensions(); - let mut closest: Vec<_> = (0..dims).map(|i| target.coordinate(i)).collect(); - - KdNode::recurse(&self.0, 0, &mut closest, &mut neighborhood); - } - - neighborhood - } -} - -/// An iterator that the moves values out of a k-d tree. -#[derive(Debug)] -pub struct IntoIter(std::vec::IntoIter>); - -impl Iterator for IntoIter { - type Item = T; - - fn next(&mut self) -> Option { - self.0.next().map(|n| n.item) - } -} - -impl IntoIterator for KdTree { - type Item = T; - type IntoIter = IntoIter; - - fn into_iter(self) -> Self::IntoIter { - IntoIter(self.0.into_iter()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - use crate::metric::tests::{test_nearest_neighbors, Point}; - use crate::metric::SquaredDistance; - - impl Metric<[f64]> for Point { - type Distance = SquaredDistance; - - fn distance(&self, other: &[f64]) -> Self::Distance { - self.0.distance(other) - } - } - - impl Cartesian for Point { - fn dimensions(&self) -> usize { - self.0.dimensions() - } - - fn coordinate(&self, i: usize) -> f64 { - self.0.coordinate(i) - } - } - - #[test] - fn test_kd_tree() { - test_nearest_neighbors(KdTree::from_iter); - } -} diff --git a/src/metric/soft.rs b/src/metric/soft.rs deleted file mode 100644 index d443bfd..0000000 --- a/src/metric/soft.rs +++ /dev/null @@ -1,292 +0,0 @@ -//! [Soft deletion](https://en.wiktionary.org/wiki/soft_deletion) for nearest neighbor search. - -use super::forest::{KdForest, VpForest}; -use super::kd::KdTree; -use super::vp::VpTree; -use super::{Metric, NearestNeighbors, Neighborhood}; - -use std::iter; -use std::iter::FromIterator; -use std::mem; - -/// A trait for objects that can be soft-deleted. -pub trait SoftDelete { - /// Check whether this item is deleted. - fn is_deleted(&self) -> bool; -} - -/// Blanket [SoftDelete] implementation for references. -impl<'a, T: SoftDelete> SoftDelete for &'a T { - fn is_deleted(&self) -> bool { - (*self).is_deleted() - } -} - -/// [Neighborhood] wrapper that ignores soft-deleted items. -#[derive(Debug)] -struct SoftNeighborhood(N); - -impl Neighborhood for SoftNeighborhood -where - T: SoftDelete, - U: Metric, - N: Neighborhood, -{ - fn target(&self) -> U { - self.0.target() - } - - fn contains(&self, distance: f64) -> bool { - self.0.contains(distance) - } - - fn contains_distance(&self, distance: U::Distance) -> bool { - self.0.contains_distance(distance) - } - - fn consider(&mut self, item: T) -> U::Distance { - if item.is_deleted() { - self.target().distance(&item) - } else { - self.0.consider(item) - } - } -} - -/// A [NearestNeighbors] implementation that supports [soft deletes](https://en.wiktionary.org/wiki/soft_deletion). -#[derive(Debug)] -pub struct SoftSearch(T); - -impl SoftSearch -where - T: SoftDelete, - U: FromIterator + IntoIterator, -{ - /// Create a new empty soft index. - pub fn new() -> Self { - Self(iter::empty().collect()) - } - - /// Push a new item into this index. - pub fn push(&mut self, item: T) - where - U: Extend, - { - self.0.extend(iter::once(item)); - } - - /// Rebuild this index, discarding deleted items. - pub fn rebuild(&mut self) { - let items = mem::replace(&mut self.0, iter::empty().collect()); - self.0 = items.into_iter().filter(|e| !e.is_deleted()).collect(); - } -} - -impl Default for SoftSearch -where - T: SoftDelete, - U: FromIterator + IntoIterator, -{ - fn default() -> Self { - Self::new() - } -} - -impl> Extend for SoftSearch { - fn extend>(&mut self, iter: I) { - self.0.extend(iter); - } -} - -impl> FromIterator for SoftSearch { - fn from_iter>(iter: I) -> Self { - Self(U::from_iter(iter)) - } -} - -impl IntoIterator for SoftSearch { - type Item = T::Item; - type IntoIter = T::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } -} - -impl NearestNeighbors for SoftSearch -where - T: SoftDelete, - U: Metric, - V: NearestNeighbors, -{ - fn search<'a, 'b, N>(&'a self, neighborhood: N) -> N - where - T: 'a, - U: 'b, - N: Neighborhood<&'a T, &'b U>, - { - self.0.search(SoftNeighborhood(neighborhood)).0 - } -} - -/// A k-d forest that supports soft deletes. -pub type SoftKdForest = SoftSearch>; - -/// A k-d tree that supports soft deletes. -pub type SoftKdTree = SoftSearch>; - -/// A VP forest that supports soft deletes. -pub type SoftVpForest = SoftSearch>; - -/// A VP tree that supports soft deletes. -pub type SoftVpTree = SoftSearch>; - -#[cfg(test)] -mod tests { - use super::*; - - use crate::metric::kd::Cartesian; - use crate::metric::tests::Point; - use crate::metric::Neighbor; - - #[derive(Debug, PartialEq)] - struct SoftPoint { - point: Point, - deleted: bool, - } - - impl SoftPoint { - fn new(x: f64, y: f64, z: f64) -> Self { - Self { - point: Point([x, y, z]), - deleted: false, - } - } - - fn deleted(x: f64, y: f64, z: f64) -> Self { - Self { - point: Point([x, y, z]), - deleted: true, - } - } - } - - impl SoftDelete for SoftPoint { - fn is_deleted(&self) -> bool { - self.deleted - } - } - - impl Metric for SoftPoint { - type Distance = ::Distance; - - fn distance(&self, other: &Self) -> Self::Distance { - self.point.distance(&other.point) - } - } - - impl Metric<[f64]> for SoftPoint { - type Distance = ::Distance; - - fn distance(&self, other: &[f64]) -> Self::Distance { - self.point.distance(other) - } - } - - impl Cartesian for SoftPoint { - fn dimensions(&self) -> usize { - self.point.dimensions() - } - - fn coordinate(&self, i: usize) -> f64 { - self.point.coordinate(i) - } - } - - impl Metric for Point { - type Distance = ::Distance; - - fn distance(&self, other: &SoftPoint) -> Self::Distance { - self.distance(&other.point) - } - } - - fn test_index(index: &T) - where - T: NearestNeighbors, - { - let target = Point([0.0, 0.0, 0.0]); - - assert_eq!( - index.nearest(&target), - Some(Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0)) - ); - - assert_eq!(index.nearest_within(&target, 2.0), None); - assert_eq!( - index.nearest_within(&target, 4.0), - Some(Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0)) - ); - - assert_eq!( - index.k_nearest(&target, 3), - vec![ - Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0), - Neighbor::new(&SoftPoint::new(3.0, 4.0, 0.0), 5.0), - Neighbor::new(&SoftPoint::new(2.0, 3.0, 6.0), 7.0), - ] - ); - - assert_eq!( - index.k_nearest_within(&target, 3, 6.0), - vec![ - Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0), - Neighbor::new(&SoftPoint::new(3.0, 4.0, 0.0), 5.0), - ] - ); - assert_eq!( - index.k_nearest_within(&target, 3, 8.0), - vec![ - Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0), - Neighbor::new(&SoftPoint::new(3.0, 4.0, 0.0), 5.0), - Neighbor::new(&SoftPoint::new(2.0, 3.0, 6.0), 7.0), - ] - ); - } - - fn test_soft_index(index: &mut SoftSearch) - where - T: Extend, - T: FromIterator, - T: IntoIterator, - T: NearestNeighbors, - { - let points = vec![ - SoftPoint::deleted(0.0, 0.0, 0.0), - SoftPoint::new(3.0, 4.0, 0.0), - SoftPoint::new(5.0, 0.0, 12.0), - SoftPoint::new(0.0, 8.0, 15.0), - SoftPoint::new(1.0, 2.0, 2.0), - SoftPoint::new(2.0, 3.0, 6.0), - SoftPoint::new(4.0, 4.0, 7.0), - ]; - - for point in points { - index.push(point); - } - test_index(index); - - index.rebuild(); - test_index(index); - } - - #[test] - fn test_soft_kd_forest() { - test_soft_index(&mut SoftKdForest::new()); - } - - #[test] - fn test_soft_vp_forest() { - test_soft_index(&mut SoftVpForest::new()); - } -} diff --git a/src/metric/vp.rs b/src/metric/vp.rs deleted file mode 100644 index d6e05df..0000000 --- a/src/metric/vp.rs +++ /dev/null @@ -1,137 +0,0 @@ -//! [Vantage-point trees](https://en.wikipedia.org/wiki/Vantage-point_tree). - -use super::{Metric, NearestNeighbors, Neighborhood, Ordered}; - -use std::iter::FromIterator; - -/// A node in a VP tree. -#[derive(Debug)] -struct VpNode { - /// The vantage point itself. - item: T, - /// The radius of this node. - radius: f64, - /// The size of the subtree inside the radius. - inside_len: usize, -} - -impl VpNode { - /// Create a new VpNode. - fn new(item: T) -> Self { - Self { - item, - radius: 0.0, - inside_len: 0, - } - } - - /// Build a VP tree recursively. - fn build(slice: &mut [VpNode]) { - if let Some((node, children)) = slice.split_first_mut() { - let item = &node.item; - children.sort_by_cached_key(|n| Ordered(item.distance(&n.item))); - - let (inside, outside) = children.split_at_mut(children.len() / 2); - if let Some(last) = inside.last() { - node.radius = item.distance(&last.item).into(); - } - node.inside_len = inside.len(); - - Self::build(inside); - Self::build(outside); - } - } - - /// Recursively search for nearest neighbors. - fn recurse<'a, U, N>(slice: &'a [VpNode], neighborhood: &mut N) - where - T: 'a, - U: Metric<&'a T>, - N: Neighborhood<&'a T, U>, - { - let (node, children) = slice.split_first().unwrap(); - let (inside, outside) = children.split_at(node.inside_len); - - let distance = neighborhood.consider(&node.item).into(); - - if distance <= node.radius { - if !inside.is_empty() && neighborhood.contains(distance - node.radius) { - Self::recurse(inside, neighborhood); - } - if !outside.is_empty() && neighborhood.contains(node.radius - distance) { - Self::recurse(outside, neighborhood); - } - } else { - if !outside.is_empty() && neighborhood.contains(node.radius - distance) { - Self::recurse(outside, neighborhood); - } - if !inside.is_empty() && neighborhood.contains(distance - node.radius) { - Self::recurse(inside, neighborhood); - } - } - } -} - -/// A [vantage-point tree](https://en.wikipedia.org/wiki/Vantage-point_tree). -#[derive(Debug)] -pub struct VpTree(Vec>); - -impl FromIterator for VpTree { - fn from_iter>(items: I) -> Self { - let mut nodes: Vec<_> = items.into_iter().map(VpNode::new).collect(); - VpNode::build(nodes.as_mut_slice()); - Self(nodes) - } -} - -impl NearestNeighbors for VpTree -where - T: Metric, - U: Metric, -{ - fn search<'a, 'b, N>(&'a self, mut neighborhood: N) -> N - where - T: 'a, - U: 'b, - N: Neighborhood<&'a T, &'b U>, - { - if !self.0.is_empty() { - VpNode::recurse(&self.0, &mut neighborhood); - } - - neighborhood - } -} - -/// An iterator that moves values out of a VP tree. -#[derive(Debug)] -pub struct IntoIter(std::vec::IntoIter>); - -impl Iterator for IntoIter { - type Item = T; - - fn next(&mut self) -> Option { - self.0.next().map(|n| n.item) - } -} - -impl IntoIterator for VpTree { - type Item = T; - type IntoIter = IntoIter; - - fn into_iter(self) -> Self::IntoIter { - IntoIter(self.0.into_iter()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - use crate::metric::tests::test_nearest_neighbors; - - #[test] - fn test_vp_tree() { - test_nearest_neighbors(VpTree::from_iter); - } -} diff --git a/src/soft.rs b/src/soft.rs new file mode 100644 index 0000000..3edaa10 --- /dev/null +++ b/src/soft.rs @@ -0,0 +1,289 @@ +//! [Soft deletion](https://en.wiktionary.org/wiki/soft_deletion) for nearest neighbor search. + +use super::forest::{KdForest, VpForest}; + +use acap::distance::Proximity; +use acap::kd::FlatKdTree; +use acap::vp::FlatVpTree; +use acap::{NearestNeighbors, Neighborhood}; + +use std::iter; +use std::iter::FromIterator; +use std::mem; + +/// A trait for objects that can be soft-deleted. +pub trait SoftDelete { + /// Check whether this item is deleted. + fn is_deleted(&self) -> bool; +} + +/// Blanket [SoftDelete] implementation for references. +impl<'a, T: SoftDelete> SoftDelete for &'a T { + fn is_deleted(&self) -> bool { + (*self).is_deleted() + } +} + +/// [Neighborhood] wrapper that ignores soft-deleted items. +#[derive(Debug)] +struct SoftNeighborhood(N); + +impl Neighborhood for SoftNeighborhood +where + V: SoftDelete, + K: Proximity, + N: Neighborhood, +{ + fn target(&self) -> K { + self.0.target() + } + + fn contains(&self, distance: D) -> bool + where + D: PartialOrd + { + self.0.contains(distance) + } + + fn consider(&mut self, item: V) -> K::Distance { + if item.is_deleted() { + self.target().distance(&item) + } else { + self.0.consider(item) + } + } +} + +/// A [NearestNeighbors] implementation that supports [soft deletes](https://en.wiktionary.org/wiki/soft_deletion). +#[derive(Debug)] +pub struct SoftSearch(T); + +impl SoftSearch +where + T: SoftDelete, + U: FromIterator + IntoIterator, +{ + /// Create a new empty soft index. + pub fn new() -> Self { + Self(iter::empty().collect()) + } + + /// Push a new item into this index. + pub fn push(&mut self, item: T) + where + U: Extend, + { + self.0.extend(iter::once(item)); + } + + /// Rebuild this index, discarding deleted items. + pub fn rebuild(&mut self) { + let items = mem::replace(&mut self.0, iter::empty().collect()); + self.0 = items.into_iter().filter(|e| !e.is_deleted()).collect(); + } +} + +impl Default for SoftSearch +where + T: SoftDelete, + U: FromIterator + IntoIterator, +{ + fn default() -> Self { + Self::new() + } +} + +impl> Extend for SoftSearch { + fn extend>(&mut self, iter: I) { + self.0.extend(iter); + } +} + +impl> FromIterator for SoftSearch { + fn from_iter>(iter: I) -> Self { + Self(U::from_iter(iter)) + } +} + +impl IntoIterator for SoftSearch { + type Item = T::Item; + type IntoIter = T::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl NearestNeighbors for SoftSearch +where + K: Proximity, + V: SoftDelete, + T: NearestNeighbors, +{ + fn search<'k, 'v, N>(&'v self, neighborhood: N) -> N + where + K: 'k, + V: 'v, + N: Neighborhood<&'k K, &'v V> + { + self.0.search(SoftNeighborhood(neighborhood)).0 + } +} + +/// A k-d forest that supports soft deletes. +pub type SoftKdForest = SoftSearch>; + +/// A k-d tree that supports soft deletes. +pub type SoftKdTree = SoftSearch>; + +/// A VP forest that supports soft deletes. +pub type SoftVpForest = SoftSearch>; + +/// A VP tree that supports soft deletes. +pub type SoftVpTree = SoftSearch>; + +#[cfg(test)] +mod tests { + use super::*; + + use acap::coords::Coordinates; + use acap::euclid::{euclidean_distance, Euclidean, EuclideanDistance}; + use acap::Neighbor; + + type Point = Euclidean<[f32; 3]>; + + #[derive(Debug, PartialEq)] + struct SoftPoint { + point: [f32; 3], + deleted: bool, + } + + impl SoftPoint { + fn new(x: f32, y: f32, z: f32) -> Self { + Self { + point: [x, y, z], + deleted: false, + } + } + + fn deleted(x: f32, y: f32, z: f32) -> Self { + Self { + point: [x, y, z], + deleted: true, + } + } + } + + impl SoftDelete for SoftPoint { + fn is_deleted(&self) -> bool { + self.deleted + } + } + + impl Proximity for SoftPoint { + type Distance = EuclideanDistance; + + fn distance(&self, other: &Self) -> Self::Distance { + euclidean_distance(&self.point, &other.point) + } + } + + impl Coordinates for SoftPoint { + type Value = ::Value; + + fn dims(&self) -> usize { + self.point.dims() + } + + fn coord(&self, i: usize) -> Self::Value { + self.point.coord(i) + } + } + + impl Proximity for Point { + type Distance = EuclideanDistance; + + fn distance(&self, other: &SoftPoint) -> Self::Distance { + euclidean_distance(&self, &other.point) + } + } + + fn test_index(index: &T) + where + T: NearestNeighbors, + { + let target = Euclidean([0.0, 0.0, 0.0]); + + assert_eq!( + index.nearest(&target).expect("No nearest neighbor found"), + Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0) + ); + + assert_eq!(index.nearest_within(&target, 2.0), None); + assert_eq!( + index.nearest_within(&target, 4.0).expect("No nearest neighbor found within 4.0"), + Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0) + ); + + assert_eq!( + index.k_nearest(&target, 3), + vec![ + Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0), + Neighbor::new(&SoftPoint::new(3.0, 4.0, 0.0), 5.0), + Neighbor::new(&SoftPoint::new(2.0, 3.0, 6.0), 7.0), + ] + ); + + assert_eq!( + index.k_nearest_within(&target, 3, 6.0), + vec![ + Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0), + Neighbor::new(&SoftPoint::new(3.0, 4.0, 0.0), 5.0), + ] + ); + assert_eq!( + index.k_nearest_within(&target, 3, 8.0), + vec![ + Neighbor::new(&SoftPoint::new(1.0, 2.0, 2.0), 3.0), + Neighbor::new(&SoftPoint::new(3.0, 4.0, 0.0), 5.0), + Neighbor::new(&SoftPoint::new(2.0, 3.0, 6.0), 7.0), + ] + ); + } + + fn test_soft_index(index: &mut SoftSearch) + where + T: Extend, + T: FromIterator, + T: IntoIterator, + T: NearestNeighbors, + { + let points = vec![ + SoftPoint::deleted(0.0, 0.0, 0.0), + SoftPoint::new(3.0, 4.0, 0.0), + SoftPoint::new(5.0, 0.0, 12.0), + SoftPoint::new(0.0, 8.0, 15.0), + SoftPoint::new(1.0, 2.0, 2.0), + SoftPoint::new(2.0, 3.0, 6.0), + SoftPoint::new(4.0, 4.0, 7.0), + ]; + + for point in points { + index.push(point); + } + test_index(index); + + index.rebuild(); + test_index(index); + } + + #[test] + fn test_soft_kd_forest() { + test_soft_index(&mut SoftKdForest::new()); + } + + #[test] + fn test_soft_vp_forest() { + test_soft_index(&mut SoftVpForest::new()); + } +} -- cgit v1.2.3