From 5f85a59d4be37d350bcf1ee62c25ac1f84d71770 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Mon, 6 Jul 2020 22:24:02 -0400 Subject: kd: Use a more traditional k-d tree implementation The slight extra pruning possible in the previous implementation didn't seem to be worth it. The new, simpler implementation is also about 30% faster in most of the benchmarks. This gets rid of Coordinate{Proximity,Metric} as they're not necessary any more (and the old ExactNeighbors impl was too restrictive anyway). --- src/chebyshev.rs | 14 ++++----- src/coords.rs | 25 +-------------- src/euclid.rs | 20 ++++++------ src/exhaustive.rs | 4 +-- src/kd.rs | 92 +++++++++++++++++++++---------------------------------- src/lib.rs | 6 ++-- src/lp.rs | 19 ++++++++++-- src/taxi.rs | 14 ++++----- src/vp.rs | 8 ++--- 9 files changed, 84 insertions(+), 118 deletions(-) diff --git a/src/chebyshev.rs b/src/chebyshev.rs index f6eba8a..a01b24f 100644 --- a/src/chebyshev.rs +++ b/src/chebyshev.rs @@ -1,7 +1,8 @@ //! [Chebyshev distance](https://en.wikipedia.org/wiki/Chebyshev_distance). -use crate::coords::{CoordinateMetric, CoordinateProximity, Coordinates}; +use crate::coords::Coordinates; use crate::distance::{Metric, Proximity}; +use crate::lp::Minkowski; use num_traits::{zero, Signed}; @@ -104,15 +105,12 @@ impl Metric for Chebyshev {} impl Metric> for T {} -impl CoordinateProximity for Chebyshev { - type Distance = T::Value; +/// Chebyshev distance is a [Minkowski] distance. +impl Minkowski for Chebyshev {} - fn distance_to_coords(&self, coords: &[T::Value]) -> Self::Distance { - chebyshev_distance(self, coords) - } -} +impl Minkowski for Chebyshev {} -impl CoordinateMetric for Chebyshev {} +impl Minkowski> for T {} #[cfg(test)] mod tests { diff --git a/src/coords.rs b/src/coords.rs index 2e292ae..7c83946 100644 --- a/src/coords.rs +++ b/src/coords.rs @@ -1,6 +1,6 @@ //! [Coordinate spaces](https://en.wikipedia.org/wiki/Cartesian_coordinate_system). -use crate::distance::{Distance, Value}; +use crate::distance::Value; /// A coordinate space. pub trait Coordinates { @@ -88,26 +88,3 @@ impl Coordinates for &T { (*self).coord(i) } } - -/// Types that support computing distances to raw slices of coordinates. -pub trait CoordinateProximity { - type Distance: Distance; - - /// Compute the distance to a point specified by its coordinates. - fn distance_to_coords(&self, coords: &[T]) -> Self::Distance; -} - -/// Blanket [`CoordinateProximity`] implementation for references. -impl, U> CoordinateProximity for &T { - type Distance = T::Distance; - - fn distance_to_coords(&self, coords: &[U]) -> Self::Distance { - (*self).distance_to_coords(coords) - } -} - -/// Marker trait for coordinate proximities that are [metrics][crate::distance::Metric]. -pub trait CoordinateMetric: CoordinateProximity {} - -/// Blanket [`CoordinateMetric`] implementation for references. -impl, U> CoordinateMetric for &T {} diff --git a/src/euclid.rs b/src/euclid.rs index 3833146..3ec0af9 100644 --- a/src/euclid.rs +++ b/src/euclid.rs @@ -1,7 +1,8 @@ //! [Euclidean space](https://en.wikipedia.org/wiki/Euclidean_space). -use crate::coords::{CoordinateMetric, CoordinateProximity, Coordinates}; +use crate::coords::Coordinates; use crate::distance::{Distance, Metric, Proximity, Value}; +use crate::lp::Minkowski; use num_traits::zero; @@ -128,19 +129,20 @@ where EuclideanDistance: Distance, {} -impl CoordinateProximity for Euclidean +/// Euclidean distance is a [Minkowski] distance. +impl Minkowski for Euclidean where T: Coordinates, EuclideanDistance: Distance, -{ - type Distance = EuclideanDistance; +{} - fn distance_to_coords(&self, coords: &[T::Value]) -> Self::Distance { - euclidean_distance(self, coords) - } -} +impl Minkowski for Euclidean +where + T: Coordinates, + EuclideanDistance: Distance, +{} -impl CoordinateMetric for Euclidean +impl Minkowski> for T where T: Coordinates, EuclideanDistance: Distance, diff --git a/src/exhaustive.rs b/src/exhaustive.rs index 221641c..37af4c6 100644 --- a/src/exhaustive.rs +++ b/src/exhaustive.rs @@ -80,10 +80,10 @@ impl, V> ExactNeighbors for ExhaustiveSearch {} pub mod tests { use super::*; - use crate::tests::test_nearest_neighbors; + use crate::tests::test_exact_neighbors; #[test] fn test_exhaustive_index() { - test_nearest_neighbors(ExhaustiveSearch::from_iter); + test_exact_neighbors(ExhaustiveSearch::from_iter); } } diff --git a/src/kd.rs b/src/kd.rs index 291028e..dae73ec 100644 --- a/src/kd.rs +++ b/src/kd.rs @@ -1,10 +1,13 @@ //! [k-d trees](https://en.wikipedia.org/wiki/K-d_tree). -use crate::coords::{CoordinateMetric, CoordinateProximity, Coordinates}; -use crate::distance::{Metric, Proximity}; +use crate::coords::Coordinates; +use crate::distance::Proximity; +use crate::lp::Minkowski; use crate::util::Ordered; use crate::{ExactNeighbors, NearestNeighbors, Neighborhood}; +use num_traits::Signed; + use std::iter::FromIterator; use std::ops::Deref; @@ -86,7 +89,7 @@ pub trait KdProximity where Self: Coordinates, Self: Proximity, - Self: CoordinateProximity>::Distance>, + Self::Value: PartialOrd, V: Coordinates, {} @@ -95,31 +98,14 @@ impl KdProximity for K where K: Coordinates, K: Proximity, - K: CoordinateProximity>::Distance>, - V: Coordinates, -{} - -/// Marker trait for [`Metric`] implementations that are compatible with k-d tree. -pub trait KdMetric -where - Self: KdProximity, - Self: Metric, - Self: CoordinateMetric, - V: Coordinates, -{} - -/// Blanket [`KdMetric`] implementation. -impl KdMetric for K -where - K: KdProximity, - K: Metric, - K: CoordinateMetric, + K::Value: PartialOrd, V: Coordinates, {} trait KdSearch: Copy where K: KdProximity, + K::Value: PartialOrd, V: Coordinates + Copy, N: Neighborhood, { @@ -133,41 +119,29 @@ where fn right(self) -> Option; /// Recursively search for nearest neighbors. - fn search(self, level: usize, closest: &mut [V::Value], neighborhood: &mut N) { + fn search(self, level: usize, neighborhood: &mut N) { let item = self.item(); neighborhood.consider(item); let target = neighborhood.target(); - if target.coord(level) <= item.coord(level) { - self.search_near(self.left(), level, closest, neighborhood); - self.search_far(self.right(), level, closest, neighborhood); + let bound = target.coord(level) - item.coord(level); + let (near, far) = if bound.is_negative() { + (self.left(), self.right()) } else { - self.search_near(self.right(), level, closest, neighborhood); - self.search_far(self.left(), level, closest, neighborhood); - } - } + (self.right(), self.left()) + }; + + let next = (level + 1) % self.item().dims(); - /// Search the subtree closest to the target. - fn search_near(self, near: Option, level: usize, closest: &mut [V::Value], neighborhood: &mut N) { if let Some(near) = near { - let next = (level + 1) % self.item().dims(); - near.search(next, closest, neighborhood); + near.search(next, neighborhood); } - } - /// Search the subtree farthest from the target. - fn search_far(self, far: Option, level: usize, closest: &mut [V::Value], neighborhood: &mut N) { if let Some(far) = far { - // Update the closest possible point - let item = self.item(); - let target = neighborhood.target(); - let saved = std::mem::replace(&mut closest[level], item.coord(level)); - if neighborhood.contains(target.distance_to_coords(closest)) { - let next = (level + 1) % item.dims(); - far.search(next, closest, neighborhood); + if neighborhood.contains(bound.abs()) { + far.search(next, neighborhood); } - closest[level] = saved; } } } @@ -175,6 +149,7 @@ where impl<'a, K, V, N> KdSearch for &'a KdNode where K: KdProximity<&'a V>, + K::Value: PartialOrd, V: Coordinates, N: Neighborhood, { @@ -315,6 +290,7 @@ impl IntoIterator for KdTree { impl NearestNeighbors for KdTree where K: KdProximity, + K::Value: PartialOrd, V: Coordinates, { fn search<'k, 'v, N>(&'v self, mut neighborhood: N) -> N @@ -324,16 +300,17 @@ where N: Neighborhood<&'k K, &'v V>, { if let Some(root) = &self.root { - let mut closest = neighborhood.target().as_vec(); - root.search(0, &mut closest, &mut neighborhood); + root.search(0, &mut neighborhood); } neighborhood } } +/// k-d trees are exact for [Minkowski] distances. impl ExactNeighbors for KdTree where - K: KdMetric, + K: KdProximity + Minkowski, + K::Value: PartialOrd, V: Coordinates, {} @@ -389,6 +366,7 @@ impl FlatKdNode { impl<'a, K, V, N> KdSearch for &'a [FlatKdNode] where K: KdProximity<&'a V>, + K::Value: PartialOrd, V: Coordinates, N: Neighborhood, { @@ -465,6 +443,7 @@ impl IntoIterator for FlatKdTree { impl NearestNeighbors for FlatKdTree where K: KdProximity, + K::Value: PartialOrd, V: Coordinates, { fn search<'k, 'v, N>(&'v self, mut neighborhood: N) -> N @@ -474,18 +453,17 @@ where N: Neighborhood<&'k K, &'v V>, { if !self.nodes.is_empty() { - let mut closest = neighborhood.target().as_vec(); - self.nodes - .as_slice() - .search(0, &mut closest, &mut neighborhood); + self.nodes.as_slice().search(0, &mut neighborhood); } neighborhood } } +/// k-d trees are exact for [Minkowski] distances. impl ExactNeighbors for FlatKdTree where - K: KdMetric, + K: KdProximity + Minkowski, + K::Value: PartialOrd, V: Coordinates, {} @@ -493,16 +471,16 @@ where mod tests { use super::*; - use crate::tests::test_nearest_neighbors; + use crate::tests::test_exact_neighbors; #[test] fn test_kd_tree() { - test_nearest_neighbors(KdTree::from_iter); + test_exact_neighbors(KdTree::from_iter); } #[test] fn test_unbalanced_kd_tree() { - test_nearest_neighbors(|points| { + test_exact_neighbors(|points| { let mut tree = KdTree::new(); for point in points { tree.push(point); @@ -513,6 +491,6 @@ mod tests { #[test] fn test_flat_kd_tree() { - test_nearest_neighbors(FlatKdTree::from_iter); + test_exact_neighbors(FlatKdTree::from_iter); } } diff --git a/src/lib.rs b/src/lib.rs index d6e5579..57f3dac 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -464,10 +464,10 @@ pub mod tests { type Point = Euclidean<[f32; 3]>; - /// Test a [NearestNeighbors] implementation. - pub fn test_nearest_neighbors(from_iter: F) + /// Test an [ExactNeighbors] implementation. + pub fn test_exact_neighbors(from_iter: F) where - T: NearestNeighbors, + T: ExactNeighbors, F: Fn(Vec) -> T, { test_empty(&from_iter); diff --git a/src/lp.rs b/src/lp.rs index 4afd209..db9e65c 100644 --- a/src/lp.rs +++ b/src/lp.rs @@ -1,6 +1,10 @@ -//! [`$L^p$` spaces](https://en.wikipedia.org/wiki/Lp_space). +//! [`$\ell^p$`]/[Minkowski] distance. +//! +//! [`$\ell^p$`]: https://en.wikipedia.org/wiki/Lp_space +//! [Minkowski]: https://en.wikipedia.org/wiki/Minkowski_distance use crate::coords::Coordinates; +use crate::distance::Proximity; use num_traits::real::Real; use num_traits::zero; @@ -25,7 +29,7 @@ pub use crate::chebyshev::Chebyshev as Linf; /// Compute the L distance between two points. pub use crate::chebyshev::chebyshev_distance as linf_distance; -/// Compute the [`$L^p$` distance] between two points. +/// Compute the [`$\ell^p$`]/[Minkowski] distance between two points. /// /// ```math /// \begin{aligned} @@ -34,7 +38,8 @@ pub use crate::chebyshev::chebyshev_distance as linf_distance; /// \end{aligned} /// ``` /// -/// [`$L^p$` distance]: https://en.wikipedia.org/wiki/Lp_space +/// [`$\ell^p$`]: https://en.wikipedia.org/wiki/Lp_space +/// [Minkowski]: https://en.wikipedia.org/wiki/Minkowski_distance pub fn lp_distance(p: T::Value, x: T, y: U) -> T::Value where T: Coordinates, @@ -51,6 +56,14 @@ where sum.powf(p.recip()) } +/// Marker trait for [Minkowski distances]. +/// +/// [Minkowski distances]: https://en.wikipedia.org/wiki/Minkowski_distance +pub trait Minkowski: Proximity {} + +/// Blanket [`Minkowski`] implementation for references. +impl<'k, 'v, K: Minkowski, V> Minkowski<&'v V> for &'k K {} + #[cfg(test)] mod tests { use super::*; diff --git a/src/taxi.rs b/src/taxi.rs index 7c33ecb..e189a36 100644 --- a/src/taxi.rs +++ b/src/taxi.rs @@ -1,7 +1,8 @@ //! [Taxicab (Manhattan) distance](https://en.wikipedia.org/wiki/Taxicab_geometry). -use crate::coords::{CoordinateMetric, CoordinateProximity, Coordinates}; +use crate::coords::Coordinates; use crate::distance::{Metric, Proximity}; +use crate::lp::Minkowski; use num_traits::{zero, Signed}; @@ -100,15 +101,12 @@ impl Metric for Taxicab {} impl Metric> for T {} -impl CoordinateProximity for Taxicab { - type Distance = T::Value; +/// Taxicab distance is a [Minkowski] distance. +impl Minkowski for Taxicab {} - fn distance_to_coords(&self, coords: &[T::Value]) -> Self::Distance { - taxicab_distance(self, coords) - } -} +impl Minkowski for Taxicab {} -impl CoordinateMetric for Taxicab {} +impl Minkowski> for T {} #[cfg(test)] mod tests { diff --git a/src/vp.rs b/src/vp.rs index 85d3972..c44b323 100644 --- a/src/vp.rs +++ b/src/vp.rs @@ -532,16 +532,16 @@ where mod tests { use super::*; - use crate::tests::test_nearest_neighbors; + use crate::tests::test_exact_neighbors; #[test] fn test_vp_tree() { - test_nearest_neighbors(VpTree::from_iter); + test_exact_neighbors(VpTree::from_iter); } #[test] fn test_unbalanced_vp_tree() { - test_nearest_neighbors(|points| { + test_exact_neighbors(|points| { let mut tree = VpTree::new(); for point in points { tree.push(point); @@ -552,6 +552,6 @@ mod tests { #[test] fn test_flat_vp_tree() { - test_nearest_neighbors(FlatVpTree::from_iter); + test_exact_neighbors(FlatVpTree::from_iter); } } -- cgit v1.2.3