From a6e586d3f1368e4ecbe56b6481e8bca2ec8e7bb9 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Wed, 27 May 2020 13:55:59 -0400 Subject: kd: Implement k-d trees --- benches/benches.rs | 7 ++ src/kd.rs | 357 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 3 files changed, 365 insertions(+) create mode 100644 src/kd.rs diff --git a/benches/benches.rs b/benches/benches.rs index a8676ba..8464179 100644 --- a/benches/benches.rs +++ b/benches/benches.rs @@ -2,6 +2,7 @@ use acap::euclid::Euclidean; use acap::exhaustive::ExhaustiveSearch; +use acap::kd::KdTree; use acap::vp::{FlatVpTree, VpTree}; use acap::NearestNeighbors; @@ -37,6 +38,7 @@ fn bench_from_iter(c: &mut Criterion) { group.bench_function("ExhaustiveSearch", |b| b.iter(|| ExhaustiveSearch::from_iter(points.clone()))); group.bench_function("VpTree", |b| b.iter(|| VpTree::from_iter(points.clone()))); group.bench_function("FlatVpTree", |b| b.iter(|| FlatVpTree::from_iter(points.clone()))); + group.bench_function("KdTree", |b| b.iter(|| KdTree::from_iter(points.clone()))); group.finish(); } @@ -47,29 +49,34 @@ fn bench_nearest_neighbors(c: &mut Criterion) { let exhaustive = ExhaustiveSearch::from_iter(points.clone()); let vp_tree = VpTree::from_iter(points.clone()); let flat_vp_tree = FlatVpTree::from_iter(points.clone()); + let kd_tree = KdTree::from_iter(points.clone()); let mut nearest = c.benchmark_group("NearestNeighbors::nearest"); nearest.bench_function("ExhaustiveSearch", |b| b.iter(|| exhaustive.nearest(&target))); nearest.bench_function("VpTree", |b| b.iter(|| vp_tree.nearest(&target))); nearest.bench_function("FlatVpTree", |b| b.iter(|| flat_vp_tree.nearest(&target))); + nearest.bench_function("KdTree", |b| b.iter(|| kd_tree.nearest(&target))); nearest.finish(); let mut nearest_within = c.benchmark_group("NearestNeighbors::nearest_within"); nearest_within.bench_function("ExhaustiveSearch", |b| b.iter(|| exhaustive.nearest_within(&target, 0.1))); nearest_within.bench_function("VpTree", |b| b.iter(|| vp_tree.nearest_within(&target, 0.1))); nearest_within.bench_function("FlatVpTree", |b| b.iter(|| flat_vp_tree.nearest_within(&target, 0.1))); + nearest_within.bench_function("KdTree", |b| b.iter(|| kd_tree.nearest_within(&target, 0.1))); nearest_within.finish(); let mut k_nearest = c.benchmark_group("NearestNeighbors::k_nearest"); k_nearest.bench_function("ExhaustiveSearch", |b| b.iter(|| exhaustive.k_nearest(&target, 3))); k_nearest.bench_function("VpTree", |b| b.iter(|| vp_tree.k_nearest(&target, 3))); k_nearest.bench_function("FlatVpTree", |b| b.iter(|| flat_vp_tree.k_nearest(&target, 3))); + k_nearest.bench_function("KdTree", |b| b.iter(|| kd_tree.k_nearest(&target, 3))); k_nearest.finish(); let mut k_nearest_within = c.benchmark_group("NearestNeighbors::k_nearest_within"); k_nearest_within.bench_function("ExhaustiveSearch", |b| b.iter(|| exhaustive.k_nearest_within(&target, 3, 0.1))); k_nearest_within.bench_function("VpTree", |b| b.iter(|| vp_tree.k_nearest_within(&target, 3, 0.1))); k_nearest_within.bench_function("FlatVpTree", |b| b.iter(|| flat_vp_tree.k_nearest_within(&target, 3, 0.1))); + k_nearest_within.bench_function("KdTree", |b| b.iter(|| kd_tree.k_nearest_within(&target, 3, 0.1))); k_nearest_within.finish(); } diff --git a/src/kd.rs b/src/kd.rs new file mode 100644 index 0000000..97616e7 --- /dev/null +++ b/src/kd.rs @@ -0,0 +1,357 @@ +//! k-d trees. + +use crate::coords::{Coordinates, CoordinateMetric, CoordinateProximity}; +use crate::distance::{Metric, Proximity}; +use crate::util::Ordered; +use crate::{ExactNeighbors, NearestNeighbors, Neighborhood}; + +use std::iter::FromIterator; +use std::ops::Deref; + +/// A node in a k-d tree. +#[derive(Debug)] +struct KdNode { + /// The vantage point itself. + item: T, + /// The left subtree, if any. + left: Option>, + /// The right subtree, if any. + right: Option>, +} + +impl KdNode { + /// Create a new KdNode. + fn new(item: T) -> Self { + Self { + item, + left: None, + right: None, + } + } + + /// Create a balanced tree. + fn balanced>(items: I) -> Option { + let mut nodes: Vec<_> = items + .into_iter() + .map(Self::new) + .map(Box::new) + .map(Some) + .collect(); + + Self::balanced_recursive(&mut nodes, 0) + .map(|node| *node) + } + + /// Create a balanced subtree. + fn balanced_recursive(nodes: &mut [Option>], level: usize) -> Option> { + if nodes.is_empty() { + return None; + } + + nodes.sort_by_cached_key(|x| Ordered::new(x.as_ref().unwrap().item.coord(level))); + + let (left, right) = nodes.split_at_mut(nodes.len() / 2); + let (node, right) = right.split_first_mut().unwrap(); + let mut node = node.take().unwrap(); + + let next = (level + 1) % node.item.dims(); + node.left = Self::balanced_recursive(left, next); + node.right = Self::balanced_recursive(right, next); + + Some(node) + } + + /// Push a new item into this subtree. + fn push(&mut self, item: T, level: usize) { + let next = (level + 1) % item.dims(); + + if item.coord(level) <= self.item.coord(level) { + if let Some(left) = &mut self.left { + left.push(item, next); + } else { + self.left = Some(Box::new(Self::new(item))); + } + } else { + if let Some(right) = &mut self.right { + right.push(item, next); + } else { + self.right = Some(Box::new(Self::new(item))); + } + } + } +} + +/// Marker trait for [Proximity] implementations that are compatible with k-d trees. +pub trait KdProximity +where + Self: Coordinates, + Self: Proximity, + Self: CoordinateProximity>::Distance>, + V: Coordinates, +{} + +/// Blanket [KdProximity] implementation. +impl KdProximity for K +where + K: Coordinates, + K: Proximity, + K: CoordinateProximity>::Distance>, + V: Coordinates, +{} + +/// Marker trait for [Metric] implementations that are compatible with k-d tree. +pub trait KdMetric +where + Self: KdProximity, + Self: Metric, + Self: CoordinateMetric, + V: Coordinates, +{} + +/// Blanket [KdMetric] implementation. +impl KdMetric for K +where + K: KdProximity, + K: Metric, + K: CoordinateMetric, + V: Coordinates, +{} + +trait KdSearch: Copy +where + K: KdProximity, + V: Coordinates + Copy, + N: Neighborhood, +{ + /// Get this node's item. + fn item(self) -> V; + + /// Get the left subtree. + fn left(self) -> Option; + + /// Get the right subtree. + fn right(self) -> Option; + + /// Recursively search for nearest neighbors. + fn search(self, level: usize, closest: &mut [V::Value], neighborhood: &mut N) { + let item = self.item(); + neighborhood.consider(item); + + let target = neighborhood.target(); + + if target.coord(level) <= item.coord(level) { + self.search_near(self.left(), level, closest, neighborhood); + self.search_far(self.right(), level, closest, neighborhood); + } else { + self.search_near(self.right(), level, closest, neighborhood); + self.search_far(self.left(), level, closest, neighborhood); + } + } + + /// Search the subtree closest to the target. + fn search_near(self, near: Option, level: usize, closest: &mut [V::Value], neighborhood: &mut N) { + if let Some(near) = near { + let next = (level + 1) % self.item().dims(); + near.search(next, closest, neighborhood); + } + } + + /// Search the subtree farthest from the target. + fn search_far(self, far: Option, level: usize, closest: &mut [V::Value], neighborhood: &mut N) { + if let Some(far) = far { + // Update the closest possible point + let item = self.item(); + let target = neighborhood.target(); + let saved = std::mem::replace(&mut closest[level], item.coord(level)); + if neighborhood.contains(target.distance_to_coords(closest)) { + let next = (level + 1) % item.dims(); + far.search(next, closest, neighborhood); + } + closest[level] = saved; + } + } +} + +impl<'a, K, V, N> KdSearch for &'a KdNode +where + K: KdProximity<&'a V>, + V: Coordinates, + N: Neighborhood, +{ + fn item(self) -> &'a V { + &self.item + } + + fn left(self) -> Option { + self.left.as_ref().map(Box::deref) + } + + fn right(self) -> Option { + self.right.as_ref().map(Box::deref) + } +} + +/// A [k-d tree](https://en.wikipedia.org/wiki/K-d_tree). +#[derive(Debug)] +pub struct KdTree { + root: Option>, +} + +impl KdTree { + /// Create an empty tree. + pub fn new() -> Self { + Self { + root: None, + } + } + + /// Create a balanced tree out of a sequence of items. + pub fn balanced>(items: I) -> Self { + Self { + root: KdNode::balanced(items), + } + } + + /// Rebalance this k-d tree. + pub fn balance(&mut self) { + let mut nodes = Vec::new(); + if let Some(root) = self.root.take() { + nodes.push(Some(Box::new(root))); + } + + let mut i = 0; + while i < nodes.len() { + let node = nodes[i].as_mut().unwrap(); + let inside = node.left.take(); + let outside = node.right.take(); + if inside.is_some() { + nodes.push(inside); + } + if outside.is_some() { + nodes.push(outside); + } + + i += 1; + } + + self.root = KdNode::balanced_recursive(&mut nodes, 0) + .map(|node| *node); + } + + /// Push a new item into the tree. + /// + /// Inserting elements individually tends to unbalance the tree. Use [KdTree::balanced] if + /// possible to create a balanced tree from a batch of items. + pub fn push(&mut self, item: T) { + if let Some(root) = &mut self.root { + root.push(item, 0); + } else { + self.root = Some(KdNode::new(item)); + } + } +} + +impl Extend for KdTree { + fn extend>(&mut self, items: I) { + if self.root.is_some() { + for item in items { + self.push(item); + } + } else { + self.root = KdNode::balanced(items); + } + } +} + +impl FromIterator for KdTree { + fn from_iter>(items: I) -> Self { + Self::balanced(items) + } +} + +/// An iterator that moves values out of a k-d tree. +#[derive(Debug)] +pub struct IntoIter { + stack: Vec>, +} + +impl IntoIter { + fn new(node: Option>) -> Self { + Self { + stack: node.into_iter().collect(), + } + } +} + +impl Iterator for IntoIter { + type Item = T; + + fn next(&mut self) -> Option { + self.stack.pop().map(|node| { + if let Some(left) = node.left { + self.stack.push(*left); + } + if let Some(right) = node.right { + self.stack.push(*right); + } + node.item + }) + } +} + +impl IntoIterator for KdTree { + type Item = T; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.root) + } +} + +impl NearestNeighbors for KdTree +where + K: KdProximity, + V: Coordinates, +{ + fn search<'k, 'v, N>(&'v self, mut neighborhood: N) -> N + where + K: 'k, + V: 'v, + N: Neighborhood<&'k K, &'v V>, + { + if let Some(root) = &self.root { + let mut closest = neighborhood.target().as_vec(); + root.search(0, &mut closest, &mut neighborhood); + } + neighborhood + } +} + +impl ExactNeighbors for KdTree +where + K: KdMetric, + V: Coordinates, +{} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::tests::test_nearest_neighbors; + + #[test] + fn test_kd_tree() { + test_nearest_neighbors(KdTree::from_iter); + } + + #[test] + fn test_unbalanced_kd_tree() { + test_nearest_neighbors(|points| { + let mut tree = KdTree::new(); + for point in points { + tree.push(point); + } + tree + }); + } +} diff --git a/src/lib.rs b/src/lib.rs index e7312bf..8f7487b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,7 @@ pub mod coords; pub mod distance; pub mod euclid; pub mod exhaustive; +pub mod kd; pub mod vp; mod util; -- cgit v1.2.3