From 660a6dc83a761c76b1b3a3c6d71c18800e0b47a3 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Sun, 29 Jun 2014 15:57:50 -0400 Subject: prtree: Improve cache locality during tree building. This gives about a 25% speedup by storing the node color in a smaller wrapper struct instead of inside the node itself. --- libdimension/bvh.c | 5 +-- libdimension/bvh.h | 18 ++++---- libdimension/prtree.c | 117 +++++++++++++++++++++++++++++--------------------- 3 files changed, 79 insertions(+), 61 deletions(-) (limited to 'libdimension') diff --git a/libdimension/bvh.c b/libdimension/bvh.c index 462cbee..d1bd0fc 100644 --- a/libdimension/bvh.c +++ b/libdimension/bvh.c @@ -358,10 +358,9 @@ dmnsn_bvh_bounding_box(const dmnsn_bvh *bvh) } dmnsn_bvh_node * -dmnsn_new_bvh_node(size_t max_children) +dmnsn_new_bvh_node(unsigned int max_children) { - dmnsn_bvh_node *node = dmnsn_malloc(sizeof(dmnsn_bvh_node) - + max_children*sizeof(dmnsn_bvh_node *)); + dmnsn_bvh_node *node = dmnsn_malloc(sizeof(dmnsn_bvh_node) + max_children*sizeof(dmnsn_bvh_node *)); node->bounding_box = dmnsn_zero_bounding_box(); node->object = NULL; node->nchildren = 0; diff --git a/libdimension/bvh.h b/libdimension/bvh.h index 27652c1..f965632 100644 --- a/libdimension/bvh.h +++ b/libdimension/bvh.h @@ -1,5 +1,5 @@ /************************************************************************* - * Copyright (C) 2012 Tavian Barnes * + * Copyright (C) 2012-2014 Tavian Barnes * * * * This file is part of The Dimension Library. * * * @@ -54,16 +54,15 @@ DMNSN_INTERNAL dmnsn_bounding_box dmnsn_bvh_bounding_box(const dmnsn_bvh *bvh); /// A non-flat BVH representation, used by BVH implementations. typedef struct dmnsn_bvh_node { - dmnsn_bounding_box bounding_box; /// The bounding box of this node. - dmnsn_object *object; /// The object, for leaf nodes. - int data; /// Extra field for implementation use. - size_t nchildren; /// How many children this node has. - size_t max_children; /// Maximum number of children. - struct dmnsn_bvh_node *children[]; /// Flexible array of children. + dmnsn_bounding_box bounding_box; ///< The bounding box of this node. + dmnsn_object *object; ///< The object, for leaf nodes. + unsigned int nchildren; ///< How many children this node has. + unsigned int max_children; ///< Maximum number of children. + struct dmnsn_bvh_node *children[]; ///< Flexible array of children. } dmnsn_bvh_node; /// Create a BVH node. -DMNSN_INTERNAL dmnsn_bvh_node *dmnsn_new_bvh_node(size_t max_children); +DMNSN_INTERNAL dmnsn_bvh_node *dmnsn_new_bvh_node(unsigned int max_children); /// Create a BVH leaf node. DMNSN_INTERNAL dmnsn_bvh_node *dmnsn_new_bvh_leaf_node(dmnsn_object *object); @@ -72,5 +71,4 @@ DMNSN_INTERNAL dmnsn_bvh_node *dmnsn_new_bvh_leaf_node(dmnsn_object *object); DMNSN_INTERNAL void dmnsn_delete_bvh_node(dmnsn_bvh_node *node); /// Add a child to a BVH node. -DMNSN_INTERNAL void dmnsn_bvh_node_add(dmnsn_bvh_node *parent, - dmnsn_bvh_node *child); +DMNSN_INTERNAL void dmnsn_bvh_node_add(dmnsn_bvh_node *parent, dmnsn_bvh_node *child); diff --git a/libdimension/prtree.c b/libdimension/prtree.c index c525988..bb7beec 100644 --- a/libdimension/prtree.c +++ b/libdimension/prtree.c @@ -32,19 +32,29 @@ #define DMNSN_PSEUDO_B 6 /// The side of the split that a node ended up on. -typedef enum dmnsn_prnode_location { +typedef enum dmnsn_prnode_color { DMNSN_PRTREE_LEAF, ///< Priority leaf. DMNSN_PRTREE_LEFT, ///< Left child. DMNSN_PRTREE_RIGHT ///< Right child. -} dmnsn_prnode_location; +} dmnsn_prnode_color; + +/** + * A BVH node with associated color. Compared to storing the color in the + * \p dmnsn_bvh_node itself, this method has decreased cache performance during + * sorting (due to an extra pointer chase), but increased performance during + * tree building (because it's much smaller than a full \p dmnsn_bvh_node). + * Overall it gives about a 25% improvement. + */ +typedef struct dmnsn_colored_prnode { + dmnsn_prnode_color color; + dmnsn_bvh_node *node; +} dmnsn_colored_prnode; /// Construct an empty PR-node. static inline dmnsn_bvh_node * dmnsn_new_prnode(void) { - dmnsn_bvh_node *node = dmnsn_new_bvh_node(DMNSN_PRTREE_B); - node->data = DMNSN_PRTREE_LEFT; // Mustn't be _LEAF - return node; + return dmnsn_new_bvh_node(DMNSN_PRTREE_B); } /// Comparator types. @@ -62,48 +72,48 @@ enum { static int dmnsn_xmin_comp(const void *l, const void *r) { - double lval = (*(const dmnsn_bvh_node **)l)->bounding_box.min.x; - double rval = (*(const dmnsn_bvh_node **)r)->bounding_box.min.x; + double lval = (*(const dmnsn_colored_prnode **)l)->node->bounding_box.min.x; + double rval = (*(const dmnsn_colored_prnode **)r)->node->bounding_box.min.x; return (lval > rval) - (lval < rval); } static int dmnsn_ymin_comp(const void *l, const void *r) { - double lval = (*(const dmnsn_bvh_node **)l)->bounding_box.min.y; - double rval = (*(const dmnsn_bvh_node **)r)->bounding_box.min.y; + double lval = (*(const dmnsn_colored_prnode **)l)->node->bounding_box.min.y; + double rval = (*(const dmnsn_colored_prnode **)r)->node->bounding_box.min.y; return (lval > rval) - (lval < rval); } static int dmnsn_zmin_comp(const void *l, const void *r) { - double lval = (*(const dmnsn_bvh_node **)l)->bounding_box.min.z; - double rval = (*(const dmnsn_bvh_node **)r)->bounding_box.min.z; + double lval = (*(const dmnsn_colored_prnode **)l)->node->bounding_box.min.z; + double rval = (*(const dmnsn_colored_prnode **)r)->node->bounding_box.min.z; return (lval > rval) - (lval < rval); } static int dmnsn_xmax_comp(const void *l, const void *r) { - double lval = (*(const dmnsn_bvh_node **)l)->bounding_box.max.x; - double rval = (*(const dmnsn_bvh_node **)r)->bounding_box.max.x; + double lval = (*(const dmnsn_colored_prnode **)l)->node->bounding_box.max.x; + double rval = (*(const dmnsn_colored_prnode **)r)->node->bounding_box.max.x; return (lval < rval) - (lval > rval); } static int dmnsn_ymax_comp(const void *l, const void *r) { - double lval = (*(const dmnsn_bvh_node **)l)->bounding_box.max.y; - double rval = (*(const dmnsn_bvh_node **)r)->bounding_box.max.y; + double lval = (*(const dmnsn_colored_prnode **)l)->node->bounding_box.max.y; + double rval = (*(const dmnsn_colored_prnode **)r)->node->bounding_box.max.y; return (lval < rval) - (lval > rval); } static int dmnsn_zmax_comp(const void *l, const void *r) { - double lval = (*(const dmnsn_bvh_node **)l)->bounding_box.max.z; - double rval = (*(const dmnsn_bvh_node **)r)->bounding_box.max.z; + double lval = (*(const dmnsn_colored_prnode **)l)->node->bounding_box.max.z; + double rval = (*(const dmnsn_colored_prnode **)r)->node->bounding_box.max.z; return (lval < rval) - (lval > rval); } @@ -119,27 +129,27 @@ static dmnsn_array_comparator_fn *const dmnsn_comparators[DMNSN_PSEUDO_B] = { /// Add the priority leaves for this level. static void -dmnsn_add_priority_leaves(dmnsn_bvh_node **sorted_leaves[DMNSN_PSEUDO_B], +dmnsn_add_priority_leaves(dmnsn_colored_prnode **sorted_leaves[DMNSN_PSEUDO_B], size_t start, size_t end, dmnsn_array *new_leaves) { for (size_t i = 0; i < DMNSN_PSEUDO_B; ++i) { dmnsn_bvh_node *leaf = NULL; - dmnsn_bvh_node **leaves = sorted_leaves[i]; + dmnsn_colored_prnode **leaves = sorted_leaves[i]; for (size_t j = start; j < end && (!leaf || leaf->nchildren < DMNSN_PRTREE_B); ++j) { // Skip all the previously found extreme nodes - if (leaves[j]->data == DMNSN_PRTREE_LEAF) { + if (leaves[j]->color == DMNSN_PRTREE_LEAF) { continue; } if (!leaf) { leaf = dmnsn_new_prnode(); } - leaves[j]->data = DMNSN_PRTREE_LEAF; - dmnsn_bvh_node_add(leaf, leaves[j]); + leaves[j]->color = DMNSN_PRTREE_LEAF; + dmnsn_bvh_node_add(leaf, leaves[j]->node); } if (leaf) { @@ -152,11 +162,11 @@ dmnsn_add_priority_leaves(dmnsn_bvh_node **sorted_leaves[DMNSN_PSEUDO_B], /// Get rid of the extreme nodes. static void -dmnsn_filter_priority_leaves(dmnsn_bvh_node **leaves, size_t start, size_t *endp) +dmnsn_filter_priority_leaves(dmnsn_colored_prnode **leaves, size_t start, size_t *endp) { size_t i, skip, end; for (i = start, skip = 0, end = *endp; i < end; ++i) { - if (leaves[i]->data == DMNSN_PRTREE_LEAF) { + if (leaves[i]->color == DMNSN_PRTREE_LEAF) { ++skip; } else { leaves[i - skip] = leaves[i]; @@ -168,14 +178,14 @@ dmnsn_filter_priority_leaves(dmnsn_bvh_node **leaves, size_t start, size_t *endp /// Split the leaves and mark the left and right child nodes. static void -dmnsn_split_sorted_leaves_easy(dmnsn_bvh_node **leaves, size_t start, size_t *midp, size_t end) +dmnsn_split_sorted_leaves_easy(dmnsn_colored_prnode **leaves, size_t start, size_t *midp, size_t end) { size_t i, mid = start + (end - start + 1)/2; for (i = start; i < mid; ++i) { - leaves[i]->data = DMNSN_PRTREE_LEFT; + leaves[i]->color = DMNSN_PRTREE_LEFT; } for (; i < end; ++i) { - leaves[i]->data = DMNSN_PRTREE_RIGHT; + leaves[i]->color = DMNSN_PRTREE_RIGHT; } *midp = mid; @@ -183,14 +193,14 @@ dmnsn_split_sorted_leaves_easy(dmnsn_bvh_node **leaves, size_t start, size_t *mi /// Split the leaves using the coloring from dmnsn_split_sorted_leaves_easy(). static void -dmnsn_split_sorted_leaves_hard(dmnsn_bvh_node **leaves, dmnsn_bvh_node **buffer, size_t start, size_t end) +dmnsn_split_sorted_leaves_hard(dmnsn_colored_prnode **leaves, dmnsn_colored_prnode **buffer, size_t start, size_t end) { size_t i, j, skip; for (i = start, j = 0, skip = 0; i < end; ++i) { - if (leaves[i]->data == DMNSN_PRTREE_LEFT) { + if (leaves[i]->color == DMNSN_PRTREE_LEFT) { leaves[i - skip] = leaves[i]; } else { - if (leaves[i]->data == DMNSN_PRTREE_RIGHT) { + if (leaves[i]->color == DMNSN_PRTREE_RIGHT) { buffer[j] = leaves[i]; ++j; } @@ -206,9 +216,9 @@ dmnsn_split_sorted_leaves_hard(dmnsn_bvh_node **leaves, dmnsn_bvh_node **buffer, /// Split the sorted lists into the left and right subtrees. static void -dmnsn_split_sorted_leaves(dmnsn_bvh_node **sorted_leaves[DMNSN_PSEUDO_B], +dmnsn_split_sorted_leaves(dmnsn_colored_prnode **sorted_leaves[DMNSN_PSEUDO_B], size_t start, size_t *midp, size_t *endp, - dmnsn_bvh_node **buffer, int i) + dmnsn_colored_prnode **buffer, int i) { size_t orig_end = *endp; @@ -231,9 +241,9 @@ dmnsn_split_sorted_leaves(dmnsn_bvh_node **sorted_leaves[DMNSN_PSEUDO_B], /// Recursively constructs an implicit pseudo-PR-tree and collects the priority /// leaves. static void -dmnsn_priority_leaves_recursive(dmnsn_bvh_node **sorted_leaves[DMNSN_PSEUDO_B], +dmnsn_priority_leaves_recursive(dmnsn_colored_prnode **sorted_leaves[DMNSN_PSEUDO_B], size_t start, size_t end, - dmnsn_bvh_node **buffer, + dmnsn_colored_prnode **buffer, dmnsn_array *new_leaves, int comparator) { @@ -257,18 +267,22 @@ dmnsn_priority_leaves_recursive(dmnsn_bvh_node **sorted_leaves[DMNSN_PSEUDO_B], #define DMNSN_PARALLEL_SORT_THRESHOLD 1024 typedef struct { - dmnsn_bvh_node **leaves_arr; - dmnsn_bvh_node ***sorted_leaves; + dmnsn_colored_prnode *colored_leaves; + dmnsn_colored_prnode ***sorted_leaves; size_t nleaves; } dmnsn_sort_leaves_payload; -static dmnsn_bvh_node ** -dmnsn_sort_leaf_array(dmnsn_bvh_node **leaves, size_t nleaves, int comparator) +static dmnsn_colored_prnode ** +dmnsn_sort_leaf_array(dmnsn_colored_prnode *colored_leaves, size_t nleaves, int comparator) { - size_t leaves_size = nleaves*sizeof(dmnsn_bvh_node *); - dmnsn_bvh_node **sorted_leaves = dmnsn_malloc(leaves_size); - memcpy(sorted_leaves, leaves, leaves_size); - qsort(sorted_leaves, nleaves, sizeof(dmnsn_bvh_node *), dmnsn_comparators[comparator]); + dmnsn_colored_prnode **sorted_leaves = dmnsn_malloc(nleaves*sizeof(dmnsn_colored_prnode *)); + + for (size_t i = 0; i < nleaves; ++i) { + sorted_leaves[i] = colored_leaves + i; + } + + qsort(sorted_leaves, nleaves, sizeof(dmnsn_colored_prnode *), dmnsn_comparators[comparator]); + return sorted_leaves; } @@ -278,7 +292,7 @@ dmnsn_sort_leaves(void *ptr, unsigned int thread, unsigned int nthreads) dmnsn_sort_leaves_payload *payload = ptr; for (unsigned int i = thread; i < DMNSN_PSEUDO_B; i += nthreads) { - payload->sorted_leaves[i] = dmnsn_sort_leaf_array(payload->leaves_arr, payload->nleaves, i); + payload->sorted_leaves[i] = dmnsn_sort_leaf_array(payload->colored_leaves, payload->nleaves, i); } return 0; @@ -289,24 +303,31 @@ static dmnsn_array * dmnsn_priority_leaves(const dmnsn_array *leaves, unsigned int nthreads) { dmnsn_bvh_node **leaves_arr = dmnsn_array_first(leaves); - dmnsn_bvh_node **sorted_leaves[DMNSN_PSEUDO_B]; size_t nleaves = dmnsn_array_size(leaves); + dmnsn_colored_prnode *colored_leaves = dmnsn_malloc(nleaves*sizeof(dmnsn_colored_prnode)); + for (size_t i = 0; i < nleaves; ++i) { + colored_leaves[i].color = DMNSN_PRTREE_LEFT; // Mustn't be _LEAF + colored_leaves[i].node = leaves_arr[i]; + } + + dmnsn_colored_prnode **sorted_leaves[DMNSN_PSEUDO_B]; + if (nleaves >= DMNSN_PARALLEL_SORT_THRESHOLD && nthreads > 1) { dmnsn_sort_leaves_payload payload = { - .leaves_arr = leaves_arr, + .colored_leaves = colored_leaves, .sorted_leaves = sorted_leaves, .nleaves = nleaves, }; dmnsn_execute_concurrently(NULL, dmnsn_sort_leaves, &payload, nthreads); } else { for (size_t i = 0; i < DMNSN_PSEUDO_B; ++i) { - sorted_leaves[i] = dmnsn_sort_leaf_array(leaves_arr, nleaves, i); + sorted_leaves[i] = dmnsn_sort_leaf_array(colored_leaves, nleaves, i); } } size_t buffer_size = nleaves/2; - dmnsn_bvh_node **buffer = dmnsn_malloc(buffer_size*sizeof(dmnsn_bvh_node *)); + dmnsn_colored_prnode **buffer = dmnsn_malloc(buffer_size*sizeof(dmnsn_colored_prnode *)); dmnsn_array *new_leaves = DMNSN_NEW_ARRAY(dmnsn_bvh_node *); @@ -316,6 +337,7 @@ dmnsn_priority_leaves(const dmnsn_array *leaves, unsigned int nthreads) for (size_t i = 0; i < DMNSN_PSEUDO_B; ++i) { dmnsn_free(sorted_leaves[i]); } + dmnsn_free(colored_leaves); return new_leaves; } @@ -331,7 +353,6 @@ dmnsn_new_prtree(const dmnsn_array *objects) dmnsn_array *leaves = DMNSN_NEW_ARRAY(dmnsn_bvh_node *); DMNSN_ARRAY_FOREACH (dmnsn_object **, object, objects) { dmnsn_bvh_node *node = dmnsn_new_bvh_leaf_node(*object); - node->data = DMNSN_PRTREE_LEFT; // Mustn't be _LEAF dmnsn_array_push(leaves, &node); } -- cgit v1.2.3