From 0af7da3050f4952e106ce1acbceedaf85735ab17 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Mon, 14 Oct 2013 17:28:20 -0400 Subject: prtree: Sort large workloads in parallel. Performance benefit is around 33% for more than 1000 objects. --- libdimension/bench/prtree.c | 1 + libdimension/prtree.c | 63 ++++++++++++++++++++++++++++++++++++++------- libdimension/tests/prtree.c | 1 + 3 files changed, 55 insertions(+), 10 deletions(-) diff --git a/libdimension/bench/prtree.c b/libdimension/bench/prtree.c index 953c7b4..9c1a5b1 100644 --- a/libdimension/bench/prtree.c +++ b/libdimension/bench/prtree.c @@ -21,6 +21,7 @@ #include "../prtree.c" #include "../threads.c" #include "../future.c" +#include "../platform.c" #include #include diff --git a/libdimension/prtree.c b/libdimension/prtree.c index c012e6f..0d459eb 100644 --- a/libdimension/prtree.c +++ b/libdimension/prtree.c @@ -260,28 +260,69 @@ dmnsn_priority_leaves_recursive(dmnsn_bvh_node **sorted_leaves[DMNSN_PSEUDO_B], } } +/** Sort each dimension in parallel with more than this many leaves. */ +#define DMNSN_PARALLEL_SORT_THRESHOLD 1024 + +typedef struct { + dmnsn_bvh_node **leaves_arr; + dmnsn_bvh_node ***sorted_leaves; + size_t nleaves; +} dmnsn_sort_leaves_payload; + +static dmnsn_bvh_node ** +dmnsn_sort_leaf_array(dmnsn_bvh_node **leaves, size_t nleaves, int comparator) +{ + size_t leaves_size = nleaves*sizeof(dmnsn_bvh_node *); + dmnsn_bvh_node **sorted_leaves = dmnsn_malloc(leaves_size); + memcpy(sorted_leaves, leaves, leaves_size); + qsort(sorted_leaves, nleaves, sizeof(dmnsn_bvh_node *), + dmnsn_comparators[comparator]); + return sorted_leaves; +} + +static int +dmnsn_sort_leaves(void *ptr, unsigned int thread, unsigned int nthreads) +{ + dmnsn_sort_leaves_payload *payload = ptr; + + for (unsigned int i = thread; i < DMNSN_PSEUDO_B; i += nthreads) { + payload->sorted_leaves[i] = + dmnsn_sort_leaf_array(payload->leaves_arr, payload->nleaves, i); + } + + return 0; +} + /** Constructs an implicit pseudo-PR-tree and returns the priority leaves. */ static dmnsn_array * -dmnsn_priority_leaves(const dmnsn_array *leaves) +dmnsn_priority_leaves(const dmnsn_array *leaves, unsigned int nthreads) { dmnsn_bvh_node **leaves_arr = dmnsn_array_first(leaves); dmnsn_bvh_node **sorted_leaves[DMNSN_PSEUDO_B]; size_t nleaves = dmnsn_array_size(leaves); - size_t leaves_size = nleaves*sizeof(dmnsn_bvh_node *); - for (size_t i = 0; i < DMNSN_PSEUDO_B; ++i) { - sorted_leaves[i] = dmnsn_malloc(leaves_size); - memcpy(sorted_leaves[i], leaves_arr, leaves_size); - qsort(sorted_leaves[i], nleaves, sizeof(dmnsn_bvh_node *), - dmnsn_comparators[i]); + if (nleaves >= DMNSN_PARALLEL_SORT_THRESHOLD && nthreads > 1) { + dmnsn_sort_leaves_payload payload = { + .leaves_arr = leaves_arr, + .sorted_leaves = sorted_leaves, + .nleaves = nleaves, + }; + dmnsn_execute_concurrently(dmnsn_sort_leaves, &payload, nthreads); + } else { + for (size_t i = 0; i < DMNSN_PSEUDO_B; ++i) { + sorted_leaves[i] = dmnsn_sort_leaf_array(leaves_arr, nleaves, i); + } } + size_t buffer_size = nleaves/2; + dmnsn_bvh_node **buffer = dmnsn_malloc(buffer_size*sizeof(dmnsn_bvh_node *)); + dmnsn_array *new_leaves = dmnsn_new_array(sizeof(dmnsn_bvh_node *)); - dmnsn_bvh_node **buffer = dmnsn_malloc(leaves_size/2); + dmnsn_priority_leaves_recursive(sorted_leaves, nleaves, buffer, new_leaves, 0); - dmnsn_free(buffer); + dmnsn_free(buffer); for (size_t i = 0; i < DMNSN_PSEUDO_B; ++i) { dmnsn_free(sorted_leaves[i]); } @@ -304,8 +345,10 @@ dmnsn_new_prtree(const dmnsn_array *objects) dmnsn_array_push(leaves, &node); } + unsigned int ncpus = dmnsn_ncpus(); + unsigned int nthreads = ncpus < DMNSN_PSEUDO_B ? ncpus : DMNSN_PSEUDO_B; while (dmnsn_array_size(leaves) > 1) { - dmnsn_array *new_leaves = dmnsn_priority_leaves(leaves); + dmnsn_array *new_leaves = dmnsn_priority_leaves(leaves, nthreads); dmnsn_delete_array(leaves); leaves = new_leaves; } diff --git a/libdimension/tests/prtree.c b/libdimension/tests/prtree.c index 7e69acd..2642f48 100644 --- a/libdimension/tests/prtree.c +++ b/libdimension/tests/prtree.c @@ -25,6 +25,7 @@ #include "../prtree.c" #include "../threads.c" #include "../future.c" +#include "../platform.c" #include #include -- cgit v1.2.3