K-d tree: Difference between revisions

Content added Content deleted
m ({{out}})
m (Made the code compilable on gcc and g++ versions 4.8, the thisn is a bit ugly though.)
Line 31: Line 31:
=={{header|C}}==
=={{header|C}}==
Using a Quickselectesque median algorithm. Compared to unbalanced trees (random insertion), it takes slightly longer (maybe half a second or so) to construct a million-node tree, though average look up visits about 1/3 fewer nodes.
Using a Quickselectesque median algorithm. Compared to unbalanced trees (random insertion), it takes slightly longer (maybe half a second or so) to construct a million-node tree, though average look up visits about 1/3 fewer nodes.
<lang c>#include <stdio.h>
<lang c>

#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
#include <string.h>
#include <string.h>
Line 39: Line 41:
#define MAX_DIM 3
#define MAX_DIM 3
struct kd_node_t{
struct kd_node_t{
double x[MAX_DIM];
double x[MAX_DIM];
struct kd_node_t *left, *right;
struct kd_node_t *left, *right;
};
};


inline double
inline double
dist(struct kd_node_t *a, struct kd_node_t *b, int dim)
dist(struct kd_node_t *a, struct kd_node_t *b, int dim)
{
{
double t, d = 0;
double t, d = 0;
while (dim--) {
while (dim--) {
t = a->x[dim] - b->x[dim];
t = a->x[dim] - b->x[dim];
d += t * t;
d += t * t;
}
}
return d;
return d;
}
}
inline void swap(struct kd_node_t *x, struct kd_node_t *y) {
double tmp[MAX_DIM];
memcpy(tmp, x->x, sizeof(tmp));
memcpy(x->x, y->x, sizeof(tmp));
memcpy(y->x, tmp, sizeof(tmp));
}



/* see quickselect method */
/* see quickselect method */
struct kd_node_t*
struct kd_node_t*
find_median(struct kd_node_t *start, struct kd_node_t *end, int idx)
find_median(struct kd_node_t *start, struct kd_node_t *end, int idx)
{
{
if (end <= start) return NULL;
if (end <= start) return NULL;
if (end == start + 1)
if (end == start + 1)
return start;
return start;


inline void swap(struct kd_node_t *x, struct kd_node_t *y) {
struct kd_node_t *p, *store, *md = start + (end - start) / 2;
double tmp[MAX_DIM];
double pivot;
while (1) {
memcpy(tmp, x->x, sizeof(tmp));
pivot = md->x[idx];
memcpy(x->x, y->x, sizeof(tmp));
memcpy(y->x, tmp, sizeof(tmp));
}


struct kd_node_t *p, *store, *md = start + (end - start) / 2;
swap(md, end - 1);
for (store = p = start; p < end; p++) {
double pivot;
if (p->x[idx] < pivot) {
while (1) {
if (p != store)
pivot = md->x[idx];
swap(p, store);
store++;
}
}
swap(store, end - 1);


/* median has duplicate values */
swap(md, end - 1);
if (store->x[idx] == md->x[idx])
for (store = p = start; p < end; p++) {
return md;
if (p->x[idx] < pivot) {
if (p != store)
swap(p, store);
store++;
}
}
swap(store, end - 1);


if (store > md) end = store;
/* median has duplicate values */
else start = store;
if (store->x[idx] == md->x[idx])
}
return md;

if (store > md) end = store;
else start = store;
}
}
}


struct kd_node_t*
struct kd_node_t*
make_tree(struct kd_node_t *t, int len, int i, int dim)
make_tree(struct kd_node_t *t, int len, int i, int dim)
{
{
struct kd_node_t *n;
struct kd_node_t *n;


if (!len) return 0;
if (!len) return 0;


if ((n = find_median(t, t + len, i))) {
if ((n = find_median(t, t + len, i))) {
i = (i + 1) % dim;
i = (i + 1) % dim;
n->left = make_tree(t, n - t, i, dim);
n->left = make_tree(t, n - t, i, dim);
n->right = make_tree(n + 1, t + len - (n + 1), i, dim);
n->right = make_tree(n + 1, t + len - (n + 1), i, dim);
}
}
return n;
return n;
}
}


Line 112: Line 114:


void nearest(struct kd_node_t *root, struct kd_node_t *nd, int i, int dim,
void nearest(struct kd_node_t *root, struct kd_node_t *nd, int i, int dim,
struct kd_node_t **best, double *best_dist)
struct kd_node_t **best, double *best_dist)
{
{
double d, dx, dx2;
double d, dx, dx2;


if (!root) return;
if (!root) return;
d = dist(root, nd, dim);
d = dist(root, nd, dim);
dx = root->x[i] - nd->x[i];
dx = root->x[i] - nd->x[i];
dx2 = dx * dx;
dx2 = dx * dx;


visited ++;
visited ++;


if (!*best || d < *best_dist) {
if (!*best || d < *best_dist) {
*best_dist = d;
*best_dist = d;
*best = root;
*best = root;
}
}


/* if chance of exact match is high */
/* if chance of exact match is high */
if (!*best_dist) return;
if (!*best_dist) return;


if (++i >= dim) i = 0;
if (++i >= dim) i = 0;


nearest(dx > 0 ? root->left : root->right, nd, i, dim, best, best_dist);
nearest(dx > 0 ? root->left : root->right, nd, i, dim, best, best_dist);
if (dx2 >= *best_dist) return;
if (dx2 >= *best_dist) return;
nearest(dx > 0 ? root->right : root->left, nd, i, dim, best, best_dist);
nearest(dx > 0 ? root->right : root->left, nd, i, dim, best, best_dist);
}
}


#define N 1000000
#define N 1000000
#define rand1() (rand() / (double)RAND_MAX)
#define rand1() (rand() / (double)RAND_MAX)
#define rand_pt(v) { v.x[0] = rand1(); v.x[1] = rand1(); v.x[2] = rand1(); }
#define rand_pt(v) { v.x[0] = rand1(); v.x[1] = rand1(); v.x[2] = rand1(); }
int main(void)
int main(void)
{
{
int i;
int i;
struct kd_node_t wp[] = {
struct kd_node_t wp[] = {
{{2, 3}}, {{5, 4}}, {{9, 6}}, {{4, 7}}, {{8, 1}}, {{7, 2}}
{{2, 3}}, {{5, 4}}, {{9, 6}}, {{4, 7}}, {{8, 1}}, {{7, 2}}
};
};
struct kd_node_t this = {{9, 2}};
struct kd_node_t thisn = {{9, 2}};
struct kd_node_t *root, *found, *million;
struct kd_node_t *root, *found, *million;
double best_dist;
double best_dist;


root = make_tree(wp, sizeof(wp) / sizeof(wp[1]), 0, 2);
root = make_tree(wp, sizeof(wp) / sizeof(wp[1]), 0, 2);


visited = 0;
visited = 0;
found = 0;
found = 0;
nearest(root, &this, 0, 2, &found, &best_dist);
nearest(root, &thisn, 0, 2, &found, &best_dist);


printf(">> WP tree\nsearching for (%g, %g)\n"
printf(">> WP tree\nsearching for (%g, %g)\n"
"found (%g, %g) dist %g\nseen %d nodes\n\n",
"found (%g, %g) dist %g\nseen %d nodes\n\n",
this.x[0], this.x[1],
thisn.x[0], thisn.x[1],
found->x[0], found->x[1], sqrt(best_dist), visited);
found->x[0], found->x[1], sqrt(best_dist), visited);


million = calloc(N, sizeof(struct kd_node_t));
million =(struct kd_node_t*) calloc(N, sizeof(struct kd_node_t));
srand(time(0));
srand(time(0));
for (i = 0; i < N; i++) rand_pt(million[i]);
for (i = 0; i < N; i++) rand_pt(million[i]);


root = make_tree(million, N, 0, 3);
root = make_tree(million, N, 0, 3);
rand_pt(this);
rand_pt(thisn);


visited = 0;
visited = 0;
found = 0;
found = 0;
nearest(root, &this, 0, 3, &found, &best_dist);
nearest(root, &thisn, 0, 3, &found, &best_dist);


printf(">> Million tree\nsearching for (%g, %g, %g)\n"
printf(">> Million tree\nsearching for (%g, %g, %g)\n"
"found (%g, %g, %g) dist %g\nseen %d nodes\n",
"found (%g, %g, %g) dist %g\nseen %d nodes\n",
this.x[0], this.x[1], this.x[2],
thisn.x[0], thisn.x[1], thisn.x[2],
found->x[0], found->x[1], found->x[2],
found->x[0], found->x[1], found->x[2],
sqrt(best_dist), visited);
sqrt(best_dist), visited);


/* search many random points in million tree to see average behavior.
/* search many random points in million tree to see average behavior.
tree size vs avg nodes visited:
tree size vs avg nodes visited:
10 ~ 7
10 ~ 7
100 ~ 16.5
100 ~ 16.5
1000 ~ 25.5
1000 ~ 25.5
10000 ~ 32.8
10000 ~ 32.8
100000 ~ 38.3
100000 ~ 38.3
1000000 ~ 42.6
1000000 ~ 42.6
10000000 ~ 46.7 */
10000000 ~ 46.7 */
int sum = 0, test_runs = 100000;
int sum = 0, test_runs = 100000;
for (i = 0; i < test_runs; i++) {
for (i = 0; i < test_runs; i++) {
found = 0;
found = 0;
visited = 0;
visited = 0;
rand_pt(this);
rand_pt(thisn);
nearest(root, &this, 0, 3, &found, &best_dist);
nearest(root, &thisn, 0, 3, &found, &best_dist);
sum += visited;
sum += visited;
}
}
printf("\n>> Million tree\n"
printf("\n>> Million tree\n"
"visited %d nodes for %d random findings (%f per lookup)\n",
"visited %d nodes for %d random findings (%f per lookup)\n",
sum, test_runs, sum/(double)test_runs);
sum, test_runs, sum/(double)test_runs);


// free(million);
// free(million);


return 0;
return 0;
}
}</lang>
</lang>
{{out}}
{{out}}
<pre>>> WP tree
<pre>>> WP tree