Talk:Closest-pair problem/C: Difference between revisions

From Rosetta Code
Content added Content deleted
(→‎Propose replacing code: clean up a bit)
(→‎Propose replacing code: clean up, removed unneeded "optimizations")
Line 18: Line 18:
/* note: even though l_list and r_list are used by each recursion of closest(),
/* note: even though l_list and r_list are used by each recursion of closest(),
they are always used in sequentially, no need to allocate them repeatedly */
they are always used in sequentially, no need to allocate them repeatedly */
int *l_list, *r_list;
point *l_list, *r_list;


inline double dist(point a, point b)
inline double dist(point a, point b)
Line 35: Line 35:
{
{
int i, j;
int i, j;
point pi;
double d, min_d = MAXDOUBLE;
double d, min_d = MAXDOUBLE;


for (i = 0, pi = pts; i < max_n; pi++, i++) {
for (i = 0; i < max_n; i++) {
for (j = i + 1; j < max_n; j++) {
for (j = i + 1; j < max_n; j++) {
d = dist(pi, pts + j);
d = dist(pts + i, pts + j);
if (d >= min_d ) continue;
if (d >= min_d ) continue;
*a = pts + i;

*a = pi;
*b = pts + j;
*b = pts + j;
min_d = d;
min_d = d;
Line 51: Line 49:
}
}


void sort_y(point pts, int n, int *list)
void sort_y(point *list, int n)
{
{
int cmp_y(const void *a, const void *b) {
int cmp_y(const void *a, const void *b) {
return cmp_dbl(pts[*(int*)a].y, pts[*(int*)b].y);
return cmp_dbl( ((point)a)->y, ((point)b)->y );
}
}
qsort(list, n, sizeof(int), cmp_y);
qsort(list, n, sizeof(point), cmp_y);
}
}


Line 62: Line 60:
{
{
int left, right, lsize, rsize, i;
int left, right, lsize, rsize, i;
point pl, a1, b1;
point a1, b1;
double min_d, d, dsqrt, median;
double min_d, d, dsqrt, median;


Line 83: Line 81:


/* find points within +- min distance on X from the center line,
/* find points within +- min distance on X from the center line,
list their indices */
list their pointers */
for ( lsize = 0, left--;
for ( lsize = 0, left--;
median - pts[left].x < dsqrt && left;
median - pts[left].x < dsqrt && left;
l_list[lsize++] = left--);
l_list[lsize++] = pts + left--);


for ( rsize = 0;
for ( rsize = 0;
pts[right].x - median < dsqrt && right < n;
pts[right].x - median < dsqrt && right < n;
r_list[rsize++] = right++);
r_list[rsize++] = pts + right++);


/* sort the indices by y: don't touch the point data which is sorted by x */
/* sort the pointers by y: don't touch the point data which is sorted by x */
sort_y(pts, lsize, l_list);
sort_y(l_list, lsize);
sort_y(pts, rsize, r_list);
sort_y(r_list, rsize);


/* climb up left and right list and compare distance */
/* climb up left and right list and compare distance */
for (left = right = 0; left < lsize; left ++) {
for (left = right = 0; left < lsize; left ++) {
/* next point in left list */
/* next point in left list */
pl = pts + l_list[left];
median = l_list[left]->y;
median = pl->y;


/* climb up right list until the y is not too low */
/* climb up right list until the y is not too low */
while (median > dsqrt + pts[r_list[right]].y && right < rsize)
while (right < rsize && median > dsqrt + r_list[right]->y)
right++;
right++;


Line 110: Line 107:
for (i = right; i < rsize; i++) {
for (i = right; i < rsize; i++) {
/* right y is too high, break */
/* right y is too high, break */
if (pts[ r_list[i] ].y > median + dsqrt) break;
if (r_list[i]->y > median + dsqrt) break;


if (min_d > (d = dist(pts + r_list[i], pl))) {
if (min_d > (d = dist(r_list[i], l_list[left]))) {
*a = pl;
*a = l_list[left];
*b = pts + r_list[i];
*b = r_list[i];
min_d = d;
min_d = d;
dsqrt = sqrt(min_d);
dsqrt = sqrt(min_d);
Line 131: Line 128:


point pts = malloc(sizeof(point_t) * NP);
point pts = malloc(sizeof(point_t) * NP);
l_list = malloc(sizeof(int) * NP);
l_list = malloc(sizeof(point) * (NP / 2));
r_list = malloc(sizeof(int) * NP);
r_list = malloc(sizeof(point) * ((NP + 1) / 2));


for(i = 0; i < NP; i++) {
for(i = 0; i < NP; i++) {

Revision as of 22:17, 17 June 2011

your code does NOT RUN when i compile with devc or Turbo C???? (unsigned comment added by 113.22.126.190 at 21:06, 24 October 2010)

Correct it? Try setting compiler flags for compatibility with a specific C standard? --Michael Mol 13:11, 25 October 2010 (UTC)
"The code does not run"... is like pretending to derive solar physics by the statement "the sun shines"... By the way "devc" (Dev C++ IDE?) usually is used with (an old version of) gcc. On my test machine (GNU/Linux) it runs, except for some "evil dataset" that someone gave me once upon a time... I've inspected the code with valgrind, debugged it, ... but I was not able to unwind the flow that gives the problem... I know this code hides some oddity somewhere. Likely the better thing is to rewrite it from scratch, but I've not the courage yet! :) — ShinTakezou 17:52, 30 May 2011 (UTC)

Propose replacing code

I suggest replace current code sample with rewritten code below. Reasons: 1. It doesn't segfault with 200,000 points or more; 2. It's shorter and quite a bit faster; 3. It's cleaner IMO. <lang C>#include <stdio.h>

  1. include <stdlib.h>
  2. include <values.h>
  3. include <math.h>

typedef struct { double x, y; } point_t, *point;

/* note: even though l_list and r_list are used by each recursion of closest(),

  they are always used in sequentially, no need to allocate them repeatedly */

point *l_list, *r_list;

inline double dist(point a, point b) {

       double dx = a->x - b->x, dy = a->y - b->y;
       return dx * dx + dy * dy;

}

inline int cmp_dbl(double a, double b) {

       return a < b ? -1 : a > b ? 1 : 0;

}


double brute_force(point pts, int max_n, point *a, point *b) {

       int i, j;
       double d, min_d = MAXDOUBLE;
       for (i = 0; i < max_n; i++) {
               for (j = i + 1; j < max_n; j++) {
                       d = dist(pts + i, pts + j);
                       if (d >= min_d ) continue;
                       *a = pts + i;
                       *b = pts + j;
                       min_d = d;
               }
       }
       return min_d;

}

void sort_y(point *list, int n) {

       int cmp_y(const void *a, const void *b) {
               return cmp_dbl( ((point)a)->y, ((point)b)->y );
       }
       qsort(list, n, sizeof(point), cmp_y);

}

double closest(point pts, int n, point *a, point *b) {

       int left, right, lsize, rsize, i;
       point a1, b1;
       double min_d, d, dsqrt, median;
       /* problem small enough, don't divide, just conquer */
       if (n <= 8) return brute_force(pts, n, a, b);
       /* get left and right results */
       left = right = n / 2;
       min_d = closest(pts, left, a, b);
       d = closest(pts + left, n - left, &a1, &b1);
       if (min_d > d) {
               min_d = d;
               *a = a1;
               *b = b1;
       }
       median = pts[left].x;
       dsqrt = sqrt(min_d);
       /* find points within +- min distance on X from the center line,
          list their pointers */
       for (   lsize = 0, left--;
               median - pts[left].x  < dsqrt && left;
               l_list[lsize++] = pts + left--);
       for (   rsize = 0;
               pts[right].x - median < dsqrt && right < n;
               r_list[rsize++] = pts + right++);
       /* sort the pointers by y: don't touch the point data which is sorted by x */
       sort_y(l_list, lsize);
       sort_y(r_list, rsize);
       /* climb up left and right list and compare distance */
       for (left = right = 0; left < lsize; left ++) {
               /* next point in left list */
               median = l_list[left]->y;
               /* climb up right list until the y is not too low */
               while (right < rsize && median > dsqrt + r_list[right]->y)
                       right++;
               if (right >= rsize) break;
               for (i = right; i < rsize; i++) {
                       /* right y is too high, break */
                       if (r_list[i]->y > median + dsqrt) break;
                       if (min_d > (d = dist(r_list[i], l_list[left]))) {
                               *a = l_list[left];
                               *b = r_list[i];
                               min_d = d;
                               dsqrt = sqrt(min_d);
                       }
               }
       }
       return min_d;

}

  1. define NP 1000000

int main(int argc, char **argv) {

       int i;
       point a, b;
       point pts = malloc(sizeof(point_t) * NP);
       l_list = malloc(sizeof(point) * (NP / 2));
       r_list = malloc(sizeof(point) * ((NP + 1) / 2));
       for(i = 0; i < NP; i++) {
               pts[i].x = 20 * (double) rand()/RAND_MAX;
               pts[i].y = 20 * (double) rand()/RAND_MAX;
       }

/*

       printf("brute force: %g, ", sqrt(brute_force(pts, NP, &a, &b)));
       printf("between (%f,%f) and (%f,%f)\n", a->x, a->y, a->x, a->y);
  • /
       int cmp_x(const void *a, const void *b) {
               return cmp_dbl( ((point)a)->x, ((point)b)->x );
       }
       qsort(pts, NP, sizeof(point_t), cmp_x);
       printf("min: %g; ", sqrt(closest(pts, NP, &a, &b)));
       printf("point (%f,%f) and (%f,%f)\n", a->x, a->y, b->x, b->y);
       return 0;

}</lang> --Ledrug 21:34, 17 June 2011 (UTC)