Closest-pair problem/C

This example is in need of improvement:

They exist "evil" datasets able to crash it.

<lang c>#include <stdio.h>

include <stdlib.h>
include <string.h>
include <math.h>
include <assert.h>

typedef struct {

 double x, y;

} point_t;

inline double distance(point_t *p1, point_t *p2) {

 return sqrt((p1->x - p2->x)*(p1->x - p2->x) +

(p1->y - p2->y)*(p1->y - p2->y)); }</lang>

We need an ad hoc sort function; here is a modified version of what you can find at Quicksort.

<lang c>typedef int(*comparator)(const void *, const void *); void quick(point_t *P, int *left, int *right, comparator compar) {

 if (right > left) {
   int pivot = left[(right-left)/2];
   int *r = right, *l = left;
   do {
     while (compar(&P[*l], &P[pivot]) < 0) l++;
     while (compar(&P[*r], &P[pivot]) > 0) r--;
     if (l <= r) {
       int t = *l;
       *l++ = *r;
       *r-- = t;
     }
   } while (l <= r);
   quick(P, left, r, compar);
   quick(P, l, right, compar);
 }

} void m_qsort(point_t *P, int *array, int length, comparator compar) {

 quick(P, array, array+length-1, compar);

}

int sort_index_by_x(const void *ra, const void *rb) {

 const point_t *a = ra, *b = rb;
 double d = a->x - b->x;
 return (d<0) ? -1 : ( (d==0) ? 0 : 1);

}

int sort_index_by_y(const void *ra, const void *rb) {

 const point_t *a = ra, *b = rb;
 double d = a->y - b->y;
 return (d<0) ? -1 : ( (d==0) ? 0 : 1 );

}</lang>

<lang c>double closest_pair_simple_(point_t *P, int *pts, int num, int *pia, int *pib) {

 int i, j;
 if ( num < 2 ) return HUGE_VAL;
 double ird = distance(&P[pts[0]], &P[pts[1]]);
 *pia = pts[0]; *pib = pts[1];
 for(i=0; i < (num-1); i++) {
   for(j=i+1; j < num; j++) {
     double t;
     t = distance(&P[pts[i]], &P[pts[j]]);
     if ( t < ird ) {

ird = t; *pia = pts[i]; *pib = pts[j];

     }
   }
 }
 return ird;

}</lang>

This is the entry point for the simple algorithm.

<lang c>double closest_pair_simple(point_t *P, int num, int *pia, int *pib) {

 int *pts, i;
 double d;

 pts = malloc(sizeof(int)*num);  assert(pts != NULL);
 for(i=0; i < num; i++) pts[i] = i;
 d = closest_pair_simple_(P, pts, num, pia, pib);
 free(pts);
 return d;

}</lang>

<lang c>double closest_pair_(point_t *P, int *xP, int *yP, int N, int *iA, int *iB) {

 int i, k, j;
 int *xL, *xR, *yL, *yR, *yS;
 int lA, lB, rA, rB, midx;
 double dL, dR, dmin, xm, closest;
 int nS;

 if ( N <= 3 ) return closest_pair_simple_(P, xP, N, iA, iB);

 midx = ceil((double)N/2.0) - 1;
 
 xL = malloc(sizeof(int)*(midx+1));       assert(xL != NULL);
 xR = malloc(sizeof(int)*(N-(midx+1)));   assert(xR != NULL);
 yL = malloc(sizeof(int)*(midx+1));       assert(yL != NULL);
 yR = malloc(sizeof(int)*(N-(midx+1)));   assert(yR != NULL);

 for(i=0;i <= midx; i++) xL[i] = xP[i];
 for(i=midx+1; i < N; i++) xR[i-(midx+1)] = xP[i];

 xm = P[xP[midx]].x;

 for(i=0, k=0, j=0; i < N; i++) {
   if ( P[yP[i]].x <= xm ) {
     yL[k++] = yP[i];
   } else {
     yR[j++] = yP[i];
   }
 }

 dL = closest_pair_(P, xL, yL, midx+1, &lA, &lB);
 dR = closest_pair_(P, xR, yR, (N-(midx+1)), &rA, &rB);

 if ( dL < dR ) {
   dmin = dL;
   *iA = lA; *iB = lB;
 } else {
   dmin = dR;
   *iA = rA; *iB = rB;
 }

 yS = malloc(sizeof(int)*N);    assert(yS != NULL);
 nS = 0;
 for(i=0; i < N; i++) {
   if ( fabs(xm - P[yP[i]].x) < dmin ) {
     yS[nS++] = yP[i];
    }
 }

 closest = dmin;
 if (nS > 1) { 
   for(i=0; i < (nS-1); i++) {
     k = i + 1;
     while( (k < nS) && ( (P[yS[k]].y - P[yS[i]].y) < dmin ) ) {

double d = distance(&P[yS[i]], &P[yS[k]]); if ( d < closest ) { closest = d; *iA = yS[i]; *iB = yS[k]; } k++;

     }
   }
 }

 free(xR); free(xL);
 free(yL); free(yR); free(yS);
 return closest;

}</lang>

This is the entry point for the divide&conquer algorithm.

<lang c>double closest_pair(point_t *P, int N, int *iA, int *iB) {

 int *xP, *yP, i;
 double d;

 xP = malloc(sizeof(int)*N); assert(xP != NULL);
 yP = malloc(sizeof(int)*N); assert(yP != NULL);

 for(i=0; i < N; i++) {
   xP[i] = yP[i] = i;
 }
 m_qsort(P, xP, N, sort_index_by_x);
 m_qsort(P, yP, N, sort_index_by_y);
 d = closest_pair_(P, xP, yP, N, iA, iB);
 free(xP); free(yP);
 return d;

}</lang>

Testing

<lang c>#define NP 10000

int main() {

 point_t *points;
 int i;
 int p[2];
 double c;

 srand(31415);

 points = malloc(sizeof(point_t)*NP);

 for(i=0; i < NP; i++) {
   points[i].x = 20.0*((double)rand()/(RAND_MAX+1.0)) - 10.0;
   points[i].y = 20.0*((double)rand()/(RAND_MAX+1.0)) - 10.0;
 }

 c = closest_pair_simple(points, NP, p, p+1);
 printf("%lf %d %d (%lf)\n", c, p[0], p[1], distance(&points[p[0]], &points[p[1]]));
 c = closest_pair(points, NP, p, p+1);
 printf("%lf %d %d (%lf)\n", c, p[0], p[1], distance(&points[p[0]], &points[p[1]]));

 free(points);
 return EXIT_SUCCESS;

}</lang>

The divide&conquer algorithm gave 0.01user 0.00system 0:00.11elapsed, while the brute-force one gave 1.83user 0.00system 0:01.88elapsed.