Closest-pair problem/C: Difference between revisions

Content added Content deleted
(replaced code)
Line 1: Line 1:
{{collection|Closest pair problem}}
{{collection|Closest pair problem}}


<lang C>#include <stdio.h>
{{improve|C|They exist "evil" datasets able to crash it.}}

<lang c>#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
#include <values.h>
#include <math.h>
#include <string.h>
#include <string.h>
#include <math.h>
#include <assert.h>


typedef struct {
typedef struct { double x, y; } point_t, *point;
double x, y;
} point_t;


inline double distance(point_t *p1, point_t *p2)
inline double dist(point a, point b)
{
{
return sqrt((p1->x - p2->x)*(p1->x - p2->x) +
double dx = a->x - b->x, dy = a->y - b->y;
(p1->y - p2->y)*(p1->y - p2->y));
return dx * dx + dy * dy;
}
}</lang>

We need an ''ad hoc'' sort function; here is a modified version of what you can find at [[Quicksort]].


inline int cmp_dbl(double a, double b)
<lang c>typedef int(*comparator)(const void *, const void *);
void quick(point_t *P, int *left, int *right, comparator compar)
{
{
return a < b ? -1 : a > b ? 1 : 0;
if (right > left) {
int pivot = left[(right-left)/2];
int *r = right, *l = left;
do {
while (compar(&P[*l], &P[pivot]) < 0) l++;
while (compar(&P[*r], &P[pivot]) > 0) r--;
if (l <= r) {
int t = *l;
*l++ = *r;
*r-- = t;
}
} while (l <= r);
quick(P, left, r, compar);
quick(P, l, right, compar);
}
}
}

void m_qsort(point_t *P, int *array, int length, comparator compar)
int cmp_x(const void *a, const void *b) {
{
return cmp_dbl( (*((point*)a))->x, (*((point*)b))->x );
quick(P, array, array+length-1, compar);
}
}


int cmp_y(const void *a, const void *b) {

return cmp_dbl( (*((point*)a))->y, (*((point*)b))->y );
int sort_index_by_x(const void *ra, const void *rb) {
const point_t *a = ra, *b = rb;
double d = a->x - b->x;
return (d<0) ? -1 : ( (d==0) ? 0 : 1);
}
}


double brute_force(point* pts, int max_n, point *a, point *b)
int sort_index_by_y(const void *ra, const void *rb) {
{
const point_t *a = ra, *b = rb;
double d = a->y - b->y;
int i, j;
return (d<0) ? -1 : ( (d==0) ? 0 : 1 );
double d, min_d = MAXDOUBLE;
}</lang>


for (i = 0; i < max_n; i++) {
<lang c>double closest_pair_simple_(point_t *P, int *pts, int num, int *pia, int *pib) {
for (j = i + 1; j < max_n; j++) {
int i, j;
d = dist(pts[i], pts[j]);
if ( num < 2 ) return HUGE_VAL;
if (d >= min_d ) continue;
double ird = distance(&P[pts[0]], &P[pts[1]]);
*pia = pts[0]; *pib = pts[1];
*a = pts[i];
*b = pts[j];
for(i=0; i < (num-1); i++) {
min_d = d;
for(j=i+1; j < num; j++) {
double t;
}
}
t = distance(&P[pts[i]], &P[pts[j]]);
if ( t < ird ) {
return min_d;
}
ird = t;
*pia = pts[i]; *pib = pts[j];
}
}
}
return ird;
}</lang>

This is the ''entry point'' for the simple algorithm.

<lang c>double closest_pair_simple(point_t *P, int num, int *pia, int *pib) {
int *pts, i;
double d;

pts = malloc(sizeof(int)*num); assert(pts != NULL);
for(i=0; i < num; i++) pts[i] = i;
d = closest_pair_simple_(P, pts, num, pia, pib);
free(pts);
return d;
}</lang>


<lang c>double closest_pair_(point_t *P, int *xP, int *yP, int N, int *iA, int *iB)
double closest(point* sx, int nx, point* sy, int ny, point *a, point *b)
{
{
int i, k, j;
int left, right, i;
int *xL, *xR, *yL, *yR, *yS;
double d, min_d, x0, x1, mid, x;
int lA, lB, rA, rB, midx;
point a1, b1;
point *s_yy;
double dL, dR, dmin, xm, closest;
int nS;


if ( N <= 3 ) return closest_pair_simple_(P, xP, N, iA, iB);
if (nx <= 8) return brute_force(sx, nx, a, b);


midx = ceil((double)N/2.0) - 1;
s_yy = malloc(sizeof(point) * ny);
mid = sx[nx/2]->x;
xL = malloc(sizeof(int)*(midx+1)); assert(xL != NULL);
xR = malloc(sizeof(int)*(N-(midx+1))); assert(xR != NULL);
yL = malloc(sizeof(int)*(midx+1)); assert(yL != NULL);
yR = malloc(sizeof(int)*(N-(midx+1))); assert(yR != NULL);


/* adding points to the y-sorted list; if a point's x is less than mid,
for(i=0;i <= midx; i++) xL[i] = xP[i];
add to the begining; if more, add to the end backwards, hence the
for(i=midx+1; i < N; i++) xR[i-(midx+1)] = xP[i];
need to reverse it */
left = -1; right = ny;
for (i = 0; i < ny; i++)
if (sy[i]->x < mid) s_yy[++left] = sy[i];
else s_yy[--right]= sy[i];


/* reverse the higher part of the list */
xm = P[xP[midx]].x;
for (i = ny - 1; right < i; right ++, i--) {
a1 = s_yy[right]; s_yy[right] = s_yy[i]; s_yy[i] = a1;
}


for(i=0, k=0, j=0; i < N; i++) {
min_d = closest(sx, nx/2, s_yy, left + 1, a, b);
d = closest(sx + nx/2, nx - nx/2, s_yy + left + 1, ny - left - 1, &a1, &b1);
if ( P[yP[i]].x <= xm ) {
yL[k++] = yP[i];
} else {
yR[j++] = yP[i];
}
}


if (d < min_d) { min_d = d; *a = a1; *b = b1; }
dL = closest_pair_(P, xL, yL, midx+1, &lA, &lB);
d = sqrt(min_d);
dR = closest_pair_(P, xR, yR, (N-(midx+1)), &rA, &rB);


/* get all the points within distance d of the center line */
if ( dL < dR ) {
dmin = dL;
left = -1; right = ny;
*iA = lA; *iB = lB;
for (i = 0; i < ny; i++) {
x = sy[i]->x - mid;
} else {
if (x <= -d || x >= d) continue;
dmin = dR;
*iA = rA; *iB = rB;
}


if (x < 0) s_yy[++left] = sy[i];
yS = malloc(sizeof(int)*N); assert(yS != NULL);
else s_yy[--right] = sy[i];
nS = 0;
for(i=0; i < N; i++) {
}
if ( fabs(xm - P[yP[i]].x) < dmin ) {
yS[nS++] = yP[i];
}
}


/* compare each left point to right point */
closest = dmin;
if (nS > 1) {
while (left >= 0) {
for(i=0; i < (nS-1); i++) {
x0 = s_yy[left]->y + d;
k = i + 1;
while( (k < nS) && ( (P[yS[k]].y - P[yS[i]].y) < dmin ) ) {
double d = distance(&P[yS[i]], &P[yS[k]]);
if ( d < closest ) {
closest = d;
*iA = yS[i];
*iB = yS[k];
}
k++;
}
}
}


while (right < ny && s_yy[right]->y > x0) right ++;
free(xR); free(xL);
if (right >= ny) break;
free(yL); free(yR); free(yS);
return closest;
}</lang>


x1 = s_yy[left]->y - d;
This is the ''entry point'' for the divide&amp;conquer algorithm.
for (i = right; i < ny && s_yy[i]->y > x1; i++)
if ((x = dist(s_yy[left], s_yy[i])) < min_d) {
min_d = x;
d = sqrt(min_d);
*a = s_yy[left];
*b = s_yy[i];
}


left --;
<lang c>double closest_pair(point_t *P, int N, int *iA, int *iB)
}
{
int *xP, *yP, i;
double d;


free(s_yy);
xP = malloc(sizeof(int)*N); assert(xP != NULL);
return min_d;
yP = malloc(sizeof(int)*N); assert(yP != NULL);
}

for(i=0; i < N; i++) {
xP[i] = yP[i] = i;
}
m_qsort(P, xP, N, sort_index_by_x);
m_qsort(P, yP, N, sort_index_by_y);
d = closest_pair_(P, xP, yP, N, iA, iB);
free(xP); free(yP);
return d;
}</lang>

'''Testing'''

<lang c>#define NP 10000


#define NP 1000000
int main()
int main()
{
{
int i;
point_t *points;
point a, b;
int i;
int p[2];
double c;


point pts = malloc(sizeof(point_t) * NP);
srand(31415);
point* s_x = malloc(sizeof(point) * NP);
point* s_y = malloc(sizeof(point) * NP);


for(i = 0; i < NP; i++) {
points = malloc(sizeof(point_t)*NP);
s_x[i] = pts + i;
pts[i].x = 100 * (double) rand()/RAND_MAX;
pts[i].y = 100 * (double) rand()/RAND_MAX;
}


/* printf("brute force: %g, ", sqrt(brute_force(s_x, NP, &a, &b)));
for(i=0; i < NP; i++) {
printf("between (%f,%f) and (%f,%f)\n", a->x, a->y, b->x, b->y); */
points[i].x = 20.0*((double)rand()/(RAND_MAX+1.0)) - 10.0;
points[i].y = 20.0*((double)rand()/(RAND_MAX+1.0)) - 10.0;
}


c = closest_pair_simple(points, NP, p, p+1);
memcpy(s_y, s_x, sizeof(point) * NP);
qsort(s_x, NP, sizeof(point), cmp_x);
printf("%lf %d %d (%lf)\n", c, p[0], p[1], distance(&points[p[0]], &points[p[1]]));
c = closest_pair(points, NP, p, p+1);
qsort(s_y, NP, sizeof(point), cmp_y);
printf("%lf %d %d (%lf)\n", c, p[0], p[1], distance(&points[p[0]], &points[p[1]]));


printf("min: %g; ", sqrt(closest(s_x, NP, s_y, NP, &a, &b)));
free(points);
printf("point (%f,%f) and (%f,%f)\n", a->x, a->y, b->x, b->y);
return EXIT_SUCCESS;
}</lang>


/* not freeing the memory, let OS deal with it. Habit. */
The divide&amp;conquer algorithm gave 0.01user 0.00system 0:00.11elapsed, while the brute-force one gave 1.83user 0.00system 0:01.88elapsed.
return 0;
}</lang>Compile and run (1000000 points, bruteforce method not shown because it takes forever):<lang>
% gcc -Wall -lm -O2 test.c
% time ./a.out
min: 5.6321e-05; point (19.657247,79.900855) and (19.657303,79.900862)
./a.out 7.16s user 0.08s system 96% cpu 7.480 total
</lang>