K-means++ clustering

K-means++ clustering a classification of data, so that points assigned to the same cluster are similar (in some sense). It is identical to the K-means algorithm, except for the selection of initial conditions.

A circular distribution of data partitioned into 7 colored clusters; similar to the top of a beach ball — This data was partitioned into 7 clusters using the K-means algorithm.

The task is to implement the K-means++ algorithm. Produce a function which takes two arguments: the number of clusters K, and the dataset to classify. K is a positive integer and the dataset is a list of points in the Cartesian plane. The output is a list of clusters (related sets of points, according to the algorithm).

For extra credit (in order):

Provide a function to exercise your code, which generates a list of random points.
Provide a visualization of your results, including centroids (see example image).
Generalize the function to polar coordinates (in radians).
Generalize the function to points in an arbitrary N space (i.e. instead of x,y pairs, points are an N-tuples representing coordinates in ℝ^N).
If this is different or more difficult than the [naive] solution for ℝ², discuss what had to change to support N dimensions.

Extra credit is only awarded if the examples given demonstrate the feature in question. To earn credit for 1. and 2., visualize 6 clusters of 30,000 points in ℝ². It is not necessary to provide visualization for spaces higher than ℝ² but the examples should demonstrate features 3. and 4. if the solution has them.

C

Output is in EPS. 100,000 point EPS file can take quite a while to display.

To extend the code to handle dimensions higher than 2, make POINT have more coordinates, change the dist2 distance function, and change the finding of centroids in the lloyd K-Means function. Multidimensional scaling will be needed to visualize the output.

This code uses the function kppFaster to find the initial centroids. It is faster than the original function kpp, especially with large data sets. The function kppFaster uses an array to keep track of the shortest distance from each point to the previously selected centroids. It also uses a bisection search to select the points. It doesn't use the function nearestDistance. The original functions kpp and nearestDistance are included here for comparison.

# define	NUMBER_OF_POINTS	100000
# define	NUMBER_OF_CLUSTERS	11
# define	MAXIMUM_ITERATIONS	100
# define	RADIUS			10.0


#include <stdio.h>
#include <stdlib.h>
#include <math.h>


typedef struct {
	double	x;
	double	y;
	int		group;
} POINT;


/*-------------------------------------------------------
	gen_xy

	This function allocates a block of memory for data points,
	gives the data points random values and returns a pointer to them.
	The data points fall within a circle of the radius passed to
	the function. This does not create a uniform 2-dimensional
	distribution.
-------------------------------------------------------*/
POINT * gen_xy(int num_pts, double radius)
{
	int		i;
	double	ang, r;
	POINT * pts;

	pts = (POINT*) malloc(sizeof(POINT) * num_pts);
	
	for ( i = 0; i < num_pts; i++ ) {
		ang = 2.0 * M_PI * rand() / (RAND_MAX - 1.);
		r = radius * rand() / (RAND_MAX - 1.);
		pts[i].x = r * cos(ang);
		pts[i].y = r * sin(ang);
	}
	return pts;	
}

/*-------------------------------------------------------
	dist2

	This function returns the squared euclidean distance
	between two data points.
-------------------------------------------------------*/
double dist2(POINT * a, POINT * b)
{
	double x = a->x - b->x;
	double y = a->y - b->y;
	return x*x + y*y;
}
 
/*------------------------------------------------------
	nearest

  This function returns the index of the cluster centroid
  nearest to the data point passed to this function.
------------------------------------------------------*/
int nearest(POINT * pt, POINT * cent, int n_cluster)
{
	int i, clusterIndex;
	double d, min_d;

	min_d = HUGE_VAL;
	clusterIndex = pt->group;	
	for (i = 0; i < n_cluster; i++) {
		d = dist2(&cent[i], pt);
		if ( d < min_d ) {
			min_d = d;
			clusterIndex = i;
		}
	}	
	return clusterIndex;
}

/*------------------------------------------------------
	nearestDistance

  This function returns the distance of the cluster centroid
  nearest to the data point passed to this function.
------------------------------------------------------*/
double nearestDistance(POINT * pt, POINT * cent, int n_cluster)
{
	int i;
	double d, min_d;

	min_d = HUGE_VAL;
	for (i = 0; i < n_cluster; i++) {
		d = dist2(&cent[i], pt);
		if ( d < min_d ) {
			min_d = d;
		}
	}

	return min_d;
}

/*----------------------------------------------------------------------
  bisectionSearch
  
  This function makes a bisectional search of an array of values that are
  ordered in increasing order, and returns the index of the first element
  greater than the search value passed as a parameter.
  
  This code is adapted from code by Andy Allinger given to the public
  domain, which was in turn adapted from public domain code for spline 
  evaluation by Rondall Jones (Sandia National Laboratories).

  Input:
		x	A pointer to an array of values in increasing order to be searched.
		n	The number of elements in the input array x.
		v	The search value.
  Output:
		Returns the index of the first element greater than the search value, v.
----------------------------------------------------------------------*/
int bisectionSearch(double *x, int n, double v)
{
	int il, ir, i;
	

	if (n < 1) {
		return 0;
	}
	/* If v is less than x(0) or greater than x(n-1)  */
	if (v < x[0]) {
		return 0;
	}
	else if (v > x[n-1]) {
		return n - 1;
	}
	
	/*bisection search */
	il = 0;
	ir = n - 1;

	i = (il + ir) / 2;
	while ( i != il ) {
		if (x[i] <= v) {
			il = i;
		} else {
			ir = i;
		}
		i = (il + ir) / 2;		
	}		

	if (x[i] <= v)
		i = ir;
	return i;
} /* end of bisectionSearch */

/*-------------------------------------------------------
	kppFaster
	
	This function uses the K-Means++ method to select
	the cluster centroids.
	
	This code is adapted from code by Andy Allinger given to the
	public domain.

	Input:
		pts		A pointer to an array of data points.
		num_pts		The number of points in the pts array.
		centroids	A pointer to an array to receive the centroids.
		num_clusters	The number of clusters to be found.
	
	Output:
		centroids	A pointer to the array of centroids found.	
-------------------------------------------------------*/
void kppFaster(POINT * pts, int num_pts, POINT * centroids,
		 int num_clusters)
{
	int j;
	int selectedIndex;
	int cluster;
	double sum;
	double d;
	double random;	
	double * cumulativeDistances;
	double * shortestDistance;

	
	cumulativeDistances = (double*) malloc(sizeof(double) * num_pts);
	shortestDistance = (double*) malloc(sizeof(double) * num_pts);	
	

	/* Pick the first cluster centroids at random. */
	selectedIndex = rand() % num_pts;
	centroids[0] = pts[ selectedIndex ];
	
	for (j = 0; j < num_pts; ++j)
		shortestDistance[j] = HUGE_VAL;	
		
	/* Select the centroids for the remaining clusters. */
	for (cluster = 1; cluster < num_clusters; cluster++) {
			   
		/* For each point find its closest distance to any of
		   the previous cluster centers */
		for ( j = 0; j < num_pts; j++ ) {
			d = dist2(&pts[j], &centroids[cluster-1] );
			
			if (d < shortestDistance[j])
				shortestDistance[j] = d;
		}
		
		/* Create an array of the cumulative distances. */
		sum = 0.0;
		for (j = 0; j < num_pts; j++) {
			sum += shortestDistance[j];
			cumulativeDistances[j] = sum;
		}

		/* Select a point at random. Those with greater distances
		   have a greater probability of being selected. */
		random = (float) rand() / (float) RAND_MAX * sum;
		selectedIndex = bisectionSearch(cumulativeDistances, num_pts, random);
		
		/* assign the selected point as the center */
		centroids[cluster] = pts[selectedIndex];
	}

	/* Assign each point the index of it's nearest cluster centroid. */
	for (j = 0; j < num_pts; j++)
		pts[j].group = nearest(&pts[j], centroids, num_clusters);

	free(shortestDistance);
	free(cumulativeDistances);

	return;
}	/* end, kppFaster */

/*-------------------------------------------------------
	kpp
	
	This function uses the K-Means++ method to select
	the cluster centroids.
-------------------------------------------------------*/
void kpp(POINT * pts, int num_pts, POINT * centroids,
		 int num_clusters)
{
	int j;
	int cluster;
	double sum;
	double * distances;

	
	distances = (double*) malloc(sizeof(double) * num_pts);

	/* Pick the first cluster centroids at random. */
	centroids[0] = pts[ rand() % num_pts ];
	
	
	/* Select the centroids for the remaining clusters. */
	for (cluster = 1; cluster < num_clusters; cluster++) {
		
		/* For each data point find the nearest centroid, save its
		   distance in the distance array, then add it to the sum of
		   total distance. */
		sum = 0.0;
		for ( j = 0; j < num_pts; j++ ) {
			distances[j] = 
				nearestDistance(&pts[j], centroids, cluster);
			sum += distances[j];
		}

		/* Find a random distance within the span of the total distance. */
		sum = sum * rand() / (RAND_MAX - 1);
		
		/* Assign the centroids. the point with the largest distance
			will have a greater probability of being selected. */
		for (j = 0; j < num_pts; j++ ) {
			sum -= distances[j];
			if ( sum <= 0)
			{
				centroids[cluster] = pts[j];
				break;
			}
		}
	}

	/* Assign each observation the index of it's nearest cluster centroid. */
	for (j = 0; j < num_pts; j++)
		pts[j].group = nearest(&pts[j], centroids, num_clusters);

	free(distances);

	return;
}	/* end, kpp */


/*-------------------------------------------------------
	lloyd
	
	This function clusters the data using Lloyd's K-Means algorithm
	after selecting the intial centroids using the K-Means++
	method.
	It returns a pointer to the memory it allocates containing
	the array of cluster centroids.
-------------------------------------------------------*/
POINT * lloyd(POINT * pts, int num_pts, int num_clusters, int maxTimes)
{
	int i, clusterIndex;
	int changes;
	int acceptable = num_pts / 1000;	/* The maximum point changes acceptable. */


	if (num_clusters == 1 || num_pts <= 0 || num_clusters > num_pts )
		return 0;


	POINT * centroids = (POINT *)malloc(sizeof(POINT) * num_clusters);

	if ( maxTimes < 1 )
		maxTimes = 1;

/*	Assign initial clustering randomly using the Random Partition method
	for (i = 0; i < num_pts; i++ ) {
		pts[i].group = i % num_clusters;
	}
*/
 
	/* or use the k-Means++ method */

/* Original version
	kpp(pts, num_pts, centroids, num_clusters);
*/
	/* Faster version */
	kppFaster(pts, num_pts, centroids, num_clusters);

	do {
		/* Calculate the centroid of each cluster.
		  ----------------------------------------*/
		
		/* Initialize the x, y and cluster totals. */
		for ( i = 0; i < num_clusters; i++ ) {
			centroids[i].group = 0;		/* used to count the cluster members. */
			centroids[i].x = 0;			/* used for x value totals. */
			centroids[i].y = 0;			/* used for y value totals. */
		}
		
		/* Add each observation's x and y to its cluster total. */
		for (i = 0; i < num_pts; i++) {
			clusterIndex = pts[i].group;
			centroids[clusterIndex].group++;
			centroids[clusterIndex].x += pts[i].x;
			centroids[clusterIndex].y += pts[i].y;
		}
		
		/* Divide each cluster's x and y totals by its number of data points. */
		for ( i = 0; i < num_clusters; i++ ) {
			centroids[i].x /= centroids[i].group;
			centroids[i].y /= centroids[i].group;
		}
 
		/* Find each data point's nearest centroid */
		changes = 0;
		for ( i = 0; i < num_pts; i++ ) {
			clusterIndex = nearest(&pts[i], centroids, num_clusters);
			if (clusterIndex != pts[i].group) {
				pts[i].group = clusterIndex;
				changes++;
			}
		}
	
		maxTimes--;
	} while ((changes > acceptable) && (maxTimes > 0));
 
	/* Set each centroid's group index */
	for ( i = 0; i < num_clusters; i++ )
		centroids[i].group = i;

	return centroids;
}	/* end, lloyd */

/*-------------------------------------------------------
	print_eps

	this function prints the results.
-------------------------------------------------------*/
void print_eps(POINT * pts, int num_pts, POINT * centroids, int num_clusters)
{
#	define W 400
#	define H 400

	int i, j;
	double min_x, max_x, min_y, max_y, scale, cx, cy;
	double *colors = (double *) malloc(sizeof(double) * num_clusters * 3);
 
	for (i = 0; i < num_clusters; i++) {
		colors[3*i + 0] = (3 * (i + 1) % 11)/11.;
		colors[3*i + 1] = (7 * i % 11)/11.;
		colors[3*i + 2] = (9 * i % 11)/11.;
	}
 
	max_x = max_y = - HUGE_VAL;
	min_x = min_y = HUGE_VAL;
	for (j = 0; j < num_pts; j++) {
		if (max_x < pts[j].x) max_x = pts[j].x;
		if (min_x > pts[j].x) min_x = pts[j].x;
		if (max_y < pts[j].y) max_y = pts[j].y;
		if (min_y > pts[j].y) min_y = pts[j].y;
	}

	scale = W / (max_x - min_x);
	if (scale > H / (max_y - min_y))
		scale = H / (max_y - min_y);
	cx = (max_x + min_x) / 2;
	cy = (max_y + min_y) / 2;
 
	printf("%%!PS-Adobe-3.0\n%%%%BoundingBox: -5 -5 %d %d\n", W + 10, H + 10);
	printf( "/l {rlineto} def /m {rmoveto} def\n"
		"/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def\n"
		"/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath "
		"	gsave 1 setgray fill grestore gsave 3 setlinewidth"
		" 1 setgray stroke grestore 0 setgray stroke }def\n"
	);


	for (i = 0; i < num_clusters; i++) {
		printf("%g %g %g setrgbcolor\n",
			colors[3*i], colors[3*i + 1], colors[3*i + 2]);

		for (j = 0; j < num_pts; j++) {
			if (pts[j].group != i) continue;
			printf("%.3f %.3f c\n",
				(pts[j].x - cx) * scale + W / 2,
				(pts[j].y - cy) * scale + H / 2);
		}
		printf("\n0 setgray %g %g s\n",
			(centroids[i].x - cx) * scale + W / 2,
			(centroids[i].y - cy) * scale + H / 2);
	}
	printf("\n%%%%EOF");

	free(colors);

	return;
}	/* end print_eps */
 
/*-------------------------------------------------------
	main
-------------------------------------------------------*/
int main()
{
	int		num_pts = NUMBER_OF_POINTS;
	int		num_clusters = NUMBER_OF_CLUSTERS;
	int		maxTimes = MAXIMUM_ITERATIONS;
	double	radius = RADIUS;
	POINT * pts;
	POINT * centroids;

	/* Generate the observations */
	pts = gen_xy(num_pts, radius);

	/* Cluster using the Lloyd algorithm and K-Means++ initial centroids. */
	centroids = lloyd(pts, num_pts, num_clusters, maxTimes);

	/* Print the results */
	print_eps(pts, num_pts, centroids, num_clusters);

	free(pts);
	free(centroids);

	return 0;
}

Crystal

Translation of: Racket,Python

# kmeans++ clustering in crystal lang
#
# Task :: function that takes two arguments
#   k      : uint      - the number of clusters
#   points : [[float]] - the dataset to classify
# 
#   and returns a list of clusters

# The algorithm of kmeans with a specific initialization 
# 
# k                     : int                      - number of clusters
# points                : [[float]]                - the dataset of k-dimentional points 
# distance              : ([float],[float])->float - the distance between two points
# convergence_threshold : float                    - ratio of correctly classified points
# rng                   : (msg)->float             - random number generator
#
# {[[float]],[int]} - returns a tuple of the center of the cluster and an array
# with the cluster-id for each point.
#
def kmeans(k, points,
  distance = ->euclidean_distance(Array(Float64),Array(Float64)),
  convergence_threshold=0.99,
  rng = Random::DEFAULT)

  # ---------------------------------------------------------------------------
  # the k++ method for choosing the initial values ('seeds') for the k-means 
  # clustering. 
  # ---------------------------------------------------------------------------

  # arrays of the clusters centers and the number of elements in each cluster
  c_means = points.sample(k,rng).clone
  c_cnt   = Array.new(k,0)
  
  # arrays for each point distance to nearest cluster and the nearest cluster id
  p_dist = Array.new(points.size) do 1/0.0 end 
  p_cluster = Array.new(points.size) do rng.rand(0 ... k) end 

  # choose one center uniformly at random among data points
  c_means = [points.sample.clone]
  
  # to select the k-1 remaining centers
  (1 ... k).each do |_|
    # For each data point compute the distance (d(x)) and the nearest cluster center.
    (0 ... points.size).each do |p_index|
      (0 ... c_means.size).each do |c_index|
        d = distance.call(points[p_index],c_means[c_index])
        if d < p_dist[p_index] 
          p_dist[p_index] = d 
          p_cluster[p_index] = c_index
        end 
      end
    end
    
    # choose one new data point at random as a new center with a weighted 
    # probability distribution where a point is chosen with probability
    # proportional to it's squared distance. (d(x)^2)
    sum = 0.0
    (0 ... p_dist.size).each do |p_index| 
      p_dist[p_index] = p_dist[p_index]**2 
      sum += p_dist[p_index]
    end 
    sum *= rng.rand(0.0 .. 1.0)
    (0 ... points.size).each do |p_index|
      sum -= p_dist[p_index] 
      next if sum > 0
      c_means.push(points[p_index].clone)
      break
    end
  end 
  
  # ---------------------------------------------------------------------------
  # kmeans clustering
  # ---------------------------------------------------------------------------
  
  # with the previous cluster centers, the kmeans naive algorithm is performed
  # until the convergence_threshold is achieved
  changes_cnt = points.size
  while (changes_cnt.to_f/(1.0-convergence_threshold)) >= points.size

    changes_cnt = 0

    # assign each point to the nearest cluster
    (0 ... points.size).each do |p_index|
      nearest_c_index = (0 ... k).min_by do |c_index| 
        distance.call(c_means[c_index],points[p_index]) 
      end
      changes_cnt += (p_cluster[p_index] != nearest_c_index) ? 1 : 0
      p_cluster[p_index] = nearest_c_index
    end

    # use the points of each cluster to calculate its center using the mean 
    
    # Reset means
    p_dim = points[0].size 
    (0 ... k).each do |c_index| 
      (0 ... p_dim).each do |x_index|
        c_means[c_index][x_index] = 0.0
        c_cnt[c_index] = 0 
      end
    end

    # calculate the mean of the points of each cluster
    (0 ... points.size).each do |p_index|
      c_index = p_cluster[p_index]
      c_cnt[c_index] += 1
      (0 ... p_dim).each do |x_index|
        c_means[c_index][x_index] += points[p_index][x_index]
      end 
    end 
    (0 ... k).each do |c_index|
      (0 ... p_dim).each do |x_index| 
        c_means[c_index][x_index] /= c_cnt[c_index].to_f 
      end 
    end
  end 

  # return the center of each cluster and the membership of each point
  return c_means,p_cluster
end

# the euclidean distance is used in the kmeans++-algorithm
def euclidean_distance(pa,pb)
  return (0 ... pa.size).each.reduce(0.0) do |s,i| s + (pa[i] - pb[i])**2 end
end

# alternative distance
def manhattan_distance(pa,pb)
  return (0 ... pa.size).each.reduce(0.0) do |s,i| s + (pa[i] - pb[i]).abs end 
end

Extra functions

# -----------------------------------------------------------------------------
# Function to exercise the code, that generates a list of random points
# -----------------------------------------------------------------------------

# Generates a cluster of random points in a unitary circle
#
def uniform_cluster(num_points,radius = 1.0,center = [0.0,0.0],rng=Random::DEFAULT)
  return Array.new(num_points) do |_|
    r = radius*Math.sqrt(rng.rand(0.0 .. 1.0))
    angle = rng.rand(0.0 .. 2*Math::PI)
    [center[0] + r*Math.cos(angle), center[1] + r*Math.sin(angle)]
  end
end 

# Generates an n-dimentional cluster with gaaussian distribution 
def gaussian_cluster(num_points,stdev=1.0,center=[0.0,0.0],rng=Random::DEFAULT)
  dimentions = center.size 
  return Array.new(num_points) do 
    Array.new(dimentions) do |d|
      sum = 0.0
      12.times do sum += rng.rand(0.0 .. 1.0) end
      sum -= 6.0 
      center[d] + (sum * stdev)
    end 
  end 
end

# -----------------------------------------------------------------------------
# Visualization of the results
# -----------------------------------------------------------------------------

# This functions creates an svg file with the points, the cluster centers and 
# the classification of each point. 
#
def plot(cluster_means,points,points_cluster,fname="kmeans_output.svg")
  # Mapping the points to the interval (0 .. 1)
  xmin,xmax = points.minmax_by do |p| p[0] end.map(&.[0])
  ymin,ymax = points.minmax_by do |p| p[1] end.map(&.[1])
  xspan = (xmax-xmin) + 1e-12
  yspan = (ymax-ymin) + 1e-12
  points = points.map  do |p| 
    x = (p[0] - xmin) / xspan
    y = (ymin - p[1]) / yspan
    [x,y]
  end 
  cluster_means = cluster_means.map  do |p| 
    x = (p[0] - xmin) / xspan
    y = (ymin - p[1]) / yspan
    [x,y]
  end 

  # Generate the file 
  File.open(fname,"w+") do |io|
    io.puts(%(<svg xmlns="http://www.w3.org/2000/svg" version="1.1" viewBox="-0.05 -1.05 1.1 1.1">))
    (0 ... points.size).each do |p_index|
      p = points[p_index]
      c = points_cluster[p_index] * (330.0 / cluster_means.size)
      io.puts(%(<circle cx="#{p[0]}" cy="#{p[1]}" r="0.005" fill="hsl(#{c} 60% 50%)" stroke="none"></circle>)) 
    end 
    (0 ... cluster_means.size).each do |c_index|
      p = cluster_means[c_index]
      c = c_index * (330.0 / cluster_means.size)
      io.puts(%(<circle cx="#{p[0]}" cy="#{p[1]}" r="0.02" fill="hsl(50 100% 60%)" 
        stroke-width="0.004" stroke="hsl(52 100% 0%)"></circle>)) 
    end 
    io.puts(%(</svg>))
  end
end

Examples

Clustering of random points in a unitary circle using k-means ++ and euclidean distance

Clustering of random points in a unitary circle using k-means ++ and manhattan distance

rngseed = 19

# Basic usage
points = uniform_cluster(num_points: 30000,rng: Random.new(rngseed))
cluster_center,point_cluster = kmeans(6, points, rng: Random.new(rngseed))
plot(cluster_center,points,point_cluster,fname: "clustering-using-k-means-pp.svg")

# Using another distance
points = uniform_cluster(num_points: 30000,rng: Random.new(rngseed))
cluster_center,point_cluster = kmeans(6, points, rng: Random.new(rngseed),
  distance: ->manhattan_distance(Array(Float64),Array(Float64)))
plot(cluster_center,points,point_cluster,fname: "clustering-using-k-means-pp-and-manhattan.svg")

# difficult case
points = [] of Array(Float64)
rng = Random.new(rngseed)
points += gaussian_cluster(num_points: 10000,stdev: 0.5,center: [0.0,0.0],rng: rng)
points += gaussian_cluster(num_points: 10000,stdev: 0.5,center: [2.0,3.0],rng: rng)
points += gaussian_cluster(num_points: 10000,stdev: 0.5,center: [2.5,-1.0],rng: rng)
points += gaussian_cluster(num_points: 10000,stdev: 0.5,center: [6.0,0.0],rng: rng)
cluster_center,point_cluster = kmeans(4, points, rng: Random.new(rngseed))
plot(cluster_center,points,point_cluster,fname: "gaussian-clustering.svg")

# 5d-data 
points = [] of Array(Float64)
rng = Random.new(rngseed)
points += gaussian_cluster(num_points: 5000,stdev: 0.5, center: [2.0,0.0,0.0,0.0,0.0],rng: rng)
points += gaussian_cluster(num_points: 5000,stdev: 0.5, center: [0.0,2.0,0.0,0.0,0.0],rng: rng)
points += gaussian_cluster(num_points: 5000,stdev: 0.5, center: [0.0,0.0,2.0,0.0,0.0],rng: rng)
points += gaussian_cluster(num_points: 5000,stdev: 0.5, center: [0.0,0.0,0.0,2.0,0.0],rng: rng)
points += gaussian_cluster(num_points: 5000,stdev: 0.5, center: [0.0,0.0,0.0,0.0,2.0],rng: rng)
cluster_center,point_cluster = kmeans(5, points, convergence_threshold:0.99999)
puts(cluster_center.map(&.map(&.round(2))).join("\n"))

Output shows that centroids were found correctly.

#output for 5d data
[0.01, -0.0, 2.0, 0.01, 0.0]
[-0.0, 0.0, -0.0, -0.0, 2.0]
[-0.01, 2.01, 0.01, 0.01, -0.01]
[0.0, 0.0, -0.01, 2.01, 0.0]
[2.0, 0.01, -0.0, 0.0, 0.01]

D

Translation of: C

import std.stdio, std.math, std.random, std.typecons, std.algorithm;

// On Windows this uses the printf from the Microsoft C runtime,
// that doesn't handle real type and some of the C99 format
// specifiers, but it's faster for bulk printing.
extern(C) nothrow int printf(const char*, ...);

struct Point {
    immutable double x, y; // Or float.
    size_t cluster;
}

Point[] generatePoints(in size_t nPoints,
                       in double radius,
                       ref Xorshift rnd)
in {
    assert(nPoints > 0);
    assert(radius > 0);
} out(result) {
    assert(result.length == nPoints);
    foreach (const ref p; result) {
        assert(p.cluster == 0);
        assert(!p.x.isNaN && !p.y.isNaN);
    }
} body {
    Point[] points;
    points.reserve(nPoints);

    // This is not a uniform 2D distribution.
    foreach (immutable i; 0 .. nPoints) {
        immutable r = uniform(0.0, radius, rnd);
        immutable ang = uniform(0.0, 2 * PI, rnd);
        points ~= Point(r * ang.cos, r * ang.sin); // Sincos?
    }

    return points;
}


struct ClusterCenter {
    double x, y;
    void opAssign(in ref Point p) pure nothrow @nogc {
        this.x = p.x;
        this.y = p.y;
    }
}


const(ClusterCenter)[] lloyd(Point[] points,
                             in size_t nclusters,
                             ref Xorshift rnd)
in {
    assert(points.length >= nclusters);
    assert(nclusters > 0);
    foreach (const ref p; points)
        assert(!p.x.isNaN && !p.y.isNaN);
} out(result) {
    assert(result.length == nclusters);
    foreach (const ref cc; result)
        assert(!cc.x.isNaN && !cc.y.isNaN);
} body {
    /// Distance and index of the closest cluster center.
    static Tuple!(size_t, double)
    nearestClusterCenter(in ref Point point,
                         in ClusterCenter[] centers) pure nothrow @nogc
    in {
        assert(centers.length > 0);
    } out(result) {
        assert(result[0] < centers.length);
        immutable ClusterCenter c = centers[result[0]];
        immutable d = (c.x - point.x) ^^ 2  +  (c.y - point.y) ^^ 2;
        assert(feqrel(result[1], d) > 45); // Arbitrary.
    } body {
        static double sqrDistance2D(in ref ClusterCenter a,
                                    in ref Point b) pure nothrow @nogc{
            return (a.x - b.x) ^^ 2 + (a.y - b.y) ^^ 2;
        }

        size_t minIndex = point.cluster;
        double minDist = double.max;

        foreach (immutable i, const ref cc; centers) {
            immutable d = sqrDistance2D(cc, point);
            if (minDist > d) {
                minDist = d;
                minIndex = i;
            }
        }

        return tuple(minIndex, minDist);
    }


    static void kMeansPP(Point[] points,
                         ClusterCenter[] centers,
                         ref Xorshift rnd)
    in {
        assert(points.length >= centers.length);
        assert(centers.length > 0);
    } body {
        centers[0] = points[uniform(0, $, rnd)];
        auto d = new double[points.length];

        foreach (immutable i; 1 .. centers.length) {
            double sum = 0;
            foreach (immutable j, const ref p; points) {
                d[j] = nearestClusterCenter(p, centers[0 .. i])[1];
                sum += d[j];
            }

            sum = uniform(0.0, sum, rnd);

            foreach (immutable j, immutable dj; d) {
                sum -= dj;
                if (sum > 0)
                    continue;
                centers[i] = points[j];
                break;
            }
        }

        foreach (ref p; points)
            // Implicit cast of Hconst!ClusterCenter
            // to ClusterCenter[].
            p.cluster = nearestClusterCenter(p, centers)[0];
    }


    auto centers = new ClusterCenter[nclusters];
    kMeansPP(points, centers, rnd);
    auto clusterSizes = new size_t[centers.length];

    size_t changed;
    do {
        // Find clusters centroids.
        centers[] = ClusterCenter(0, 0);
        clusterSizes[] = 0;

        foreach (immutable i, const ref p; points)
            with (centers[p.cluster]) {
                clusterSizes[p.cluster]++;
                x += p.x;
                y += p.y;
            }

        foreach (immutable i, ref cc; centers) {
            cc.x /= clusterSizes[i];
            cc.y /= clusterSizes[i];
        }

        // Find closest centroid of each point.
        changed = 0;
        foreach (ref p; points) {
            immutable minI = nearestClusterCenter(p, centers)[0];
            if (minI != p.cluster) {
                changed++;
                p.cluster = minI;
            }
        }
    // Stop when 99.9% of points are good.
    } while (changed > (points.length >> 10));

    return centers;
}


void printEps(in Point[] points, in ClusterCenter[] centers,
              in size_t W = 400, in size_t H = 400) nothrow
in {
    assert(points.length >= centers.length);
    assert(centers.length > 0);
    assert(W > 0 && H > 0);
    foreach (const ref p; points)
        assert(!p.x.isNaN && !p.y.isNaN);
    foreach (const ref cc; centers)
        assert(!cc.x.isNaN && !cc.y.isNaN);
} body {
    auto findBoundingBox() nothrow @nogc {
        double min_x, max_x, min_y, max_y;
        max_x = max_y = -double.max;
        min_x = min_y = double.max;

        foreach (const ref p; points) {
            if (max_x < p.x) max_x = p.x;
            if (min_x > p.x) min_x = p.x;
            if (max_y < p.y) max_y = p.y;
            if (min_y > p.y) min_y = p.y;
        }
        assert(max_x > min_x && max_y > min_y);

        return tuple(min(W / (max_x - min_x), H / (max_y - min_y)),
                     (max_x + min_x) / 2, (max_y + min_y) / 2);
    }
    //immutable (scale, cx, cy) = findBoundingBox();
    immutable sc_cx_cy = findBoundingBox();
    immutable double scale = sc_cx_cy[0];
    immutable double cx = sc_cx_cy[1];
    immutable double cy = sc_cx_cy[2];

    static immutable struct Color { immutable double r, g, b; }

    immutable size_t k = centers.length;
    Color[] colors;
    colors.reserve(centers.length);
    foreach (immutable i; 0 .. centers.length)
        colors ~= Color((3 * (i + 1) % k) / double(k),
                        (7 * i % k) / double(k),
                        (9 * i % k) / double(k));

    printf("%%!PS-Adobe-3.0\n%%%%BoundingBox: -5 -5 %d %d\n",
           W + 10, H + 10);

    printf("/l {rlineto} def /m {rmoveto} def\n" ~
           "/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def\n" ~
           "/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath " ~
           "   gsave 1 setgray fill grestore gsave 3 setlinewidth" ~
           " 1 setgray stroke grestore 0 setgray stroke }def\n");

    foreach (immutable i, const ref cc; centers) {
        printf("%g %g %g setrgbcolor\n", colors[i].tupleof);

        foreach (const ref p; points) {
            if (p.cluster != i)
                continue;
            printf("%.3f %.3f c\n",
                   (p.x - cx) * scale + W / 2,
                   (p.y - cy) * scale + H / 2);
        }

        printf("\n0 setgray %g %g s\n",
               (cc.x - cx) * scale + W / 2,
               (cc.y - cy) * scale + H / 2);
    }

    "\n%%%%EOF".printf;
}


void main() {
    enum size_t nPoints = 100_000;
    enum size_t nClusters = 11; // k.
    auto rnd = 1.Xorshift; // For speed and repeatability.

    auto points = generatePoints(nPoints, 10, rnd);
    const clusterCenters = lloyd(points, nClusters, rnd);
    printEps(points, clusterCenters);
}

Compiled with ldc2 it's about as fast as the C entry.

Delphi

Works with: Delphi version 6.0

Library: Types,ExtCtrls,Graphics

Translated from XPLo.

The XPLo version was simple, straight forward and clear. In other words, it was too good to try to develop a different one. For clarity and simplicity, I did convert some separaate components into structs.

const     N = 30000;              {number of points}
const     K = 6;                  {number of clusters}

var ScreenSize,Center: TPoint;
var Radius: integer;

var Points: array [0..N-1] of TPoint;		{coordinates of points and their cluster}
var Pc: array [0..N-1] of TColor;
var Cent: array [0..K-1] of TPoint;		{coordinates of centroid of cluster}

const Palette: array [0..5] of TColor =(
	$00 or ($00 shl 8) or ($AA shl 16),
	$00 or ($AA shl 8) or ($00 shl 16),
	$00 or ($AA shl 8) or ($AA shl 16),
	$AA or ($00 shl 8) or ($00 shl 16),
	$AA or ($00 shl 8) or ($AA shl 16),
	$AA or ($55 shl 8) or ($00 shl 16));

{These would normally be in a separate library}
{Shown here for clarity}


procedure ClearImage(Image: TImage; Color: TColor);
var R: TRect;
begin
R:=Rect(0,0,Image.Picture.Bitmap.Width,Image.Picture.Bitmap.Height);
Image.Canvas.Brush.Color:=Color;
Image.Canvas.Brush.Style:=bsSolid;
Image.Canvas.Pen.Mode:=pmCopy;
Image.Canvas.Pen.Style:=psSolid;
Image.Canvas.Pen.Color:=Color;
Image.Canvas.Rectangle(R);
Image.Invalidate;
end;


function PointAdd(V1,V2: TPoint): TPoint;
{Add V1 and V2}
begin
Result.X:= V1.X+V2.X;
Result.Y:= V1.Y+V2.Y;
end;

function PointScalarDivide(V: TPoint; S: double): TPoint;
{Divide vector by scalar}
begin
Result.X:=Trunc(V.X/S);
Result.Y:=Trunc(V.Y/S);
end;

{--------------- Main Program -------------------------------------------------}

function Centroid: boolean;
{Find new centroids of points grouped with current centroids}
var Change: boolean;
var C0: array [0..K-1] of TPoint;
var C, Count, I: integer;
begin
Change:= false;
for C:= 0 to K-1 do				{for each centroid...}
	begin
	C0[C]:= Cent[C];				{save current centroid}
	Cent[C]:=Point(0,0); Count:= 0;			{find new centroid}
	for I:= 0 to N-1 do				{for all points}
	if Pc[I] = Palette[C] then			{ grouped with current centroid...}
		begin
		Cent[C]:=PointAdd(Cent[C],Points[I]);
		Count:= Count+1;
		end;
	Cent[C]:=PointScalarDivide(Cent[C],Count);
	if (Cent[C].X<>C0[C].X) or (Cent[C].Y<>C0[C].Y) then Change:= true;
	end;
Result:=Change;
end;


procedure Voronoi;
{Group points with their nearest centroid}
var D2, MinD2, I, C: integer;           {distance squared, minimum distance squared}
begin
for I:= 0 to N-1 do            {for each point...}
        begin
        MinD2:= High(Integer);         {find closest centroid}
        for C:= 0 to K-1 do
                begin
                D2:= sqr(Points[I].X-Cent[C].X) + sqr(Points[I].Y-Cent[C].Y);
                if D2 < MinD2 then
                        begin
                        {update closest centroid}
                        MinD2:= D2;
                        Pc[I]:= Palette[C];
                        end;
                end;
        end;
end;


procedure KMeans(Image: TImage);
{Group points into K clusters}
var Change: boolean;
var I: integer;
begin
repeat
	begin
	Voronoi;
	Change:= Centroid;
        for I:= 0 to N-1 do Image.Canvas.Pixels[Points[I].X, Points[I].Y]:=Pc[I]+1;
	Image.Repaint;
        for I:= 0 to K-1 do Image.Canvas.Pixels[Cent[I].X, Cent[I].Y]:=clWhite;
	Image.Repaint;
        end
until Change = false;
end;


procedure PolarRandom(var P: TPoint);
{Return random X,Y biased for polar coordinates}
var A, D: double;
begin
D:=Random(Radius);			{distance: 0..239}
A:=Random(314159*2) / 10000;		{angle:    0..2pi}
{rectangular coords centered on screen}
P:=PointAdd(Point(Trunc(D*Cos(A)),Trunc(D*Sin(A))),Center);
end;


procedure ConfigureScreen(Image: TImage);
{Configure screem constants to match current state of Image}
begin
ScreenSize:=Point(Image.Width,Image.Height);
Center:=Point(Image.Width div 2,Image.Height div 2);
if Center.X<Center.Y then Radius:=Center.X
else Radius:=Center.Y;
end;


procedure DoKMeansClustering(Image: TImage);
var I: integer;
begin
ConfigureScreen(Image);
ClearImage(Image,clBlack);
for I:= 0 to N-1 do PolarRandom(Points[I]);    {random set of points}
 for I:= 0 to K-1 do PolarRandom(Cent[I]);    {random set of cluster centroids}
KMeans(Image);
end;

Output:

Euler Math Toolbox

>type kmeanscluster
 function kmeanscluster (x: numerical, k: index)
     n=rows(x); m=cols(x);
     i=floor((0:k)/k*(n-1))+1;
     means=zeros(k,m);
     loop 1 to k;
         means[#]=sum(x[i[#]:(i[#+1]-1)]')'/(i[#+1]-i[#]);
     end;
     j=1:n;
     loop 1 to n;
         d=sum((x[#]-means)^2);
         j[#]=extrema(d')[2];
     end;
     repeat
         loop 1 to k;
             i=nonzeros(j==#);
             if cols(i)==0 then means[#]=1;
             else means[#]=(sum(x[i]')/cols(i))';
             endif;
         end;
         jold=j;
         loop 1 to n;
             d=sum((x[#]-means)^2);
             j[#]=extrema(d')[2];
         end;
         if all(jold==j) then break; endif;
     end
     return j
 endfunction

Let us apply to random data.

>load clustering.e
 Functions for clustering data.
>np=5; m=3*normal(np,2);
% Spread n points randomly around these points.
>n=5000; x=m[intrandom(1,n,np)]+normal(n,2);
% The function kmeanscluster contains the algorithm. It returns the
% indices of the clusters the points contain to.
>j=kmeanscluster(x,np);
% We plot each point with a color representing its cluster.
>P=x';  ...
>  plot2d(P[1],P[2],r=totalmax(abs(m))+2,color=10+j,points=1,style="."); ...
>  loop 1 to k; plot2d(m[#,1],m[#,2],points=1,style="o#",add=1); end; ...
>  insimg;

Fortran

Works with: GNU Fortran version 6.1.1 20160802

***********************************************************************
* KMPP - K-Means++ - Traditional data clustering with a special initialization
* Public Domain - This program may be used by any person for any purpose.
*
* Origin:
*    Hugo Steinhaus, 1956
*
* Refer to:
*    "kmeans++: the advantages of careful seeding"
*    David Arthur and Sergei Vassilvitskii
*    Proceedings of the eighteenth annual ACM-SIAM symposium 
*      on Discrete algorithms, 2007
*
*____Variable_______I/O_______Description___________________Type_______
*    X(P,N)         In        Data points                   Real
*    P              In        Dimension of the data         Integer
*    N              In        Number of points              Integer
*    K              In        # clusters                    Integer
*    C(P,K)         Out       Center points of clusters     Real
*    Z(N)           Out       What cluster a point is in    Integer
*    WORK(N)        Neither                                 Real
*    IFAULT         Out       Error code                    Integer
************************************************************************
      SUBROUTINE KMPP (X, P, N, K, C, Z, WORK, IFAULT)
       
       IMPLICIT NONE
       INTEGER P, N, K, Z, IFAULT
       REAL X, C, WORK
       DIMENSION X(P,N), C(P,K), Z(N), WORK(N)
  
*               constants
       INTEGER ITER                 ! maximum iterations
       REAL BIG                     ! arbitrary large number
       PARAMETER (ITER = 1000,
     $            BIG = 1E33)
                 
*                local variables
       INTEGER 
     $         H,          ! count iterations
     $         I,          ! count points
     $         I1,         ! point marked as initial center
     $         J,          ! count dimensions
     $         L,          ! count clusters
     $         L0,         ! present cluster ID
     $         L1          ! new cluster ID
     
       REAL 
     $      BEST,                 ! shortest distance to a center
     $      D2,                   ! squared distance
     $      TOT,                  ! a total
     $      W                     ! temp scalar
     
       LOGICAL CHANGE             ! whether any points have been reassigned

************************************************************************
*           Begin.
************************************************************************
       IFAULT = 0
       IF (K < 1 .OR. K > N) THEN       ! K out of bounds
         IFAULT = 3
         RETURN
       END IF
       DO I = 1, N                       ! clear Z
         Z(I) = 0
       END DO
        
************************************************************************
*        initial centers
************************************************************************
       DO I = 1, N
         WORK(I) = BIG
       END DO

       CALL RANDOM_NUMBER (W)
       I1 = MIN(INT(W * FLOAT(N)) + 1, N)  ! choose first center at random
       DO J = 1, P
         C(J,1) = X(J,I1)
       END DO
       
       DO L = 2, K                    ! initialize other centers
         TOT = 0.
         DO I = 1, N                     ! measure from each point
           BEST = WORK(I)
           D2 = 0.                         ! to prior center
           DO J = 1, P
             D2 = D2 + (X(J,I) - C(J,L-1)) **2  ! Squared Euclidean distance
             IF (D2 .GE. BEST) GO TO 10               ! needless to add to D2
           END DO                          ! next J
           IF (D2 < BEST) BEST = D2          ! shortest squared distance 
           WORK(I) = BEST 
  10       TOT = TOT + BEST             ! cumulative squared distance
         END DO                      ! next data point
         
************************************************************************
* Choose center with probability proportional to its squared distance
*     from existing centers.
************************************************************************
         CALL RANDOM_NUMBER (W)
         W = W * TOT    ! uniform at random over cumulative distance
         TOT = 0.
         DO I = 1, N
           I1 = I
           TOT = TOT + WORK(I)
           IF (TOT > W) GO TO 20
         END DO                ! next I
  20     CONTINUE
         DO J = 1, P         ! assign center
           C(J,L) = X(J,I1)
         END DO
       END DO               ! next center to initialize

************************************************************************
*                      main loop
************************************************************************
       DO H = 1, ITER
         CHANGE = .FALSE.

*             find nearest center for each point 
         DO I = 1, N
           L0 = Z(I)
           L1 = 0
           BEST = BIG
           DO L = 1, K
             D2 = 0.
             DO J = 1, P
               D2 = D2 + (X(J,I) - C(J,L)) **2
               IF (D2 .GE. BEST) GO TO 30
             END DO
  30         CONTINUE
             IF (D2 < BEST) THEN           ! new nearest center
               BEST = D2
               L1 = L
             END IF             
           END DO        ! next L
        
           IF (L0 .NE. L1) THEN
             Z(I) = L1                   !  reassign point 
             CHANGE = .TRUE.
           END IF
         END DO         ! next I
         IF (.NOT. CHANGE) RETURN      ! success

************************************************************************
*           find cluster centers
************************************************************************
         DO L = 1, K              ! zero population
           WORK(L) = 0.
         END DO
         DO L = 1, K               ! zero centers
           DO J = 1, P
             C(J,L) = 0.
           END DO
         END DO

         DO I = 1, N
           L = Z(I)
           WORK(L) = WORK(L) + 1.             ! count
           DO J = 1, P
             C(J,L) = C(J,L) + X(J,I)         ! add
           END DO
         END DO
           
         DO L = 1, K
           IF (WORK(L) < 0.5) THEN          ! empty cluster check
             IFAULT = 1                     ! fatal error
             RETURN
           END IF
           W = 1. / WORK(L)
           DO J = 1, P
             C(J,L) = C(J,L) * W     ! multiplication is faster than division
           END DO
         END DO
             
       END DO                   ! next H
       IFAULT = 2                ! too many iterations
       RETURN
    
      END  ! of KMPP


************************************************************************
*             test program (extra credit #1)
************************************************************************
      PROGRAM TPEC1
       IMPLICIT NONE
       INTEGER N, P, K
       REAL TWOPI
       PARAMETER (N = 30 000,
     $            P = 2,
     $            K = 6,
     $            TWOPI = 6.2831853)
       INTEGER I, L, Z(N), IFAULT
       REAL X(P,N), C(P,K), R, THETA, W, WORK(N)

*             Begin
       CALL RANDOM_SEED()
       DO I = 1, N                      ! random points over unit circle
         CALL RANDOM_NUMBER (W)
         R = SQRT(W)                      ! radius
         CALL RANDOM_NUMBER (W)
         THETA = W * TWOPI                ! angle
         X(1,I) = R * COS(THETA)          ! Cartesian coordinates
         X(2,I) = R * SIN(THETA)
       END DO
       
*             Call subroutine
       CALL KMPP (X, P, N, K, C, Z, WORK, IFAULT)
       PRINT *, 'kmpp returns with error code ', IFAULT
       
*            Print lists of points in each cluster
       DO L = 1, K
         PRINT *, 'Cluster ', L, ' contains points: '
  10     FORMAT (I6, $)
  20     FORMAT ()
         DO I = 1, N
           IF (Z(I) .EQ. L) PRINT 10, I
         END DO
         PRINT 20
       END DO
       
*         Write CSV file with Y-coordinates in different columns by cluster
       OPEN (UNIT=1, FILE='tpec1.csv', STATUS='NEW', IOSTAT=IFAULT)
       IF (IFAULT .NE. 0) PRINT *, 'tpec1: trouble opening file'
  30   FORMAT (F8.4, $)
  40   FORMAT (',', $)
  50   FORMAT (F8.4)
       DO I = 1, N
         WRITE (UNIT=1, FMT=30, IOSTAT=IFAULT) X(1,I)
         IF (IFAULT .NE. 0) PRINT *, 'tpec1: trouble writing X-coord'
         DO L = 1, Z(I)                     ! one comma per cluster ID
           WRITE (UNIT=1, FMT=40, IOSTAT=IFAULT)
           IF (IFAULT .NE. 0) PRINT *, 'tpec1: trouble writing comma'
         END DO
         WRITE (UNIT=1, FMT=50, IOSTAT=IFAULT) X(2,I)
         IF (IFAULT .NE. 0) PRINT *, 'tpec1: trouble writing Y-coord'
       END DO
       
*           Write the centroids in the far column
       DO L = 1, K
         WRITE (UNIT=1, FMT=30, IOSTAT=IFAULT) C(1,L)
         IF (IFAULT .NE. 0) PRINT *, 'tpec1: trouble writing X-coord'
         DO I = 1, K+1
           WRITE (UNIT=1, FMT=40, IOSTAT=IFAULT)
           IF (IFAULT .NE. 0) PRINT *, 'tpec1: trouble writing comma'
         END DO
         WRITE (UNIT=1, FMT=50, IOSTAT=IFAULT) C(2,L)
         IF (IFAULT .NE. 0) PRINT *, 'tpec1: trouble writing Y-coord'
       END DO
       CLOSE (UNIT=1)
       
      END  ! of test program

Uniform random points over the unit circle File:Kmeans FORTRAN round points.gif

The points with clusters marked by color: File:Kmeans FORTRAN round clusters.gif

Go

package main

import (
    "fmt"
    "image"
    "image/color"
    "image/draw"
    "image/png"
    "math"
    "math/rand"
    "os"
    "time"
)

type r2 struct {
    x, y float64
}

type r2c struct {
    r2
    c int // cluster number
}

// kmpp implements K-means++, satisfying the basic task requirement
func kmpp(k int, data []r2c) {
    kMeans(data, kmppSeeds(k, data))
}

// kmppSeeds is the ++ part.
// It generates the initial means for the k-means algorithm.
func kmppSeeds(k int, data []r2c) []r2 {
    s := make([]r2, k)
    s[0] = data[rand.Intn(len(data))].r2
    d2 := make([]float64, len(data))
    for i := 1; i < k; i++ {
        var sum float64
        for j, p := range data {
            _, dMin := nearest(p, s[:i])
            d2[j] = dMin * dMin
            sum += d2[j]
        }
        target := rand.Float64() * sum
        j := 0
        for sum = d2[0]; sum < target; sum += d2[j] {
            j++
        }
        s[i] = data[j].r2
    }
    return s
}

// nearest finds the nearest mean to a given point.
// return values are the index of the nearest mean, and the distance from
// the point to the mean.
func nearest(p r2c, mean []r2) (int, float64) {
    iMin := 0
    dMin := math.Hypot(p.x-mean[0].x, p.y-mean[0].y)
    for i := 1; i < len(mean); i++ {
        d := math.Hypot(p.x-mean[i].x, p.y-mean[i].y)
        if d < dMin {
            dMin = d
            iMin = i
        }
    }
    return iMin, dMin
}

// kMeans algorithm.  Lloyd's
func kMeans(data []r2c, mean []r2) {
    // initial assignment
    for i, p := range data {
        cMin, _ := nearest(p, mean)
        data[i].c = cMin
    }
    mLen := make([]int, len(mean))
    for {
        // update means
        for i := range mean {
            mean[i] = r2{}
            mLen[i] = 0
        }
        for _, p := range data {
            mean[p.c].x += p.x
            mean[p.c].y += p.y
            mLen[p.c]++
        }
        for i := range mean {
            inv := 1 / float64(mLen[i])
            mean[i].x *= inv
            mean[i].y *= inv
        }
        // make new assignments, count changes
        var changes int
        for i, p := range data {
            if cMin, _ := nearest(p, mean); cMin != p.c {
                changes++
                data[i].c = cMin
            }
        }
        if changes == 0 {
            return
        }
    }
}

// parameters for extra credit exercises
type ecParam struct {
    k          int
    nPoints    int
    xBox, yBox int
    stdv       int
}

// extra credit 1 and 2:
func main() {
    ec := &ecParam{6, 30000, 300, 200, 30}
    
    origin, data := genECData(ec)
    vis(ec, data, "origin")
    fmt.Println("Data set origins:")
    fmt.Println("    x      y")
    for _, o := range origin {
        fmt.Printf("%5.1f  %5.1f\n", o.x, o.y)
    }

    kmpp(ec.k, data)
    
    fmt.Println(
        "\nCluster centroids, mean distance from centroid, number of points:")
    fmt.Println("    x      y  distance  points")
    cent := make([]r2, ec.k)
    cLen := make([]int, ec.k)
    inv := make([]float64, ec.k)
    for _, p := range data {
        cent[p.c].x += p.x 
        cent[p.c].y += p.y 
        cLen[p.c]++
    }
    for i, iLen := range cLen {
        inv[i] = 1 / float64(iLen)
        cent[i].x *= inv[i]
        cent[i].y *= inv[i]
    }
    dist := make([]float64, ec.k)
    for _, p := range data {
        dist[p.c] += math.Hypot(p.x-cent[p.c].x, p.y-cent[p.c].y)
    }
    for i, iLen := range cLen {
        fmt.Printf("%5.1f  %5.1f  %8.1f  %6d\n",
            cent[i].x, cent[i].y, dist[i]*inv[i], iLen)
    }
    vis(ec, data, "clusters")
}

// genECData generates random data for extra credit tasks.
// k origin points are randomly selected in a bounding box.
// nPoints/k coordinates are then generated for each origin point.
// The x and y coordinates of the data are normally distributed
// with standard deviation stdv.  Thus data coordinates are not
// constrained to the origin box; they can range to +/- max float64.
func genECData(ec *ecParam) (orig []r2, data []r2c) {
    rand.Seed(time.Now().UnixNano())
    orig = make([]r2, ec.k)
    data = make([]r2c, ec.nPoints)
    for i, n := 0, 0; i < ec.k; i++ {
        x := rand.Float64() * float64(ec.xBox)
        y := rand.Float64() * float64(ec.yBox)
        orig[i] = r2{x, y}
        for j := ec.nPoints / ec.k; j > 0; j-- {
            data[n].x = rand.NormFloat64()*float64(ec.stdv) + x
            data[n].y = rand.NormFloat64()*float64(ec.stdv) + y
            data[n].c = i
            n++
        }
    }
    return
}

// vis writes a .png for extra credit 2.
func vis(ec *ecParam, data []r2c, fn string) {
    colors := make([]color.NRGBA, ec.k)
    for i := range colors {
        i3 := i * 3
        third := i3 / ec.k
        frac := uint8((i3 % ec.k) * 255 / ec.k)
        switch third {
        case 0:
            colors[i] = color.NRGBA{frac, 255 - frac, 0, 255}
        case 1:
            colors[i] = color.NRGBA{0, frac, 255 - frac, 255}
        case 2:
            colors[i] = color.NRGBA{255 - frac, 0, frac, 255}
        }
    }
    bounds := image.Rect(-ec.stdv, -ec.stdv, ec.xBox+ec.stdv, ec.yBox+ec.stdv)
    im := image.NewNRGBA(bounds)
    draw.Draw(im, bounds, image.NewUniform(color.White), image.ZP, draw.Src)
    fMinX := float64(bounds.Min.X)
    fMaxX := float64(bounds.Max.X)
    fMinY := float64(bounds.Min.Y)
    fMaxY := float64(bounds.Max.Y)
    for _, p := range data {
        imx := math.Floor(p.x)
        imy := math.Floor(float64(ec.yBox) - p.y)
        if imx >= fMinX && imx < fMaxX && imy >= fMinY && imy < fMaxY {
            im.SetNRGBA(int(imx), int(imy), colors[p.c])
        }
    }
    f, err := os.Create(fn + ".png")
    if err != nil {
        fmt.Println(err)
        return
    }
    err = png.Encode(f, im)
    if err != nil {
        fmt.Println(err)
    }
    err = f.Close()
    if err != nil {
        fmt.Println(err)
    }
}

Text output:

Data set origins:
    x      y
256.8  188.6
 91.7   51.2
201.8  100.2
161.6  102.8
 78.9  152.9
 97.8   17.4

Cluster centroids, mean distance from centroid, number of points:
    x      y  distance  points
152.4  102.1      30.9    5654
104.8    8.7      31.4    4947
211.3   99.4      32.0    4961
 78.3   57.7      29.4    4817
257.7  191.4      36.5    4915
 76.9  156.5      35.0    4706

Visualization. Original clusters on left, discovered clusters on right.

Haskell

Solution Uses Map for clusterization and MonadRandom library for random sampling. Vectors are represented as lists, so the solution could be extended to any space dimension.

{-# LANGUAGE Strict,FlexibleInstances #-}
module KMeans where
 
import Control.Applicative
import Control.Monad.Random
import Data.List (minimumBy, genericLength, transpose)
import Data.Ord (comparing)
import qualified Data.Map.Strict as M

 
type Vec = [Float]
type Cluster = [Vec]
 
kMeansIteration :: [Vec] -> [Vec] -> [Cluster]
kMeansIteration pts = clusterize . fixPoint iteration
  where
    iteration = map centroid . clusterize
 
    clusterize centroids = M.elems $ foldr add m0 pts
      where add x = M.insertWith (++) (centroids `nearestTo` x) [x]
            m0 = M.unions $ map (`M.singleton` []) centroids
 
nearestTo :: [Vec] -> Vec -> Vec
nearestTo pts x =  minimumBy (comparing (distance x)) pts
 
distance :: Vec -> Vec -> Float
distance a b = sum $ map (^2) $ zipWith (-) a b
 
centroid :: [Vec] -> Vec
centroid = map mean . transpose
  where  mean pts = sum pts / genericLength pts
 
fixPoint :: Eq a => (a -> a) -> a -> a
fixPoint f x = if x == fx then x else fixPoint f fx where fx = f x
 
-- initial sampling
 
kMeans :: MonadRandom m => Int -> [Vec] -> m [Cluster]
kMeans n pts = kMeansIteration pts <$> take n <$> randomElements pts
 
kMeansPP :: MonadRandom m => Int -> [Vec] -> m [Cluster]
kMeansPP n pts = kMeansIteration pts <$> centroids
  where centroids = iterate (>>= nextCentroid) x0 !! (n-1)
        x0 = take 1 <$> randomElements pts
        nextCentroid cs = (: cs) <$> fromList (map (weight cs) pts)
        weight cs x = (x, toRational $ distance x (cs `nearestTo` x))
 
randomElements :: MonadRandom m => [a] -> m [a]
randomElements pts = map (pts !!) <$> getRandomRs (0, length pts)

-- sample cluster generation

instance (RandomGen g, Monoid m) => Monoid (Rand g m) where
   mempty = pure mempty
   mappend = liftA2 mappend

mkCluster n s m = take n . transpose <$> mapM randomsAround m
  where randomsAround x0 = map (\x -> x0+s*atanh x) <$> getRandomRs (-1,1)

Examples

module Main where

import Graphics.EasyPlot
import Data.Monoid

import KMeans

test = do datum <- mkCluster 1000 0.5 [0,0,1]
                <> mkCluster 2000 0.5 [2,3,1]
                <> mkCluster 3000 0.5 [2,-3,0]
          cls <- kMeansPP 3 datum
          mapM_ (\x -> print (centroid x, length x)) cls

main = do datum  <- sequence [ mkCluster 30100 0.3 [0,0]
                             , mkCluster 30200 0.4 [2,3]
                             , mkCluster 30300 0.5 [2,-3]
                             , mkCluster 30400 0.6 [6,0]
                             , mkCluster 30500 0.7 [-3,-3]
                             , mkCluster 30600 0.8 [-5,5] ]
          cls <- kMeansPP 6 (mconcat datum)
          plot (PNG "plot1.png") $ map listPlot cls
            where
              listPlot = Data2D [Title "",Style Dots] [] . map (\(x:y:_) -> (x,y))

Result: all centroids and clusters are found.

λ> test
([3.161875e-3,-3.096125e-3,0.99095285],1002)
([2.004138,2.9655986,1.0139971],1999)
([2.006579,-2.9902787],2999)

Huginn

#! /bin/sh
exec huginn -E "${0}" "${@}"
#! huginn

import Algorithms as algo;
import Mathematics as math;
import OperatingSystem as os;

class Color { r = 0.; g = 0.; b = 0.; }
class Point { x = 0.; y = 0.; group = -1; }

k_means_initial_centroids( points_, clusterCount_ ) {
	centroids = [];
	discreteRng = math.Randomizer( math.Randomizer.DISTRIBUTION.DISCRETE, 0, size( points_ ) - 1 );
	uniformRng = math.Randomizer( math.Randomizer.DISTRIBUTION.UNIFORM, 0.0, 1.0 );
	centroids.push( copy( points_[discreteRng.next()] ) );
	for ( i : algo.range( clusterCount_ - 1 ) ) {
		distances = [];
		sum = 0.0;
		for ( p : points_ ) {
			shortestDist = math.INFINITY;
			for ( c : centroids ) {
				dx = c.x - p.x;
				dy = c.y - p.y;
				d = dx * dx + dy * dy;
				if ( d < shortestDist ) {
					shortestDist = d;
				}
			}
			distances.push( ( shortestDist, p ) );
			sum += shortestDist;
		}
		sum *= uniformRng.next();
		for ( d : distances ) {
			sum -= d[0];
			if ( sum <= 0.0 ) {
				centroids.push( copy( d[1] ) );
				break;
			}
		}
	}
	for ( i, c : algo.enumerate( centroids ) ) {
		c.group = i;
	}
	return ( centroids );
}

k_means( points_, clusterCount_, maxError_ = 0.001, maxIter_ = 100 ) {
	centroids = k_means_initial_centroids( points_, clusterCount_ );
	pointCount = real( size( points_ ) );
	for ( iter : algo.range( maxIter_ ) ) {
		updated = 0.0;
		for ( p : points_ ) {
			shortestDist = math.INFINITY;
			g = 0;
			for ( c : centroids ) {
				dx = c.x - p.x;
				dy = c.y - p.y;
				dist = dx * dx + dy * dy;
				if ( dist < shortestDist ) {
					shortestDist = dist;
					g = c.group;
				}
			}
			if ( p.group != g ) {
				p.group = g;
				updated += 1.0;
			}
		}
		for ( c : centroids ) {
			n = 0;
			c.x = 0.;
			c.y = 0.;
			for ( p : points_ ) {
				if ( p.group == c.group ) {
					c.x += p.x;
					c.y += p.y;
					n += 1;
				}
			}
			if ( n > 0 ) {
				c.x /= real( n );
				c.y /= real( n );
			}
		}
		err = updated / pointCount;
		os.stderr().write_line( "err = {}\n".format( err ) );
		if ( err < maxError_ ) {
			os.stderr().write_line( "done in {} iterations\n".format( iter ) );
			break;
		}
	}
	return ( centroids );
}

gen_points( numPoints_ ) {
	phiGen = math.Randomizer( math.Randomizer.DISTRIBUTION.UNIFORM, 0., 2. * math.pi( real ) );
	rGen = math.Randomizer( math.Randomizer.DISTRIBUTION.TRIANGLE, 0., 1., 1. );
	points = [];
	for ( i : algo.range( numPoints_ ) ) {
		phi = phiGen.next();
		r = rGen.next();
		points.push( Point( r * math.cosinus( phi ), r * math.sinus( phi ) ) );
	}
	return ( points );
}

import ProgramOptions as po;

main( argv_ ) {
	poh = po.Handler( "k-means++", "k-means++ clustering algorithm demo" );
	poh.add_option(
		name: "numPoints,N",
		requirement: po.VALUE_REQUIREMENT.REQUIRED,
		help: "number of points",
		conversion: integer,
		valueName: "num",
		defaultValue: 30000
	);
	poh.add_option(
		name: "numClusters,C",
		requirement: po.VALUE_REQUIREMENT.REQUIRED,
		help: "number of custers",
		conversion: integer,
		valueName: "num",
		defaultValue: 7
	);
	poh.add_option(
		name: "maxIterations,I",
		requirement: po.VALUE_REQUIREMENT.REQUIRED,
		help: "maximum number of iterations for the algorithm to run",
		conversion: integer,
		valueName: "num",
		defaultValue: 100
	);
	poh.add_option(
		name: "maxInvalidRatio,R",
		requirement: po.VALUE_REQUIREMENT.REQUIRED,
		help: "maximum ratio of points that are still assigned to invalid centroids",
		conversion: real,
		valueName: "num",
		defaultValue: 0.001
	);
	poh.add_option(
		name: "help,H",
		requirement: po.VALUE_REQUIREMENT.NONE,
		help: "show help information and stop"
	);
	poh.add_option(
		name: "verbose,v",
		requirement: po.VALUE_REQUIREMENT.NONE,
		help: "show more info about program execution"
	);
	parsed = poh.command_line( argv_ );
	if ( parsed == none ) {
		return ( 1 );
	}
	if ( parsed.options["help"] ) {
		print( poh.help_string() + "\n" );
		return ( 0 );
	}
	if ( parsed.options["verbose"] ) {
		os.stderr().write_line( string( parsed ) + "\n" );
	}
	points = gen_points( parsed.options["numPoints"] );
	print_eps(
		points,
		k_means(
			points,
			parsed.options["numClusters"],
			parsed.options["maxInvalidRatio"],
			parsed.options["maxIterations"]
		)
	);
}

print_eps( points, cluster_centers, W = 400, H = 400 ) {
	colors = [];
	for ( i : algo.range( size( cluster_centers ) ) ) {
		ii = real( i );
		colors.push(
			Color(
				( 3. * ( ii + 1. ) % 11. ) / 11.0,
				( 7. * ii % 11. ) / 11.0,
				( 9. * ii % 11. ) / 11.0
			)
		);
	}
	max_x = max_y = - math.INFINITY;
	min_x = min_y = math.INFINITY;
	for ( p : points ) {
		if ( max_x < p.x ) { max_x = p.x; }
		if ( min_x > p.x ) { min_x = p.x; }
		if ( max_y < p.y ) { max_y = p.y; }
		if ( min_y > p.y ) { min_y = p.y; }
	}
	scale = math.min( real( W ) / ( max_x - min_x ), real( H ) / ( max_y - min_y ) );
	cx = ( max_x + min_x ) / 2.;
	cy = ( max_y + min_y ) / 2.;
	print( "%!PS-Adobe-3.0\n%%BoundingBox: -5 -5 {} {}\n".format( W + 10, H + 10 ) );
	print(
		"/l {rlineto} def /m {rmoveto} def\n"
		"/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def\n"
		"/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath "
		"   gsave 1 setgray fill grestore gsave 3 setlinewidth"
		" 1 setgray stroke grestore 0 setgray stroke }def\n"
	);
	for ( i, cc : algo.enumerate( cluster_centers ) ) {
		print( "{} {} {} setrgbcolor\n".format( colors[i].r, colors[i].g, colors[i].b ) );
		for ( p : points ) {
			if ( p.group != i ) {
				continue;
			}
			print( "{:.3f} {:.3f} c\n".format( ( p.x - cx ) * scale + real( W ) / 2., ( p.y - cy ) * scale + real( H ) / 2. ) );
		}
		print("\n0 setgray {} {} s\n".format( ( cc.x - cx ) * scale + real( W ) / 2., ( cc.y - cy ) * scale + real( H ) / 2. ) );
	}
	print( "\n%%%%EOF\n" );
}

J

Solution:

   NB.  Selection of initial centroids, per K-means++
   initialCentroids     =:  (] , randomCentroid)^:(<:@:]`(,:@:seedCentroid@:[))~
     seedCentroid       =:  {~ ?@#
     randomCentroid     =:  [ {~ [: wghtProb [: <./ distance/~
       distance         =:  +/&.:*:@:-"1  NB.  Extra credit #3 (N-dimensional is the same as 2-dimensional in J)
       wghtProb         =:  1&$: : ((%{:)@:(+/\)@:] I. [ ?@$ 0:)"0 1 NB.  Due to Roger Hui http://j.mp/lj5Pnt

   NB.  Having selected the initial centroids, the standard K-means algo follows
   centroids            =:  ([ mean/.~ closestCentroid)^:(]`_:`initialCentroids)
     closestCentroid    =:  [: (i.<./)"1 distance/
     mean               =:  +/ % #

Extra credit:

   randMatrix           =:  ?@$&0  NB.  Extra credit #1
   packPoints           =:  <"1@:|: NB.  Extra credit #2: Visualization code due to Max Harms http://j.mp/l8L45V  
   plotClusters         =:  dyad define  NB.  as is the example image in this task
	require 'plot'
	pd 'reset;aspect 1;type dot;pensize 2'
	pd@:packPoints&> y
	pd 'type marker;markersize 1.5;color 0 0 0'
	pd@:packPoints x
	pd 'markersize 0.8;color 255 255 0'
	pd@:packPoints x
	pd 'show'
)

NB.  Extra credit #4:  Polar coordinates are not available in this version 
NB.  but wouldn't be hard to provide with  &.cartToPole  .

Example:

   plotRandomClusters   =:  3&$: : (dyad define)
	dataset          =.  randMatrix 2 {. y,2

	centers          =.  x centroids dataset
	clusters         =.  centers (closestCentroid~ </. ])  dataset
	centers plotClusters clusters
)

     plotRandomClusters 300         NB.  300 points, 3 clusters
   6 plotRandomClusters 30000       NB.  3e5 points, 6 clusters
  10 plotRandomClusters 17000 5     NB.  17e3 points, 10 clusters, 5 dimensions

Java

Translation of: C

import java.util.Random;

public class KMeansWithKpp{
		// Variables Needed
		public Point[] points;
		public Point[] centroids;
		Random rand;
		public int n;
		public int k;

		// hide default constructor
		private KMeansWithKpp(){
		}

		KMeansWithKpp(Point[] p, int clusters){
				points = p;
				n = p.length;
				k = Math.max(1, clusters);
				centroids = new Point[k];
				rand = new Random();
		}


		private static double distance(Point a, Point b){
				return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y);
		}

		private static int nearest(Point pt, Point[] others, int len){
				double minD = Double.MAX_VALUE;
				int index = pt.group;
				len = Math.min(others.length, len);
				double dist;
				for (int i = 0; i < len; i++) {
						if (minD > (dist = distance(pt, others[i]))) {
								minD = dist;
								index = i;
						}
				}
				return index;
		}

		private static double nearestDistance(Point pt, Point[] others, int len){
				double minD = Double.MAX_VALUE;
				len = Math.min(others.length, len);
				double dist;
				for (int i = 0; i < len; i++) {
						if (minD > (dist = distance(pt, others[i]))) {
								minD = dist;
						}
				}
				return minD;
		}

		private void kpp(){
				centroids[0] = points[rand.nextInt(n)];
				double[] dist = new double[n];
				double sum = 0;
				for (int i = 1; i < k; i++) {
						for (int j = 0; j < n; j++) {
								dist[j] = nearestDistance(points[j], centroids, i);
								sum += dist[j];
						}
						sum = (sum * rand.nextInt(Integer.MAX_VALUE)) / Integer.MAX_VALUE;
						for (int j = 0; j < n; j++) {
								if ((sum -= dist[j]) > 0)
										continue;
								centroids[i].x = points[j].x;
								centroids[i].y = points[j].y;
						}
				}
				for (int i = 0; i < n; i++) {
						points[i].group = nearest(points[i], centroids, k);
				}
		}

		public void kMeans(int maxTimes){
				if (k == 1 || n <= 0) {
						return;
				}
				if(k >= n){
						for(int i =0; i < n; i++){
								points[i].group = i;
						}
						return;
				}
				maxTimes = Math.max(1, maxTimes);
				int changed;
				int bestPercent = n/1000;
				int minIndex;
				kpp();
				do {
						for (Point c : centroids) {
								c.x = 0.0;
								c.y = 0.0;
								c.group = 0;
						}
						for (Point pt : points) {
								if(pt.group < 0 || pt.group > centroids.length){
										pt.group = rand.nextInt(centroids.length);
								}
								centroids[pt.group].x += pt.x;
								centroids[pt.group].y = pt.y;
								centroids[pt.group].group++;
						}
						for (Point c : centroids) {
								c.x /= c.group;
								c.y /= c.group;
						}
						changed = 0;
						for (Point pt : points) {
								minIndex = nearest(pt, centroids, k);
								if (k != pt.group) {
										changed++;
										pt.group = minIndex;
								}
						}
						maxTimes--;
				} while (changed > bestPercent && maxTimes > 0);
		}
}


// A class for point(x,y) in plane

class Point{
		public double x;
		public double y;
		public int group;

		Point(){
				x = y = 0.0;
				group = 0;
		}

		/*
			Generates a random points on 2D Plane within given X-axis and Y-axis
		 */
		public Point[] getRandomPlaneData(double minX, double maxX, double minY, double maxY, int size){
				if (size <= 0)
						return null;
				double xdiff, ydiff;
				xdiff = maxX - minX;
				ydiff = maxY - minY;
				if (minX > maxX) {
						xdiff = minX - maxX;
						minX = maxX;
				}
				if (maxY < minY) {
						ydiff = minY - maxY;
						minY = maxY;
				}
				Point[] data = new Point[size];
				Random rand = new Random();
				for (int i = 0; i < size; i++) {
						data[i].x = minX + (xdiff * rand.nextInt(Integer.MAX_VALUE)) / Integer.MAX_VALUE;
						data[i].y = minY + (ydiff * rand.nextInt(Integer.MAX_VALUE)) / Integer.MAX_VALUE;
				}
				return data;
		}

		/*
	             Generate Random Polar Coordinates within given radius
		 */
		public Point[] getRandomPolarData(double radius, int size){
				if (size <= 0) {
						return null;
				}
				Point[] data = new Point[size];
				double radi, arg;
				Random rand = new Random();
				for (int i = 0; i < size; i++) {
						radi = (radius * rand.nextInt(Integer.MAX_VALUE)) / Integer.MAX_VALUE;
						arg = (2 * Math.PI * rand.nextInt(Integer.MAX_VALUE)) / Integer.MAX_VALUE;
						data[i].x = radi * Math.cos(arg);
						data[i].y = radi * Math.sin(arg);
				}
				return data;
		}
		
}

JavaScript

This example is in need of improvement:

live demo link broken/404

Solution

Live Demo (Extra Credit #2) KMeans++ in JavaScript

/**
 * kmeans module
 * 
 *   cluster(model, k, converged = assignmentsConverged)
 *   distance(p, q),
 *   distanceSquared(p, q),
 *   centroidsConverged(delta)
 *   assignmentsConverged(model, newModel)
 *   assignmentsToClusters(model)
 */
define(function () {
    "use strict";
    
    /**
     * @public
     * Calculate the squared distance between two vectors.
     *
     * @param [number] p vector with same dimension as q
     * @param [number] q vector with same dimension as p
     * @return {number} the distance between p and q squared
     */
    function distanceSquared(p, q) {
        const d = p.length; // dimension of vectors

        if(d !== q.length) throw Error("p and q vectors must be the same length")

        let sum = 0;
        for(let i = 0; i < d; i += 1) {
            sum += (p[i] - q[i])**2
        }
        return sum;
    }

    /**
     * @public
     * Calculate the distance between two vectors of the same dimension.
     *
     * @param [number] p vector of same dimension as q
     * @param [number] q vector of same dimension as p
     * @return the distance between vectors p and q
     */
    function distance(p, q) {
        return Math.sqrt(distanceSquared(p, q));
    }

    /**
     * @private
     * find the closest centroid for the given observation and return it's index.
     *
     * @param [[number]] centroids - array of k vectors, each vector with same dimension as observations.
     *                               these are the center of the k clusters
     * @param [[number]] observation - vector with same dimension as centroids.
     *                                 this is the observation to be clustered.
     * @return {number} the index of the closest centroid in centroids
     */
    function findClosestCentroid(centroids, observation) {
        const k = centroids.length; // number of clusters/centroids

        let centroid = 0;
        let minDistance = distance(centroids[0], observation);
        for(let i = 1; i < k; i += 1) {
            const dist = distance(centroids[i], observation);
            if(dist < minDistance) {
                centroid = i;
                minDistance = dist;
            }
        }
        return centroid;
    }

    /**
     * @private
     * Calculate the centroid for the given observations.
     * This takes the average of all observations (at each dimension).
     * This average vector is the centroid for those observations.
     *
     * @param [[number]] observations - array of observations (each observatino is a vectors)
     * @return [number] centroid for given observations (vector of same dimension as observations)
     */
    function calculateCentroid(observations) {
        const n = observations.length;      // number of observations
        const d = observations[0].length;   // dimension of vectors

        // create zero vector of same dimension as observation
        let centroid = [];
        for(let i = 0; i < d; i += 1) {
            centroid.push(0.0);
        }

        //
        // sum all observations at each dimension
        //
        for(let i = 0; i < n; i += 1) {
            //
            // add the observation to the sum vector, element by element
            // to prepare to calculate the average at each dimension.
            //
            for(let j = 0; j < d; j += 1) {
                centroid[j] += observations[i][j];
            }
        }

        //
        // divide each dimension by the number of observations
        // to create the average vector.
        //
        for(let j = 0; j < d; j += 1) {
            centroid[j] /= n;
        }

        return centroid;
    }

    /**
     * @private
     * calculate the cluster assignments for the observations, given the centroids.
     *
     * @param [[number]] centroids - list of vectors with same dimension as observations
     * @param [[number]] observations - list of vectors with same dimension as centroids
     * @return [number] list of indices into centroids; one per observation.
     */
    function assignClusters(centroids, observations) {
        const n = observations.length;  // number of observations

        const assignments = [];
        for(let i = 0; i < n; i += 1) {
            assignments.push(findClosestCentroid(centroids, observations[i]));
        }

        return assignments; // centroid index for each observation
    }

    /**
     * @private
     * calculate one step of the k-means algorithm;
     * - assign each observation to the nearest centroid to create clusters
     * - calculate a new centroid for each cluster given the observations in the cluster.
     *
     * @param [[number]] centroids - list of vectors with same dimension as observations
     * @param [[number]] observations - list of vectors with same dimension as centroids
     * @return a new model with observations, centroids and assignments
     */
    function kmeansStep(centroids, observations) {
        const k = centroids.length; // number of clusters/centroids

        // assign each observation to the nearest centroid to create clusters
        const assignments = assignClusters(centroids, observations); // array of cluster indices that correspond observations

        // calculate a new centroid for each cluster given the observations in the cluster
        const newCentroids = [];
        for(let i = 0; i < k; i += 1) {
            // get the observations for this cluster/centroid
            const clusteredObservations = observations.filter((v, j) => assignments[j] === i);

            // calculate a new centroid for the observations
            newCentroids.push(calculateCentroid(clusteredObservations));
        }
        return {'observations': observations, 'centroids': newCentroids, 'assignments': assignments }
    }

    /**
     * @public
     * Run k-means on the given model until each centroid converges to with the given delta
     * The initial model is NOT modified by the algorithm, rather a new model is returned.
     * 
     * @param {*} model - object with 
     *                    observations: array, length n, of data points; each datapoint is 
     *                                  itself an array of numbers (a vector).
     *                                  The length each datapoint (d) vector should be the same.  
     *                    centroids: array of data points.
     *                               The length of the centroids array indicates the number of
     *                               of desired clusters (k).
     *                               each datapoint is array (vector) of numbers 
     *                               with same dimension as the datapoints in observations. 
     *                    assignments: array of integers, one per observation, 
     *                                 with values 0..centroids.length - 1
     * @param number delta - the maximum difference between each centroid in consecutive runs for convergence
     * @return {*} - result with 
     *               model: model, as described above, with updated centroids and assignments, 
     *               iterations: number of iterations, 
     *               durationMs: elapsed time in milliseconds
     */
    function kmeans(model, maximumIterations = 200, converged = assignmentsConverged) {
        const start = new Date();

        // calculate new centroids and cluster assignments
        let newModel = kmeansStep(model.centroids, model.observations);

        // continue until centroids do not change (within given delta)
        let i = 0;
        while((i < maximumIterations) && !converged(model, newModel)) {
            model = newModel;   // new model is our model now
            // console.log(model);

            // calculate new centroids and cluster assignments
            newModel = kmeansStep(model.centroids, model.observations);
            i += 1;
        }

        // console.log(newModel);
        const finish = new Date();
        return {'model': newModel, 'iterations': i, 'durationMs': (finish.getTime() - start.getTime())};
    }

    /**
     * @public
     * Return a function that determines convergence based on the centroids.
     * If two consecutive sets of centroids remain within a given delta,
     * then the algorithm is converged.
     * 
     * @param number delta, the maximum difference between each centroid in consecutive runs for convergence
     * @return function to use as the converged function in kmeans call.
     */
    function centroidsConverged(delta) {
        /**
         * determine if two consecutive set of centroids are converged given a maximum delta.
         *
         * @param [[number]] centroids - list of vectors with same dimension as observations
         * @param [[number]] newCentroids - list of vectors with same dimension as observations
         * @param number delta - the maximum difference between each centroid in consecutive runs for convergence
         */
        return function(model, newModel) {
            const centroids = model.centroids;
            const newCentroids = newModel.centroids;
    
            const k = centroids.length; // number of clusters/centroids
            for(let i = 0; i < k; i += 1) {
                if(distance(centroids[i], newCentroids[i]) > delta) {
                    return false;
                }
            }
    
            return true;
        }
    }

    /**
     * @public
     * determine if two consecutive set of clusters are converged;
     * the clusters are converged if the cluster assignments are the same.
     *
     * @param {*} model - object with observations, centroids, assignments
     * @param {*} newModel - object with observations, centroids, assignments
     * @param number delta - the maximum difference between each centroid in consecutive runs for convergence
     */
    function assignmentsConverged(model, newModel) {
        function arraysEqual(a, b) {
            if (a === b) return true;
            if (a === undefined || b === undefined) return false;
            if (a === null || b === null) return false;
            if (a.length !== b.length) return false;
        
            // If you don't care about the order of the elements inside
            // the array, you should sort both arrays here.
        
            for (var i = 0; i < a.length; ++i) {
            if (a[i] !== b[i]) return false;
            }
            return true;
        }
        
        return arraysEqual(model.assignments, newModel.assignments);
    }

    /**
     * Use the model assignments to create
     * array of observation indices for each centroid
     * 
     * @param {object} model with observations, centroids and assignments
     * @reutrn [[number]] array of observation indices for each cluster
     */
    function assignmentsToClusters(model) {
        // 
        // put offset of each data points into clusters using the assignments
        //
        const n = model.observations.length;
        const k = model.centroids.length;
        const assignments = model.assignments;
        const clusters = [];
        for(let i = 0; i < k; i += 1) {
            clusters.push([])
        }
        for(let i = 0; i < n; i += 1) {
            clusters[assignments[i]].push(i);
        }

        return clusters;
    }

    
    //
    // return public methods
    //
    return {
        'cluster': kmeans, 
        'distance': distance,
        'distanceSquared': distanceSquared,
        'centroidsConverged': centroidsConverged,
        'assignmentsConverged': assignmentsConverged,
        "assignmentsToClusters": assignmentsToClusters
    };

});

/**
 * kmeans++ initialization module
 */
define(function (require) {
    "use strict";

    const kmeans = require("./kmeans");

    /**
     * @public
     * create an initial model given the data and the number of clusters.
     * 
     * This uses the kmeans++ algorithm:
     * 1. Choose one center uniformly at random from among the data points.
     * 2. For each data point x, compute D(x), the distance between x and 
     *    the nearest center that has already been chosen.
     * 3. Choose one new data point at random as a new center, 
     *    using a weighted probability distribution where a point x is chosen with probability proportional to D(x)^2.
     * 4. Repeat Steps 2 and 3 until k centers have been chosen.
     * 5. Now that the initial centers have been chosen, proceed using
     *    standard k-means clustering.
     * 
     * @param {[float]} observations the data as an array of number
     * @param {integer} k the number of clusters
     */
    return function(observations, k) {

        /**
         * given a set of n  weights,
         * choose a value in the range 0..n-1
         * at random using weights as a distribution.
         * 
         * @param {*} weights 
         */
        function weightedRandomIndex(weights, normalizationWeight) {
            const n = weights.length;
            if(typeof normalizationWeight !== 'number') {
                normalizationWeight = 0.0;
                for(let i = 0; i < n; i += 1) {
                    normalizationWeight += weights[i];
                }
            }

            const r = Math.random();  // uniformly random number 0..1 (a probability)
            let index = 0;
            let cumulativeWeight = 0.0;
            for(let i = 0; i < n; i += 1) {
                //
                // use the uniform probability to search 
                // within the normalized weighting (we divide by totalWeight to normalize).
                // once we hit the probability, we have found our index.
                //
                cumulativeWeight += weights[i] / normalizationWeight;
                if(cumulativeWeight > r) {
                    return i;
                }
            }

            throw Error("algorithmic failure choosing weighted random index");
        }

        const n = observations.length;
        const distanceToCloseCentroid = []; // distance D(x) to closest centroid for each observation
        const centroids = [];   // indices of observations that are chosen as centroids

        //
        // keep list of all observations' indices so
        // we can remove centroids as they are created
        // so they can't be chosen twice
        //
        const index = [];
        for(let i = 0; i < n; i += 1) {
            index[i] = i;
        }

        //
        //  1. Choose one center uniformly at random from among the data points.
        //
        let centroidIndex = Math.floor(Math.random() * n);
        centroids.push(centroidIndex);

        for(let c = 1; c < k; c += 1) {        
            index.slice(centroids[c - 1], 1);    // remove previous centroid from further consideration
            distanceToCloseCentroid[centroids[c - 1]] = 0;  // this effectively removes it from the probability distribution

            //
            // 2. For each data point x, compute D(x), the distance between x and 
            //    the nearest center that has already been chosen.  
            //
            // NOTE: we used the distance squared (L2 norm)
            //
            let totalWeight = 0.0;
            for(let i = 0; i < index.length; i += 1) {
                //
                // if this is the first time through, the distance is undefined, so just set it.
                // Otherwise, choose the minimum of the prior closest and this new centroid
                //
                const distanceToCentroid = kmeans.distanceSquared(observations[index[i]], observations[centroids[c - 1]]);
                distanceToCloseCentroid[index[i]] = 
                    (typeof distanceToCloseCentroid[index[i]] === 'number') 
                    ? Math.min(distanceToCloseCentroid[index[i]], distanceToCentroid)
                    : distanceToCentroid;
                totalWeight += distanceToCloseCentroid[index[i]];
            }  

            //
            //  3. Choose one new data point at random as a new center, 
            //     using a weighted probability distribution where a point x is chosen with probability proportional to D(x)^2.
            //
            centroidIndex = index[weightedRandomIndex(distanceToCloseCentroid, totalWeight)];
            centroids.push(centroidIndex);

            //  4. Repeat Steps 2 and 3 until k centers have been chosen.
        }

        //
        //  5. Now that the initial centers have been chosen, proceed using
        //     standard k-means clustering. Return the model so that
        //     kmeans can continue.
        //
        return {
            'observations': observations,
            'centroids': centroids.map(x => observations[x]), // map centroid index to centroid value
            'assignments': observations.map((x, i) => i % centroids.length) // distribute among centroids
        }
    }

});

/**
 * Extra Credit #1
 * module for creating random models for kmeans clustering
 */
define(function (require) {
    "use strict";

    const kmeans = require("./kmeans");

    /**
     * @return a random, normally distributed number
     */
    function randomNormal() {
        // n = 6 gives a good enough approximation
        return ((Math.random() + Math.random() + Math.random() + Math.random() + Math.random() + Math.random()) - 3) / 3;
    }

    /**
     * Generate a uniform random unit vector
     * 
     * @param {Integer} d dimension of data
     * @return n random datapoints of dimension d with length == 1
     */
    function randomUnitVector(d) {
        const range = max - min;
        let magnitude = 0.0;
        const observation = [];

        // uniform random for each dimension
        for(let j = 0; j < d; j += 1) {
            const x = Math.random();
            observation[j] = x;
            magnitude = x * x;
        }

        // normalize
        const magnitude = Math.sqrt(magnitude);
        for(let j = 0; j < d; j += 1) {
            observation[j] /= magnitude;
        }

        return observation;
    }

    /**
     * Generate a uniform random unit vectors for clustering
     * 
     * @param {Integer} n number of data points
     * @param {Integer} d dimension of data
     * @return n random datapoints of dimension d with length == 1
     */
    function randomUnitVectors(n, d) {

        // create n random observations, each of dimension d
        const observations = [];
        for(let i = 0; i < n; i += 1) {
            // create random observation of dimension d
            const observation = randomUnitVector(d);
            observations.push(observation);
        }

        return observations;
    }



    /**
     * Generate a spherical random vector
     * 
     * @param {Integer} n number of data points
     * @param {Integer} d dimension of data
     * @param {Number} r radium from center for data point
     * @return n random datapoints of dimension d
     */
    function randomSphericalVector(d, r) {
        const observation = [];

        let magnitude = 0.0;
        for(let j = 0; j < d; j += 1)
        {
            const x = randomNormal();
            observation[j] = x;
            magnitude += x * x;
        }

        // normalize
        magnitude = Math.sqrt(magnitude);
        for(let j = 0; j < d; j += 1) {
            observation[j] = observation[j] * r / magnitude;
        }

        return observation;
    }



    /**
     * Generate a spherical random vectors
     * 
     * @param {Integer} n number of data points
     * @param {Integer} d dimension of data
     * @param {Number} max radius from center for data points
     * @return n random datapoints of dimension d
     */
    function randomSphericalVectors(n, d, r) {

        // create n random observations, each of dimension d
        const observations = [];
        for(let i = 0; i < n; i += 1) {
            // create random observation of dimension d with random radius
            const observation = randomSphericalVector(d, Math.random() * r);
            observations.push(observation);
        }

        return observations;
    }

    /**
     * Generate a uniform random model for clustering
     * 
     * @param {Integer} n number of data points
     * @param {Integer} d dimension of data
     * @param {Number} radius of sphere
     * @return n random datapoints of dimension d
     */
    function randomVectors(n, d, min, max) {

        const range = max - min;

        // create n random observations, each of dimension d
        const observations = [];
        for(let i = 0; i < n; i += 1) {
            // create random observation of dimension d
            const observation = randomVector(d, min, max);
            observations.push(observation);
        }

        return observations;
    }

    /**
     * Generate a uniform random model for clustering
     * 
     * @param {Integer} d dimension of data
     * @param {Number} radius of sphere
     * @return n random datapoints of dimension d
     */
    function randomVector(d, min, max) {

        // create random observation of dimension d
        const range = max - min;
        const observation = [];
        for(let j = 0; j < d; j += 1) {
            observation.push(min + Math.random() * range);
        }

        return observation;
    }

    return {
        'randomVector': randomVector,
        'randomUnitVector': randomUnitVector,
        'randomSphericalVector': randomSphericalVector,
        'randomVectors': randomVectors,
        'randomUnitVectors': randomUnitVectors,
        'randomSphericalVectors': randomSphericalVectors
    }

});

/**
 * Extra Credit #4
 * Application to cluster random data using kmeans++
 * 
 * cluster(k, n, d) - cluster n data points of dimension d into k clusters
 * plot(canvas, result) - plot the results of cluster() to the given html5 canvas using clusterjs
 */
define(function (require) {
    "use strict";
    const kmeans = require("./kmeans/kmeans");
    const kmeanspp = require("./kmeans/kmeanspp");
    const randomCentroidInitializer = require("./kmeans/randomCentroidInitializer");
    const kmeansRandomModel = require("./kmeans/kmeansRandomModel");


    /**
     * @public
     * Load iris dataset and run kmeans on it given the number of clusters
     * 
     * @param {integer} k number of clusters to create
     */
    function cluster(k, n, d) {

        //
        // map iris data rows from dictionary to vector (array), leaving out the label
        //
        const observations = kmeansRandomModel.randomSphericalVectors(n, d, 10.0);

        //
        // create the intial model and run it
        //
        // const initialModel = randomCentroidInitializer(observations, k);
        const initialModel = kmeanspp(observations, k);

        //
        // cluster into given number of clusters
        //
        const results = kmeans.cluster(initialModel);
    
        //
        // do this for the convenience of the plotting functions
        //
        results.clusters = kmeans.assignmentsToClusters(results.model);

        return results;
    }

    const clusterColor = ['red', 'green', 'blue', 'yellow', 'purple', 'cyan', 'magenta', 'pink', 'brown', 'black'];
    let chart = undefined;

    /**
     * plot the clustred iris data model.
     * 
     * @param {object} results of cluster(), with model, clusters and clusterCompositions
     * @param {boolean} showClusterColor true to show learned cluster points
     * @param {boolean} showSpeciesColor true to show known dataset labelled points
     */
    function plot(canvas, results) {

        //
        // map iris data rows from dictionary to vector (array), leaving out the label
        //
        const model = results.model;
        const observations = model.observations;
        const assignments = model.assignments;
        const centroids = model.centroids;
        const d = observations[0].length;
        const n = observations.length;
        const k = centroids.length;

        // 
        // put offset of each data points into clusters using the assignments
        //
        const clusters = results.clusters;

        //
        // plot the clusters
        //
        const chartData = {
            // for the purposes of plotting in 2 dimensions, we will use 
            // x = dimension 0 and y = dimension 1 
            datasets: clusters.map(function(c, i) { 
                return {
                    label: "cluster" + i,
                    data: c.map(d => ({'x': observations[d][0], 'y': observations[d][1]})),
                    backgroundColor: clusterColor[i % clusterColor.length],
                    pointBackgroundColor: clusterColor[i % clusterColor.length],
                    pointBorderColor:  clusterColor[i % clusterColor.length]
                };
            })
        };
        const chartOptions = {
            responsive: true,
            maintainAspectRatio: false,
            title: {
                display: true,
                text: 'Random spherical data set (d=$d, n=$n) clustered using K-Means (k=$k)'
                        .replace("$d", d)
                        .replace('$n', n)
                        .replace('$k', k)
            },
            legend: {
                position: 'bottom',
                display: true
            },
            scales: {
                xAxes: [{
                    type: 'linear',
                    position: 'bottom',
                    scaleLabel: {
                        labelString: 'x axis',
                        display: false,
                    }
                }],
                yAxes: [{
                    type: 'linear',
                    position: 'left',
                    scaleLabel: {
                        labelString: 'y axis',
                        display: false
                    }
                }]
            }
        };

        //
        // we need to destroy the previous chart so it's interactivity 
        // does not continue to run
        //
        if(undefined !== chart) {
            chart.destroy()
        } 
        chart = new Chart(canvas, {
            type: 'scatter',
            data: chartData,
            options: chartOptions,
        });

    }

    return {'cluster': cluster, 'plot': plot};
});

jq

Adapted from #Wren

Works with: jq

The jq program shown here generates PostScript.

Since jq does not currently include a PRNG, we will use /dev/urandom as a source of entropy as per the following bash script, which invoces jq to generate 30,000 points in accordance with the task description. The result of one run of this script is shown at https://imgur.com/gallery/F0q1yO6

#!/bin/bash
export LC_ALL=C
# Generate $1 pseudo-random integers of width $2.
# The integers may have leading 0s
function prng {
  cat /dev/urandom | tr -cd '0-9' | fold -w "$2" | head -n "$1"
}

PTS=30000
prng $((4 * PTS)) 3 | jq -nr --argjson PTS $PTS -f k-means++-clustering.jq

k-means++-clustering.jq

# A Point is represented by a JSON array: [x, y, group]

def hugeVal: infinite;
 
def RAND_MAX : 1000;

def PTS: $PTS;
def K :   6;
def W : 400;
def H : 400;
 
def rand: input | tonumber;
def randf(m): m * rand / (RAND_MAX - 1);

def pi: 1 | atan * 4;

# Pseudo-randomly generate `count` points in a circle with the given radius
def genXY(count; radius):
  # note: this is not a uniform 2-d distribution
  pi as $pi
  | reduce range(0; count) as $i ([];
      .[$i] = [0, 0, 0]
      | randf(2 * $pi) as $ang
      | randf(radius) as $r
      | .[$i][0] = $r * ($ang|cos)
      | .[$i][1] = $r * ($ang|sin) ) ;

def dist2($a; $b):
  ($a[0] - $b[0]) as $x
  | ($a[1] - $b[1]) as $y
  | $x * $x + $y * $y;

# output: {minD, minI}
def nearest($pt; $cent; $nCluster):
  { minD : hugeVal,
    minI : $pt[2] }
  | reduce range(0; $nCluster) as $i (.;
      dist2($cent[$i]; $pt) as $d
      | if .minD > $d
        then .minD = $d
        | .minI = $i
        else .
        end ) ;

# input: {pts, cent}
# output: ditto
def kpp(len):
  (.cent|length) as $nCent
  | .cent[0] = .pts[rand % len]
  | . + { d: []}
  | reduce range(1; $nCent) as $nCluster (.;
      .sum = 0
      | reduce range(0; len) as $j (.;
          .d[$j] = nearest(.pts[$j]; .cent; $nCluster).minD
          | .sum += .d[$j] )
      | .sum = randf(.sum)
      | label $out
      | .emit = null
      # Be sure to emit something:
      | foreach (range(0; len), null) as $j (.;
          if $j == null then .emit = .
          else .sum -= .d[$j]
          | if .sum > 0
            then .
            else .cent[$nCluster] = .pts[$j]
            | .emit = .
            end
          end;
          select(.emit) | (.emit, break $out)
     ) )
  | reduce range(0; len) as $j (.;
      .pts[$j][2] = nearest(.pts[$j]; .cent; $nCent).minI ) ;

# Input: an array of Point (.pts)
# Output: {pts, cent}
def lloyd(len; nCluster):
  {pts: .,
   cent: [range(0; nCluster) | [0,0,0]]}
  | kpp(len)

  # stop when > 99.9% of points are good
  | until( .changed > (len / 1E4);

      # group elements of centroids are used as counters
      reduce range(0; nCluster) as $i (.; .cent[$i] = [0,0,0])
      | reduce range(0; len) as $j (.;
          .pts[$j] as $p
          | .cent[$p[2]] |= [ .[0]+$p[0], .[1]+$p[1], .[2] + 1] )
      | reduce range(0; nCluster) as $i (.;
          (.cent[$i][2] | if . == 0 then 0.0001 else . end) as $divisor
          | .cent[$i] |= [ (.[0]/$divisor), (.[1]/$divisor), .[2] ] )
      | .changed = 0
      # find closest centroid of each point
      | reduce range(0; len) as $j (.;
          .pts[$j] as $p
          | nearest($p; .cent; nCluster).minI as $minI
          | if $minI != $p[2]
            then .changed += 1
            | .pts[$j][2] = $minI
            else .
            end) )
  | .cent |= reduce range(0; nCluster) as $i (.; .[$i][2] = $i ) ;

def printEps($pts; len; cent; nCluster):

  def f1($x;$y;$z): "\($x) \($y) \($z) setrgbcolor";
  def f2($x;$y)  : "\($x) \($y) c";
  def f3($x;$y)  : "\n0 setgray \($x) \($y) s";

  def colors:
    reduce range(0; nCluster) as $i ([];
        .[3 * $i + 0] = (3 * ($i + 1) % 11) / 11
      | .[3 * $i + 1] = (7 * $i % 11) / 11
      | .[3 * $i + 2] = (9 * $i % 11) / 11 );

  {colors: colors}
  | .minX = hugeVal
  | .minY = hugeVal
  | .maxX = -hugeVal
  | .maxY = -hugeVal
  | reduce range(0; len) as $j (.;
      $pts[$j] as $p
      | if .maxX < $p[0] then .maxX = $p[0] else . end
      | if .minX > $p[0] then .minX = $p[0] else . end
      | if .maxY < $p[1] then .maxY = $p[1] else . end
      | if .minY > $p[1] then .minY = $p[1] else . end )
  | ([((W / (.maxX - .minX))), ((H / (.maxY - .minY)))] | min) as $scale
  | ( (.maxX + .minX) / 2) as $cx
  | ( (.maxY + .minY) / 2) as $cy
 
  | "%!PS-Adobe-3.0\n%%BoundingBox: -5 -5 \(W + 10) \(H + 10)",
    "/l {rlineto} def /m {rmoveto} def",
    "/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def",
    "/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath ",
    "     gsave 1 setgray fill grestore gsave 3 setlinewidth",
    " 1 setgray stroke grestore 0 setgray stroke }def",
    (range(0; nCluster) as $i
     | cent[$i] as $c
     |  f1(.colors[3 * $i]; .colors[3 * $i + 1]; .colors[3 * $i + 2]),
        ( range(0; len) as $j
          | $pts[$j] as $p
          | if $p[2] == $i
            then f2( ($p[0] - $cx) * $scale + W / 2; ($p[1] - $cy) * $scale + H / 2)
            else empty
            end ),
        f3( ($c[0] - $cx) * $scale + W / 2; ($c[1] - $cy) * $scale + H / 2)),
    "\n%%EOF"
;

# The required clustering function with two arguments.
# It returns {pts, cent}
# where .pts and .cent are arrays of Points,
# with the cluster id as the third item.
#
def cluster($nCluster; $points):
  $points
  | lloyd( length; $nCluster);

# The task:  
genXY(PTS; 10)
| lloyd(PTS; K) as { pts: $pts, cent: $cent }
| printEps($pts; PTS; $cent; K)

Output:

See https://imgur.com/gallery/F0q1yO6

Julia

# run via Julia REPL
using Clustering, Plots, DataFrames, RDatasets

const iris = dataset("datasets", "iris")
plt1 = plot(title= "Species Classification",  xlabel = "Sepal Width", ylabel = "Sepal length")
plt2 = plot(title= "Kmeans++ Classification",  xlabel = "Sepal Width", ylabel = "Sepal length")

for (i, sp) in enumerate(unique(iris[!, :Species]))
    idx = iris[!, :Species] .== sp
    spwidth, splength = iris[idx, :SepalWidth], iris[idx, :SepalLength]
    scatter!(plt1, spwidth, splength, color = [:red, :green, :blue][i], legend = false)
end

features = collect(Matrix(iris[!, 1:4])')

# K Means ++
result = kmeans(features, 3, init = KmppAlg())  # set to 3 clusters with kmeans++

for center in unique(result.assignments)
    idx = result.assignments .== center
    spwidth, splength = iris[idx, :SepalWidth], iris[idx, :SepalLength]
    scatter!(plt2, spwidth, splength, color = [:green, :red, :blue][center], legend = false)
end

plot(plt1, plt2)

Kotlin

Translation of: C

The terminal output should, of course, be redirected to an .eps file so that it can be viewed with (for instance) Ghostscript.

As in the case of the C example, the data is partitioned into 11 clusters though, unlike C (which doesn't use srand), the output will be different each time the program is run.

// version 1.2.21

import java.util.Random
import kotlin.math.*

data class Point(var x: Double, var y: Double, var group: Int)

typealias LPoint = List<Point>
typealias MLPoint = MutableList<Point>

val origin get() = Point(0.0, 0.0, 0)
val r = Random()
val hugeVal = Double.POSITIVE_INFINITY

const val RAND_MAX = Int.MAX_VALUE
const val PTS = 100_000
const val K = 11
const val W = 400
const val H = 400

fun rand() = r.nextInt(RAND_MAX)

fun randf(m: Double) = m * rand() / (RAND_MAX - 1)

fun genXY(count: Int, radius: Double): LPoint {
    val pts = List(count) { origin }

    /* note: this is not a uniform 2-d distribution */
    for (i in 0 until count) {
        val ang = randf(2.0 * PI)
        val r = randf(radius)
        pts[i].x = r * cos(ang)
        pts[i].y = r * sin(ang)
    }
    return pts
}

fun dist2(a: Point, b: Point): Double {
    val x = a.x - b.x
    val y = a.y - b.y
    return x * x + y * y
}

fun nearest(pt: Point, cent: LPoint, nCluster: Int): Pair<Int, Double> {
    var minD = hugeVal
    var minI = pt.group
    for (i in 0 until nCluster) {
        val d = dist2(cent[i], pt)
        if (minD > d) {
            minD = d
            minI = i
        }
    }
    return minI to minD
}

fun kpp(pts: LPoint, len: Int, cent: MLPoint) {
    val nCent = cent.size
    val d = DoubleArray(len)
    cent[0] = pts[rand() % len].copy()
    for (nCluster in 1 until nCent) {
        var sum = 0.0
        for (j in 0 until len) {
            d[j] = nearest(pts[j], cent, nCluster).second
            sum += d[j]
        }
        sum = randf(sum)
        for (j in 0 until len) {
            sum -= d[j]
            if (sum > 0.0) continue
            cent[nCluster] = pts[j].copy()
            break
        }
    }
    for (j in 0 until len) pts[j].group = nearest(pts[j], cent, nCent).first
}

fun lloyd(pts: LPoint, len: Int, nCluster: Int): LPoint {
    val cent = MutableList(nCluster) { origin }
    kpp(pts, len, cent)
    do {
        /* group element for centroids are used as counters */
        for (i in 0 until nCluster) {
            with (cent[i]) { x = 0.0; y = 0.0; group = 0 }
        }
        for (j in 0 until len) {
            val p = pts[j]
            val c = cent[p.group]
            with (c) { group++; x += p.x; y += p.y }
        }
        for (i in 0 until nCluster) {
            val c = cent[i]
            c.x /= c.group
            c.y /= c.group
        }
        var changed = 0

        /* find closest centroid of each point */
        for (j in 0 until len) {
            val p = pts[j]
            val minI = nearest(p, cent, nCluster).first
            if (minI != p.group) {
                changed++
                p.group = minI
            }
        }
    }
    while (changed > (len shr 10))  /* stop when 99.9% of points are good */

    for (i in 0 until nCluster) cent[i].group = i
    return cent
}

fun printEps(pts: LPoint, len: Int, cent: LPoint, nCluster: Int) {
    val colors = DoubleArray(nCluster * 3)
    for (i in 0 until nCluster) {
        colors[3 * i + 0] = (3 * (i + 1) % 11) / 11.0
        colors[3 * i + 1] = (7 * i % 11) / 11.0
        colors[3 * i + 2] = (9 * i % 11) / 11.0
    }
    var minX = hugeVal
    var minY = hugeVal
    var maxX = -hugeVal
    var maxY = -hugeVal
    for (j in 0 until len) {
        val p = pts[j]
        if (maxX < p.x) maxX = p.x
        if (minX > p.x) minX = p.x
        if (maxY < p.y) maxY = p.y
        if (minY > p.y) minY = p.y
    }
    val scale = minOf(W / (maxX - minX), H / (maxY - minY))
    val cx = (maxX + minX) / 2.0
    val cy = (maxY + minY) / 2.0

    print("%%!PS-Adobe-3.0\n%%%%BoundingBox: -5 -5 %${W + 10} ${H + 10}\n")
    print("/l {rlineto} def /m {rmoveto} def\n")
    print("/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def\n")
    print("/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath ")
    print("	gsave 1 setgray fill grestore gsave 3 setlinewidth")
    print(" 1 setgray stroke grestore 0 setgray stroke }def\n")
    val f1 = "%g %g %g setrgbcolor"
    val f2 = "%.3f %.3f c"
    val f3 = "\n0 setgray %g %g s"
    for (i in 0 until nCluster) {
        val c = cent[i]
        println(f1.format(colors[3 * i], colors[3 * i + 1], colors[3 * i + 2]))
        for (j in 0 until len) {
            val p = pts[j]
            if (p.group != i) continue
            println(f2.format((p.x - cx) * scale + W / 2, (p.y - cy) * scale + H / 2))
        }
        println(f3.format((c.x - cx) * scale + W / 2, (c.y - cy) * scale + H / 2))
    }
    print("\n%%%%EOF")
}

fun main(args: Array<String>) {
    val v = genXY(PTS, 10.0)
    val c = lloyd(v, PTS, K)
    printEps(v, PTS, c, K)
}

Lua

Works with: lua version 5.3

local function load_data(npoints, radius)
  -- Generate random data points
  --
  local data = {}
  for i = 1,npoints do
    local ang = math.random() * (2.0 * math.pi)
    local rad = math.random() * radius
    data[i] = {x = math.cos(ang) * rad, y = math.sin(ang) * rad}
  end
  return data
end

local function print_eps(data, nclusters, centers, cluster)
  local WIDTH  = 400
  local HEIGHT = 400

  -- Print an EPS file with clustered points
  --
  local colors = {}
  for k = 1,nclusters do
    colors[3*k + 0] = (3 * k % 11) / 11.0
    colors[3*k + 1] = (7 * k % 11) / 11.0
    colors[3*k + 2] = (9 * k % 11) / 11.0
  end

  local max_x, max_y, min_x, min_y = -math.maxinteger, -math.maxinteger, 
    math.maxinteger, math.maxinteger
  
  for i = 1,#data do
    if max_x < data[i].x then max_x = data[i].x end
    if min_x > data[i].x then min_x = data[i].x end
    if max_y < data[i].y then max_y = data[i].y end
    if min_y > data[i].y then min_y = data[i].y end
  end

  local scale = WIDTH / (max_x - min_x)
  if scale > HEIGHT / (max_y - min_y) then scale = HEIGHT / (max_y - min_y) end

  local cx = (max_x + min_x) / 2.0
  local cy = (max_y + min_y) / 2.0

  print(string.format("%%!PS-Adobe-3.0\n%%%%BoundingBox: -5 -5 %d %d", 
    WIDTH + 10, HEIGHT + 10))
  print(string.format("/l {rlineto} def /m {rmoveto} def\n/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def\n/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath gsave 1 setgray fill grestore gsave 3 setlinewidth 1 setgray stroke grestore 0 setgray stroke }def"
))
  -- print(string.format("%g %g %g setrgbcolor\n", 1, 2, 3))
  for k = 1,nclusters do
    print(string.format("%g %g %g setrgbcolor",
      colors[3*k], colors[3*k + 1], colors[3*k + 2]))

    for i = 1,#data do
      if cluster[i] == k then
        print(string.format("%.3f %.3f c", 
          (data[i].x - cx) * scale + WIDTH  / 2.0, 
          (data[i].y - cy) * scale + HEIGHT / 2.0))
      end
    end
    
    print(string.format("0 setgray %g %g s",
      (centers[k].x - cx) * scale + WIDTH  / 2.0,
      (centers[k].y - cy) * scale + HEIGHT / 2.0))
  end
  print(string.format("\n%%%%EOF"))
end

local function kmeans(data, nclusters, init)
  -- K-means Clustering
  --
  assert(nclusters > 0)
  assert(#data > nclusters)
  assert(init == "kmeans++" or init == "random")

  local diss = function(p, q)
    -- Computes the dissimilarity between points 'p' and 'q'
    -- 
    return math.pow(p.x - q.x, 2) + math.pow(p.y - q.y, 2)
  end
  
  -- Initialization
  --  
  local centers = {} -- clusters centroids
  if init == "kmeans++" then
    local K = 1
    
    -- take one center c1, chosen uniformly at random from 'data'
    local i = math.random(1, #data)
    centers[K] = {x = data[i].x, y = data[i].y}
    local D = {}
   
    -- repeat until we have taken 'nclusters' centers
    while K < nclusters do
      -- take a new center ck, choosing a point 'i' of 'data' with probability 
      -- D(i)^2 / sum_{i=1}^n D(i)^2

      local sum_D = 0.0
      for i = 1,#data do
        local min_d = D[i]
        local d = diss(data[i], centers[K])
        if min_d == nil or d < min_d then
            min_d = d
        end
        D[i] = min_d
        sum_D = sum_D + min_d
      end
      
      sum_D = math.random() * sum_D
      for i = 1,#data do
        sum_D = sum_D - D[i]
        
        if sum_D <= 0 then 
          K = K + 1
          centers[K] = {x = data[i].x, y = data[i].y}
          break
        end
      end
    end
  elseif init == "random" then
    for k = 1,nclusters do
      local i = math.random(1, #data)
      centers[k] = {x = data[i].x, y = data[i].y}
    end
  end

  -- Lloyd K-means Clustering
  --
  local cluster = {} -- k-partition
  for i = 1,#data do cluster[i] = 0 end
      
  local J = function()
    -- Computes the loss value
    --
    local loss = 0.0
    for i = 1,#data do
      loss = loss + diss(data[i], centers[cluster[i]])
    end
    return loss
  end
  
  local updated = false
  repeat
    -- update k-partition
    --
    local card = {}
    for k = 1,nclusters do
      card[k] = 0.0
    end
    
    updated = false
    for i = 1,#data do
      local min_d, min_k = nil, nil

      for k = 1,nclusters do
        local d = diss(data[i], centers[k])
        
        if min_d == nil or d < min_d then
          min_d, min_k = d, k
        end
      end

      if min_k ~= cluster[i] then updated = true end

      cluster[i]  = min_k
      card[min_k] = card[min_k] + 1.0
    end
    -- print("update k-partition: ", J())

    -- update centers
    --
    for k = 1,nclusters do
      centers[k].x = 0.0
      centers[k].y = 0.0
    end
    
    for i = 1,#data do
      local k = cluster[i]
      
      centers[k].x = centers[k].x + (data[i].x / card[k])
      centers[k].y = centers[k].y + (data[i].y / card[k])
    end
    -- print("    update centers: ", J())
  until updated == false

  return centers, cluster, J()
end

 ------------------------------------------------------------------------------
 ---- MAIN --------------------------------------------------------------------

local N_POINTS   = 100000  -- number of points
local N_CLUSTERS = 11      -- number of clusters

local data = load_data(N_POINTS, N_CLUSTERS)
centers, cluster, loss = kmeans(data, N_CLUSTERS, "kmeans++")
-- print("Loss: ", loss)
-- for k = 1,N_CLUSTERS do
--   print("center.x: ", centers[k].x, " center.y: ", centers[k].y)
-- end
print_eps(data, N_CLUSTERS, centers, cluster)

Mathematica/Wolfram Language

Solution - Initial kmeans code comes from http://mathematica.stackexchange.com/questions/7441/k-means-clustering, now extended to kmeans++ by introducing the function initM.
Was not able to upload pictures of the result...:

initM[list_List, k_Integer, distFunc_Symbol] := 
  Module[{m = {RandomChoice[list]}, n, d},
   While[Length[m] < k,
    n = RandomChoice@Nearest[m, #] & /@ list;
    d = Apply[distFunc, Transpose[{n, list}], {1}];
    m = Append[m, RandomChoice[d -> list]] 
    ];
   m
   ];
kmeanspp[list_, k_, 
   opts : OptionsPattern[{DistanceFunction -> 
       SquaredEuclideanDistance, "RandomSeed" -> {}}]] := 
  BlockRandom[SeedRandom[OptionValue["RandomSeed"]];
   Module[{m = initM[list, k, OptionValue[DistanceFunction]], update, 
     partition, clusters}, update[] := m = Mean /@ clusters;
    partition[_] := (clusters = 
       GatherBy[list, 
        RandomChoice@
          Nearest[m, #, (# -> OptionValue[#] &@DistanceFunction)] &]; 
      update[]);
    FixedPoint[partition, list];
    {clusters, m}
    ]
   ];

Extra credit:
1. no changes required for N dimensions, it juts works.
2. random data can be generated with

dim = 3;
points = 3000;
l = RandomReal[1, {points, dim}];

or

l = Select[ RandomReal[{-1, 1}, {points,2}], 
   EuclideanDistance[#, {0, 0}] <= 1 &];

or

x1 = RandomVariate[MultinormalDistribution[{0, 0}, {{1, 0}, {0, 20}}],
   points]; 
x2 = 
 RandomVariate[MultinormalDistribution[{10, 0}, {{1, 0}, {0, 20}}], 
  points];
l = Join[x1, x2];

3. data can be visualized with 2D:

dim = 2;
points = 30000;
l = RandomReal[1, {points, dim}];
k = 6
r1 = kmeanspp[l, k];
p1 = ListPlot[r1[[1]]];
p2 = ListPlot[r1[[2]],PlotMarkers -> {"#"}];
Show[{p1, p2}]

3D:

dim = 3;
points = 3000;
l = RandomReal[1, {points, dim}];
k = 6
r1 = kmeanspp[l, k];
p1 = ListPointPlot3D[r1[[1]]];
p2 = ListPointPlot3D[r1[[2]]];
Show[{p1, p2}]

Another version

KMeans[k_, data_] :=

Module[{Renew, Label, Iteration},
 clusters = RandomSample[data, k];
 Label[clusters_] := 
  Flatten[Table[
    Ordering[
     Table[EuclideanDistance[datai, clustersj], {j, 
       Length[clusters]}], 1], {i, Length[data]}]];
 Renew[labels_] :=
  Module[{position},
   position = PositionIndex[labels];
   Return[Table[Mean[data[[positioni]]], {i, Length[position]}]]];
 Iteration[labels_, clusters_] :=
  
  Module[{newlabels, newclusters},
   newclusters = Renew[labels];
   newlabels = Label[newclusters];
   If[newlabels == labels, labels, 
    Iteration[newlabels, newclusters]]];
 Return[Iteration[clusters, Label[clusters]]]]

Nim

Translation of: Python, C

#
# compile:
#   nim c -d:release kmeans.nim
#
# and pipe the resultant EPS output to a file, e.g.
# 
#   kmeans > results.eps
#
import random, math, strutils

const
  FloatMax  = 1.0e100
  nPoints   = 100_000
  nClusters = 11

type
  Point = object
    x, y: float
    group: int
  Points = seq[Point]
  ClusterDist = tuple[indx: int, dist: float]
  ColorRGB = tuple[r, g, b: float]

proc generatePoints(nPoints: int, radius: float): Points =
  result.setLen(nPoints)
  for i in 0..<nPoints:
    let
      r = rand(1.0) * radius
      ang = rand(1.0) * 2 * PI
    result[i] = Point(x: r * cos(ang),
                      y: r * sin(ang),
                      group: 0)

proc nearestClusterCenter(point: Point, cluster_centers: Points): ClusterDist =
  # Distance and index of the closest cluster center
  proc sqrDistance2D(a, b: Point): float =
    result = (a.x - b.x) ^ 2  +  (a.y - b.y) ^ 2

  result = (indx: point.group, dist: FLOAT_MAX)

  for i, cc in pairs(cluster_centers):
    let d = sqrDistance2D(cc, point)
    if result.dist > d:
      result.dist = d
      result.indx = i

proc kpp(points: var Points, clusterCenters: var Points) =
  let 
    choice = points[rand(points.high)]
  clusterCenters[0] = choice

  var
    d: seq[float]
    sum = 0.0

  d.setLen(points.len)

  for i in 1..clusterCenters.high:
    sum = 0.0
    for j, p in pairs(points):
      d[j] = nearestClusterCenter(p, cluster_centers[0..i])[1]
      sum += d[j]

    sum *= rand(1.0)

    for j, di in pairs(d):
      sum -= di
      if sum > 0.0:
        continue
      clusterCenters[i] = points[j]
      break

  for _, p in mpairs(points):
    p.group = nearestClusterCenter(p, clusterCenters)[0]


proc lloyd(points: var Points, nclusters: int): Points =
  #result is the cluster_centers
  let lenpts10 = points.len shr 10
  var
    changed = 0
    minI = 0
  result.setLen(nclusters)

  # call k++ init
  kpp(points, result)

  while true:
    # group element for centroids are used as counters
    for _, cc in mpairs(result):
      cc.x = 0.0
      cc.y = 0.0
      cc.group = 0

    for p in points:
      let i = p.group 
      result[i].group += 1
      result[i].x += p.x
      result[i].y += p.y

    for _, cc in mpairs(result):
      cc.x /= cc.group.float
      cc.y /= cc.group.float

    # find closest centroid of each PointPtr
    changed = 0

    for _, p in mpairs(points):
      minI = nearest_cluster_center(p, result)[0]
      if minI != p.group:
        changed += 1
        p.group = minI

    # stop when 99.9% of points are good
    if changed <= lenpts10:
      break

  for i, cc in mpairs(result):
      cc.group = i

proc printEps(points: Points, cluster_centers: Points, W: int = 400, H: int = 400) =
  var
    colors: seq[ColorRGB]

  colors.setLen(clusterCenters.len)
  
  #assert((3.0 * 5.0) mod 11.0 == 4.0)
  #assert(3.0 * 5.0 mod 11.0 == 4.0)
  #assert((3.0 * 5.0 mod 11.0) / 2.0 == 2.0)
  #assert(3.0 * 5.0 mod 11.0 / 2.0 == 2.0)
  
  for i in 0..<clusterCenters.len:
    let
      f1 = i.float
      f2 = (i + 1).float
    colors[i] = (r: (3.0 * f2) mod 11.0 / 11.0,
                 g: (7.0 * f1) mod 11.0 / 11.0,
                 b: (9.0 * f1) mod 11.0 / 11.0 )

  var
    max_x = -FLOAT_MAX
    max_y = -FLOAT_MAX
    min_x =  FLOAT_MAX
    min_y =  FLOAT_MAX

  for p in points:
    if max_x < p.x: max_x = p.x
    if min_x > p.x: min_x = p.x
    if max_y < p.y: max_y = p.y
    if min_y > p.y: min_y = p.y

  let
    scale = min(W.float / (max_x - min_x),
              H.float / (max_y - min_y))
    cx = (max_x + min_x) / 2.0
    cy = (max_y + min_y) / 2.0

  echo "%!PS-Adobe-3.0\n%%BoundingBox: -5 -5 $1 $2" % [$(W + 10), $(H + 10)]

  echo """/l {rlineto} def /m {rmoveto} def
/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def
/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath
   gsave 1 setgray fill grestore gsave 3 setlinewidth
 1 setgray stroke grestore 0 setgray stroke }def"""

  for i, cc in pairs(clusterCenters):
    echo "$1 $2 $3 setrgbcolor" %
            [formatFloat(colors[i].r, ffDecimal, 6), 
             formatFloat(colors[i].g, ffDecimal, 6), 
             formatFloat(colors[i].b, ffDecimal, 6)]

    for p in points:
      if p.group != i:
        continue
      echo "$1 $2 c" % [formatFloat( ((p.x - cx) * scale + W / 2), ffDecimal, 3),
                        formatFloat( ((p.y - cy) * scale + H / 2), ffDecimal, 3)]

    echo "\n0 setgray $1 $2 s" % [formatFloat( ((cc.x - cx) * scale + W / 2), ffDecimal, 3),
                                  formatFloat( ((cc.y - cy) * scale + H / 2), ffDecimal, 3)]

  echo "\nshowpage\n%%EOF"


proc main() =
  randomize()

  var
    points = generatePoints(nPoints, 10.0)
  let
    clusterCentrs = lloyd(points, nClusters)
  printEps(points, clusterCentrs)

main()

Phix

Library: Phix/pGUI

Translation of: XPL0

Translation of: Go

I nicked the initial dataset creation from Go, as an alternative

-- demo\rosetta\K_means_clustering.exw
--  Press F5 to restart
include pGUI.e

Ihandle dlg, canvas, timer
cdCanvas cddbuffer, cdcanvas

constant TITLE = "K-means++ clustering"

constant useGoInitialData = false       -- (not very well centered)

constant N = 30000,                     -- number of points
         K = 16                         -- number of clusters

sequence {Px, Py, Pc} @= repeat(0,N),   -- coordinates of points and their cluster
         {Cx, Cy} @= repeat(0,K)        -- coordinates of centroid of cluster

constant colours = {CD_RED, CD_DARK_RED, CD_BLUE, CD_DARK_BLUE, CD_CYAN, CD_DARK_CYAN, 
                    CD_GREEN, CD_DARK_GREEN, CD_MAGENTA, CD_DARK_MAGENTA, CD_YELLOW,
                    CD_DARK_YELLOW, CD_DARK_ORANGE, CD_INDIGO, CD_PURPLE, CD_DARK_GREY}
if length(colours)<K then ?9/0 end if
 
function Centroid()
-- Find new centroids of points grouped with current centroids
bool change = false
    for c=1 to K do                         -- for each centroid...
        integer x=0, y=0, count:= 0;        -- find new centroid
        for i=1 to N do                     -- for all points
            if Pc[i] = c then               -- grouped with current centroid...
                x += Px[i]
                y += Py[i]
                count += 1
            end if
        end for
        if count!=0 then
            x = floor(x/count)
            y = floor(y/count)
            if Cx[c]!=x
            or Cy[c]!=y then
                Cx[c] = x
                Cy[c] = y
                change:= true
            end if
        end if
    end for
    return change
end function
 
function sq(atom x) return x*x end function

procedure Voronoi()             -- Group points with their nearest centroid
    integer d2,                 -- distance squared, 
            min_d2              -- minimum distance squared
    for i=1 to N do             -- for each point...
        min_d2 := #3FFFFFFF     -- find closest centroid
        for c=1 to K do
            d2 := sq(Px[i]-Cx[c]) + sq(Py[i]-Cy[c])
            if d2<min_d2 then
                min_d2 := d2
                Pc[i] := c      -- update closest centroid
            end if
        end for
    end for
end procedure
 
function rand_xy()              -- Return random X,Y biased for polar coordinates
    atom d := rand(240)-1,              -- distance: 0..239
         a := rnd()*2*PI                -- angle:    0..2pi
    integer x:= floor(d*cos(a))+320,    -- rectangular coords centered on screen
            y:= floor(d*sin(a))+240     --     (that is, assuming 640x480)
    return {x,y}
end function

--This little bit is copied from/based on Go:
constant k = K,
         nPoints = N,
         xBox = 300,
         yBox = 200,
         stdv = 30

function genECData()
    sequence orig = repeat({0,0}, k),
             data = repeat({0,0,0}, nPoints)
    integer n = 0, nk = k
    for i=1 to k do
        integer x := rand(xBox)+320,
                y := rand(yBox)+240
        orig[i] = {x, y}
        for j=1 to floor((nPoints-n)/nk) do
            n += 1
            atom d := rand(stdv)-1,             -- distance: 0..239
                 a := rnd()*2*PI                -- angle:    0..2pi
            integer nx:= floor(d*cos(a))+x, -- rectangular coords centered on screen
                    ny:= floor(d*sin(a))+y  --     (that is, assuming 640x480)
            data[n] = {nx,ny,i}
        end for
        nk -= 1
    end for
    if n!=nPoints then ?9/0 end if
    return {orig, data}
end function
--</Go ends>
 
integer iteration = 0

function redraw_cb(Ihandle /*ih*/, integer /*posx*/, integer /*posy*/)
    integer {w, h} = IupGetIntInt(canvas, "DRAWSIZE")
    cdCanvasActivate(cddbuffer)
    if iteration=0 then
        if useGoInitialData then
            sequence {origins,data} = genECData()
            {Px, Py, Pc} = columnize(data)
            {Cx, Cy} = columnize(origins)
        else
            for i=1 to N do {Px[i],Py[i]} = rand_xy() end for   -- random set of points
            for i=1 to K do {Cx[i],Cy[i]} = rand_xy() end for   -- random set of cluster centroids
        end if
    end if
    sequence {r,g,b} @ = repeat(0,w*h)
    Voronoi()
    bool change := Centroid()
    for i=1 to N do
        integer idx = Px[i]+(Py[i]-1)*w
        {r[idx],g[idx],b[idx]} = cdDecodeColor(colours[Pc[i]])
    end for
    for i=1 to K do
        integer idx = Cx[i]+(Cy[i]-1)*w
        {r[idx],g[idx],b[idx]} = cdDecodeColor(CD_WHITE)
    end for
    cdCanvasPutImageRectRGB(cddbuffer, w, h, {r,g,b})
    cdCanvasFlush(cddbuffer)
    if change then
        iteration += 1
        IupSetStrAttribute(dlg, "TITLE", "%s (iteration %d)",{TITLE,iteration})
    else
        IupSetInt(timer,"RUN",0)                -- (stop timer)
        IupSetStrAttribute(dlg, "TITLE", TITLE)
    end if
    return IUP_DEFAULT
end function

function timer_cb(Ihandle /*ih*/)
    IupUpdate(canvas)
    return IUP_IGNORE
end function

function map_cb(Ihandle ih)
    cdcanvas = cdCreateCanvas(CD_IUP, ih)
    cddbuffer = cdCreateCanvas(CD_DBUFFER, cdcanvas)
    return IUP_DEFAULT
end function

function esc_close(Ihandle /*ih*/, atom c)
    if c=K_ESC then return IUP_CLOSE end if
    if c=K_F5 then
        iteration = 0
        IupSetInt(timer,"RUN",1)                -- (restart timer)
    end if
    return IUP_CONTINUE
end function

procedure main()
    IupOpen()

    canvas = IupCanvas(NULL)
    IupSetAttribute(canvas, "RASTERSIZE", "640x480")
    IupSetCallback(canvas, "MAP_CB", Icallback("map_cb"))
    IupSetCallback(canvas, "ACTION", Icallback("redraw_cb"))

    timer = IupTimer(Icallback("timer_cb"), 100)

    dlg = IupDialog(canvas,"DIALOGFRAME=YES")
    IupSetAttribute(dlg, "TITLE", TITLE)
    IupSetCallback(dlg, "K_ANY",     Icallback("esc_close"))

    IupShow(dlg)
    IupSetAttribute(canvas, "RASTERSIZE", NULL)
    IupMainLoop()
    IupClose()
end procedure

main()

Probably the hardest part of handling more than 2 dimensions would be deleteing all the GUI code, or modifying it to produce an n-dimensional representation. Obviously you would need Pz and Cz, or replace them with n-tuples, and to replace rand_xy().

Python

Translation of: D

from math import pi, sin, cos
from collections import namedtuple
from random import random, choice
from copy import copy

try:
    import psyco
    psyco.full()
except ImportError:
    pass


FLOAT_MAX = 1e100


class Point:
    __slots__ = ["x", "y", "group"]
    def __init__(self, x=0.0, y=0.0, group=0):
        self.x, self.y, self.group = x, y, group


def generate_points(npoints, radius):
    points = [Point() for _ in xrange(npoints)]

    # note: this is not a uniform 2-d distribution
    for p in points:
        r = random() * radius
        ang = random() * 2 * pi
        p.x = r * cos(ang)
        p.y = r * sin(ang)

    return points


def nearest_cluster_center(point, cluster_centers):
    """Distance and index of the closest cluster center"""
    def sqr_distance_2D(a, b):
        return (a.x - b.x) ** 2  +  (a.y - b.y) ** 2

    min_index = point.group
    min_dist = FLOAT_MAX

    for i, cc in enumerate(cluster_centers):
        d = sqr_distance_2D(cc, point)
        if min_dist > d:
            min_dist = d
            min_index = i

    return (min_index, min_dist)


def kpp(points, cluster_centers):
    cluster_centers[0] = copy(choice(points))
    d = [0.0 for _ in xrange(len(points))]

    for i in xrange(1, len(cluster_centers)):
        sum = 0
        for j, p in enumerate(points):
            d[j] = nearest_cluster_center(p, cluster_centers[:i])[1]
            sum += d[j]

        sum *= random()

        for j, di in enumerate(d):
            sum -= di
            if sum > 0:
                continue
            cluster_centers[i] = copy(points[j])
            break

    for p in points:
        p.group = nearest_cluster_center(p, cluster_centers)[0]


def lloyd(points, nclusters):
    cluster_centers = [Point() for _ in xrange(nclusters)]

    # call k++ init
    kpp(points, cluster_centers)

    lenpts10 = len(points) >> 10

    changed = 0
    while True:
        # group element for centroids are used as counters
        for cc in cluster_centers:
            cc.x = 0
            cc.y = 0
            cc.group = 0

        for p in points:
            cluster_centers[p.group].group += 1
            cluster_centers[p.group].x += p.x
            cluster_centers[p.group].y += p.y

        for cc in cluster_centers:
            cc.x /= cc.group
            cc.y /= cc.group

        # find closest centroid of each PointPtr
        changed = 0
        for p in points:
            min_i = nearest_cluster_center(p, cluster_centers)[0]
            if min_i != p.group:
                changed += 1
                p.group = min_i

        # stop when 99.9% of points are good
        if changed <= lenpts10:
            break

    for i, cc in enumerate(cluster_centers):
        cc.group = i

    return cluster_centers


def print_eps(points, cluster_centers, W=400, H=400):
    Color = namedtuple("Color", "r g b");

    colors = []
    for i in xrange(len(cluster_centers)):
        colors.append(Color((3 * (i + 1) % 11) / 11.0,
                            (7 * i % 11) / 11.0,
                            (9 * i % 11) / 11.0))

    max_x = max_y = -FLOAT_MAX
    min_x = min_y = FLOAT_MAX

    for p in points:
        if max_x < p.x: max_x = p.x
        if min_x > p.x: min_x = p.x
        if max_y < p.y: max_y = p.y
        if min_y > p.y: min_y = p.y

    scale = min(W / (max_x - min_x),
                H / (max_y - min_y))
    cx = (max_x + min_x) / 2
    cy = (max_y + min_y) / 2

    print "%%!PS-Adobe-3.0\n%%%%BoundingBox: -5 -5 %d %d" % (W + 10, H + 10)

    print ("/l {rlineto} def /m {rmoveto} def\n" +
           "/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def\n" +
           "/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath " +
           "   gsave 1 setgray fill grestore gsave 3 setlinewidth" +
           " 1 setgray stroke grestore 0 setgray stroke }def")

    for i, cc in enumerate(cluster_centers):
        print ("%g %g %g setrgbcolor" %
               (colors[i].r, colors[i].g, colors[i].b))

        for p in points:
            if p.group != i:
                continue
            print ("%.3f %.3f c" % ((p.x - cx) * scale + W / 2,
                                    (p.y - cy) * scale + H / 2))

        print ("\n0 setgray %g %g s" % ((cc.x - cx) * scale + W / 2,
                                        (cc.y - cy) * scale + H / 2))

    print "\n%%%%EOF"


def main():
    npoints = 30000
    k = 7 # # clusters

    points = generate_points(npoints, 10)
    cluster_centers = lloyd(points, k)
    print_eps(points, cluster_centers)


main()

Racket

The k-means clustering:

#lang racket
(require racket/dict
         math/distributions)

;; Divides the set of points into k clusters
;; using the standard k-means clustering algorithm
(define (k-means data k #:initialization (init k-means++))
  (define (iteration centroids)     
    (map centroid (clusterize data centroids)))
  (fixed-point iteration (init data k) #:same-test small-shift?))

;; Finds the centroid for a set of points
(define (centroid pts)
  (vector-map (curryr / (length pts))
       (for/fold ([sum (car pts)]) ([x (in-list (cdr pts))])
         (vector-map + x sum))))

;; Divides the set of points into clusters
;; using given centroids
(define (clusterize data centroids)
  (for*/fold ([res (map list centroids)]) ([x (in-list data)])
    (define c (argmin (distanse-to x) centroids))
    (dict-set res c (cons x (dict-ref res c)))))

;; Stop criterion: all centroids change their positions
;; by less then 0.1% of the minimal distance between centroids.
(define (small-shift? c1 c2)
  (define min-distance
    (apply min
           (for*/list ([x (in-list c2)]
                       [y (in-list c2)] #:unless (equal? x y))
             ((metric) x y))))
  (for/and ([a (in-list c1)] [b (in-list c2)])
    (< ((metric) a b) (* 0.001 min-distance))))

Initialization methods

;; picks k points from a dataset randomly
(define (random-choice data k)
  (for/list ([i (in-range k)])
    (list-ref data (random (length data)))))

;; uses k-means++ algorithm
(define (k-means++ data k)
  (for/fold ([centroids (random-choice data 1)]) ([i (in-range (- k 1))])
    (define weights
      (for/list ([x (in-list data)])
        (apply min (map (distanse-to x) centroids))))
    (define new-centroid
      (sample (discrete-dist data weights)))
    (cons new-centroid centroids)))

Different metrics

(define (euclidean-distance a b)
  (for/sum ([x (in-vector a)] [y (in-vector b)]) 
    (sqr (- x y))))

(define (manhattan-distance a b)
  (for/sum ([x (in-vector a)] [y (in-vector b)]) 
    (abs (- x y))))

(define metric (make-parameter euclidean-distance))
(define (distanse-to x) (curry (metric) x))

The fixed point operator

(define (fixed-point f x0 #:same-test [same? equal?])
  (let loop ([x x0] [fx (f x0)])
    (if (same? x fx) fx (loop fx (f fx)))))

Creating sample clusters

Clustering using k-means++ method with Manhattan distanse.

Clustering using k-means++ method almost always handles the difficult case.

Clustering using random seeds may give poor results.

(define (gaussian-cluster N 
                          #:stdev (σ 1) 
                          #:center (r0 #(0 0)) 
                          #:dim (d 2))
  (for/list ([i (in-range N)])
    (define r (for/vector ([j (in-range d)]) (sample (normal-dist 0 σ))))
    (vector-map + r r0)))

(define (uniform-cluster N 
                         #:radius (R 1) 
                         #:center (r0 #(0 0)))
  (for/list ([i (in-range N)])
    (define r (* R (sqrt (sample (uniform-dist)))))
    (define φ (* 2 pi (sample (uniform-dist))))
    (vector-map + r0 (vector (* r (cos φ)) (* r (sin φ))))))

Visualization

(require plot)

(define (show-clustering data k #:method (method k-means++))
  (define c (k-means data k #:initialization method))
  (display
   (plot 
    (append
     (for/list ([d (clusterize data c)] 
                [i (in-naturals)])
       (points d #:color i #:sym 'fullcircle1))
     (list (points c  
                   #:sym 'fullcircle7
                   #:fill-color 'yellow
                   #:line-width 3)))
    #:title (format "Initializing by ~a" (object-name method)))))

Testing

(module+ test
  (define circle (uniform-cluster 30000))
  ; using k-means++ method
  (show-clustering circle 6)
  ; using standard k-means method
  (show-clustering circle 6 #:method random-choice)
  ; using manhattan distance
  (parameterize ([metric manhattan-distance])
    (show-clustering circle 6)))

The difficult case.

(module+ test
  (define clouds
    (append
     (gaussian-cluster 1000 #:stdev 0.5 #:center #(0 0))
     (gaussian-cluster 1000 #:stdev 0.5 #:center #(2 3))
     (gaussian-cluster 1000 #:stdev 0.5 #:center #(2.5 -1))
     (gaussian-cluster 1000 #:stdev 0.5 #:center #(6 0))))
  
  ; using k-means++ method
  (show-clustering clouds 4)
  ; using standard k-means method
  (show-clustering clouds 4 #:method random-choice))

Multi-dimensional case.

(module+ test
  (define 5d-data
    (append
     (gaussian-cluster 1000 #:dim 5 #:center #(2 0 0 0 0))
     (gaussian-cluster 1000 #:dim 5 #:center #(0 2 0 0 0))
     (gaussian-cluster 1000 #:dim 5 #:center #(0 0 2 0 0))
     (gaussian-cluster 1000 #:dim 5 #:center #(0 0 0 2 0))
     (gaussian-cluster 1000 #:dim 5 #:center #(0 0 0 0 2))))
  
  (define centroids (k-means 5d-data 5))
  
  (map (curry vector-map round) centroids))

Output shows that centroids were found correctly.

(#(-0.0 2.0 -0.0 0.0 0.0)
 #(0.0 0.0 -0.0 2.0 -0.0)
 #(2.0 -0.0 -0.0 -0.0 -0.0)
 #(-0.0 -0.0 2.0 0.0 0.0)
 #(-0.0 -0.0 0.0 0.0 2.0))

Raku

(formerly Perl 6)

Works with: rakudo version 2015-09-16

We use Complex numbers to represent points in the plane. We feed the algorithm with three artificially made clouds of points so we can easily see if the output makes sense.

sub postfix:«-means++»(Int $K) {
    return sub (@data) {
        my @means = @data.pick;
        until @means == $K {
            my @cumulD2 = [\+] @data.map: -> $x {
                min @means.map: { abs($x - $_)**2 }
            }
            my $rand = rand * @cumulD2[*-1];
            @means.push: @data[
                (^@data).first: { @cumulD2[$_] > $rand }
            ];
        }
        sub cluster { @data.classify: -> $x { @means.min: { abs($_ - $x) } } }
        loop (
            my %cluster;
            $*TOLERANCE < [+] (@means Z- keys (%cluster = cluster))».abs X** 2;
            @means = %cluster.values.map( { .elems R/ [+] @$_ } )
        ) { ; }
        return @means;
    }
}
 
my @centers = 0, 5, 3 + 2i;
my @data = flat @centers.map: { ($_ + .5 - rand + (.5 - rand) * i) xx 100 }
@data.=pick(*);
.say for 3-means++(@data);

Output:

5.04622376429502+0.0145269848483031i
0.0185674577571743+0.0298199687431731i
2.954898072093+2.14922298688815i

Rust

Translation of: Python

(the initial point selection part)

extern crate csv;
extern crate getopts;
extern crate gnuplot;
extern crate nalgebra;
extern crate num;
extern crate rand;
extern crate rustc_serialize;
extern crate test;

use getopts::Options;
use gnuplot::{Axes2D, AxesCommon, Color, Figure, Fix, PointSize, PointSymbol};
use nalgebra::{DVector, Iterable};
use rand::{Rng, SeedableRng, StdRng};
use rand::distributions::{IndependentSample, Range};
use std::f64::consts::PI;
use std::env;

type Point = DVector<f64>;

struct Cluster<'a> {
    members: Vec<&'a Point>,
    center: Point,
}

struct Stats {
    centroids: Vec<Point>,
    mean_d_from_centroid: DVector<f64>,
}

/// DVector doesn't implement BaseFloat, so a custom distance function is required.
fn sqdist(p1: &Point, p2: &Point) -> f64 {
    (p1.clone() - p2.clone()).iter().map(|x| x * x).fold(0f64, |a, b| a + b)
}

/// Returns (distance^2, index) tuple of winning point.
fn nearest(p: &Point, candidates: &Vec<Point>) -> (f64, usize) {
    let (dsquared, the_index) = candidates.iter()
                                          .enumerate()
                                          .fold((sqdist(p, &candidates[0]), 0),
                                                |(d, index), next| {
                                                    let dprime = sqdist(p, &candidates[next.0]);
                                                    if dprime < d {
                                                        (dprime, next.0)
                                                    } else {
                                                        (d, index)
                                                    }
                                                });
    (dsquared, the_index)
}

/// Computes starting centroids and makes initial assignments.
fn kpp(points: &Vec<Point>, k: usize, rng: &mut StdRng) -> Stats {
    let mut centroids: Vec<Point> = Vec::new();
    // Random point for first centroid guess:
    centroids.push(points[rng.gen::<usize>() % points.len()].clone());
    let mut dists: Vec<f64> = vec![0f64; points.len()];

    for _ in 1..k {
        let mut sum = 0f64;
        for (j, p) in points.iter().enumerate() {
            let (dsquared, _) = nearest(&p, &centroids);
            dists[j] = dsquared;
            sum += dsquared;
        }

        // This part chooses the next cluster center with a probability proportional to d^2
        sum *= rng.next_f64();
        for (j, d) in dists.iter().enumerate() {
            sum -= *d;
            if sum <= 0f64 {
                centroids.push(points[j].clone());
                break;
            }
        }
    }

    let clusters = assign_clusters(points, &centroids);
    compute_stats(&clusters)
}

fn assign_clusters<'a>(points: &'a Vec<Point>, centroids: &Vec<Point>) -> Vec<Cluster<'a>> {
    let mut clusters: Vec<Cluster> = Vec::new();

    for _ in 0..centroids.len() {
        clusters.push(Cluster {
            members: Vec::new(),
            center: DVector::new_zeros(points[0].len()),
        });
    }

    for p in points.iter() {
        let (_, nearest_index) = nearest(p, centroids);
        clusters[nearest_index].center = clusters[nearest_index].center.clone() + p.clone();
        clusters[nearest_index].members.push(p);
    }

    for i in 0..clusters.len() {
        clusters[i].center = clusters[i].center.clone() / clusters[i].members.len() as f64;
    }

    clusters
}

/// Computes centroids and mean-distance-from-centroid for each cluster.
fn compute_stats(clusters: &Vec<Cluster>) -> Stats {
    let mut centroids = Vec::new();
    let mut means_vec = Vec::new();

    for c in clusters.iter() {
        let pts = &c.members;
        let seed: DVector<f64> = DVector::new_zeros(pts[0].len());
        let centroid = pts.iter().fold(seed, |a, &b| a + b.clone()) / pts.len() as f64;
        means_vec.push(pts.iter().fold(0f64, |acc, pt| acc + sqdist(pt, &centroid).sqrt()) /
                       pts.len() as f64);
        centroids.push(centroid);
    }

    Stats {
        centroids: centroids,
        mean_d_from_centroid: DVector::from_slice(means_vec.len(), means_vec.as_slice()),
    }
}

fn lloyd<'a>(points: &'a Vec<Point>,
             k: usize,
             stoppage_delta: f64,
             max_iter: u32,
             rng: &mut StdRng)
             -> (Vec<Cluster<'a>>, Stats) {

    let mut clusters = Vec::new();
    // Choose starting centroids and make initial assignments
    let mut stats = kpp(points, k, rng);

    for i in 1..max_iter {
        let last_means: DVector<f64> = stats.mean_d_from_centroid.clone();
        clusters = assign_clusters(points, &stats.centroids);
        stats = compute_stats(&clusters);
        let err = sqdist(&stats.mean_d_from_centroid, &last_means).sqrt();
        if err < stoppage_delta {
            println!("Stoppage condition reached on iteration {}", i);
            return (clusters, stats);
        }
        // Console output
        print!("Iter {}: ", i);
        for (cen, mu) in stats.centroids.iter().zip(stats.mean_d_from_centroid.iter()) {
            print_dvec(cen);
            print!(" {:1.2} | ", mu);
        }
        print!("{:1.5}\n", err);
    }

    println!("Stoppage condition not reached by iteration {}", max_iter);
    (clusters, stats)
}

/// Uniform sampling on the unit disk.
fn generate_points(n: u32, rng: &mut StdRng) -> Vec<Point> {
    let r_range = Range::new(0f64, 1f64);
    let theta_range = Range::new(0f64, 2f64 * PI);
    let mut points: Vec<Point> = Vec::new();

    for _ in 0..n {
        let root_r = r_range.ind_sample(rng).sqrt();
        let theta = theta_range.ind_sample(rng);
        points.push(DVector::<f64>::from_slice(2, &[root_r * theta.cos(), root_r * theta.sin()]));
    }

    points
}

// Plot clusters (2d only). Closure idiom allows us to borrow and mutate the Axes2D.
fn viz(clusters: Vec<Cluster>, stats: Stats, k: usize, n: u32, e: f64) {
    let mut fg = Figure::new();
    {
        let prep = |fg: &mut Figure| {
            let axes: &mut Axes2D = fg.axes2d();
            let title: String = format!("k = {}, n = {}, e = {:4}", k, n, e);
            let centroids_x = stats.centroids.iter().map(|c| c[0]);
            let centroids_y = stats.centroids.iter().map(|c| c[1]);
            for cluster in clusters.iter() {
                axes.points(cluster.members.iter().map(|p| p[0]),
                            cluster.members
                                   .iter()
                                   .map(|p| p[1]),
                            &[PointSymbol('O'), PointSize(0.25)]);
            }
            axes.set_aspect_ratio(Fix(1.0))
                .points(centroids_x,
                        centroids_y,
                        &[PointSymbol('o'), PointSize(1.5), Color("black")])
                .set_title(&title[..], &[]);
        };
        prep(&mut fg);
    }
    fg.show();
}

fn print_dvec(v: &DVector<f64>) {
    print!("(");
    for elem in v.at.iter().take(v.len() - 1) {
        print!("{:+1.2}, ", elem)
    }
    print!("{:+1.2})", v.at.iter().last().unwrap());
}

fn print_usage(program: &str, opts: Options) {
    let brief = format!("Usage: {} [options]", program);
    print!("{}", opts.usage(&brief));
}

fn main() {
    let args: Vec<String> = env::args().collect();
    let mut k: usize = 7;
    let mut n: u32 = 30000;
    let mut e: f64 = 1e-3;
    let max_iterations = 100u32;

    let mut opts = Options::new();
    opts.optflag("?", "help", "Print this help menu");
    opts.optopt("k",
                "",
                "Number of clusters to assign (default: 7)",
                "<clusters>");
    opts.optopt("n",
                "",
                "Operate on this many points on the unit disk (default: 30000)",
                "<pts>");
    opts.optopt("e",
                "",
                "Min delta in norm of successive cluster centroids to continue (default: 1e-3)",
                "<eps>");
    opts.optopt("f", "", "Read points from file (overrides -n)", "<csv>");

    let program = args[0].clone();
    let matches = match opts.parse(&args[1..]) {
        Ok(m) => m,
        Err(f) => panic!(f.to_string()),
    };
    if matches.opt_present("?") {
        print_usage(&program, opts);
        return;
    }
    match matches.opt_str("k") {
        None => {}
        Some(x) => k = x.parse::<usize>().unwrap(),
    };
    match matches.opt_str("n") {
        None => {}
        Some(x) => n = x.parse::<u32>().unwrap(),
    };
    match matches.opt_str("e") {
        None => {}
        Some(x) => e = x.parse::<f64>().unwrap(),
    };

    let seed: &[_] = &[1, 2, 3, 4];
    let mut rng: StdRng = SeedableRng::from_seed(seed);

    let mut points: Vec<Point>;

    match matches.opt_str("f") {
        None => {
            // Proceed with random 2d data
            points = generate_points(n, &mut rng)
        }
        Some(file) => {
            points = Vec::new();
            let mut rdr = csv::Reader::from_file(file.clone()).unwrap();
            for row in rdr.records().map(|r| r.unwrap()) {
                // row is Vec<String>
                let floats: Vec<f64> = row.iter().map(|s| s.parse::<f64>().unwrap()).collect();
                points.push(DVector::<f64>::from_slice(floats.len(), floats.as_slice()));
            }
            assert!(points.iter().all(|v| v.len() == points[0].len()));
            n = points.len() as u32;
            println!("Read {} points from {}", points.len(), file.clone());
        }
    };

    assert!(points.len() >= k);
    let (clusters, stats) = lloyd(&points, k, e, max_iterations, &mut rng);

    println!(" k       centroid{}mean dist    pop",
             std::iter::repeat(" ").take((points[0].len() - 2) * 7 + 7).collect::<String>());
    println!("===  {}  ===========  =====",
             std::iter::repeat("=").take(points[0].len() * 7 + 2).collect::<String>());
    for i in 0..clusters.len() {
        print!(" {:>1}    ", i);
        print_dvec(&stats.centroids[i]);
        print!("      {:1.2}       {:>4}\n",
               stats.mean_d_from_centroid[i],
               clusters[i].members.len());
    }

    if points[0].len() == 2 {
        viz(clusters, stats, k, n, e)
    }
}

[Plots exist but file upload is broken at the moment.]

Output of run on 30k points on the unit disk:

Stoppage condition reached on iteration 10
 k       centroid       mean dist    pop
===  ================  ===========  =====
 0    (+0.34, -0.61)      0.27       4425
 1    (+0.70, -0.01)      0.26       4293
 2    (-0.37, -0.59)      0.27       4319
 3    (+0.35, +0.61)      0.26       4368
 4    (-0.00, +0.01)      0.25       4095
 5    (-0.34, +0.62)      0.26       4190
 6    (-0.71, +0.04)      0.26       4310

Extra credit 4: Use of the DVector type in the nalgebra crate gives some arithmetic vector operations for free, and generalizes to n dimensions with no work. Here is the output of running this program on the 4-D Fisher Iris data (I don't think this data clusters well):

k       centroid                     mean dist    pop
===  ==============================  ===========  =====
0    (+5.00, +3.43, +1.46, +0.25)      0.49         49
1    (+5.88, +2.74, +4.39, +1.43)      0.73         61
2    (+6.85, +3.08, +5.72, +2.05)      0.73         39

Scheme

The eps output is translated from the C version. The 'tester' functions demonstrate the unit square and the unit circle, with eps graphical output, and a 5D unit square, with text-only output. Nothing special is needed to handle multiple dimensions: all points are represented as lists, which the euclidean distance function works through in a loop.

(import (scheme base) ; headers for R7RS Scheme
        (scheme file)
        (scheme inexact)
        (scheme write)
        (srfi 1 lists)
        (srfi 27 random-bits))

;; calculate euclidean distance between points, any dimension
(define (euclidean-distance pt1 pt2)
  (sqrt (apply + (map (lambda (x y) (square (- x y))) pt1 pt2))))
 
;; input
;; - K: the target number of clusters K
;; - data: a list of points in the Cartesian plane
;; output
;; - a list of K centres
(define (kmeans++ K data)
  (define (select-uniformly data)
    (let loop ((index (random-integer (length data))) ; uniform selection of index
               (rem data)
               (front '()))
      (if (zero? index)
        (values (car rem) (append (reverse front) (cdr rem)))
        (loop (- index 1) (cdr rem) (cons (car rem) front)))))
  ;
  (define (select-weighted centres data)
    (define (distance-to-nearest datum)
      (apply min (map (lambda (c) (euclidean-distance c datum)) centres)))
    ;
    (let* ((weights (map (lambda (d) (square (distance-to-nearest d))) data))
           (target-weight (* (apply + weights) (random-real))))
      (let loop ((rem data)
                 (front '())
                 (weight-sum 0.0)
                 (wgts weights))
        (if (or (>= weight-sum target-weight) (null? (cdr rem)))
          (values (car rem) (append (reverse front) (cdr rem)))
          (loop (cdr rem)
                (cons (car rem) front)
                (+ weight-sum (car wgts))
                (cdr weights))))))
  ;
  (let-values (((pt rem) (select-uniformly data)))
              (let loop ((centres (list pt))
                         (items rem))
                (if (= (length centres) K)
                  centres
                  (let-values (((pt rem) (select-weighted centres items)))
                              (loop (cons pt centres)
                                    rem))))))

;; assign a point into a cluster
;; input: a point and a list of cluster centres
;; output: index of cluster centre
(define (assign-cluster pt centres)
  (let* ((distances (map (lambda (centre) (euclidean-distance centre pt)) centres))
         (smallest (apply min distances)))
    (list-index (lambda (d) (= d smallest)) distances)))
 
;; input
;; - num: the number of clusters K
;; - data: a list of points in the Cartesian plane
;; output
;; - list of K centres
(define (cluster K data)
  (define (centroid-for-cluster i assignments)
    (let* ((cluster (map cadr (filter (lambda (a-d) (= (car a-d) i)) (zip assignments data))))
           (length-cluster (length cluster)))
      ; compute centroid for cluster
      (map (lambda (vals) (/ (apply + vals) length-cluster)) (apply zip cluster))))
  ;
  (define (update-centres assignments)
    (map (lambda (i) (centroid-for-cluster i assignments)) (iota K)))
  ;
  (let ((initial-centres (kmeans++ K data)))
    (let loop ((centres initial-centres)
               (assignments (map (lambda (datum) (assign-cluster datum initial-centres)) data)))
      (let* ((new-centres (update-centres assignments))
             (new-assignments (map (lambda (datum) (assign-cluster datum new-centres)) data)))
        (if (equal? assignments new-assignments)
          new-centres
          (loop new-centres new-assignments))))))

;; using eps output, based on that in C - only works for 2D points
(define (save-as-eps filename data clusters K)
  (when (file-exists? filename) (delete-file filename))
  (with-output-to-file
    filename
    (lambda ()
      (let* ((W 400)
             (H 400)
             (colours (make-vector (* 3 K) 0.0))
             (max-x (apply max (map car data)))
             (min-x (apply min (map car data)))
             (max-y (apply max (map cadr data)))
             (min-y (apply min (map cadr data)))
             (scale (min (/ W (- max-x min-x))
                         (/ H (- max-y min-y))))
             (cx (/ (+ max-x min-x) 2))
             (cy (/ (+ max-y min-y) 2)))

        ;; set up colours
        (for-each 
          (lambda (i) 
            (vector-set! colours (+ (* i 3) 0) (inexact (/ (modulo (* 3 (+ i 1)) 11) 11)))
            (vector-set! colours (+ (* i 3) 1) (inexact (/ (modulo (* 7 i) 11) 11)))
            (vector-set! colours (+ (* i 3) 2) (inexact (/ (modulo (* 9 i) 11) 11))))
          (iota K))

        (display ;; display header
          (string-append 
            "%!PS-Adobe-3.0\n%%BoundingBox: -5 -5 " 
            (number->string (+ 10 W)) " " (number->string (+ 10 H)) "\n"
            "/l {rlineto} def /m {rmoveto} def\n"
            "/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def\n"
            "/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath "
            "	gsave 1 setgray fill grestore gsave 3 setlinewidth"
            " 1 setgray stroke grestore 0 setgray stroke }def\n"))

        ;; display points
        (for-each ; top loop runs over the clusters
          (lambda (i)
            (display
              (string-append (number->string (vector-ref colours (* i 3)))
                             " "
                             (number->string (vector-ref colours (+ (* i 3) 1)))
                             " "
                             (number->string (vector-ref colours (+ (* i 3) 2)))
                             " setrgbcolor\n"))
            (for-each ;loop over points in cluster
              (lambda (pt)
                (when (= i (assign-cluster pt clusters))
                  (display
                    (string-append (number->string (+ (* (- (car pt) cx) scale) (/ W 2)))
                                   " "
                                   (number->string (+ (* (- (cadr pt) cy) scale) (/ H 2)))
                                   " c\n"))))
              data)
            (let ((center (list-ref clusters i))) ; display cluster centre
              (display 
                (string-append "\n0 setgray "
                               (number->string (+ (* (- (car center) cx) scale) (/ W 2)))
                               " "
                               (number->string (+ (* (- (cadr center) cy) scale) (/ H 2)))
                               " s\n"))))
          (iota K))
        (display "\n%%EOF")))))

;; extra credit 1: creates a list of n random points in n-D unit square
(define (make-data num-points num-dimensions)
  (random-source-randomize! default-random-source)
  (map (lambda (i) (list-tabulate num-dimensions (lambda (i) (random-real)))) (iota num-points)))

;; extra credit 2, uses eps visualisation to display result
(define (tester-1 num-points K)
  (let ((data (make-data num-points 2)))
    (save-as-eps "clusters-1.eps" data (cluster K data) K)))

;; extra credit 3: uses radians instead to make data
(define (tester-2 num-points K radius)
  (random-source-randomize! default-random-source)
  (let ((data (map (lambda (i) 
                     (let ((ang (* (random-real) 2 (* 4 (atan 1))))
                           (rad (* radius (random-real))))
                       (list (* rad (cos ang)) (* rad (sin ang)))))
                   (iota num-points))))
    ;; extra credit 2, uses eps visualisation to display result
    (save-as-eps "clusters-2.eps" data (cluster K data) K)))

;; extra credit 4: arbitrary dimensions - already handled, as all points are lists
(define (tester-3 num-points K num-dimensions)
  (display "Results:\n")
  (display (cluster K (make-data num-points num-dimensions))) 
  (newline))

(tester-1 30000 6)
(tester-2 30000 6 10)
(tester-3 30000 6 5)

Images in eps files are output for the 2D unit square and unit circle.

Text output for the 5D centres:

Results:
((0.2616723761604841 0.6134082964889989 0.29284958577190745 0.5883330600440337 0.2701242883590077) 
 (0.4495151954110258 0.7213650269267102 0.4785552477630192 0.2520793123281655 0.73785249828929) 
 (0.6873676767669482 0.3228592693134481 0.4713526933057497 0.23850999205524145 0.3104607677290796) 
 (0.6341937732424933 0.36435831485631176 0.2760548254423423 0.7120766805103155 0.7028127288541974) 
 (0.2718747392615238 0.2743005712228975 0.7515030778279079 0.5424997615106112 0.5849261595501698) 
 (0.6882031980026069 0.7048387370769692 0.7373477088448752 0.6859917992267395 0.4027193966445248))

SequenceL

Translation of: C

import <Utilities/Sequence.sl>;
import <Utilities/Random.sl>;
import <Utilities/Math.sl>;
import <Utilities/Conversion.sl>;

Point ::= (x : float, y : float);
Pair<T1, T2> ::= (first : T1, second : T2);

W := 400;
H := 400;

// ------------ Utilities --------------
distance(a, b) := (a.x-b.x)^2 + (a.y-b.y)^2;

nearestDistance(point, centers(1)) :=
    nearestCenterHelper(point, centers, 2, distance(point, centers[1]), 1).second;

nearestCenter(point, centers(1)) :=
    nearestCenterHelper(point, centers, 2, distance(point, centers[1]), 1).first;

nearestCenterHelper(point, centers(1), counter, minDistance, minIndex) :=
    let
        d := distance(point, centers[counter]);
    in
    (first : minIndex, second : minDistance) when counter > size(centers) else
    nearestCenterHelper(point, centers, counter + 1, d, counter) when minDistance > d else
    nearestCenterHelper(point, centers, counter + 1, minDistance, minIndex);

// ------------ KPP --------------
kpp(points(1), k, RG) :=
    let
        randomValues := getRandomSequence(RG, k).Value;
        centers := initialCenters(points, k, randomValues / (RG.RandomMax - 1.0),
                    [points[randomValues[1] mod size(points)]]);
    in
        nearestCenter(points, centers);

initialCenters(points(1), k, randoms(1), centers(1)) :=
    let
        distances := nearestDistance(points, centers);
        randomSum := randoms[size(centers) + 1] * sum(distances);
        newCenter := points[findNewCenter(randomSum, distances, 1)];
    in
        centers when size(centers) = k else
        initialCenters(points, k, randoms, centers++[newCenter]);

findNewCenter(s, distances(1), counter) :=
    let
        new_s := s - distances[counter];
    in
    counter when new_s <= 0 else
    findNewCenter(new_s, distances, counter + 1);

// ------------ K Means --------------
kMeans(points(1), groups(1), k) :=
    let
        newCenters := clusterAverage(points, groups, k);
        newGroups := nearestCenter(points, newCenters);
        threshold := size(points)/1024;
        // Calculate the number of changes between iterations
        changes[i] := 1 when groups[i] /= newGroups[i] else 0;
    in
        (first : newGroups, second : newCenters) when sum(changes) < threshold else
        kMeans(points, newGroups, k);

clusterAverage(points(1), groups(1), k) :=
        clusterAverageHelper(points, groups, 1, duplicate((x:0.0, y:0.0), k), duplicate(0, k));

clusterAverageHelper(points(1), groups(1), counter, averages(1), sizes(1)) :=
    let
        group := groups[counter];
        result[i] := (x : averages[i].x / sizes[i], y : averages[i].y / sizes[i]);
    in
    result when counter > size(points) else
    clusterAverageHelper(points, groups, counter + 1,
        setElementAt(averages, group,
                          (x : averages[group].x + points[counter].x,
                           y : averages[group].y + points[counter].y)),
        setElementAt(sizes, group, sizes[group] + 1));

// ------------ Generate Points --------------
gen2DPoints(count, radius, RG) :=
    let
        randA := getRandomSequence(RG, count);
        randR := getRandomSequence(randA.Generator, count);
        angles := 2*pi*(randA.Value / (RG.RandomMax - 1.0));
        radiuses := radius * (randR.Value / (RG.RandomMax - 1.0));
        points[i] := (x: radiuses[i] * cos(angles[i]), y : radiuses[i] * sin(angles[i]));
    in
        (first : points, second : randR.Generator);

// ------------ Visualize --------------
printEPS(points(1),groups(1),centers(1),k,maxVal) :=
    let
          scale := min(W / (maxVal * 2), H / (maxVal * 2));
          printedGroups := printGroup(points, groups, centers, k, 0.0, scale, 1 ... k);
    in
        "%!-PS-Adobe-3.0\n%%BoundingBox: -5 -5 " ++ toString(W + 10) ++ " " ++
        toString(H + 10) ++
        "\n/l {rlineto} def /m {rmoveto} def\n" ++
        "/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def\n" ++
        "/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath " ++
        "   gsave 1 setgray fill grestore gsave 3 setlinewidth" ++
        " 1 setgray stroke grestore 0 setgray stroke }def\n" ++
        join(printedGroups) ++
        "\n%%EOF";

printGroup(points(1), groups(1), centers(1), k, maxVal, scale, group) :=
    let
        printedPoints[i] :=
            toString((points[i].x - maxVal) * scale + W/2) ++ " " ++
            toString((points[i].y - maxVal) * scale + H/2) ++ " c\n"
                when groups[i] = group;
        
        colors := toString((3 * group mod k) / (k * 1.0)) ++ " " ++
                  toString((7 * (group - 1) mod k) / (k * 1.0)) ++ " " ++
                  toString((9 * (group - 1) mod k) / (k * 1.0)) ++
                  " setrgbcolor\n";
        
        printedCenters := "\n0 setgray " ++
                   toString((centers[group].x - maxVal) * scale + W/2) ++ " " ++
                   toString((centers[group].y - maxVal) * scale + H/2) ++ " s\n";
    in
        colors ++ join(printedPoints) ++ printedCenters;
        
// Take number of points, K and seed for random data as command line inputs
main(args(2)) := 
    let
        n := stringToInt(args[1]) when size(args) >= 1 else 1000;
        k := stringToInt(args[2]) when size(args) >= 2 else 7;
        seed := stringToInt(args[3]) when size(args) >= 3 else 13;
        
        points := gen2DPoints(n, 10.0, seedRandom(seed));
        initialGroups := kpp(points.first, k, points.second);
        result := kMeans(points.first, initialGroups, k);
    in
        printEPS(points.first, result.first, result.second,k,10.0);

Tcl

Translation of: C

Library: Tcllib (Package: math::constants)

package require Tcl 8.5
package require math::constants
math::constants::constants pi
proc tcl::mathfunc::randf m {expr {$m * rand()}}

proc genXY {count radius} {
    global pi
    for {set i 0} {$i < $count} {incr i} {
	set ang [expr {randf(2 * $pi)}]
	set r [expr {randf($radius)}]
	lappend pt [list [expr {$r*cos($ang)}] [expr {$r*sin($ang)}] -1]
    }
    return $pt
}
proc dist2 {a b} {
    lassign $a ax ay
    lassign $b bx by
    return [expr {($ax-$bx)**2 + ($ay-$by)**2}]
}

proc nearest {pt cent {d2var ""}} {
    set minD 1e30
    set minI [lindex $pt 2]
    set i -1
    foreach c $cent {
	incr i
	set d [dist2 $c $pt]
	if {$minD > $d} {
	    set minD $d
	    set minI $i
	}
    }
    if {$d2var ne ""} {
	upvar 1 $d2var d2
	set d2 $minD
    }
    return $minI
}

proc kpp {ptsVar centVar numClusters} {
    upvar 1 $ptsVar pts $centVar cent
    set idx [expr {int([llength $pts] * rand())}]
    set cent [list [lindex $pts $idx]]
    for {set nCent 1} {$nCent < $numClusters} {incr nCent} {
	set sum 0
	set d {}
	foreach p $pts {
	    nearest $p $cent dd
	    set sum [expr {$sum + $dd}]
	    lappend d $dd
	}
	set sum [expr {randf($sum)}]
	foreach p $pts dj $d {
	    set sum [expr {$sum - $dj}]
	    if {$sum <= 0} {
		lappend cent $p
		break
	    }
	}
    }
    set i -1
    foreach p $pts {
	lset pts [incr i] 2 [nearest $p $cent]
    }
}

proc lloyd {ptsVar numClusters} {
    upvar 1 $ptsVar pts
    kpp pts cent $numClusters
    while 1 {
	# Find centroids for round
	set groupCounts [lrepeat [llength $cent] 0]
	foreach p $pts {
	    lassign $p cx cy group
	    lset groupCounts $group [expr {[lindex $groupCounts $group] + 1}]
	    lset cent $group 0 [expr {[lindex $cent $group 0] + $cx}]
	    lset cent $group 1 [expr {[lindex $cent $group 1] + $cy}]
	}
	set i -1
	foreach groupn $groupCounts {
	    incr i
	    lset cent $i 0 [expr {[lindex $cent $i 0] / $groupn}]
	    lset cent $i 1 [expr {[lindex $cent $i 1] / $groupn}]
	}

	set changed 0
	set i -1
	foreach p $pts {
	    incr i
	    set minI [nearest $p $cent]
	    if {$minI != [lindex $p 2]} {
		incr changed
		lset pts $i 2 $minI
	    }
	}
	if {$changed < ([llength $pts] >> 10)} break
    }
    set i -1
    foreach c $cent {
	lset cent [incr i] 2 $i
    }
    return $cent
}

Demonstration/visualization code:

Library: Tk

package require Tk
image create photo disp -width 400 -height 400
pack [label .l -image disp]
update
proc plot {x y color} {
    disp put $color -to [expr {int(200+19.9*$x)}] [expr {int(200+19.9*$y)}]
}
apply {{} {
    set POINTS [genXY 100000 10]
    set CENTROIDS [lloyd POINTS 11]
    foreach c $CENTROIDS {
	lappend colors [list [list [format "#%02x%02x%02x" \
		[expr {64+int(128*rand())}] [expr {64+int(128*rand())}] \
		[expr {64+int(128*rand())}]]]]
    }
    foreach pt $POINTS {
	lassign $pt px py group
	plot $px $py [lindex $colors $group]
    }
    foreach c $CENTROIDS {
	lassign $c cx cy group
	plot $cx $cy black
    }
}}

Wren

Translation of: Kotlin

Library: Wren-dynamic

Library: Wren-fmt

import "random" for Random
import "./dynamic" for Struct
import "./fmt" for Fmt

var Point = Struct.create("Point", ["x", "y", "group"])

var r = Random.new()
var hugeVal = Num.infinity

var RAND_MAX = Num.maxSafeInteger
var PTS = 100000
var K = 11
var W = 400
var H = 400

var rand = Fn.new { r.int(RAND_MAX) }
var randf = Fn.new { |m| m * rand.call() / (RAND_MAX - 1) }

var genXY = Fn.new { |count, radius|
    var pts = List.filled(count, null)

    /* note: this is not a uniform 2-d distribution */
    for (i in 0...count) {
        pts[i] = Point.new(0, 0, 0)
        var ang = randf.call(2 * Num.pi)
        var r = randf.call(radius)
        pts[i].x = r * ang.cos
        pts[i].y = r * ang.sin
    }
    return pts
}

var dist2 = Fn.new { |a, b|
    var x = a.x - b.x
    var y = a.y - b.y
    return x * x + y * y
}

var nearest = Fn.new { |pt, cent, nCluster|
    var minD = hugeVal
    var minI = pt.group
    for (i in 0...nCluster) {
        var d = dist2.call(cent[i], pt)
        if (minD > d) {
            minD = d
            minI = i
        }
    }
    return [minI, minD]
}

var copyPoint = Fn.new { |pt| Point.new(pt.x, pt.y, pt.group) }

var kpp = Fn.new { |pts, len, cent|
    var nCent = cent.count
    var d = List.filled(len, 0)
    cent[0] = copyPoint.call(pts[rand.call() % len])
    for (nCluster in 1...nCent) {
        var sum = 0
        for (j in 0...len) {
            d[j] = nearest.call(pts[j], cent, nCluster)[1]
            sum = sum + d[j]
        }
        sum = randf.call(sum)
        for (j in 0...len) {
            sum = sum - d[j]
            if (sum > 0) continue
            cent[nCluster] = copyPoint.call(pts[j])
            break
        }
    }
    for (j in 0...len) pts[j].group = nearest.call(pts[j], cent, nCent)[0]
}

var lloyd = Fn.new { |pts, len, nCluster|
    var cent = List.filled(nCluster, null)
    for (i in 0...nCluster) cent[i] = Point.new(0, 0, 0)
    kpp.call(pts, len, cent)
    while(true) {
        /* group element for centroids are used as counters */
        for (i in 0...nCluster) {
            cent[i].x = 0
            cent[i].y = 0
            cent[i].group = 0
        }
        for (j in 0...len) {
            var p = pts[j]
            var c = cent[p.group]
            c.group = c.group + 1
            c.x = c.x + p.x
            c.y = c.y + p.y
        }
        for (i in 0...nCluster) {
            var c = cent[i]
            c.x = c.x / c.group
            c.y = c.y / c.group
        }
        var changed = 0

        /* find closest centroid of each point */
        for (j in 0...len) {
            var p = pts[j]
            var minI = nearest.call(p, cent, nCluster)[0]
            if (minI != p.group) {
                changed = changed + 1
                p.group = minI
            }
        }

        /* stop when 99.9% of points are good */
        if (changed <= (len >> 10)) break
    }
    for (i in 0...nCluster) cent[i].group = i
    return cent
}

var printEps = Fn.new { |pts, len, cent, nCluster|
    var colors = List.filled(nCluster * 3, 0)
    for (i in 0...nCluster) {
        colors[3 * i + 0] = (3 * (i + 1) % 11) / 11
        colors[3 * i + 1] = (7 * i % 11) / 11
        colors[3 * i + 2] = (9 * i % 11) / 11
    }
    var minX = hugeVal
    var minY = hugeVal
    var maxX = -hugeVal
    var maxY = -hugeVal
    for (j in 0...len) {
        var p = pts[j]
        if (maxX < p.x) maxX = p.x
        if (minX > p.x) minX = p.x
        if (maxY < p.y) maxY = p.y
        if (minY > p.y) minY = p.y
    }
    var scale = (W / (maxX - minX)).min(H / (maxY - minY))
    var cx = (maxX + minX) / 2
    var cy = (maxY + minY) / 2

    System.print("\%!PS-Adobe-3.0\n\%\%BoundingBox: -5 -5 %(W + 10) %(H + 10)")
    System.print("/l {rlineto} def /m {rmoveto} def")
    System.print("/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def")
    System.write("/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath ")
    System.write("     gsave 1 setgray fill grestore gsave 3 setlinewidth")
    System.print(" 1 setgray stroke grestore 0 setgray stroke }def")
    var f1 = "$g $g $g setrgbcolor"
    var f2 = "$.3f $.3f c"
    var f3 = "\n0 setgray $g $g s"
    for (i in 0...nCluster) {
        var c = cent[i]
        Fmt.print(f1, colors[3 * i], colors[3 * i + 1], colors[3 * i + 2])
        for (j in 0...len) {
            var p = pts[j]
            if (p.group != i) continue
            Fmt.print(f2, (p.x - cx) * scale + W / 2, (p.y - cy) * scale + H / 2)
        }
        Fmt.print(f3, (c.x - cx) * scale + W / 2, (c.y - cy) * scale + H / 2)
    }
    System.write("\n\%\%EOF")
}

var v = genXY.call(PTS, 10)
var c = lloyd.call(v, PTS, K)
printEps.call(v, PTS, c, K)

XPL0

Like C, simplicity and clarity was chosen over extra credit. Also, the dataset is global, and the arrays are separate instead of being packed into two arguments and passed into the KMeans procedure. Hopefully the animated display, showing the convergence of the clusters, compensates somewhat for these sins. Alas, image uploads appears to be broken.

include c:\cxpl\codes;  \intrinsic 'code' declarations

def     N = 30000;              \number of points
def     K = 6;                  \number of clusters
int     Px(N), Py(N), Pc(N),    \coordinates of points and their cluster
        Cx(K), Cy(K);           \coordinates of centroid of cluster


func Centroid;  \Find new centroids of points grouped with current centroids
int  Change, Cx0(K), Cy0(K), C, Count, I;
[Change:= false;
for C:= 0 to K-1 do                       \for each centroid...
        [Cx0(C):= Cx(C);  Cy0(C):= Cy(C); \save current centroid
        Cx(C):= 0;  Cx(C):= 0;  Count:= 0;\find new centroid
        for I:= 0 to N-1 do               \for all points
            if Pc(I) = C then             \ grouped with current centroid...
                [Cx(C):= Cx(C) + Px(I);
                 Cy(C):= Cy(C) + Py(I);
                 Count:= Count+1;
                ];
        Cx(C):= Cx(C)/Count;  Cy(C):= Cy(C)/Count;
        if Cx(C)#Cx0(C) or Cy(C)#Cy0(C) then Change:= true;
        ];
return Change;
];


proc Voronoi;                   \Group points with their nearest centroid
int  D2, MinD2, I, C;           \distance squared, minimum distance squared
[for I:= 0 to N-1 do            \for each point...
        [MinD2:= -1>>1;         \find closest centroid
        for C:= 0 to K-1 do
                [D2:= sq(Px(I)-Cx(C)) + sq(Py(I)-Cy(C));
                if D2 < MinD2 then
                        [MinD2:= D2;  Pc(I):= C];  \update closest centroid
                ];
        ];
];


proc KMeans;                    \Group points into K clusters
int  Change, I;
repeat  Voronoi;
        Change:= Centroid;
        SetVid($101);           \show result on 640x480x8 screen
        for I:= 0 to N-1 do Point(Px(I), Py(I), Pc(I)+1);
        for I:= 0 to K-1 do Point(Cx(I), Cy(I), \bright white\ $F);
until   Change = false;


proc Random(X, Y);              \Return random X,Y biased for polar coordinates
int  X, Y;
real A, D;
[D:= float(Ran(240));                   \distance: 0..239
A:= float(Ran(314159*2)) / 10000.0;     \angle:    0..2pi
X(0):= fix(D*Cos(A)) + 320;             \rectangular coords centered on screen
Y(0):= fix(D*Sin(A)) + 240;
];


int  I;
[for I:= 0 to N-1 do Random(@Px(I), @Py(I));    \random set of points
 for I:= 0 to K-1 do Random(@Cx(I), @Cy(I));    \random set of cluster centroids
KMeans;
I:= ChIn(1);                    \wait for keystroke
SetVid($03);                    \restore normal text screen
]