K-means++ clustering: Difference between revisions

m
(→‎{{header|C}}: I moved an operation.)
m (→‎{{header|Wren}}: Minor tidy)
 
(15 intermediate revisions by 10 users not shown)
Line 18:
To extend the code to handle dimensions higher than 2, make <code>POINT</code> have more coordinates, change the <code>dist2</code> distance function, and change the finding of centroids in the <code>lloyd</code> K-Means function. Multidimensional scaling will be needed to visualize the output.
 
This code uses the function <code>kppFaster</code> to find the initial centroids. It is faster than the original function <code>kpp</code>, especially with large data sets. The function <code>kppFaster</code> uses an array to keep track of the shortest distance from each point to the previously selected centroids. It also uses a bisection search to select the points. It doesn't use the function <code>nearestDistance</code>. The original functions <code>kpp</code> and <code>nearestDistance</code> are included here for comparison.
<lang c># define NUMBER_OF_POINTS 100000
 
<syntaxhighlight lang="c"># define NUMBER_OF_POINTS 100000
# define NUMBER_OF_CLUSTERS 11
# define MAXIMUM_ITERATIONS 100
# define RADIUS 10.0
 
 
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
 
 
typedef struct {
double x;
double y;
int group;
} POINT;
 
Line 45 ⟶ 49:
POINT * gen_xy(int num_pts, double radius)
{
int i;
double ang, r;
POINT * pts;
Line 72 ⟶ 76:
return x*x + y*y;
}
 
/*------------------------------------------------------
nearest
 
This function returns the index of the cluster centroid
nearest to the data point passed to this function.
------------------------------------------------------*/
int nearest(POINT * pt, POINT * cent, int n_cluster)
Line 92 ⟶ 96:
clusterIndex = i;
}
}
return clusterIndex;
}
Line 99 ⟶ 103:
nearestDistance
 
This function returns the distance of the cluster centroid
nearest to the data point passed to this function.
------------------------------------------------------*/
double nearestDistance(POINT * pt, POINT * cent, int n_cluster)
Line 114 ⟶ 118:
}
}
 
return min_d;
}
 
/*----------------------------------------------------------------------
bisectionSearch
This function makes a bisectional search of an array of values that are
ordered in increasing order, and returns the index of the first element
greater than the search value passed as a parameter.
This code is adapted from code by Andy Allinger given to the public
domain, which was in turn adapted from public domain code for spline
evaluation by Rondall Jones (Sandia National Laboratories).
 
Input:
x A pointer to an array of values in increasing order to be searched.
n The number of elements in the input array x.
v The search value.
Output:
Returns the index of the first element greater than the search value, v.
----------------------------------------------------------------------*/
int bisectionSearch(double *x, int n, double v)
{
int il, ir, i;
 
if (n < 1) {
return 0;
}
/* If v is less than x(0) or greater than x(n-1) */
if (v < x[0]) {
return 0;
}
else if (v > x[n-1]) {
return n - 1;
}
/*bisection search */
il = 0;
ir = n - 1;
 
i = (il + ir) / 2;
while ( i != il ) {
if (x[i] <= v) {
il = i;
} else {
ir = i;
}
i = (il + ir) / 2;
}
 
if (x[i] <= v)
i = ir;
return i;
} /* end of bisectionSearch */
 
/*-------------------------------------------------------
kppFaster
This function uses the K-Means++ method to select
the cluster centroids.
This code is adapted from code by Andy Allinger given to the
public domain.
 
Input:
pts A pointer to an array of data points.
num_pts The number of points in the pts array.
centroids A pointer to an array to receive the centroids.
num_clusters The number of clusters to be found.
Output:
centroids A pointer to the array of centroids found.
-------------------------------------------------------*/
void kppFaster(POINT * pts, int num_pts, POINT * centroids,
int num_clusters)
{
int j;
int selectedIndex;
int cluster;
double sum;
double d;
double random;
double * cumulativeDistances;
double * shortestDistance;
 
cumulativeDistances = (double*) malloc(sizeof(double) * num_pts);
shortestDistance = (double*) malloc(sizeof(double) * num_pts);
 
/* Pick the first cluster centroids at random. */
selectedIndex = rand() % num_pts;
centroids[0] = pts[ selectedIndex ];
for (j = 0; j < num_pts; ++j)
shortestDistance[j] = HUGE_VAL;
/* Select the centroids for the remaining clusters. */
for (cluster = 1; cluster < num_clusters; cluster++) {
/* For each point find its closest distance to any of
the previous cluster centers */
for ( j = 0; j < num_pts; j++ ) {
d = dist2(&pts[j], &centroids[cluster-1] );
if (d < shortestDistance[j])
shortestDistance[j] = d;
}
/* Create an array of the cumulative distances. */
sum = 0.0;
for (j = 0; j < num_pts; j++) {
sum += shortestDistance[j];
cumulativeDistances[j] = sum;
}
 
/* Select a point at random. Those with greater distances
have a greater probability of being selected. */
random = (float) rand() / (float) RAND_MAX * sum;
selectedIndex = bisectionSearch(cumulativeDistances, num_pts, random);
/* assign the selected point as the center */
centroids[cluster] = pts[selectedIndex];
}
 
/* Assign each point the index of it's nearest cluster centroid. */
for (j = 0; j < num_pts; j++)
pts[j].group = nearest(&pts[j], centroids, num_clusters);
 
free(shortestDistance);
free(cumulativeDistances);
 
return;
} /* end, kppFaster */
 
/*-------------------------------------------------------
Line 131 ⟶ 269:
double * distances;
 
distances = (double*) malloc(sizeof(double) * num_pts);
 
/* Pick the first cluster centroids at random. */
centroids[0] = pts[ rand() % num_pts ];
/* Select the centroids for the remaining clusters. */
Line 153 ⟶ 293:
/* Assign the centroids. the point with the largest distance
will have a greater probability of being selected. */
for (j = 0; j < num_pts; j++ ) {
sum -= distances[j];
Line 169 ⟶ 309:
 
free(distances);
 
return;
} /* end, kpp */
}
 
 
/*-------------------------------------------------------
Line 184 ⟶ 326:
{
int i, clusterIndex;
int changedchanges;
int bestPercentacceptable = num_pts / 1000; /* The maximum point changes acceptable. */
 
 
if (num_clusters == 1 || num_pts <= 0 || num_clusters > num_pts )
return 0;
 
 
POINT * centroids = (POINT *)malloc(sizeof(POINT) * num_clusters);
Line 200 ⟶ 344:
}
*/
/* or use the k-Means++ method */
 
/* Original version
kpp(pts, num_pts, centroids, num_clusters);
*/
/* Faster version */
kppFaster(pts, num_pts, centroids, num_clusters);
 
do {
Line 229 ⟶ 379:
/* Find each data point's nearest centroid */
changedchanges = 0;
for ( i = 0; i < num_pts; i++ ) {
clusterIndex = nearest(&pts[i], centroids, num_clusters);
if (clusterIndex != pts[i].group) {
pts[i].group = clusterIndex;
changedchanges++;
}
}
maxTimes--;
} while ((changedchanges > bestPercentacceptable) && (maxTimes > 0));
/* Set each centroid's group index */
Line 246 ⟶ 396:
 
return centroids;
} /* end, lloyd */
}
 
/*-------------------------------------------------------
Line 309 ⟶ 459:
 
free(colors);
return;
}
 
return;
} /* end print_eps */
/*-------------------------------------------------------
main
Line 317 ⟶ 468:
int main()
{
int num_pts = NUMBER_OF_POINTS;
int num_clusters = NUMBER_OF_CLUSTERS;
int maxTimes = MAXIMUM_ITERATIONS;
double radius = RADIUS;
POINT * pts;
Line 337 ⟶ 488:
 
return 0;
}</langsyntaxhighlight>
 
=={{header|Crystal}}==
{{trans|Racket,Python}}
 
<syntaxhighlight lang="crystal">
 
# kmeans++ clustering in crystal lang
#
# Task :: function that takes two arguments
# k : uint - the number of clusters
# points : [[float]] - the dataset to classify
#
# and returns a list of clusters
 
# The algorithm of kmeans with a specific initialization
#
# k : int - number of clusters
# points : [[float]] - the dataset of k-dimentional points
# distance : ([float],[float])->float - the distance between two points
# convergence_threshold : float - ratio of correctly classified points
# rng : (msg)->float - random number generator
#
# {[[float]],[int]} - returns a tuple of the center of the cluster and an array
# with the cluster-id for each point.
#
def kmeans(k, points,
distance = ->euclidean_distance(Array(Float64),Array(Float64)),
convergence_threshold=0.99,
rng = Random::DEFAULT)
 
# ---------------------------------------------------------------------------
# the k++ method for choosing the initial values ('seeds') for the k-means
# clustering.
# ---------------------------------------------------------------------------
 
# arrays of the clusters centers and the number of elements in each cluster
c_means = points.sample(k,rng).clone
c_cnt = Array.new(k,0)
# arrays for each point distance to nearest cluster and the nearest cluster id
p_dist = Array.new(points.size) do 1/0.0 end
p_cluster = Array.new(points.size) do rng.rand(0 ... k) end
 
# choose one center uniformly at random among data points
c_means = [points.sample.clone]
# to select the k-1 remaining centers
(1 ... k).each do |_|
# For each data point compute the distance (d(x)) and the nearest cluster center.
(0 ... points.size).each do |p_index|
(0 ... c_means.size).each do |c_index|
d = distance.call(points[p_index],c_means[c_index])
if d < p_dist[p_index]
p_dist[p_index] = d
p_cluster[p_index] = c_index
end
end
end
# choose one new data point at random as a new center with a weighted
# probability distribution where a point is chosen with probability
# proportional to it's squared distance. (d(x)^2)
sum = 0.0
(0 ... p_dist.size).each do |p_index|
p_dist[p_index] = p_dist[p_index]**2
sum += p_dist[p_index]
end
sum *= rng.rand(0.0 .. 1.0)
(0 ... points.size).each do |p_index|
sum -= p_dist[p_index]
next if sum > 0
c_means.push(points[p_index].clone)
break
end
end
# ---------------------------------------------------------------------------
# kmeans clustering
# ---------------------------------------------------------------------------
# with the previous cluster centers, the kmeans naive algorithm is performed
# until the convergence_threshold is achieved
changes_cnt = points.size
while (changes_cnt.to_f/(1.0-convergence_threshold)) >= points.size
 
changes_cnt = 0
 
# assign each point to the nearest cluster
(0 ... points.size).each do |p_index|
nearest_c_index = (0 ... k).min_by do |c_index|
distance.call(c_means[c_index],points[p_index])
end
changes_cnt += (p_cluster[p_index] != nearest_c_index) ? 1 : 0
p_cluster[p_index] = nearest_c_index
end
 
# use the points of each cluster to calculate its center using the mean
# Reset means
p_dim = points[0].size
(0 ... k).each do |c_index|
(0 ... p_dim).each do |x_index|
c_means[c_index][x_index] = 0.0
c_cnt[c_index] = 0
end
end
 
# calculate the mean of the points of each cluster
(0 ... points.size).each do |p_index|
c_index = p_cluster[p_index]
c_cnt[c_index] += 1
(0 ... p_dim).each do |x_index|
c_means[c_index][x_index] += points[p_index][x_index]
end
end
(0 ... k).each do |c_index|
(0 ... p_dim).each do |x_index|
c_means[c_index][x_index] /= c_cnt[c_index].to_f
end
end
end
 
# return the center of each cluster and the membership of each point
return c_means,p_cluster
end
 
# the euclidean distance is used in the kmeans++-algorithm
def euclidean_distance(pa,pb)
return (0 ... pa.size).each.reduce(0.0) do |s,i| s + (pa[i] - pb[i])**2 end
end
 
# alternative distance
def manhattan_distance(pa,pb)
return (0 ... pa.size).each.reduce(0.0) do |s,i| s + (pa[i] - pb[i]).abs end
end
</syntaxhighlight>
 
 
Extra functions
 
 
<syntaxhighlight lang="crystal">
 
# -----------------------------------------------------------------------------
# Function to exercise the code, that generates a list of random points
# -----------------------------------------------------------------------------
 
# Generates a cluster of random points in a unitary circle
#
def uniform_cluster(num_points,radius = 1.0,center = [0.0,0.0],rng=Random::DEFAULT)
return Array.new(num_points) do |_|
r = radius*Math.sqrt(rng.rand(0.0 .. 1.0))
angle = rng.rand(0.0 .. 2*Math::PI)
[center[0] + r*Math.cos(angle), center[1] + r*Math.sin(angle)]
end
end
 
# Generates an n-dimentional cluster with gaaussian distribution
def gaussian_cluster(num_points,stdev=1.0,center=[0.0,0.0],rng=Random::DEFAULT)
dimentions = center.size
return Array.new(num_points) do
Array.new(dimentions) do |d|
sum = 0.0
12.times do sum += rng.rand(0.0 .. 1.0) end
sum -= 6.0
center[d] + (sum * stdev)
end
end
end
 
# -----------------------------------------------------------------------------
# Visualization of the results
# -----------------------------------------------------------------------------
 
# This functions creates an svg file with the points, the cluster centers and
# the classification of each point.
#
def plot(cluster_means,points,points_cluster,fname="kmeans_output.svg")
# Mapping the points to the interval (0 .. 1)
xmin,xmax = points.minmax_by do |p| p[0] end.map(&.[0])
ymin,ymax = points.minmax_by do |p| p[1] end.map(&.[1])
xspan = (xmax-xmin) + 1e-12
yspan = (ymax-ymin) + 1e-12
points = points.map do |p|
x = (p[0] - xmin) / xspan
y = (ymin - p[1]) / yspan
[x,y]
end
cluster_means = cluster_means.map do |p|
x = (p[0] - xmin) / xspan
y = (ymin - p[1]) / yspan
[x,y]
end
 
# Generate the file
File.open(fname,"w+") do |io|
io.puts(%(<svg xmlns="http://www.w3.org/2000/svg" version="1.1" viewBox="-0.05 -1.05 1.1 1.1">))
(0 ... points.size).each do |p_index|
p = points[p_index]
c = points_cluster[p_index] * (330.0 / cluster_means.size)
io.puts(%(<circle cx="#{p[0]}" cy="#{p[1]}" r="0.005" fill="hsl(#{c} 60% 50%)" stroke="none"></circle>))
end
(0 ... cluster_means.size).each do |c_index|
p = cluster_means[c_index]
c = c_index * (330.0 / cluster_means.size)
io.puts(%(<circle cx="#{p[0]}" cy="#{p[1]}" r="0.02" fill="hsl(50 100% 60%)"
stroke-width="0.004" stroke="hsl(52 100% 0%)"></circle>))
end
io.puts(%(</svg>))
end
end
 
</syntaxhighlight>
 
Examples
 
[[File:Clustering-using-k-means-pp-and-euclidean-distance.svg|250x250px|thumb|Clustering of random points in a unitary circle using k-means ++ and euclidean distance]]
 
[[File:Clustering-using-k-means-pp-and-manhattan-distance.svg|250x250px|thumb|Clustering of random points in a unitary circle using k-means ++ and manhattan distance]]
 
[[File:Gaussian-clustering.svg|250x250px|thumb|alt=Kmeans++ algorithm on gaussian clusters|Kmeans++ algorithm on gaussian clusters]]
 
 
<syntaxhighlight lang="crystal">
rngseed = 19
 
# Basic usage
points = uniform_cluster(num_points: 30000,rng: Random.new(rngseed))
cluster_center,point_cluster = kmeans(6, points, rng: Random.new(rngseed))
plot(cluster_center,points,point_cluster,fname: "clustering-using-k-means-pp.svg")
 
# Using another distance
points = uniform_cluster(num_points: 30000,rng: Random.new(rngseed))
cluster_center,point_cluster = kmeans(6, points, rng: Random.new(rngseed),
distance: ->manhattan_distance(Array(Float64),Array(Float64)))
plot(cluster_center,points,point_cluster,fname: "clustering-using-k-means-pp-and-manhattan.svg")
 
# difficult case
points = [] of Array(Float64)
rng = Random.new(rngseed)
points += gaussian_cluster(num_points: 10000,stdev: 0.5,center: [0.0,0.0],rng: rng)
points += gaussian_cluster(num_points: 10000,stdev: 0.5,center: [2.0,3.0],rng: rng)
points += gaussian_cluster(num_points: 10000,stdev: 0.5,center: [2.5,-1.0],rng: rng)
points += gaussian_cluster(num_points: 10000,stdev: 0.5,center: [6.0,0.0],rng: rng)
cluster_center,point_cluster = kmeans(4, points, rng: Random.new(rngseed))
plot(cluster_center,points,point_cluster,fname: "gaussian-clustering.svg")
 
# 5d-data
points = [] of Array(Float64)
rng = Random.new(rngseed)
points += gaussian_cluster(num_points: 5000,stdev: 0.5, center: [2.0,0.0,0.0,0.0,0.0],rng: rng)
points += gaussian_cluster(num_points: 5000,stdev: 0.5, center: [0.0,2.0,0.0,0.0,0.0],rng: rng)
points += gaussian_cluster(num_points: 5000,stdev: 0.5, center: [0.0,0.0,2.0,0.0,0.0],rng: rng)
points += gaussian_cluster(num_points: 5000,stdev: 0.5, center: [0.0,0.0,0.0,2.0,0.0],rng: rng)
points += gaussian_cluster(num_points: 5000,stdev: 0.5, center: [0.0,0.0,0.0,0.0,2.0],rng: rng)
cluster_center,point_cluster = kmeans(5, points, convergence_threshold:0.99999)
puts(cluster_center.map(&.map(&.round(2))).join("\n"))
 
</syntaxhighlight>
 
Output shows that centroids were found correctly.
 
 
<pre>
#output for 5d data
[0.01, -0.0, 2.0, 0.01, 0.0]
[-0.0, 0.0, -0.0, -0.0, 2.0]
[-0.01, 2.01, 0.01, 0.01, -0.01]
[0.0, 0.0, -0.01, 2.01, 0.0]
[2.0, 0.01, -0.0, 0.0, 0.01]
</pre>
 
 
=={{header|D}}==
{{trans|C}}
<langsyntaxhighlight lang="d">import std.stdio, std.math, std.random, std.typecons, std.algorithm;
 
// On Windows this uses the printf from the Microsoft C runtime,
Line 587 ⟶ 1,010:
const clusterCenters = lloyd(points, nClusters, rnd);
printEps(points, clusterCenters);
}</langsyntaxhighlight>
Compiled with ldc2 it's about as fast as the C entry.
 
=={{header|Delphi}}==
{{works with|Delphi|6.0}}
{{libheader|Types,ExtCtrls,Graphics}}
Translated from XPLo.
 
The XPLo version was simple, straight forward and clear. In other words, it was too good to try to develop a different one. For clarity and simplicity, I did convert some separaate components into structs.
 
 
<syntaxhighlight lang="Delphi">
 
const N = 30000; {number of points}
const K = 6; {number of clusters}
 
var ScreenSize,Center: TPoint;
var Radius: integer;
 
var Points: array [0..N-1] of TPoint; {coordinates of points and their cluster}
var Pc: array [0..N-1] of TColor;
var Cent: array [0..K-1] of TPoint; {coordinates of centroid of cluster}
 
const Palette: array [0..5] of TColor =(
$00 or ($00 shl 8) or ($AA shl 16),
$00 or ($AA shl 8) or ($00 shl 16),
$00 or ($AA shl 8) or ($AA shl 16),
$AA or ($00 shl 8) or ($00 shl 16),
$AA or ($00 shl 8) or ($AA shl 16),
$AA or ($55 shl 8) or ($00 shl 16));
 
{These would normally be in a separate library}
{Shown here for clarity}
 
 
procedure ClearImage(Image: TImage; Color: TColor);
var R: TRect;
begin
R:=Rect(0,0,Image.Picture.Bitmap.Width,Image.Picture.Bitmap.Height);
Image.Canvas.Brush.Color:=Color;
Image.Canvas.Brush.Style:=bsSolid;
Image.Canvas.Pen.Mode:=pmCopy;
Image.Canvas.Pen.Style:=psSolid;
Image.Canvas.Pen.Color:=Color;
Image.Canvas.Rectangle(R);
Image.Invalidate;
end;
 
 
function PointAdd(V1,V2: TPoint): TPoint;
{Add V1 and V2}
begin
Result.X:= V1.X+V2.X;
Result.Y:= V1.Y+V2.Y;
end;
 
function PointScalarDivide(V: TPoint; S: double): TPoint;
{Divide vector by scalar}
begin
Result.X:=Trunc(V.X/S);
Result.Y:=Trunc(V.Y/S);
end;
 
{--------------- Main Program -------------------------------------------------}
 
function Centroid: boolean;
{Find new centroids of points grouped with current centroids}
var Change: boolean;
var C0: array [0..K-1] of TPoint;
var C, Count, I: integer;
begin
Change:= false;
for C:= 0 to K-1 do {for each centroid...}
begin
C0[C]:= Cent[C]; {save current centroid}
Cent[C]:=Point(0,0); Count:= 0; {find new centroid}
for I:= 0 to N-1 do {for all points}
if Pc[I] = Palette[C] then { grouped with current centroid...}
begin
Cent[C]:=PointAdd(Cent[C],Points[I]);
Count:= Count+1;
end;
Cent[C]:=PointScalarDivide(Cent[C],Count);
if (Cent[C].X<>C0[C].X) or (Cent[C].Y<>C0[C].Y) then Change:= true;
end;
Result:=Change;
end;
 
 
procedure Voronoi;
{Group points with their nearest centroid}
var D2, MinD2, I, C: integer; {distance squared, minimum distance squared}
begin
for I:= 0 to N-1 do {for each point...}
begin
MinD2:= High(Integer); {find closest centroid}
for C:= 0 to K-1 do
begin
D2:= sqr(Points[I].X-Cent[C].X) + sqr(Points[I].Y-Cent[C].Y);
if D2 < MinD2 then
begin
{update closest centroid}
MinD2:= D2;
Pc[I]:= Palette[C];
end;
end;
end;
end;
 
 
procedure KMeans(Image: TImage);
{Group points into K clusters}
var Change: boolean;
var I: integer;
begin
repeat
begin
Voronoi;
Change:= Centroid;
for I:= 0 to N-1 do Image.Canvas.Pixels[Points[I].X, Points[I].Y]:=Pc[I]+1;
Image.Repaint;
for I:= 0 to K-1 do Image.Canvas.Pixels[Cent[I].X, Cent[I].Y]:=clWhite;
Image.Repaint;
end
until Change = false;
end;
 
 
procedure PolarRandom(var P: TPoint);
{Return random X,Y biased for polar coordinates}
var A, D: double;
begin
D:=Random(Radius); {distance: 0..239}
A:=Random(314159*2) / 10000; {angle: 0..2pi}
{rectangular coords centered on screen}
P:=PointAdd(Point(Trunc(D*Cos(A)),Trunc(D*Sin(A))),Center);
end;
 
 
procedure ConfigureScreen(Image: TImage);
{Configure screem constants to match current state of Image}
begin
ScreenSize:=Point(Image.Width,Image.Height);
Center:=Point(Image.Width div 2,Image.Height div 2);
if Center.X<Center.Y then Radius:=Center.X
else Radius:=Center.Y;
end;
 
 
procedure DoKMeansClustering(Image: TImage);
var I: integer;
begin
ConfigureScreen(Image);
ClearImage(Image,clBlack);
for I:= 0 to N-1 do PolarRandom(Points[I]); {random set of points}
for I:= 0 to K-1 do PolarRandom(Cent[I]); {random set of cluster centroids}
KMeans(Image);
end;
 
 
</syntaxhighlight>
{{out}}
[[File:DelphiKMeanCluster.png|none]]
<pre>
</pre>
 
=={{header|Euler Math Toolbox}}==
 
<syntaxhighlight lang="euler math toolbox">
<lang Euler Math Toolbox>
>type kmeanscluster
function kmeanscluster (x: numerical, k: index)
Line 622 ⟶ 1,208:
return j
endfunction
</syntaxhighlight>
</lang>
 
Let us apply to random data.
 
<syntaxhighlight lang="euler math toolbox">
<lang Euler Math Toolbox>
>load clustering.e
Functions for clustering data.
Line 640 ⟶ 1,226:
> loop 1 to k; plot2d(m[#,1],m[#,2],points=1,style="o#",add=1); end; ...
> insimg;
</syntaxhighlight>
</lang>
 
[[File:ClusteringEulerMathToolbox.png]]
Line 647 ⟶ 1,233:
{{works with|GNU Fortran |6.1.1 20160802}}
 
<syntaxhighlight lang="fortran">
<lang Fortran>
***********************************************************************
* KMPP - K-Means++ - Traditional data clustering with a special initialization
Line 899 ⟶ 1,485:
END ! of test program
</syntaxhighlight>
</lang>
 
 
Uniform random points over the unit circle (compare to the solution in C)
[[File:kmeans_FORTRAN_round_points.gif]]
[http://13olive.net/kmeans_FORTRAN_points.png (External image)]
 
The points with clusters marked by color:
[[File:kmeans_FORTRAN_round_clusters.gif]]
[http://13olive.net/kmeans_FORTRAN_clusters.png (External image)]
 
=={{header|Go}}==
<langsyntaxhighlight lang="go">package main
 
import (
Line 1,130 ⟶ 1,716:
fmt.Println(err)
}
}</langsyntaxhighlight>
Text output:
<pre>
Line 1,160 ⟶ 1,746:
Vectors are represented as lists, so the solution could be extended to any space dimension.
 
<langsyntaxhighlight lang="haskell">{-# LANGUAGE Strict,FlexibleInstances #-}
module KMeans where
Line 1,217 ⟶ 1,803:
 
mkCluster n s m = take n . transpose <$> mapM randomsAround m
where randomsAround x0 = map (\x -> x0+s*atanh x) <$> getRandomRs (-1,1)</langsyntaxhighlight>
 
'''Examples'''
 
<langsyntaxhighlight lang="haskell">module Main where
 
import Graphics.EasyPlot
Line 1,244 ⟶ 1,830:
where
listPlot = Data2D [Title "",Style Dots] [] . map (\(x:y:_) -> (x,y))
</syntaxhighlight>
</lang>
Result: all centroids and clusters are found.
<pre>λ> test
Line 1,252 ⟶ 1,838:
 
=={{header|Huginn}}==
<langsyntaxhighlight lang="huginn">#! /bin/sh
exec huginn -E "${0}" "${@}"
#! huginn
Line 1,470 ⟶ 2,056:
}
print( "\n%%%%EOF\n" );
}</langsyntaxhighlight>
 
=={{header|J}}==
 
'''Solution''':<langsyntaxhighlight lang="j"> NB. Selection of initial centroids, per K-means++
initialCentroids =: (] , randomCentroid)^:(<:@:]`(,:@:seedCentroid@:[))~
seedCentroid =: {~ ?@#
Line 1,484 ⟶ 2,070:
centroids =: ([ mean/.~ closestCentroid)^:(]`_:`initialCentroids)
closestCentroid =: [: (i.<./)"1 distance/
mean =: +/ % #</langsyntaxhighlight>
 
'''Extra credit''':<langsyntaxhighlight lang="j"> randMatrix =: ?@$&0 NB. Extra credit #1
packPoints =: <"1@:|: NB. Extra credit #2: Visualization code due to Max Harms http://j.mp/l8L45V
plotClusters =: dyad define NB. as is the example image in this task
Line 1,500 ⟶ 2,086:
 
NB. Extra credit #4: Polar coordinates are not available in this version
NB. but wouldn't be hard to provide with &.cartToPole .</langsyntaxhighlight>
 
'''Example''':<langsyntaxhighlight lang="j"> plotRandomClusters =: 3&$: : (dyad define)
dataset =. randMatrix 2 {. y,2
 
Line 1,512 ⟶ 2,098:
plotRandomClusters 300 NB. 300 points, 3 clusters
6 plotRandomClusters 30000 NB. 3e5 points, 6 clusters
10 plotRandomClusters 17000 5 NB. 17e3 points, 10 clusters, 5 dimensions</langsyntaxhighlight>
 
=={{header|Java}}==
{{trans|C}}
<langsyntaxhighlight lang="java">
import java.util.Random;
 
Line 1,697 ⟶ 2,283:
}
</syntaxhighlight>
</lang>
 
=={{header|JavaScript}}==
{{improve|JavaScript|live demo link broken/404}}
'''Solution'''
 
Live Demo (Extra Credit #2) [https://ezward.github.io/kmeans-javascript/kmeansrandom.html KMeans++ in JavaScript]
 
<langsyntaxhighlight lang="javascript">
/**
* kmeans module
Line 2,452 ⟶ 3,039:
 
 
</syntaxhighlight>
</lang>
 
=={{header|jq}}==
'''Adapted from [[#Wren]]'''
{{works with|jq}}
 
The jq program shown here generates PostScript.
 
Since jq does not currently include a PRNG, we will use /dev/urandom as a source of entropy
as per the following bash script, which invoces jq to generate 30,000 points
in accordance with the task description. The result of one run of this script is shown at
https://imgur.com/gallery/F0q1yO6
<syntaxhighlight lang="sh">#!/bin/bash
export LC_ALL=C
# Generate $1 pseudo-random integers of width $2.
# The integers may have leading 0s
function prng {
cat /dev/urandom | tr -cd '0-9' | fold -w "$2" | head -n "$1"
}
 
PTS=30000
prng $((4 * PTS)) 3 | jq -nr --argjson PTS $PTS -f k-means++-clustering.jq
</syntaxhighlight>
 
'''k-means++-clustering.jq'''
<syntaxhighlight lang="jq">
# A Point is represented by a JSON array: [x, y, group]
 
def hugeVal: infinite;
def RAND_MAX : 1000;
 
def PTS: $PTS;
def K : 6;
def W : 400;
def H : 400;
def rand: input | tonumber;
def randf(m): m * rand / (RAND_MAX - 1);
 
def pi: 1 | atan * 4;
 
# Pseudo-randomly generate `count` points in a circle with the given radius
def genXY(count; radius):
# note: this is not a uniform 2-d distribution
pi as $pi
| reduce range(0; count) as $i ([];
.[$i] = [0, 0, 0]
| randf(2 * $pi) as $ang
| randf(radius) as $r
| .[$i][0] = $r * ($ang|cos)
| .[$i][1] = $r * ($ang|sin) ) ;
 
def dist2($a; $b):
($a[0] - $b[0]) as $x
| ($a[1] - $b[1]) as $y
| $x * $x + $y * $y;
 
# output: {minD, minI}
def nearest($pt; $cent; $nCluster):
{ minD : hugeVal,
minI : $pt[2] }
| reduce range(0; $nCluster) as $i (.;
dist2($cent[$i]; $pt) as $d
| if .minD > $d
then .minD = $d
| .minI = $i
else .
end ) ;
 
# input: {pts, cent}
# output: ditto
def kpp(len):
(.cent|length) as $nCent
| .cent[0] = .pts[rand % len]
| . + { d: []}
| reduce range(1; $nCent) as $nCluster (.;
.sum = 0
| reduce range(0; len) as $j (.;
.d[$j] = nearest(.pts[$j]; .cent; $nCluster).minD
| .sum += .d[$j] )
| .sum = randf(.sum)
| label $out
| .emit = null
# Be sure to emit something:
| foreach (range(0; len), null) as $j (.;
if $j == null then .emit = .
else .sum -= .d[$j]
| if .sum > 0
then .
else .cent[$nCluster] = .pts[$j]
| .emit = .
end
end;
select(.emit) | (.emit, break $out)
) )
| reduce range(0; len) as $j (.;
.pts[$j][2] = nearest(.pts[$j]; .cent; $nCent).minI ) ;
 
# Input: an array of Point (.pts)
# Output: {pts, cent}
def lloyd(len; nCluster):
{pts: .,
cent: [range(0; nCluster) | [0,0,0]]}
| kpp(len)
 
# stop when > 99.9% of points are good
| until( .changed > (len / 1E4);
 
# group elements of centroids are used as counters
reduce range(0; nCluster) as $i (.; .cent[$i] = [0,0,0])
| reduce range(0; len) as $j (.;
.pts[$j] as $p
| .cent[$p[2]] |= [ .[0]+$p[0], .[1]+$p[1], .[2] + 1] )
| reduce range(0; nCluster) as $i (.;
(.cent[$i][2] | if . == 0 then 0.0001 else . end) as $divisor
| .cent[$i] |= [ (.[0]/$divisor), (.[1]/$divisor), .[2] ] )
| .changed = 0
# find closest centroid of each point
| reduce range(0; len) as $j (.;
.pts[$j] as $p
| nearest($p; .cent; nCluster).minI as $minI
| if $minI != $p[2]
then .changed += 1
| .pts[$j][2] = $minI
else .
end) )
| .cent |= reduce range(0; nCluster) as $i (.; .[$i][2] = $i ) ;
 
def printEps($pts; len; cent; nCluster):
 
def f1($x;$y;$z): "\($x) \($y) \($z) setrgbcolor";
def f2($x;$y) : "\($x) \($y) c";
def f3($x;$y) : "\n0 setgray \($x) \($y) s";
 
def colors:
reduce range(0; nCluster) as $i ([];
.[3 * $i + 0] = (3 * ($i + 1) % 11) / 11
| .[3 * $i + 1] = (7 * $i % 11) / 11
| .[3 * $i + 2] = (9 * $i % 11) / 11 );
 
{colors: colors}
| .minX = hugeVal
| .minY = hugeVal
| .maxX = -hugeVal
| .maxY = -hugeVal
| reduce range(0; len) as $j (.;
$pts[$j] as $p
| if .maxX < $p[0] then .maxX = $p[0] else . end
| if .minX > $p[0] then .minX = $p[0] else . end
| if .maxY < $p[1] then .maxY = $p[1] else . end
| if .minY > $p[1] then .minY = $p[1] else . end )
| ([((W / (.maxX - .minX))), ((H / (.maxY - .minY)))] | min) as $scale
| ( (.maxX + .minX) / 2) as $cx
| ( (.maxY + .minY) / 2) as $cy
| "%!PS-Adobe-3.0\n%%BoundingBox: -5 -5 \(W + 10) \(H + 10)",
"/l {rlineto} def /m {rmoveto} def",
"/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def",
"/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath ",
" gsave 1 setgray fill grestore gsave 3 setlinewidth",
" 1 setgray stroke grestore 0 setgray stroke }def",
(range(0; nCluster) as $i
| cent[$i] as $c
| f1(.colors[3 * $i]; .colors[3 * $i + 1]; .colors[3 * $i + 2]),
( range(0; len) as $j
| $pts[$j] as $p
| if $p[2] == $i
then f2( ($p[0] - $cx) * $scale + W / 2; ($p[1] - $cy) * $scale + H / 2)
else empty
end ),
f3( ($c[0] - $cx) * $scale + W / 2; ($c[1] - $cy) * $scale + H / 2)),
"\n%%EOF"
;
 
# The required clustering function with two arguments.
# It returns {pts, cent}
# where .pts and .cent are arrays of Points,
# with the cluster id as the third item.
#
def cluster($nCluster; $points):
$points
| lloyd( length; $nCluster);
 
# The task:
genXY(PTS; 10)
| lloyd(PTS; K) as { pts: $pts, cent: $cent }
| printEps($pts; PTS; $cent; K)</syntaxhighlight>
 
{{out}}
See https://imgur.com/gallery/F0q1yO6
 
=={{header|Julia}}==
<langsyntaxhighlight lang="julia"># run via Julia REPL
using Clustering, MakiePlots, DataFrames, RDatasets
 
const iris = dataset("datasets", "iris")
plt1 = plot(title= "Species Classification", xlabel = "Sepal Width", ylabel = "Sepal length")
const colors = [:red, :green, :blue]
plt2 = plot(title= "Kmeans++ Classification", xlabel = "Sepal Width", ylabel = "Sepal length")
const plt = Vector{Any}(undef,2)
 
scene1 = Scene()
scene2 = Scene()
 
for (i, sp) in enumerate(unique(iris[!, :Species]))
idx = iris[!, :Species] .== sp
selspwidth, splength = iris[idx, [:SepalWidth], iris[idx, :SepalLength]]
plt[1] = scatter!(scene1plt1, sel[1]spwidth, sel[2]splength, color = colors[:red, :green, :blue][i], legend = false)
limits = FRect(1.5, 4.0, 3.0, 4.0))
end
 
features = permutedimscollect(convertMatrix(Arrayiris[!, iris[1:4]), [2, 1]')
 
# K Means ++
result = kmeans(features, 3, init = :kmppKmppAlg()) # set to 3 clusters with kmeans++ :kmpp
 
for center in unique(result.assignments)
idx = result.assignments .== center
selspwidth, splength = iris[idx, [:SepalWidth], iris[idx, :SepalLength]]
plt[2] = scatter!(scene2plt2, sel[1]spwidth, sel[2]splength, color = colors[:green, :red, :blue][center], legend = false)
limits = FRect(1.5, 4.0, 3.0, 4.0))
end
 
plot(plt1, plt2)
scene2[Axis][:names][:axisnames] = scene1[Axis][:names][:axisnames] =
</syntaxhighlight>
("Sepal Width", "Sepal Length")
t1 = text(Theme(), "Species Classification", camera=campixel!)
t2 = text(Theme(), "Kmeans Classification", camera=campixel!)
vbox(hbox(plt[1], t1), hbox(plt[2], t2))
</lang>
 
=={{header|Kotlin}}==
Line 2,496 ⟶ 3,264:
 
As in the case of the C example, the data is partitioned into 11 clusters though, unlike C (which doesn't use srand), the output will be different each time the program is run.
<langsyntaxhighlight lang="scala">// version 1.2.21
 
import java.util.Random
Line 2,657 ⟶ 3,425:
val c = lloyd(v, PTS, K)
printEps(v, PTS, c, K)
}</langsyntaxhighlight>
 
=={{header|Lua}}==
{{works with|lua|5.3}}
 
<langsyntaxhighlight lang="lua">
 
local function load_data(npoints, radius)
Line 2,862 ⟶ 3,630:
-- end
print_eps(data, N_CLUSTERS, centers, cluster)
</syntaxhighlight>
</lang>
 
=={{header|Mathematica}}/{{header|Wolfram Language}}==
'''Solution - Initial kmeans code comes from http://mathematica.stackexchange.com/questions/7441/k-means-clustering, now extended to kmeans++ by introducing the function initM.<br> Was not able to upload pictures of the result...''':
<syntaxhighlight lang="text">initM[list_List, k_Integer, distFunc_Symbol] :=
Module[{m = {RandomChoice[list]}, n, d},
While[Length[m] < k,
Line 2,889 ⟶ 3,657:
{clusters, m}
]
];</langsyntaxhighlight>
 
Extra credit:<br>
Line 2,956 ⟶ 3,724:
=={{header|Nim}}==
{{trans|Python, C}}
<langsyntaxhighlight lang="nim">#
# compile:
# nim c -d:release kmeans.nim
Line 3,148 ⟶ 3,916:
printEps(points, clusterCentrs)
 
main()</langsyntaxhighlight>
 
=={{header|Phix}}==
Line 3,155 ⟶ 3,923:
{{trans|Go}}
I nicked the initial dataset creation from Go, as an alternative
<langsyntaxhighlight Phixlang="phix">-- demo\rosetta\K_means_clustering.exw
-- Press F5 to restart
include pGUI.e
Line 3,336 ⟶ 4,104:
end procedure
 
main()</langsyntaxhighlight>
Probably the hardest part of handling more than 2 dimensions would be deleteing all
the GUI code, or modifying it to produce an n-dimensional representation. Obviously
Line 3,343 ⟶ 4,111:
=={{header|Python}}==
{{trans|D}}
<langsyntaxhighlight lang="python">from math import pi, sin, cos
from collections import namedtuple
from random import random, choice
Line 3,516 ⟶ 4,284:
 
 
main()</langsyntaxhighlight>
 
=={{header|Racket}}==
The k-means clustering:
<langsyntaxhighlight lang="racket">
#lang racket
(require racket/dict
Line 3,555 ⟶ 4,323:
(for/and ([a (in-list c1)] [b (in-list c2)])
(< ((metric) a b) (* 0.001 min-distance))))
</syntaxhighlight>
</lang>
 
Initialization methods
 
<langsyntaxhighlight lang="racket">
;; picks k points from a dataset randomly
(define (random-choice data k)
Line 3,574 ⟶ 4,342:
(sample (discrete-dist data weights)))
(cons new-centroid centroids)))
</syntaxhighlight>
</lang>
 
Different metrics
 
<langsyntaxhighlight lang="racket">
(define (euclidean-distance a b)
(for/sum ([x (in-vector a)] [y (in-vector b)])
Line 3,589 ⟶ 4,357:
(define metric (make-parameter euclidean-distance))
(define (distanse-to x) (curry (metric) x))
</syntaxhighlight>
</lang>
 
The fixed point operator
 
<langsyntaxhighlight lang="racket">
(define (fixed-point f x0 #:same-test [same? equal?])
(let loop ([x x0] [fx (f x0)])
(if (same? x fx) fx (loop fx (f fx)))))
</syntaxhighlight>
</lang>
 
Creating sample clusters
Line 3,608 ⟶ 4,376:
 
 
<langsyntaxhighlight lang="racket">
(define (gaussian-cluster N
#:stdev (σ 1)
Line 3,624 ⟶ 4,392:
(define φ (* 2 pi (sample (uniform-dist))))
(vector-map + r0 (vector (* r (cos φ)) (* r (sin φ))))))
</syntaxhighlight>
</lang>
 
Visualization
 
<langsyntaxhighlight lang="racket">
(require plot)
 
Line 3,644 ⟶ 4,412:
#:line-width 3)))
#:title (format "Initializing by ~a" (object-name method)))))
</syntaxhighlight>
</lang>
 
Testing
 
<langsyntaxhighlight lang="racket">
(module+ test
(define circle (uniform-cluster 30000))
Line 3,658 ⟶ 4,426:
(parameterize ([metric manhattan-distance])
(show-clustering circle 6)))
</syntaxhighlight>
</lang>
 
The difficult case.
 
<langsyntaxhighlight lang="racket">
(module+ test
(define clouds
Line 3,675 ⟶ 4,443:
; using standard k-means method
(show-clustering clouds 4 #:method random-choice))
</syntaxhighlight>
</lang>
 
Multi-dimensional case.
 
<langsyntaxhighlight lang="racket">
(module+ test
(define 5d-data
Line 3,692 ⟶ 4,460:
(map (curry vector-map round) centroids))
</syntaxhighlight>
</lang>
Output shows that centroids were found correctly.
<pre>
Line 3,706 ⟶ 4,474:
{{works with|rakudo|2015-09-16}}
We use Complex numbers to represent points in the plane. We feed the algorithm with three artificially made clouds of points so we can easily see if the output makes sense.
<syntaxhighlight lang="raku" perl6line>sub postfix:«-means++»(Int $K) {
return sub (@data) {
my @means = @data.pick;
Line 3,731 ⟶ 4,499:
my @data = flat @centers.map: { ($_ + .5 - rand + (.5 - rand) * i) xx 100 }
@data.=pick(*);
.say for 3-means++(@data);</langsyntaxhighlight>
 
{{out}}
Line 3,740 ⟶ 4,508:
=={{header|Rust}}==
{{trans|Python}} (the initial point selection part)
<langsyntaxhighlight lang="rust">extern crate csv;
extern crate getopts;
extern crate gnuplot;
Line 4,039 ⟶ 4,807:
}
}
</syntaxhighlight>
</lang>
[Plots exist but file upload is broken at the moment.]
 
Line 4,070 ⟶ 4,838:
Nothing special is needed to handle multiple dimensions: all points are represented as lists, which the euclidean distance function works through in a loop.
 
<langsyntaxhighlight lang="scheme">
(import (scheme base) ; headers for R7RS Scheme
(scheme file)
Line 4,249 ⟶ 5,017:
(tester-2 30000 6 10)
(tester-3 30000 6 5)
</syntaxhighlight>
</lang>
 
Images in eps files are output for the 2D unit square and unit circle.
Line 4,265 ⟶ 5,033:
=={{header|SequenceL}}==
{{trans|C}}
<langsyntaxhighlight lang="sequencel">
import <Utilities/Sequence.sl>;
import <Utilities/Random.sl>;
Line 4,403 ⟶ 5,171:
in
printEPS(points.first, result.first, result.second,k,10.0);
</syntaxhighlight>
</lang>
 
=={{header|Tcl}}==
{{trans|C}}{{tcllib|math::constants}}
<langsyntaxhighlight lang="tcl">package require Tcl 8.5
package require math::constants
math::constants::constants pi
Line 4,509 ⟶ 5,277:
}
return $cent
}</langsyntaxhighlight>
Demonstration/visualization code:
{{libheader|Tk}}
<langsyntaxhighlight lang="tcl">package require Tk
image create photo disp -width 400 -height 400
pack [label .l -image disp]
Line 4,535 ⟶ 5,303:
plot $cx $cy black
}
}}</langsyntaxhighlight>
 
=={{header|Wren}}==
{{trans|Kotlin}}
{{libheader|Wren-dynamic}}
{{libheader|Wren-fmt}}
<syntaxhighlight lang="wren">import "random" for Random
import "./dynamic" for Struct
import "./fmt" for Fmt
 
var Point = Struct.create("Point", ["x", "y", "group"])
 
var r = Random.new()
var hugeVal = Num.infinity
 
var RAND_MAX = Num.maxSafeInteger
var PTS = 100000
var K = 11
var W = 400
var H = 400
 
var rand = Fn.new { r.int(RAND_MAX) }
var randf = Fn.new { |m| m * rand.call() / (RAND_MAX - 1) }
 
var genXY = Fn.new { |count, radius|
var pts = List.filled(count, null)
 
/* note: this is not a uniform 2-d distribution */
for (i in 0...count) {
pts[i] = Point.new(0, 0, 0)
var ang = randf.call(2 * Num.pi)
var r = randf.call(radius)
pts[i].x = r * ang.cos
pts[i].y = r * ang.sin
}
return pts
}
 
var dist2 = Fn.new { |a, b|
var x = a.x - b.x
var y = a.y - b.y
return x * x + y * y
}
 
var nearest = Fn.new { |pt, cent, nCluster|
var minD = hugeVal
var minI = pt.group
for (i in 0...nCluster) {
var d = dist2.call(cent[i], pt)
if (minD > d) {
minD = d
minI = i
}
}
return [minI, minD]
}
 
var copyPoint = Fn.new { |pt| Point.new(pt.x, pt.y, pt.group) }
 
var kpp = Fn.new { |pts, len, cent|
var nCent = cent.count
var d = List.filled(len, 0)
cent[0] = copyPoint.call(pts[rand.call() % len])
for (nCluster in 1...nCent) {
var sum = 0
for (j in 0...len) {
d[j] = nearest.call(pts[j], cent, nCluster)[1]
sum = sum + d[j]
}
sum = randf.call(sum)
for (j in 0...len) {
sum = sum - d[j]
if (sum > 0) continue
cent[nCluster] = copyPoint.call(pts[j])
break
}
}
for (j in 0...len) pts[j].group = nearest.call(pts[j], cent, nCent)[0]
}
 
var lloyd = Fn.new { |pts, len, nCluster|
var cent = List.filled(nCluster, null)
for (i in 0...nCluster) cent[i] = Point.new(0, 0, 0)
kpp.call(pts, len, cent)
while(true) {
/* group element for centroids are used as counters */
for (i in 0...nCluster) {
cent[i].x = 0
cent[i].y = 0
cent[i].group = 0
}
for (j in 0...len) {
var p = pts[j]
var c = cent[p.group]
c.group = c.group + 1
c.x = c.x + p.x
c.y = c.y + p.y
}
for (i in 0...nCluster) {
var c = cent[i]
c.x = c.x / c.group
c.y = c.y / c.group
}
var changed = 0
 
/* find closest centroid of each point */
for (j in 0...len) {
var p = pts[j]
var minI = nearest.call(p, cent, nCluster)[0]
if (minI != p.group) {
changed = changed + 1
p.group = minI
}
}
 
/* stop when 99.9% of points are good */
if (changed <= (len >> 10)) break
}
for (i in 0...nCluster) cent[i].group = i
return cent
}
 
var printEps = Fn.new { |pts, len, cent, nCluster|
var colors = List.filled(nCluster * 3, 0)
for (i in 0...nCluster) {
colors[3 * i + 0] = (3 * (i + 1) % 11) / 11
colors[3 * i + 1] = (7 * i % 11) / 11
colors[3 * i + 2] = (9 * i % 11) / 11
}
var minX = hugeVal
var minY = hugeVal
var maxX = -hugeVal
var maxY = -hugeVal
for (j in 0...len) {
var p = pts[j]
if (maxX < p.x) maxX = p.x
if (minX > p.x) minX = p.x
if (maxY < p.y) maxY = p.y
if (minY > p.y) minY = p.y
}
var scale = (W / (maxX - minX)).min(H / (maxY - minY))
var cx = (maxX + minX) / 2
var cy = (maxY + minY) / 2
 
System.print("\%!PS-Adobe-3.0\n\%\%BoundingBox: -5 -5 %(W + 10) %(H + 10)")
System.print("/l {rlineto} def /m {rmoveto} def")
System.print("/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def")
System.write("/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath ")
System.write(" gsave 1 setgray fill grestore gsave 3 setlinewidth")
System.print(" 1 setgray stroke grestore 0 setgray stroke }def")
var f1 = "$g $g $g setrgbcolor"
var f2 = "$.3f $.3f c"
var f3 = "\n0 setgray $g $g s"
for (i in 0...nCluster) {
var c = cent[i]
Fmt.print(f1, colors[3 * i], colors[3 * i + 1], colors[3 * i + 2])
for (j in 0...len) {
var p = pts[j]
if (p.group != i) continue
Fmt.print(f2, (p.x - cx) * scale + W / 2, (p.y - cy) * scale + H / 2)
}
Fmt.print(f3, (c.x - cx) * scale + W / 2, (c.y - cy) * scale + H / 2)
}
System.write("\n\%\%EOF")
}
 
var v = genXY.call(PTS, 10)
var c = lloyd.call(v, PTS, K)
printEps.call(v, PTS, c, K)</syntaxhighlight>
 
=={{header|XPL0}}==
Line 4,544 ⟶ 5,480:
somewhat for these sins. Alas, image uploads appears to be broken.
 
<langsyntaxhighlight XPL0lang="xpl0">include c:\cxpl\codes; \intrinsic 'code' declarations
 
def N = 30000; \number of points
Line 4,610 ⟶ 5,546:
I:= ChIn(1); \wait for keystroke
SetVid($03); \restore normal text screen
]</langsyntaxhighlight>
9,485

edits