打开Eclipse软件,初始化界面如下所示:

进入后选择工作空间,界面如下所示:

进入软件后,初始界面如下所示:(注意:如果已将创建过项目,请直接跳到后面新建项目步骤)

点击“Workbench”,左边为显示的当前文件夹,右边为命令行窗口和输出窗口。

点击菜单栏”File-New-Java Project”,创建一个新的java项目。

为项目命名“k-means“,点击Finish完成创建。

右键点击src文件,点击New分别新建三个class文件,分别命名为“Tuple.java”,“KmeansCalc.java”和“KmeansPaint.java”,点击Finish完成创建。
实验目的是实现对100个随机点进行三个聚类的划分,通过多次迭代,直到三个聚类中心不发生变化为止,并将整个聚类过程用窗口可视化出来。
这部分程序实现的主要功能就是读取数据点所在的坐标点。在最终的可视化界面中,可以观察到聚类中心点和其他数据点在二维坐标中的位置显示。具体程序如下:
拷贝代码/*
* data object
* two-dimensional coordinate
*/
public class Tuple {
float x;
float y;
public Tuple(){}
public Tuple(float a,float b){
this.x=a;
this.y=b;
}
public void setXY(float a,float b){
this.x=a;
this.y=b;
}
public void setX(float a){
this.x=a;
}
public void setY(float a){
this.y=a;
}
public float getX(){
return x;
}
public float getY(){
return y;
}
}

这部分程序实现k-means算法聚类,假定默认聚类中心为3个,初始化数据对象为100个,收敛的条件为聚类中心点不再变化。通过计算欧式距离,计算数据对象离哪个中心点更近,从而确认当前对象属于哪个簇。
拷贝代码/*
* KmeansCalc
*/
import java.lang.Math;
import java.util.Random;
import java.util.Vector;
public class KmeansCalc {
static int k=3;
int i=0;
int lable = 0;
static int flag = 0;
static StringBuilder data = new StringBuilder();
static Vector
static Tuple means[]= new Tuple[k];
static Vector
Tuple meansLast[]= new Tuple[k];
Tuple meanFrist[] = new Tuple[k];
public KmeansCalc(){
for(int i=0;i<100;i++){
tuples.addElement(new Tuple(0,0));
}
for(i=0;i
means[i] = new Tuple(0,0);
meanFrist[i] = new Tuple(0,0);
clusters[i] = new Vector
clusters[i].addElement(means[i]);
}
}
//REset Data,bt1Action
public void ReSet(){
data.delete(0, data.length());
tuples.clear();
for(i=0;i
means[i] = new Tuple(0,0);
clusters[i] = new Vector
clusters[i].clear();
clusters[i].addElement(means[i]);
}
flag=0;
}
public double getDistXY(Tuple t1,Tuple t2){
return Math.sqrt(Math.pow((t1.x-t2.x)*(t1.x-t2.x), 2)+Math.pow((t1.y-t2.y)*(t1.y-t2.y),2));
}
public int clusterOfTuple(Tuple means[],Tuple tuple){
double dist=getDistXY(means[0],tuple);
double tmp;
int label=0;
for(int i=1;i
tmp=getDistXY(means[i],tuple);
if(tmp
}
return label;
}
public Tuple getMeans(Vector
float meansX=0,meansY=0;
Tuple t = new Tuple();
for (int i=0;i
{
meansX+=cluster.get(i).getX();
meansY+=cluster.get(i).getY();
}
t.setX(meansX/cluster.size());
t.setY(meansY/cluster.size());
return t;
}
//bt4 Action
public static String getData(){
return data.toString();
}
//init(),bt1Action
public void KMeansInit(){
Random ran = new Random();
tuples.addElement(new Tuple(ran.nextInt(19),ran.nextInt(17)));
data.append("This dataset \n (for viewing replication only, changing the value here does not affect the program):\n("+String.valueOf(tuples.get(0).getX())+","+
String.valueOf(tuples.get(0).getY())+")\n");
for(int i=1;i<100;i++){
Tuple tem = new Tuple(ran.nextInt(19),ran.nextInt(17));
if(!tuples.contains(tem)){
tuples.addElement(tem);
data.append("("+String.valueOf(tem.getX())+","+
String.valueOf(tem.getY())+")\n");
}
else{
i--;
}
}
meanFrist[0] = tuples.get(ran.nextInt(100));
for(i=1;i
meanFrist[i] = tuples.get(ran.nextInt(100));
for(int j=0;j
if(meanFrist[i]==meanFrist[i-1]){
i--;
}
}
}
for(i=0;i
meansLast[i]= new Tuple();
means[i].setX(meanFrist[i].getX());
meansLast[i].x=means[i].x;
means[i].y=meanFrist[i].getY();
meansLast[i].y=means[i].y;
}
for(i=0;i!=tuples.size();++i){
lable=clusterOfTuple(means,tuples.get(i));
clusters[lable].addElement(tuples.get(i));
}
for(lable=0;lable
System.out.print("The cluster of"+(lable+1)+"\n");
Vector
for (i=0;i
{
System.out.print("("+t.get(i).getX()+","+t.get(i).getY()+")"+" ");
}
System.out.print("\n");
}
}
//Later handle,bt2&bt3 Action
public void KMeansHandle(){
for (i=0;i
{
means[i]=getMeans(clusters[i]);
if(means[i].x==meansLast[i].x&&means[i].y==meansLast[i].y)flag++;
else { meansLast[i].x=means[i].x; meansLast[i].y=means[i].y;}
}
if(flag==k)return;
flag=0;
for (i=0;i
{
clusters[i].clear();
}
for(i=0;i!=tuples.size();++i){
lable=clusterOfTuple(means,tuples.get(i));
clusters[lable].add(tuples.get(i));
}
for(lable=0;lable
System.out.print("The cluster of"+(lable+1)+"\n");
System.out.print("central point:("+means[lable].x+","+means[lable].y+") \n");
System.out.print("Tuple group:\n");
Vector
for (i=0;i
{
System.out.print("("+t.get(i).getX()+","+t.get(i).getY()+")"+" ");
}
System.out.print("\n");
}
System.out.print("\n");
try{
Thread.sleep(1000);
}catch(Exception e){
System.out.print("Thread.sleep Exception in Kmeans.java\n");
System.exit(0);
}
}
}


这部分程序实现的是最终的图形界面可视化结果,包括聚类结果的显示,按钮的选取,聚类中心点的坐标点等。
/*
* KmeansPaint
* Implement the graphical interface part
*/
import java.awt.BasicStroke;
import java.awt.BorderLayout;
import java.awt.Color;
import java.awt.Graphics;
import java.awt.Graphics2D;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.text.DecimalFormat;
import javax.swing.JButton;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
public class KmeansPaint extends JFrame {
static int Flag = 0; //End judge
int stepCount = 0,countflag=0;
private final int FREAME_X = 20;
private final int FREAME_Y = 80;
private final int FREAME_WIDTH = 650;
private final int FREAME_HEIGHT = 550;
private final int Origin_X = FREAME_X + 50;
private final int Origin_Y = FREAME_Y + FREAME_HEIGHT - 50;
private final int XAxis_X = FREAME_X + FREAME_WIDTH - 30;
private final int XAxis_Y = Origin_Y;
private final int YAxis_X = Origin_X;
private final int YAxis_Y = FREAME_Y;
private final int INTERVAL = 30;
JLabel cluster1 =new JLabel("No.1Cluster:");
JLabel cluster2 =new JLabel("No.2Cluster:");
JLabel cluster3 =new JLabel("No.3Cluster:");
JLabel countResult = new JLabel();
JLabel tip = new JLabel("Note: 100 coordinate data are generated randomly each time, and 3 initial center coordinates are integers.");
JButton bt1 = new JButton("Load the data");
JButton bt2 = new JButton("Automatic demonstration");
JButton bt3 = new JButton("Single step ");
JButton bt4 = new JButton("see the data");
JTextArea lab = new JTextArea();
JFrame newF = new JFrame("DataSet");
KmeansCalc km=new KmeansCalc();
private MyCanva trendChartCanvas = new MyCanva();
public KmeansPaint(){
super("K-means clustering algorithm demonstration system");
this.setDefaultCloseOperation(EXIT_ON_CLOSE);
this.setBounds(300, 200, 1000, 700);
bt1.setBounds(XAxis_X+60,Origin_Y-110, 100, 40);
bt4.setBounds(XAxis_X+200,Origin_Y-110, 100, 40);
bt2.setBounds(XAxis_X+60,Origin_Y-50, 100, 40);
bt3.setBounds(XAxis_X+200,Origin_Y-50, 100, 40);
cluster1.setBounds(XAxis_X+60, YAxis_Y, 200, 40);
cluster2.setBounds(XAxis_X+60, YAxis_Y+50, 200, 40);
cluster3.setBounds(XAxis_X+60, YAxis_Y+100, 200, 40);
cluster1.setFont(new java.awt.Font("Dialog", 1, 15));
cluster2.setFont(new java.awt.Font("Dialog", 1, 15));
cluster3.setFont(new java.awt.Font("Dialog", 1, 15));
countResult.setBounds(XAxis_X+60, YAxis_Y+150, 400, 40);
countResult.setFont(new java.awt.Font("Dialog", 1, 17));
tip.setBounds(Origin_X, Origin_Y+30, 400, 20);
this.add(bt1);
this.add(bt2);
this.add(bt3);
this.add(bt4);
this.add(cluster1);
this.add(cluster2);
this.add(cluster3);
this.add(countResult);
this.add(tip);
this.add(trendChartCanvas, BorderLayout.CENTER);
this.setVisible(true);
bt1.addActionListener(new ActionListener(){
public void actionPerformed(ActionEvent arg0) {
// TODO Auto-generated method stub
stepCount=0;
countflag=1;
km.ReSet();
km.KMeansInit();
repaint();
}
});
bt2.addActionListener(new ActionListener(){
public void actionPerformed(ActionEvent arg0) {
// TODO Auto-generated method stub
new Thread(new Runnable() {
public void run() {
while (KmeansCalc.flag YAxis_Y; i -= INTERVAL) {
g.drawString(xyString + " ", Origin_X - 30, i + 3);
xyString++;
}
g.drawString("Y", YAxis_X - 5, YAxis_Y - 5);
DecimalFormat df = new DecimalFormat("######0.00");
cluster1.setText("No.1Cluster: ("+df.format(KmeansCalc.means[0].getX())+","+df.format(KmeansCalc.means[0].getX())+")");
cluster2.setText("No.2Cluster: ("+df.format(KmeansCalc.means[1].getX())+","+df.format(KmeansCalc.means[1].getY())+")");
cluster3.setText("No.3Cluster: ("+df.format(KmeansCalc.means[2].getX())+","+df.format(KmeansCalc.means[2].getY())+")");
if(countflag==0){
countResult.setText("Click the load data button to load the data");
}
else if(stepCount==0){
for(int i=0;i

3运行项目
右键点击左侧中文件中src文件夹,选择“Run As”-“Java Application”,运行之后得到下图中的窗口。

4观察结果

点击查看数据按钮,可以查看100个随机数据点的坐标。

点击自动演示,可以看见多次迭代的过程在二维坐标系里的变化,等聚类计算结束后,在右边会显示迭代的次数和最终的聚类中心坐标。

点击单步演示,可以看到每次聚类的中心坐标和二维坐标系里的数据点变化。
