Clustering update

This commit is contained in:
Marcel Kronfeld 2011-05-04 12:17:50 +00:00
parent c79eb33bca
commit 21dd2fe5c8
3 changed files with 161 additions and 38 deletions

View File

@ -0,0 +1,74 @@
package eva2.server.go.operators.cluster;
import java.io.Serializable;
import java.util.Arrays;
import eva2.server.go.populations.Population;
/**
* Dummy class which assigns all individuals to a single cluster only.
*
* @author mkron
*
*/
public class ClusterAll implements InterfaceClustering, Serializable {
private boolean assignLoners = false; // should loners be assigned?
public Object clone() {
return new ClusterAll();
}
/**
* Try to associate a set of loners with a given set of species. Return a list
* of indices assigning loner i with species j for all loners. If no species can
* be associated, -1 is returned as individual entry.
* Note that the last cluster threshold is used which may have depended on the last
* generation.
* If the clustering depends on population measures, a reference set may be given
* which is the reference population to consider the measures of. This is for cases
* where, e.g., subsets of a Population are to be clustered using measures of the
* original population.
*
* @param loners
* @param species
* @param referenceSet a reference population for dynamic measures
* @return associative list matching loners to species.
*/
@Override
public int[] associateLoners(Population loners, Population[] species,
Population referenceSet) {
if (loners!=null && (loners.size()>0)) {
int[] indices = new int[loners.size()];
if (assignLoners) Arrays.fill(indices, 0);
else Arrays.fill(indices, -1);
return indices;
} else return null;
}
@Override
public Population[] cluster(Population pop, Population referenceSet) {
// first pop is empty (there are no loners), second pop is complete
return new Population[]{pop.cloneWithoutInds(), pop.cloneShallowInds()};
}
@Override
public String initClustering(Population pop) {
return null;
}
@Override
public boolean mergingSpecies(Population species1, Population species2,
Population referenceSet) {
return true;
}
public static String globalInfo() {
return "A dummy clustering implementation which assigns all elements to a single cluster.";
}
public String getName() {
return "Cluster-all";
}
}

View File

@ -7,6 +7,8 @@ import eva2.gui.Plot;
import eva2.server.go.individuals.AbstractEAIndividual; import eva2.server.go.individuals.AbstractEAIndividual;
import eva2.server.go.individuals.ESIndividualDoubleData; import eva2.server.go.individuals.ESIndividualDoubleData;
import eva2.server.go.individuals.InterfaceDataTypeDouble; import eva2.server.go.individuals.InterfaceDataTypeDouble;
import eva2.server.go.operators.distancemetric.EuclideanMetric;
import eva2.server.go.operators.distancemetric.InterfaceDistanceMetric;
import eva2.server.go.populations.Population; import eva2.server.go.populations.Population;
import eva2.server.go.problems.F1Problem; import eva2.server.go.problems.F1Problem;
import eva2.tools.chart2d.Chart2DDPointIconCircle; import eva2.tools.chart2d.Chart2DDPointIconCircle;
@ -24,11 +26,15 @@ import eva2.tools.math.RNG;
*/ */
public class ClusteringKMeans implements InterfaceClustering, java.io.Serializable { public class ClusteringKMeans implements InterfaceClustering, java.io.Serializable {
public int m_K = 5; private int m_K = 5;
public double[][] m_C = null; private double[][] m_C = null;
public boolean m_UseSearchSpace = true; private double mergeDist = 0.001;
public boolean m_ReuseC = false; private boolean m_UseSearchSpace = true;
public boolean m_Debug = false; private boolean m_ReuseC = false;
private boolean m_Debug = false;
private int minClustSize = 1;
InterfaceDistanceMetric metric = new EuclideanMetric();
AbstractEAIndividual tmpIndy = null;
public ClusteringKMeans() { public ClusteringKMeans() {
@ -38,6 +44,9 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab
this.m_Debug = a.m_Debug; this.m_Debug = a.m_Debug;
this.m_K = a.m_K; this.m_K = a.m_K;
this.m_UseSearchSpace = a.m_UseSearchSpace; this.m_UseSearchSpace = a.m_UseSearchSpace;
this.metric = a.metric;
this.minClustSize = a.minClustSize;
this.mergeDist = a.mergeDist;
if (a.m_C != null) { if (a.m_C != null) {
this.m_C = new double[a.m_C.length][a.m_C[0].length]; this.m_C = new double[a.m_C.length][a.m_C[0].length];
for (int i = 0; i < this.m_C.length; i++) { for (int i = 0; i < this.m_C.length; i++) {
@ -62,12 +71,23 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab
* @return Population[] * @return Population[]
*/ */
public Population[] cluster(Population pop, Population referencePop) { public Population[] cluster(Population pop, Population referencePop) {
double[][] data = this.extractClusterDataFrom(pop); if (pop.size()<m_K) {
// in this case, there arent enough indies to do anything, so we just return them as "unclustered"
Population[] res = new Population[1];
res[0]=pop.cloneShallowInds();
return res;
}
tmpIndy = (AbstractEAIndividual)pop.getEAIndividual(0).clone();
// double[][] data = this.extractClusterDataFrom(pop);
if (!(this.m_ReuseC) || (this.m_C == null)) { if (!(this.m_ReuseC) || (this.m_C == null)) {
this.m_C = new double[this.m_K][]; this.m_C = new double[this.m_K][];
// now choose random initial Cs // now choose random initial Cs
for (int i = 0; i < this.m_C.length; i++) { Population initialSeeds = pop.getRandNIndividuals(this.m_K);
this.m_C[i] = data[RNG.randomInt(0, data.length-1)]; for (int i = 0; i < this.m_K; i++) {
if (m_UseSearchSpace) this.m_C[i] = initialSeeds.getEAIndividual(i).getDoublePosition().clone();
else this.m_C[i] = initialSeeds.getEAIndividual(i).getFitness().clone();
// this.m_C[i] = data[RNG.randomInt(0, data.length-1)];
//this.m_C[i] = data[i]; // This works!! //this.m_C[i] = data[i]; // This works!!
// we won't check for double instances assuming that double instances // we won't check for double instances assuming that double instances
// will be ironed out during clustering and to prevent infinite loops // will be ironed out during clustering and to prevent infinite loops
@ -79,22 +99,23 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab
boolean finished = false; boolean finished = false;
double[][] newC; double[][] newC;
int[] numbOfAssigned; int[] numbOfAssigned;
int[] assignment = new int[data.length]; int[] assignment = new int[pop.size()];
int assign; int assign;
while (!finished) { while (!finished) {
// first assign the data to the closes C // first assign the data to the closes C
for (int i = 0; i < data.length; i++) { for (int i = 0; i < pop.size(); i++) {
// check which C is closest // check which C is closest
assign = 0; assign = 0;
for (int j = 1; j < this.m_C.length; j++) { for (int j = 1; j < this.m_C.length; j++) {
if (this.distance(this.m_C[assign], data[i]) > this.distance(this.m_C[j], data[i])) if (this.distance(pop.getEAIndividual(i), this.m_C[assign]) > this.distance(pop.getEAIndividual(i), this.m_C[j]))
// if (this.distance(this.m_C[assign], data[i]) > this.distance(this.m_C[j], data[i]))
assign = j; assign = j;
} }
assignment[i] = assign; assignment[i] = assign;
} }
// now calcuate the mean of each cluster and calculate new C // now calcuate the mean of each cluster and calculate new C
newC = new double[this.m_K][data[0].length]; newC = new double[this.m_K][m_C[0].length];
numbOfAssigned = new int[this.m_K]; numbOfAssigned = new int[this.m_K];
for (int i = 0; i < newC.length; i++) { for (int i = 0; i < newC.length; i++) {
numbOfAssigned[i] = 1; numbOfAssigned[i] = 1;
@ -103,7 +124,8 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab
for (int i = 0; i < assignment.length; i++) { for (int i = 0; i < assignment.length; i++) {
numbOfAssigned[assignment[i]]++; numbOfAssigned[assignment[i]]++;
for (int j = 0; j < newC[assignment[i]].length; j++) { for (int j = 0; j < newC[assignment[i]].length; j++) {
newC[assignment[i]][j] += data[i][j]; if (m_UseSearchSpace) newC[assignment[i]][j] += pop.getEAIndividual(i).getDoublePosition()[j];
else newC[assignment[i]][j] += pop.getEAIndividual(i).getFitness(j);
} }
} }
for (int i = 0; i < newC.length; i++) { for (int i = 0; i < newC.length; i++) {
@ -153,10 +175,10 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab
GraphPointSet mySet; GraphPointSet mySet;
DPoint myPoint; DPoint myPoint;
Chart2DDPointIconText tmp; Chart2DDPointIconText tmp;
for (int i = 0; i < data.length; i++) { for (int i = 0; i < pop.size(); i++) {
mySet = new GraphPointSet(10+1, plot.getFunctionArea()); mySet = new GraphPointSet(10+1, plot.getFunctionArea());
mySet.setConnectedMode(false); mySet.setConnectedMode(false);
double[] x = data[i]; double[] x = pop.getEAIndividual(i).getDoublePosition();
myPoint = new DPoint(x[0], x[1]); myPoint = new DPoint(x[0], x[1]);
tmp = new Chart2DDPointIconText(""+assignment[i]); tmp = new Chart2DDPointIconText(""+assignment[i]);
if (assignment[i] % 2 == 0) tmp.setIcon(new Chart2DDPointIconCircle()); if (assignment[i] % 2 == 0) tmp.setIcon(new Chart2DDPointIconCircle());
@ -167,7 +189,7 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab
// finally let's check whether or not the C changed and if i can terminate k_Means // finally let's check whether or not the C changed and if i can terminate k_Means
finished = true; finished = true;
for (int i = 0; i < this.m_C.length; i++) { for (int i = 0; i < this.m_C.length; i++) {
if (this.distance(this.m_C[i], newC[i]) > 0.0001) finished = false; if (EuclideanMetric.euclideanDistance(this.m_C[i], newC[i]) > 0.0001) finished = false;
this.m_C[i] = newC[i]; this.m_C[i] = newC[i];
} }
} // gosh now i'm done } // gosh now i'm done
@ -201,17 +223,34 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab
} }
} }
return result; // now expand to the expected format (unclustered indies at pop of index 0)
int largeEnough = 0;
// count clusters that are large enough
for (int i=0; i<result.length; i++) if (result[i].size()>=getMinClustSize()) largeEnough++;
Population[] resExpanded = new Population[largeEnough+1];
resExpanded[0]=pop.cloneWithoutInds();
int lastIndex = 1;
for (int i=0; i<result.length; i++) {
if (result[i].size()>=getMinClustSize()) {
resExpanded[lastIndex]=result[i];
lastIndex++;
} else resExpanded[0].addPopulation(result[i]);
}
tmpIndy=null;
return resExpanded;
} }
/** This method allows you to cluster a population using m_C /**
* This method allows you to cluster a population using m_C. The minimal cluster
* size is _not_ regarded here.
* @param pop The population * @param pop The population
* @param c The centroids * @param c The centroids
* @return The clusters as populations * @return The clusters as populations
*/ */
public Population[] cluster(Population pop, double[][] c) { public Population[] cluster(Population pop, double[][] c) {
if (tmpIndy==null) tmpIndy=(AbstractEAIndividual)pop.getEAIndividual(0).clone(); // nec. only because the method is public...
Population[] result = new Population[c.length]; Population[] result = new Population[c.length];
double[][] data = this.extractClusterDataFrom(pop); // double[][] data = this.extractClusterDataFrom(pop);
int clusterAssigned; int clusterAssigned;
try { try {
@ -224,16 +263,15 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab
e.printStackTrace(); e.printStackTrace();
} }
// let's assign the elements of the population to a c // let's assign the elements of the population to a c
for (int i = 0; i < data.length; i++) { for (int i = 0; i < pop.size(); i++) {
// find the closest c // find the closest c
clusterAssigned = 0; clusterAssigned = 0;
for (int j = 1; j < c.length; j++) { for (int j = 1; j < c.length; j++) {
if (this.distance(data[i], c[clusterAssigned]) > this.distance(data[i], c[j])) if (this.distance(pop.getEAIndividual(i), c[clusterAssigned]) > this.distance(pop.getEAIndividual(i), c[j]))
clusterAssigned = j; clusterAssigned = j;
} }
result[clusterAssigned].add(pop.get(i)); result[clusterAssigned].add(pop.get(i));
} }
return result; return result;
} }
@ -242,14 +280,11 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab
* @param d2 * @param d2
* @return The scalar distances between d1 and d2 * @return The scalar distances between d1 and d2
*/ */
private double distance(double[] d1, double[] d2) { private double distance(AbstractEAIndividual indy, double[] p) {
double result = 0; if (m_UseSearchSpace) ((InterfaceDataTypeDouble)tmpIndy).SetDoubleGenotype(p);
else tmpIndy.SetFitness(p);
for (int i = 0; i < d1.length; i++) { return metric.distance(indy, tmpIndy);
result += Math.pow(d1[i] - d2[i], 2);
}
result = Math.sqrt(result);
return result;
} }
/** This method extracts the double data to cluster from the /** This method extracts the double data to cluster from the
@ -281,8 +316,9 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab
* @return True if species converge, else False. * @return True if species converge, else False.
*/ */
public boolean mergingSpecies(Population species1, Population species2, Population referencePop) { public boolean mergingSpecies(Population species1, Population species2, Population referencePop) {
// @todo i could use the BIC metric from X-means to calculate this // TODO i could use the BIC metric from X-means to calculate this
return false; if (metric.distance(species1.getBestEAIndividual(), species2.getBestEAIndividual())<mergeDist) return true;
else return false;
} }
// /** This method decides if a unclustered individual belongs to an already established species. // /** This method decides if a unclustered individual belongs to an already established species.
@ -296,6 +332,7 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab
// } // }
public int[] associateLoners(Population loners, Population[] species, Population referencePop) { public int[] associateLoners(Population loners, Population[] species, Population referencePop) {
// tmpIndy = (AbstractEAIndividual)loners.getEAIndividual(0).clone();
int[] res=new int[loners.size()]; int[] res=new int[loners.size()];
System.err.println("Warning, associateLoners not implemented for " + this.getClass()); System.err.println("Warning, associateLoners not implemented for " + this.getClass());
Arrays.fill(res, -1); Arrays.fill(res, -1);
@ -368,7 +405,7 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab
this.m_UseSearchSpace = m; this.m_UseSearchSpace = m;
} }
public String useSearchSpaceTipText() { public String useSearchSpaceTipText() {
return "Toggle between search/objective space distance."; return "Toggel between search/objective space distance.";
} }
/** This method allows you to toggle reuse of c. /** This method allows you to toggle reuse of c.
@ -381,10 +418,20 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab
this.m_ReuseC = m; this.m_ReuseC = m;
} }
public String reuseCTipText() { public String reuseCTipText() {
return "Toggle reuse of previously found cluster centroids."; return "Toggel reuse of previously found cluster centroids.";
} }
public String initClustering(Population pop) { public String initClustering(Population pop) {
return null; return null;
} }
public void setMinClustSize(int minClustSize) {
this.minClustSize = minClustSize;
}
public int getMinClustSize() {
return minClustSize;
}
public String minClustSizeTipText() {
return "Require a cluster to be at least of this size. Smaller ones are assigned to the unclustered set.";
}
} }

View File

@ -1,6 +1,5 @@
package eva2.server.go.operators.cluster; package eva2.server.go.operators.cluster;
import eva2.server.go.individuals.AbstractEAIndividual;
import eva2.server.go.populations.Population; import eva2.server.go.populations.Population;
/** /**
@ -33,6 +32,9 @@ public interface InterfaceClustering {
* which is the reference population to consider the measures of. This is for cases * which is the reference population to consider the measures of. This is for cases
* where, e.g., subsets of a Population are to be clustered using measures of the * where, e.g., subsets of a Population are to be clustered using measures of the
* original population. * original population.
* Note that the clustered individuals should only be shallow instances of the members
* of the given population pop. The sum of sizes of all returned individuals must be
* equal to pop.size().
* *
* @param pop The population of individuals that is to be clustered. * @param pop The population of individuals that is to be clustered.
* @param referenceSet a reference population for dynamic measures * @param referenceSet a reference population for dynamic measures