diff --git a/src/eva2/server/go/operators/cluster/ClusterAll.java b/src/eva2/server/go/operators/cluster/ClusterAll.java new file mode 100644 index 00000000..a09f44b3 --- /dev/null +++ b/src/eva2/server/go/operators/cluster/ClusterAll.java @@ -0,0 +1,74 @@ +package eva2.server.go.operators.cluster; + +import java.io.Serializable; +import java.util.Arrays; + +import eva2.server.go.populations.Population; + +/** + * Dummy class which assigns all individuals to a single cluster only. + * + * @author mkron + * + */ +public class ClusterAll implements InterfaceClustering, Serializable { + + private boolean assignLoners = false; // should loners be assigned? + + public Object clone() { + return new ClusterAll(); + } + + /** + * Try to associate a set of loners with a given set of species. Return a list + * of indices assigning loner i with species j for all loners. If no species can + * be associated, -1 is returned as individual entry. + * Note that the last cluster threshold is used which may have depended on the last + * generation. + * If the clustering depends on population measures, a reference set may be given + * which is the reference population to consider the measures of. This is for cases + * where, e.g., subsets of a Population are to be clustered using measures of the + * original population. + * + * @param loners + * @param species + * @param referenceSet a reference population for dynamic measures + * @return associative list matching loners to species. + */ + @Override + public int[] associateLoners(Population loners, Population[] species, + Population referenceSet) { + if (loners!=null && (loners.size()>0)) { + int[] indices = new int[loners.size()]; + if (assignLoners) Arrays.fill(indices, 0); + else Arrays.fill(indices, -1); + return indices; + } else return null; + } + + @Override + public Population[] cluster(Population pop, Population referenceSet) { + // first pop is empty (there are no loners), second pop is complete + return new Population[]{pop.cloneWithoutInds(), pop.cloneShallowInds()}; + } + + @Override + public String initClustering(Population pop) { + return null; + } + + @Override + public boolean mergingSpecies(Population species1, Population species2, + Population referenceSet) { + return true; + } + + public static String globalInfo() { + return "A dummy clustering implementation which assigns all elements to a single cluster."; + } + + public String getName() { + return "Cluster-all"; + } + +} diff --git a/src/eva2/server/go/operators/cluster/ClusteringKMeans.java b/src/eva2/server/go/operators/cluster/ClusteringKMeans.java index e919205e..1a694a70 100644 --- a/src/eva2/server/go/operators/cluster/ClusteringKMeans.java +++ b/src/eva2/server/go/operators/cluster/ClusteringKMeans.java @@ -7,6 +7,8 @@ import eva2.gui.Plot; import eva2.server.go.individuals.AbstractEAIndividual; import eva2.server.go.individuals.ESIndividualDoubleData; import eva2.server.go.individuals.InterfaceDataTypeDouble; +import eva2.server.go.operators.distancemetric.EuclideanMetric; +import eva2.server.go.operators.distancemetric.InterfaceDistanceMetric; import eva2.server.go.populations.Population; import eva2.server.go.problems.F1Problem; import eva2.tools.chart2d.Chart2DDPointIconCircle; @@ -24,11 +26,15 @@ import eva2.tools.math.RNG; */ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializable { - public int m_K = 5; - public double[][] m_C = null; - public boolean m_UseSearchSpace = true; - public boolean m_ReuseC = false; - public boolean m_Debug = false; + private int m_K = 5; + private double[][] m_C = null; + private double mergeDist = 0.001; + private boolean m_UseSearchSpace = true; + private boolean m_ReuseC = false; + private boolean m_Debug = false; + private int minClustSize = 1; + InterfaceDistanceMetric metric = new EuclideanMetric(); + AbstractEAIndividual tmpIndy = null; public ClusteringKMeans() { @@ -38,6 +44,9 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab this.m_Debug = a.m_Debug; this.m_K = a.m_K; this.m_UseSearchSpace = a.m_UseSearchSpace; + this.metric = a.metric; + this.minClustSize = a.minClustSize; + this.mergeDist = a.mergeDist; if (a.m_C != null) { this.m_C = new double[a.m_C.length][a.m_C[0].length]; for (int i = 0; i < this.m_C.length; i++) { @@ -62,12 +71,23 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab * @return Population[] */ public Population[] cluster(Population pop, Population referencePop) { - double[][] data = this.extractClusterDataFrom(pop); + if (pop.size() this.distance(this.m_C[j], data[i])) + if (this.distance(pop.getEAIndividual(i), this.m_C[assign]) > this.distance(pop.getEAIndividual(i), this.m_C[j])) +// if (this.distance(this.m_C[assign], data[i]) > this.distance(this.m_C[j], data[i])) assign = j; } assignment[i] = assign; } // now calcuate the mean of each cluster and calculate new C - newC = new double[this.m_K][data[0].length]; + newC = new double[this.m_K][m_C[0].length]; numbOfAssigned = new int[this.m_K]; for (int i = 0; i < newC.length; i++) { numbOfAssigned[i] = 1; @@ -103,7 +124,8 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab for (int i = 0; i < assignment.length; i++) { numbOfAssigned[assignment[i]]++; for (int j = 0; j < newC[assignment[i]].length; j++) { - newC[assignment[i]][j] += data[i][j]; + if (m_UseSearchSpace) newC[assignment[i]][j] += pop.getEAIndividual(i).getDoublePosition()[j]; + else newC[assignment[i]][j] += pop.getEAIndividual(i).getFitness(j); } } for (int i = 0; i < newC.length; i++) { @@ -153,10 +175,10 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab GraphPointSet mySet; DPoint myPoint; Chart2DDPointIconText tmp; - for (int i = 0; i < data.length; i++) { + for (int i = 0; i < pop.size(); i++) { mySet = new GraphPointSet(10+1, plot.getFunctionArea()); mySet.setConnectedMode(false); - double[] x = data[i]; + double[] x = pop.getEAIndividual(i).getDoublePosition(); myPoint = new DPoint(x[0], x[1]); tmp = new Chart2DDPointIconText(""+assignment[i]); if (assignment[i] % 2 == 0) tmp.setIcon(new Chart2DDPointIconCircle()); @@ -167,7 +189,7 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab // finally let's check whether or not the C changed and if i can terminate k_Means finished = true; for (int i = 0; i < this.m_C.length; i++) { - if (this.distance(this.m_C[i], newC[i]) > 0.0001) finished = false; + if (EuclideanMetric.euclideanDistance(this.m_C[i], newC[i]) > 0.0001) finished = false; this.m_C[i] = newC[i]; } } // gosh now i'm done @@ -200,18 +222,35 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab } } } - - return result; + + // now expand to the expected format (unclustered indies at pop of index 0) + int largeEnough = 0; + // count clusters that are large enough + for (int i=0; i=getMinClustSize()) largeEnough++; + Population[] resExpanded = new Population[largeEnough+1]; + resExpanded[0]=pop.cloneWithoutInds(); + int lastIndex = 1; + for (int i=0; i=getMinClustSize()) { + resExpanded[lastIndex]=result[i]; + lastIndex++; + } else resExpanded[0].addPopulation(result[i]); + } + tmpIndy=null; + return resExpanded; } - /** This method allows you to cluster a population using m_C + /** + * This method allows you to cluster a population using m_C. The minimal cluster + * size is _not_ regarded here. * @param pop The population * @param c The centroids * @return The clusters as populations */ public Population[] cluster(Population pop, double[][] c) { + if (tmpIndy==null) tmpIndy=(AbstractEAIndividual)pop.getEAIndividual(0).clone(); // nec. only because the method is public... Population[] result = new Population[c.length]; - double[][] data = this.extractClusterDataFrom(pop); +// double[][] data = this.extractClusterDataFrom(pop); int clusterAssigned; try { @@ -224,16 +263,15 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab e.printStackTrace(); } // let's assign the elements of the population to a c - for (int i = 0; i < data.length; i++) { + for (int i = 0; i < pop.size(); i++) { // find the closest c clusterAssigned = 0; for (int j = 1; j < c.length; j++) { - if (this.distance(data[i], c[clusterAssigned]) > this.distance(data[i], c[j])) + if (this.distance(pop.getEAIndividual(i), c[clusterAssigned]) > this.distance(pop.getEAIndividual(i), c[j])) clusterAssigned = j; } result[clusterAssigned].add(pop.get(i)); } - return result; } @@ -242,16 +280,13 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab * @param d2 * @return The scalar distances between d1 and d2 */ - private double distance(double[] d1, double[] d2) { - double result = 0; - - for (int i = 0; i < d1.length; i++) { - result += Math.pow(d1[i] - d2[i], 2); - } - result = Math.sqrt(result); - return result; + private double distance(AbstractEAIndividual indy, double[] p) { + if (m_UseSearchSpace) ((InterfaceDataTypeDouble)tmpIndy).SetDoubleGenotype(p); + else tmpIndy.SetFitness(p); + + return metric.distance(indy, tmpIndy); } - + /** This method extracts the double data to cluster from the * population * @param pop The population @@ -281,8 +316,9 @@ public class ClusteringKMeans implements InterfaceClustering, java.io.Serializab * @return True if species converge, else False. */ public boolean mergingSpecies(Population species1, Population species2, Population referencePop) { - // @todo i could use the BIC metric from X-means to calculate this - return false; + // TODO i could use the BIC metric from X-means to calculate this + if (metric.distance(species1.getBestEAIndividual(), species2.getBestEAIndividual())