From 315ef66d5470fa4cee970842812efb1577f9ed7b Mon Sep 17 00:00:00 2001 From: Marcel Kronfeld Date: Tue, 3 May 2011 12:30:16 +0000 Subject: [PATCH] Update DPI clustering --- .../cluster/ClusteringDynPeakIdent.java | 113 +++++++++++++++--- 1 file changed, 98 insertions(+), 15 deletions(-) diff --git a/src/eva2/server/go/operators/cluster/ClusteringDynPeakIdent.java b/src/eva2/server/go/operators/cluster/ClusteringDynPeakIdent.java index a7b22eb7..dcc0d046 100644 --- a/src/eva2/server/go/operators/cluster/ClusteringDynPeakIdent.java +++ b/src/eva2/server/go/operators/cluster/ClusteringDynPeakIdent.java @@ -1,8 +1,12 @@ package eva2.server.go.operators.cluster; +import java.util.ArrayList; + import eva2.gui.BeanInspector; import eva2.server.go.individuals.AbstractEAIndividual; import eva2.server.go.individuals.AbstractEAIndividualComparator; +import eva2.server.go.individuals.IndividualDistanceComparator; +import eva2.server.go.operators.distancemetric.EuclideanMetric; import eva2.server.go.operators.distancemetric.InterfaceDistanceMetric; import eva2.server.go.operators.distancemetric.PhenotypeMetric; import eva2.server.go.populations.Population; @@ -12,8 +16,11 @@ import eva2.tools.Pair; * Clustering using the DPI mechanism (dynamic peak identification). * Collect a number of peaks, which are the fittest individuals * which are not dominated by other individuals within a certain distance. - * The remaining individuals are assigned to a peak if they have a distance - * smaller than rho to that peak. + * The remaining individuals are assigned to the closest peak up to a maximum + * niche count. If more individuals would be assigned to one peak, the best ones are + * chosen and remaining ones are assumed unclustered. + * For the strict radius case, individuals are only assigned to a peak if they + * have a distance smaller than the niche radius to that peak. * The number of expected peaks (clusters) must be predefined. * Note that the returned number of clusters may be smaller than q. * @@ -21,21 +28,35 @@ import eva2.tools.Pair; * */ public class ClusteringDynPeakIdent implements InterfaceClustering, java.io.Serializable { - private static final boolean TRACE=true; + private static final boolean TRACE=false; private int numNiches; private double nicheRadius; + private int maxNicheCount; // maximum number of individuals per peak private boolean strictNicheRadius=true; // if false, all individuals are clustered to the closest niche, otherwise some remain unclustered (those which are further than the nicheRadius from any peak) + InterfaceDistanceMetric metric = new PhenotypeMetric(); - public ClusteringDynPeakIdent(int numNs, double nicheRad) { + /** + * Uses the alternative metric if it is non null. In case it is null, the last metric is used or, if + * none was set, the default PhenotypeMetric() is used. + * + * @param numNs + * @param numIndiesPerPeak + * @param nicheRad + * @param strictRad + * @param alternativeMetric + */ + public ClusteringDynPeakIdent(int numNs, int numIndiesPerPeak, double nicheRad, boolean strictRad, InterfaceDistanceMetric alternativeMetric) { this.numNiches = numNs; + this.maxNicheCount = numIndiesPerPeak; this.nicheRadius = nicheRad; + this.strictNicheRadius = strictRad; + if (metric==null && (alternativeMetric==null)) metric=new PhenotypeMetric(); + else if (alternativeMetric!=null) metric=alternativeMetric; } public ClusteringDynPeakIdent(ClusteringDynPeakIdent o) { - this(o.numNiches, o.nicheRadius); - metric = o.metric; - this.strictNicheRadius = o.strictNicheRadius; + this(o.numNiches, o.maxNicheCount, o.nicheRadius, o.strictNicheRadius, o.metric); } public Object clone() { @@ -73,15 +94,32 @@ public class ClusteringDynPeakIdent implements InterfaceClustering, java.io.Seri } public Population[] cluster(Population pop, Population referenceSet) { +// boolean TRACE_METH=false; +// if (TRACE_METH) System.out.println("A1 " + System.currentTimeMillis()); AbstractEAIndividualComparator eaComparator = new AbstractEAIndividualComparator(-1); Population sorted = pop.getSortedBestFirst(eaComparator); +// if (TRACE_METH) System.out.println("A2 " + System.currentTimeMillis()); Population peaks = performDynPeakIdent(metric, sorted, numNiches, nicheRadius); +// System.out.println("peak measures: " + BeanInspector.toString(peaks.getPopulationMeasures())); +// if (TRACE_METH) System.out.println("A3 " + System.currentTimeMillis()); Population[] clusters = new Population[peaks.size()+1]; - for (int i=0; i0) clusters[i].add(peaks.getEAIndividual(i-1)); // add peaks to clusters! + } +// if (TRACE_METH) System.out.println("A4 " + System.currentTimeMillis()); + Population rest = pop.filter(peaks); +// if (TRACE_METH) System.out.println("A4a " + System.currentTimeMillis()); + if (pop.getRedundancyCount()>0) { + // happens e.g. on the bounds of the domain + System.err.println("warning, found redundant indies: " + pop.getRedundancyCount()); + rest.removeRedundantIndies(); + } + if ((rest.size()+peaks.size())+pop.getRedundancyCount()!=pop.size()) { + System.err.println("Warning, inconsistent filtering in ClusteringDynPeakIdent! Redundant: " + pop.getRedundancyCount() ); + } int[] assoc = assignLeaders(rest, peaks); +// if (TRACE_METH) System.out.println("A5 " + System.currentTimeMillis()); for (int i=0; i=0) { // it can be assigned to a peak @@ -90,6 +128,30 @@ public class ClusteringDynPeakIdent implements InterfaceClustering, java.io.Seri clusters[0].add(rest.getEAIndividual(i)); } } +// if (TRACE_METH) System.out.println("A6 " + System.currentTimeMillis()); + int cnt = clusters[0].size(); + for (int i=1; i0) { // check for too large species + for (int i=1; imaxNicheCount) { +// Population overhead = clusters[i].getSortedNIndividuals(clusters[i].size()-maxNicheCount, false); + ArrayList overhd = clusters[i].getSorted(new IndividualDistanceComparator(peaks.getEAIndividual(i-1), new EuclideanMetric(), true)); + Population overhead = new Population(); + overhead.addAll(overhead.toTail(clusters[i].size()-maxNicheCount, overhd)); // add only the front maxNicheCount individuals + clusters[i].removeMembers(overhead, true); + clusters[0].addPopulation(overhead); + } + } + } +// if (TRACE_METH) System.out.println("-- " + System.currentTimeMillis()); return clusters; } @@ -117,18 +179,39 @@ public class ClusteringDynPeakIdent implements InterfaceClustering, java.io.Seri public static Population performDynPeakIdent(InterfaceDistanceMetric metric, Population sortedPop, int q, double rho) { int i=0; Population peaks = new Population(q); + if (TRACE) System.out.print("Adding peaks: "); while (i