493 lines
19 KiB
Java
493 lines
19 KiB
Java
package eva2.optimization.operator.cluster;
|
|
|
|
import eva2.gui.BeanInspector;
|
|
import eva2.gui.GenericObjectEditor;
|
|
import eva2.optimization.individuals.AbstractEAIndividual;
|
|
import eva2.optimization.individuals.AbstractEAIndividualComparator;
|
|
import eva2.optimization.operator.distancemetric.InterfaceDistanceMetric;
|
|
import eva2.optimization.operator.distancemetric.PhenotypeMetric;
|
|
import eva2.optimization.operator.paramcontrol.ParamAdaption;
|
|
import eva2.optimization.operator.paramcontrol.ParameterControlManager;
|
|
import eva2.optimization.population.Population;
|
|
import java.io.Serializable;
|
|
import java.util.ArrayList;
|
|
import java.util.LinkedList;
|
|
import java.util.Vector;
|
|
|
|
/**
|
|
* Hierarchical clustering after Preuss et al., "Counteracting Genetic Drift and Disruptive Recombination
|
|
* in (mu+,lambda)-EA on Multimodal Fitness Landscapes", GECCO '05.
|
|
*
|
|
* A tree is produced by assigning each individual the closest individual with better fitness.
|
|
* Connections with a distance above a certain threshold are cut. After that, each interconnected subtree forms a cluster.
|
|
* In the paper, the threshold is deduced as 2*d_p for d_p: the mean distance in the population.
|
|
*
|
|
* @author mkron
|
|
*
|
|
*/
|
|
public class ClusteringNearestBetter implements InterfaceClustering, Serializable {
|
|
private static final long serialVersionUID = 1L;
|
|
private InterfaceDistanceMetric metric = new PhenotypeMetric();
|
|
private double absoluteDistThreshold = 0.5;
|
|
private boolean thresholdMultipleOfMeanDist = true;
|
|
private double meanDistFactor = 2.; // recommended setting
|
|
private double currentMeanDistance = -1.;
|
|
private int minimumGroupSize = 3;
|
|
private boolean testConvergingSpeciesOnBestOnly = true; // if two species are tested for convergence, only the best indies may be compared regarding the distance threshold
|
|
protected ParameterControlManager paramControl = new ParameterControlManager();
|
|
|
|
private int[] uplink;
|
|
private double[] uplinkDist;
|
|
private AbstractEAIndividualComparator comparator = new AbstractEAIndividualComparator();
|
|
private Vector<Integer>[] children;
|
|
private static final String initializedForKey = "initializedClustNearestBetterOnHash";
|
|
private static final String initializedRefData = "initializedClustNearestBetterData";
|
|
|
|
private static boolean TRACE = false;
|
|
|
|
public ClusteringNearestBetter() {
|
|
}
|
|
|
|
public ClusteringNearestBetter(ClusteringNearestBetter o) {
|
|
this.metric = o.metric;
|
|
this.absoluteDistThreshold = o.absoluteDistThreshold;
|
|
this.thresholdMultipleOfMeanDist = o.thresholdMultipleOfMeanDist;
|
|
this.meanDistFactor = o.meanDistFactor;
|
|
this.currentMeanDistance = o.currentMeanDistance;
|
|
this.minimumGroupSize = o.minimumGroupSize;
|
|
this.comparator = (AbstractEAIndividualComparator)o.comparator.clone();
|
|
this.testConvergingSpeciesOnBestOnly = o.testConvergingSpeciesOnBestOnly;
|
|
}
|
|
|
|
/**
|
|
* Set the mean distance factor in the adaptive case or the absolute distance
|
|
* threshold in the non-adaptive case.
|
|
*
|
|
* @param adaptive
|
|
* @param thresholdOrFactor
|
|
*
|
|
*/
|
|
public ClusteringNearestBetter(boolean adaptive, double thresholdOrFactor) {
|
|
setAdaptiveThreshold(adaptive);
|
|
if (adaptive) {
|
|
setMeanDistFactor(thresholdOrFactor);
|
|
}
|
|
else {
|
|
setDistThreshold(thresholdOrFactor);
|
|
}
|
|
}
|
|
|
|
public void hideHideable() {
|
|
setAdaptiveThreshold(isAdaptiveThreshold());
|
|
}
|
|
|
|
public ParameterControlManager getParamControl() {
|
|
return paramControl;
|
|
}
|
|
|
|
public ParamAdaption[] getParameterControl() {
|
|
return paramControl.getSingleAdapters();
|
|
}
|
|
|
|
public void setParameterControl(ParamAdaption[] paramControl) {
|
|
this.paramControl.setSingleAdapters(paramControl);
|
|
}
|
|
|
|
/** This method allows you to make a deep clone of
|
|
* the object
|
|
* @return the deep clone
|
|
*/
|
|
@Override
|
|
public Object clone() {
|
|
return (Object) new ClusteringNearestBetter(this);
|
|
}
|
|
|
|
/**
|
|
* Try to associate a set of loners with a given set of species. Return a list
|
|
* of indices assigning loner i with species j for all loners. If no species can
|
|
* be associated, -1 is returned as individual entry.
|
|
* Note that the last cluster threshold is used which may have depended on the last
|
|
* generation.
|
|
*
|
|
* @param loners
|
|
* @param species
|
|
* @return associative list matching loners to species.
|
|
*/
|
|
@Override
|
|
public int[] associateLoners(Population loners, Population[] species, Population referenceSet) {
|
|
// Pair<Integer,Double>[][] closestPerSpecList = new Pair[loners.size()][species.length];
|
|
int[] res = new int[loners.size()];
|
|
getRefData(referenceSet, loners);
|
|
for (int l=0; l<loners.size(); l++) { // for each loner: search closest better indy for each species.
|
|
int nearestBetterSpeciesID=-1;
|
|
double nearestBetterDist=-1;
|
|
|
|
for (int spI=0; spI<species.length; spI++) { // loop species
|
|
boolean lonerIndyIsBest = (comparator.compare(loners.getEAIndividual(l), species[spI].getBestEAIndividual())<=0);
|
|
if (lonerIndyIsBest) { // if the loner is the best, check the distance to the best indy within the species
|
|
double curDist = metric.distance(loners.getEAIndividual(l), species[spI].getBestEAIndividual());
|
|
//Population.getClosestFarthestIndy(loners.getEAIndividual(l), species[spI], metric, true).tail();
|
|
if (nearestBetterDist<0 || (curDist < nearestBetterDist)) {
|
|
// System.out.println("Loner is better " + loners.getEAIndividual(l) + " than best " + species[spI].getBestEAIndividual() + ", dist is "+curDist);
|
|
nearestBetterSpeciesID=spI;
|
|
nearestBetterDist = curDist;
|
|
}
|
|
} else {
|
|
for (int i=0; i<species[spI].size(); i++) { //loop indies in species
|
|
double curDist = metric.distance(loners.getEAIndividual(l), species[spI].getEAIndividual(i));
|
|
boolean specIndyIsBetter = (comparator.compare(species[spI].getEAIndividual(i), loners.getEAIndividual(l))<0);
|
|
if (specIndyIsBetter && (nearestBetterDist<0 || (curDist < nearestBetterDist))) {
|
|
// if the found indy species is better than the loner, it is a possible cluster.
|
|
// store the closest possible cluster.
|
|
// nearestBetterIndyID = i;
|
|
nearestBetterSpeciesID=spI;
|
|
nearestBetterDist = curDist;
|
|
}
|
|
}
|
|
}
|
|
// if (comparator.compare(species[spI].getEAIndividual(closestID), loners.getEAIndividual(l))<0) {
|
|
//
|
|
// if (closestClustDist<0 || (closestDist < closestClustDist)) {
|
|
// closestClustDist = closestDist;
|
|
// closestClustID = spI;
|
|
// }
|
|
// }
|
|
} // end loop species
|
|
if (nearestBetterDist < currentDistThreshold()) {
|
|
// System.out.println("dist is " + nearestBetterDist + ", assigning spec " + nearestBetterSpeciesID);
|
|
res[l]=nearestBetterSpeciesID;
|
|
} else {
|
|
res[l]=-1;
|
|
}
|
|
} // end for all loners
|
|
return res;
|
|
}
|
|
|
|
// public boolean belongsToSpecies(AbstractEAIndividual indy,
|
|
// Population species, Population pop) {
|
|
// // this sucks since every time the full clustering must be performed...
|
|
// return false;
|
|
//// if (thresholdMultipleOfMeanDist) currentMeanDistance = pop.getPopulationMeasures(metric)[0];
|
|
//// ArrayList<AbstractEAIndividual> sorted = pop.getSorted(comparator);
|
|
//// for (int i=sorted.size()-1; i>=1; i--) { // start with worst indies
|
|
//// if (sorted.get(i).getIndyID()==indy.getIndyID()) { // found the desired indy.
|
|
//// int uplink=-1; double uplinkDist = -1;
|
|
//// for (int j=i-1; j>=0; j--) { // search nearest better indy
|
|
//// double curDist = metric.distance(sorted.get(i), sorted.get(j));
|
|
//// if (uplinkDist<0 || (curDist < uplinkDist)) {
|
|
//// uplink = j;
|
|
//// uplinkDist = curDist;
|
|
//// }
|
|
//// }
|
|
//// // if it belongs to species spec and the distance is below threshold, be happy and return true
|
|
//// if (uplink==-1) { // it is the best individual?
|
|
//// return false;
|
|
//// }
|
|
//// if (uplinkDist > currentDistThreshold()) return false;
|
|
//// else {
|
|
//// return (species.isMemberByID(pop.getEAIndividual(uplink)));
|
|
//// }
|
|
//// }
|
|
//// }
|
|
//// // size <= 1?
|
|
//// return false;
|
|
// }
|
|
|
|
/**
|
|
* Perform one clustering step to measure the mean distance to the
|
|
* nearest better individual (only if used).
|
|
*/
|
|
@Override
|
|
public String initClustering(Population pop) {
|
|
if (this.isAdaptiveThreshold()) {
|
|
ArrayList<AbstractEAIndividual> sorted = pop.getSorted(comparator);
|
|
if (uplink==null || (uplink.length!=pop.size())) {
|
|
uplink = new int[pop.size()];
|
|
} // parent index of all indys
|
|
if (uplinkDist==null || (uplinkDist.length!=pop.size())) {
|
|
uplinkDist = new double[pop.size()];
|
|
} // parent distance for all indys
|
|
if (children==null || (children.length!=pop.size())) {
|
|
children = new Vector[pop.size()];
|
|
} // list of children for all indies
|
|
else if (children.length==pop.size()) {
|
|
for (int i=0; i<pop.size(); i++) {
|
|
children[i]=null;
|
|
} }
|
|
currentMeanDistance = createClusterTreeFromSortedPop(sorted);
|
|
if (TRACE) {
|
|
pop.putData(initializedForKey, pop.hashCode());
|
|
}
|
|
pop.putData(initializedRefData, currentMeanDistance);
|
|
return initializedRefData;
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public Population[] cluster(Population pop, Population referenceSet) {
|
|
if (pop.isEmpty()) {
|
|
return new Population[]{pop.cloneWithoutInds()};
|
|
}
|
|
ArrayList<AbstractEAIndividual> sorted = pop.getSorted(comparator);
|
|
if (uplink==null || (uplink.length!=pop.size())) {
|
|
uplink = new int[pop.size()];
|
|
} // parent index of all indys
|
|
if (uplinkDist==null || (uplinkDist.length!=pop.size())) {
|
|
uplinkDist = new double[pop.size()];
|
|
} // parent distance for all indys
|
|
if (children==null || (children.length!=pop.size())) {
|
|
children = new Vector[pop.size()];
|
|
} // list of children for all indies
|
|
else if (children.length==pop.size()) {
|
|
for (int i=0; i<pop.size(); i++) {
|
|
children[i]=null;
|
|
} }
|
|
|
|
if (TRACE) {
|
|
System.out.println("Current pop measures: " + BeanInspector.toString(pop.getPopulationMeasures(metric)[0]));
|
|
System.out.println("Current threshold: " + currentDistThreshold());
|
|
}
|
|
if (isAdaptiveThreshold()) { // test if there was a valid initialization step
|
|
if (!getRefData(referenceSet, pop)) {
|
|
currentMeanDistance=createClusterTreeFromSortedPop(sorted);
|
|
}
|
|
else {
|
|
createClusterTreeFromSortedPop(sorted);
|
|
}
|
|
} else {
|
|
createClusterTreeFromSortedPop(sorted);
|
|
}
|
|
|
|
// now go through indies starting with best.
|
|
// Add all children which are closer than threshold and recursively their children to a cluster.
|
|
// Mark them as clustered and start with the next best unclustered.
|
|
int current = 0; // top indy is first
|
|
boolean[] clustered = new boolean[pop.size()];
|
|
LinkedList<Population> allClusters = new LinkedList<Population>();
|
|
while (current<sorted.size()) {
|
|
Population currentClust = pop.cloneWithoutInds();
|
|
currentClust.add(sorted.get(current));
|
|
clustered[current]=true;
|
|
addChildren(current, clustered, sorted, currentClust);
|
|
// currentClust now recursively contains all children - the cluster is complete
|
|
// now jump to the next best unclustered indy
|
|
allClusters.add(currentClust);
|
|
while (current<sorted.size() && (clustered[current])) current++;
|
|
}
|
|
|
|
ArrayList<Population> finalClusts = new ArrayList<Population>(allClusters.size());
|
|
finalClusts.add(pop.cloneWithoutInds());
|
|
for (Population clust : allClusters) {
|
|
if (clust.size()<minimumGroupSize) { // add to loner population
|
|
finalClusts.get(0).addPopulation(clust);
|
|
} else { // add to cluster list
|
|
finalClusts.add(clust);
|
|
}
|
|
}
|
|
Population[] finalArr = new Population[finalClusts.size()];
|
|
return finalClusts.toArray(finalArr);
|
|
}
|
|
|
|
/**
|
|
* Get the reference data from a population instance that should have been initialized.
|
|
* If the reference set is null, the backup is treated as reference set.
|
|
*
|
|
* @param referenceSet
|
|
* @param backup
|
|
*/
|
|
private boolean getRefData(Population referenceSet, Population backup) {
|
|
if (referenceSet==null) {
|
|
referenceSet=backup;
|
|
}
|
|
Double refDat = (Double)referenceSet.getData(initializedRefData);
|
|
if (refDat!=null) {
|
|
if (TRACE) { // check hash
|
|
Integer hash=(Integer)referenceSet.getData(initializedForKey);
|
|
if ((hash==null) || (hash!=referenceSet.hashCode())) {
|
|
System.err.println("Warning, missing initialization before clustering for ClusteringNearestBetter!");
|
|
return false;
|
|
}
|
|
}
|
|
currentMeanDistance = refDat.doubleValue();
|
|
return true;
|
|
} else {
|
|
System.err.println("Warning, missing reference data - forgot reference set initialization? " + this.getClass());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private double createClusterTreeFromSortedPop(ArrayList<AbstractEAIndividual> sorted) {
|
|
double edgeLengthSum=0; int edgeCnt = 0;
|
|
for (int i=sorted.size()-1; i>=1; i--) { // start with worst indies
|
|
// search for closest indy which is better
|
|
uplink[i]=-1;
|
|
uplinkDist[i] = -1;
|
|
for (int j=i-1; j>=0; j--) { // look at all which are better
|
|
// if the j-th indy is closer, reset the index
|
|
double curDist = metric.distance(sorted.get(i), sorted.get(j));
|
|
if (uplinkDist[i]<0 || (curDist < uplinkDist[i])) {
|
|
uplink[i] = j;
|
|
uplinkDist[i] = curDist;
|
|
}
|
|
}
|
|
// the closest best for indy i is now known. connect them in the graph.
|
|
if (children[uplink[i]]==null) {
|
|
children[uplink[i]]=new Vector<Integer>();
|
|
}
|
|
children[uplink[i]].add(i);
|
|
edgeLengthSum+=uplinkDist[i];
|
|
edgeCnt++;
|
|
}
|
|
// currentMeanDistance = pop.getPopulationMeasures(metric)[0];
|
|
return edgeLengthSum/((double)edgeCnt); // the average edge length
|
|
}
|
|
|
|
/**
|
|
* Add the next layer of children to the clustered population.
|
|
*
|
|
* @param current
|
|
* @param clustered
|
|
* @param sorted
|
|
* @param currentClust
|
|
*/
|
|
private void addChildren(int current, boolean[] clustered, ArrayList<AbstractEAIndividual> sorted, Population currentClust) {
|
|
if (children[current]!=null && (children[current].size()>0)) {
|
|
for (int i=0; i<children[current].size(); i++) {
|
|
if ((!clustered[children[current].get(i)]) && (uplinkDist[children[current].get(i)] < currentDistThreshold())) {
|
|
// the first child is not clustered yet and below distance threshold.
|
|
// so add it to the cluster, mark it, and proceed recursively.
|
|
currentClust.add(sorted.get(children[current].get(i)));
|
|
clustered[children[current].get(i)]=true;
|
|
if (TRACE) {
|
|
System.out.println("Assigned " + current);
|
|
}
|
|
addChildren(children[current].get(i), clustered, sorted, currentClust);
|
|
} else {
|
|
if (TRACE) {
|
|
System.out.println("Not assigned " + current);
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// nothing more to do
|
|
}
|
|
}
|
|
|
|
private double currentDistThreshold() {
|
|
if (thresholdMultipleOfMeanDist) {
|
|
return meanDistFactor*currentMeanDistance;
|
|
}
|
|
else {
|
|
return absoluteDistThreshold;
|
|
}
|
|
}
|
|
|
|
/** This method allows you to decide if two species converge.
|
|
* @param species1 The first species.
|
|
* @param species2 The second species.
|
|
* @return True if species converge, else False.
|
|
*/
|
|
@Override
|
|
public boolean mergingSpecies(Population species1, Population species2, Population referenceSet) {
|
|
getRefData(referenceSet, species1);
|
|
if (testConvergingSpeciesOnBestOnly) {
|
|
if (this.metric.distance(species1.getBestEAIndividual(), species2.getBestEAIndividual()) < this.currentDistThreshold()) {
|
|
return true;
|
|
}
|
|
else {
|
|
return false;
|
|
}
|
|
} else {
|
|
Population tmpPop = new Population(species1.size()+species2.size());
|
|
tmpPop.addPopulation(species1);
|
|
tmpPop.addPopulation(species2);
|
|
if (this.cluster(tmpPop, referenceSet).length <= 2) {
|
|
return true;
|
|
}
|
|
else {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
public static String globalInfo() {
|
|
return "A tree is produced by assigning each individual the closest individual with better fitness. Connections with a distance above a certain threshold are cut. After that, each interconnected subtree forms a cluster.";
|
|
}
|
|
|
|
public String metricTipText() {
|
|
return "The metric to use during clustering.";
|
|
}
|
|
public InterfaceDistanceMetric getMetric() {
|
|
return metric;
|
|
}
|
|
public void setMetric(InterfaceDistanceMetric metric) {
|
|
this.metric = metric;
|
|
}
|
|
|
|
public String distThresholdTipText() {
|
|
return "In the non-adaptive case the absolute threshold below which clusters are connected.";
|
|
}
|
|
public double getDistThreshold() {
|
|
return absoluteDistThreshold;
|
|
}
|
|
public void setDistThreshold(double distThreshold) {
|
|
this.absoluteDistThreshold = distThreshold;
|
|
}
|
|
|
|
public String minimumGroupSizeTipText() {
|
|
return "Minimum group size that makes an own cluster.";
|
|
}
|
|
public int getMinimumGroupSize() {
|
|
return minimumGroupSize;
|
|
}
|
|
public void setMinimumGroupSize(int minimumGroupSize) {
|
|
this.minimumGroupSize = minimumGroupSize;
|
|
}
|
|
|
|
public String comparatorTipText() {
|
|
return "Define the comparator by which the population is sorted before clustering.";
|
|
}
|
|
public AbstractEAIndividualComparator getComparator() {
|
|
return comparator;
|
|
}
|
|
// public void setComparator(AbstractEAIndividualComparator comparator) {
|
|
// this.comparator = comparator;
|
|
// }
|
|
|
|
public String adaptiveThresholdTipText() {
|
|
return "Activate adaptive threshold which is calculated from mean distance in the population and a constant factor.";
|
|
}
|
|
public boolean isAdaptiveThreshold() {
|
|
return thresholdMultipleOfMeanDist;
|
|
}
|
|
public void setAdaptiveThreshold(boolean thresholdMultipleOfMeanDist) {
|
|
this.thresholdMultipleOfMeanDist = thresholdMultipleOfMeanDist;
|
|
GenericObjectEditor.setHideProperty(this.getClass(), "meanDistFactor", !thresholdMultipleOfMeanDist);
|
|
GenericObjectEditor.setHideProperty(this.getClass(), "distThreshold", thresholdMultipleOfMeanDist);
|
|
}
|
|
|
|
public String meanDistFactorTipText() {
|
|
return "Factor producing the distance threshold from population mean distance.";
|
|
}
|
|
public double getMeanDistFactor() {
|
|
return meanDistFactor;
|
|
}
|
|
public void setMeanDistFactor(double meanDistFactor) {
|
|
this.meanDistFactor = meanDistFactor;
|
|
}
|
|
|
|
public String testConvergingSpeciesOnBestOnlyTipText() {
|
|
return "Only the best individuals may be compared when testing whether to merge two species.";
|
|
}
|
|
public boolean isTestConvergingSpeciesOnBestOnly() {
|
|
return testConvergingSpeciesOnBestOnly;
|
|
}
|
|
public void SetTestConvergingSpeciesOnBestOnly(
|
|
boolean testConvergingSpeciesOnBestOnly) {
|
|
this.testConvergingSpeciesOnBestOnly = testConvergingSpeciesOnBestOnly;
|
|
}
|
|
|
|
}
|