at.tuwien.ifs.somtoolbox.clustering
Class WardClustering<E>

java.lang.Object
  extended by at.tuwien.ifs.somtoolbox.clustering.WardClustering<E>
All Implemented Interfaces:
ClusteringAlgorithm<E>, HierarchicalClusteringAlgorithm<E>

public class WardClustering<E>
extends Object
implements HierarchicalClusteringAlgorithm<E>

Version:
$Id: WardClustering.java 3932 2010-11-09 16:56:38Z mayer $
Author:
Rudolf Mayer

Nested Class Summary
(package private)  class WardClustering.ClusterThread
           
 
Field Summary
protected  double[] clusterLevelMergeCosts
           
protected  ArrayList<HierarchicalCluster<E>>[] clusterLevels
           
protected  List<HierarchicalCluster<E>> clusters
          Stores the clusters; starts from being a list of clusters containing only one element (n elements), up to a final set of clusters; there will be at least one single cluster containing all elements (if the targetSize or threshold permit that), or potentially any other number m <= n.
protected  boolean debug
          Turn on some debugging
private  CountDownLatch doneSignal
           
private  ThreadPoolExecutor e
           
protected  ClusterElementFunctions<E> elementDistance
           
private  int numberOfCPUs
           
protected  int targetSize
           
protected  double threshold
           
 
Constructor Summary
WardClustering(ClusterElementFunctions<E> elementDistance)
          Do full clustering tree.
WardClustering(ClusterElementFunctions<E> elementDistance, double threshold)
          Do clustering with a specified threshold to stop building the tree.
WardClustering(ClusterElementFunctions<E> elementDistance, int targetSize)
          Do clustering with a specified target number of clusters to reach.
 
Method Summary
 HierarchicalCluster<E> clusterStep(List<HierarchicalCluster<E>> clusters)
           
 List<HierarchicalCluster<E>> doCluster(List<E> data)
           
 double ess(Cluster<E> cluster)
           
private  HierarchicalCluster<E> findOptiomalClusterMerger(List<HierarchicalCluster<E>> clusters)
           
private  HierarchicalCluster<E> findOptiomalClusterMerger(List<HierarchicalCluster<E>> clusters, int startX, int startY, int endX, int endY)
           
 List<HierarchicalCluster<E>> getClusters()
           
 HashMap<Integer,ArrayList<HierarchicalCluster<E>>> getClustersAtLevel()
          Returns the clusters at all levels
 ArrayList<HierarchicalCluster<E>> getClustersAtLevel(int num)
          Returns the clustering at a certain level, where the level equals the number of clusters
 ArrayList<HierarchicalCluster<E>> getClustersByRelativeThreshold(double percent)
          Returns the clustering at a certain level indicated by the relative merge cost for that level, compared to the costs of merging all data items
 ArrayList<HierarchicalCluster<E>> getClustersByThreshold(double threshold)
          Returns the clustering at a certain level indicated by the merge cost for that level
private  Indices2D[] getIndices(List<HierarchicalCluster<E>> clusters)
          find matrix indices for starting & ending the symmetrical matrix indices
 double getInitialMinESS(List<HierarchicalCluster<E>> clusters)
           
 prefuse.data.Tree getPrefuseTree()
           
protected  void init(List<E> data)
           
 void setDebug(boolean debug)
           
 void setNumberOfCPUs(int numberOfCPUs)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

numberOfCPUs

private int numberOfCPUs

doneSignal

private CountDownLatch doneSignal

e

private ThreadPoolExecutor e

clusterLevels

protected ArrayList<HierarchicalCluster<E>>[] clusterLevels

clusterLevelMergeCosts

protected double[] clusterLevelMergeCosts

elementDistance

protected ClusterElementFunctions<E> elementDistance

threshold

protected double threshold

targetSize

protected int targetSize

clusters

protected List<HierarchicalCluster<E>> clusters
Stores the clusters; starts from being a list of clusters containing only one element (n elements), up to a final set of clusters; there will be at least one single cluster containing all elements (if the targetSize or threshold permit that), or potentially any other number m <= n. Intermediate results, with numbers of clusters l, m < l < n, are stored in #clusterLevels.


debug

protected boolean debug
Turn on some debugging

Constructor Detail

WardClustering

public WardClustering(ClusterElementFunctions<E> elementDistance)
Do full clustering tree.


WardClustering

public WardClustering(ClusterElementFunctions<E> elementDistance,
                      double threshold)
Do clustering with a specified threshold to stop building the tree.


WardClustering

public WardClustering(ClusterElementFunctions<E> elementDistance,
                      int targetSize)
Do clustering with a specified target number of clusters to reach.

Method Detail

getPrefuseTree

public prefuse.data.Tree getPrefuseTree()
Specified by:
getPrefuseTree in interface HierarchicalClusteringAlgorithm<E>

doCluster

public List<HierarchicalCluster<E>> doCluster(List<E> data)
Specified by:
doCluster in interface ClusteringAlgorithm<E>

init

protected void init(List<E> data)

getInitialMinESS

public double getInitialMinESS(List<HierarchicalCluster<E>> clusters)

ess

public double ess(Cluster<E> cluster)

clusterStep

public HierarchicalCluster<E> clusterStep(List<HierarchicalCluster<E>> clusters)

findOptiomalClusterMerger

private HierarchicalCluster<E> findOptiomalClusterMerger(List<HierarchicalCluster<E>> clusters)

findOptiomalClusterMerger

private HierarchicalCluster<E> findOptiomalClusterMerger(List<HierarchicalCluster<E>> clusters,
                                                         int startX,
                                                         int startY,
                                                         int endX,
                                                         int endY)

getClustersByThreshold

public ArrayList<HierarchicalCluster<E>> getClustersByThreshold(double threshold)
                                                         throws SOMToolboxException
Returns the clustering at a certain level indicated by the merge cost for that level

Throws:
SOMToolboxException

getClustersByRelativeThreshold

public ArrayList<HierarchicalCluster<E>> getClustersByRelativeThreshold(double percent)
                                                                 throws SOMToolboxException
Returns the clustering at a certain level indicated by the relative merge cost for that level, compared to the costs of merging all data items

Throws:
SOMToolboxException

getClustersAtLevel

public ArrayList<HierarchicalCluster<E>> getClustersAtLevel(int num)
                                                     throws SOMToolboxException
Returns the clustering at a certain level, where the level equals the number of clusters

Specified by:
getClustersAtLevel in interface HierarchicalClusteringAlgorithm<E>
Throws:
SOMToolboxException

getClustersAtLevel

public HashMap<Integer,ArrayList<HierarchicalCluster<E>>> getClustersAtLevel()
                                                                      throws SOMToolboxException
Description copied from interface: HierarchicalClusteringAlgorithm
Returns the clusters at all levels

Specified by:
getClustersAtLevel in interface HierarchicalClusteringAlgorithm<E>
Throws:
SOMToolboxException

setDebug

public void setDebug(boolean debug)

setNumberOfCPUs

public void setNumberOfCPUs(int numberOfCPUs)

getIndices

private Indices2D[] getIndices(List<HierarchicalCluster<E>> clusters)
find matrix indices for starting & ending the symmetrical matrix indices


getClusters

public List<HierarchicalCluster<E>> getClusters()