|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectat.tuwien.ifs.somtoolbox.data.AbstractSOMLibSparseInputData
at.tuwien.ifs.somtoolbox.data.SOMLibSparseInputData
public class SOMLibSparseInputData
Implements InputData
based on a SOMLib Input Vector File.
Field Summary | |
---|---|
private boolean |
containsMissingValues
|
protected cern.colt.matrix.DoubleMatrix2D |
data
The actual data. |
static boolean |
DEFAULT_NORMALISED
|
static int |
DEFAULT_NUM_CACHE_BLOCKS
|
static int |
DEFAULT_RANDOM_SEED
|
static boolean |
DEFAULT_SPARSE
|
static String |
INPUT_VECTOR_FILE_FORMAT_CORRUPT_MESSAGE
|
protected int[] |
nonZeros
Counts how many of the feature values are not zero; stores an int value for each vector in the input data. |
protected boolean |
sparse
|
private int |
ydim
|
Fields inherited from class at.tuwien.ifs.somtoolbox.data.AbstractSOMLibSparseInputData |
---|
classInfo, content_subtype, content_type, dataNames, dim, ERROR_MESSAGE_FILE_FORMAT_CORRUPT, featureMatrixCols, featureMatrixRows, isNormalized, meanVector, mqe0, nameCache, numVectors, rand, source, templateVector |
Fields inherited from interface at.tuwien.ifs.somtoolbox.data.InputData |
---|
inputFileNameSuffix, MISSING_VALUE |
Constructor Summary | |
---|---|
protected |
SOMLibSparseInputData()
|
protected |
SOMLibSparseInputData(cern.colt.matrix.DoubleMatrix2D data,
String[] dataNames,
boolean norm,
Random rand,
TemplateVector tv,
SOMLibClassInformation clsInfo)
Constructor intended for subset generation. |
|
SOMLibSparseInputData(InputDatum[] inputData,
SOMLibClassInformation classInfo)
Constructor intended for generated synthetic data. |
|
SOMLibSparseInputData(String vectorFileName)
Uses default values for sparsity ( true ), normalisation (true ), chacheblocks (
1 ) and seed (7 ). |
|
SOMLibSparseInputData(String vectorFileName,
boolean sparse,
boolean norm,
int numCacheBlocks,
long seed)
|
|
SOMLibSparseInputData(String vectorFileName,
String templateFileName)
|
|
SOMLibSparseInputData(String vectorFileName,
String templateFileName,
boolean sparse,
boolean norm,
int numCacheBlocks,
long seed)
|
|
SOMLibSparseInputData(String vectorFileName,
String templateFileName,
String classInfoFileName)
|
|
SOMLibSparseInputData(String vectorFileName,
String templateFileName,
String classInfoFileName,
boolean sparse,
boolean norm,
int numCacheBlocks,
long seed)
|
Method Summary | |
---|---|
protected void |
addInstance(int index,
String label)
|
static long |
getDimensionality(String vectorFileName)
|
InputDatum |
getInputDatum(int index)
Get an input datum with a specified index. |
double[] |
getInputVector(int d)
Get the vector for the input datum of the specified index |
double |
getValue(int x,
int y)
Returns the value of the y-th feature of input vector x. |
void |
init(boolean sparse,
boolean norm,
long seed)
|
protected void |
initDataStructures(boolean sparse)
|
private void |
initFromExistingData(cern.colt.matrix.DoubleMatrix2D data,
String[] dataNames,
boolean norm,
Random rand,
TemplateVector tv,
SOMLibClassInformation clsInfo)
|
protected void |
initMatrix(boolean sparse)
|
static void |
main(String[] args)
Method for stand-alone execution, prints useful information about the input data. |
double |
mqe0(DistanceMetric metric)
Calculates the mean quantisation error of the top-level unit. |
protected static BufferedReader |
openFile(String vectorFileName)
|
protected double |
parseDouble(String s)
|
protected void |
processLine(int index,
String[] lineElements)
Process a single line of the input vector file. |
protected void |
readVectorFile(String vectorFileName,
boolean sparse)
Reads the input data from the given file, which has to follow the Input Vector File specification. |
void |
setLabel(int index,
String name)
|
protected void |
setMatrixValue(int row,
int column,
double value)
|
InputData |
subset(String[] names)
Gets a subset of this input data set. |
Methods inherited from class java.lang.Object |
---|
clone, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final String INPUT_VECTOR_FILE_FORMAT_CORRUPT_MESSAGE
public static final boolean DEFAULT_NORMALISED
public static final int DEFAULT_NUM_CACHE_BLOCKS
public static final int DEFAULT_RANDOM_SEED
public static final boolean DEFAULT_SPARSE
private boolean containsMissingValues
protected int[] nonZeros
protected boolean sparse
protected cern.colt.matrix.DoubleMatrix2D data
private int ydim
Constructor Detail |
---|
public SOMLibSparseInputData(InputDatum[] inputData, SOMLibClassInformation classInfo)
protected SOMLibSparseInputData(cern.colt.matrix.DoubleMatrix2D data, String[] dataNames, boolean norm, Random rand, TemplateVector tv, SOMLibClassInformation clsInfo)
public SOMLibSparseInputData(String vectorFileName)
true
), normalisation (true
), chacheblocks (
1
) and seed (7
).
public SOMLibSparseInputData(String vectorFileName, boolean sparse, boolean norm, int numCacheBlocks, long seed)
public SOMLibSparseInputData(String vectorFileName, String templateFileName)
public SOMLibSparseInputData(String vectorFileName, String templateFileName, boolean sparse, boolean norm, int numCacheBlocks, long seed)
public SOMLibSparseInputData(String vectorFileName, String templateFileName, String classInfoFileName) throws SOMToolboxException
SOMToolboxException
public SOMLibSparseInputData(String vectorFileName, String templateFileName, String classInfoFileName, boolean sparse, boolean norm, int numCacheBlocks, long seed) throws SOMToolboxException
SOMToolboxException
protected SOMLibSparseInputData()
Method Detail |
---|
private void initFromExistingData(cern.colt.matrix.DoubleMatrix2D data, String[] dataNames, boolean norm, Random rand, TemplateVector tv, SOMLibClassInformation clsInfo)
public void init(boolean sparse, boolean norm, long seed)
public InputDatum getInputDatum(int index)
InputData
index
- the index of the input datum.
public double[] getInputVector(int d)
InputData
public double getValue(int x, int y)
InputData
public double mqe0(DistanceMetric metric)
InputData
metric
- the metric to use for distance calculation.
protected void readVectorFile(String vectorFileName, boolean sparse)
AbstractSOMLibSparseInputData.meanVector
and creates the
AbstractSOMLibSparseInputData.nameCache
for faster index search.
vectorFileName
- the name of the input vector file.protected void initDataStructures(boolean sparse)
protected void initMatrix(boolean sparse)
protected static BufferedReader openFile(String vectorFileName)
protected void processLine(int index, String[] lineElements) throws Exception
index
- the line indexlineElements
- the line elements, split by the delimeters
Exception
protected double parseDouble(String s)
protected void setMatrixValue(int row, int column, double value)
protected void addInstance(int index, String label)
public InputData subset(String[] names)
InputData
names
- the label names of the desired subset data.
public static void main(String[] args) throws Exception
Exception
public static long getDimensionality(String vectorFileName)
public void setLabel(int index, String name)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |