szte.datamining
Class DataHandler

java.lang.Object
  extended by szte.datamining.DataHandler
All Implemented Interfaces:
java.lang.Cloneable
Direct Known Subclasses:
MalletDataHandler

public abstract class DataHandler
extends java.lang.Object
implements java.lang.Cloneable


Constructor Summary
DataHandler()
           
 
Method Summary
abstract  void addDataHandler(DataHandler dh)
           
abstract  ClassificationResult classifyDataset(Model model)
           
 DataHandler createEmptyDataHandler()
           
abstract  void createNewDataset(java.util.Map<java.lang.String,java.lang.Object> parameters)
          creates a new empty dataset using the underlying native datatype
abstract  DataHandler createSubset(java.util.Set<java.lang.String> instancesSelected, java.util.Set<java.lang.String> featuresSelected)
          creates a subset of the dataset where only the given instances and/or features are present
abstract  java.lang.Boolean getBinaryValue(java.lang.String instanceId, java.lang.String featureName)
           
abstract  int getFeatureCount()
           
abstract  java.util.Set<java.lang.String> getFeatureNames()
           
abstract  java.util.List<java.lang.String> getFeatureValues(java.lang.String featureName)
           
abstract  int getInstanceCount()
           
abstract  java.util.Set<java.lang.String> getInstanceIds()
           
abstract
<T extends java.lang.Comparable<?>>
T
getLabel(java.lang.String instanceId)
           
abstract  java.lang.String getNominalValue(java.lang.String instanceId, java.lang.String featureName)
           
abstract  java.lang.Double getNumericValue(java.lang.String instanceId, java.lang.String featureName)
           
abstract
<T extends java.lang.Comparable<?>>
T
getValue(java.lang.String instanceId, java.lang.String featureName)
           
abstract  void initClassifier(java.util.Map<java.lang.String,java.lang.Object> parameters)
           
abstract  void loadDataset(java.lang.String source)
          loads a native dataset from the given source
abstract  void removeFeature(java.lang.String featureName)
           
abstract  void removeInstance(java.lang.String instanceId)
           
abstract  void saveDataset(java.lang.String target)
          saves the current dataset to the given target
abstract  void setBinaryValue(java.lang.String instanceId, java.lang.String featureName, java.lang.Boolean value)
          Sets the value of a binary feature
abstract  void setBinaryValue(java.lang.String instanceId, java.lang.String featureName, java.lang.Boolean value, boolean ternal)
           
abstract  void setDefaultFeatureValue(java.lang.String featureName, java.lang.String value)
           
abstract
<T extends java.lang.Comparable<?>>
void
setLabel(java.lang.String instanceId, T label)
          sets the class label of the given instance
abstract  void setNominalValue(java.lang.String instanceId, java.lang.String featureName, java.lang.String value)
          Sets the value of a nominal feature if this is a new nominal value it is added to the dataset
abstract  void setNumericValue(java.lang.String instanceId, java.lang.String featureName, double value)
          Sets the value of a numeric feature
abstract
<T extends java.lang.Comparable<?>>
void
setValue(java.lang.String instanceId, java.lang.String featureName, T value)
          Sets the value of a feature, the type of the feature is given by the beginning of the feature name b_ binary feature n_ numeric feature m_ nominal feature t_ ternal feature
abstract  Model trainClassifier()
           
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

DataHandler

public DataHandler()
Method Detail

createNewDataset

public abstract void createNewDataset(java.util.Map<java.lang.String,java.lang.Object> parameters)
creates a new empty dataset using the underlying native datatype


createEmptyDataHandler

public DataHandler createEmptyDataHandler()

loadDataset

public abstract void loadDataset(java.lang.String source)
                          throws DataMiningException
loads a native dataset from the given source

Parameters:
source - A String denotes the source of the native dataset it contains a native dataset implementation dependent resource string
Throws:
DataMiningException

saveDataset

public abstract void saveDataset(java.lang.String target)
saves the current dataset to the given target

Parameters:
target - A String denotes the target of the native dataset it contains a native dataset implementation dependent resource string

setLabel

public abstract <T extends java.lang.Comparable<?>> void setLabel(java.lang.String instanceId,
                                                                  T label)
sets the class label of the given instance


getLabel

public abstract <T extends java.lang.Comparable<?>> T getLabel(java.lang.String instanceId)
Returns:
the class label f the given instance

setNumericValue

public abstract void setNumericValue(java.lang.String instanceId,
                                     java.lang.String featureName,
                                     double value)
Sets the value of a numeric feature

Parameters:
instanceId - instance identifier
featureName - name of the feature
value -

setNominalValue

public abstract void setNominalValue(java.lang.String instanceId,
                                     java.lang.String featureName,
                                     java.lang.String value)
Sets the value of a nominal feature if this is a new nominal value it is added to the dataset

Parameters:
instanceId - instance identifier
featureName - name of the feature
value -

setBinaryValue

public abstract void setBinaryValue(java.lang.String instanceId,
                                    java.lang.String featureName,
                                    java.lang.Boolean value)
Sets the value of a binary feature

Parameters:
instanceId - instance identifier
featureName - name of the feature
value -

setBinaryValue

public abstract void setBinaryValue(java.lang.String instanceId,
                                    java.lang.String featureName,
                                    java.lang.Boolean value,
                                    boolean ternal)

setValue

public abstract <T extends java.lang.Comparable<?>> void setValue(java.lang.String instanceId,
                                                                  java.lang.String featureName,
                                                                  T value)
                       throws DataMiningException
Sets the value of a feature, the type of the feature is given by the beginning of the feature name b_ binary feature n_ numeric feature m_ nominal feature t_ ternal feature

Parameters:
instanceId - instance identifier
featureName - name of the feature
value -
Throws:
DataMiningException

getNumericValue

public abstract java.lang.Double getNumericValue(java.lang.String instanceId,
                                                 java.lang.String featureName)
                                          throws DataMiningException
Throws:
DataMiningException

getNominalValue

public abstract java.lang.String getNominalValue(java.lang.String instanceId,
                                                 java.lang.String featureName)
                                          throws DataMiningException
Throws:
DataMiningException

getBinaryValue

public abstract java.lang.Boolean getBinaryValue(java.lang.String instanceId,
                                                 java.lang.String featureName)
                                          throws DataMiningException
Throws:
DataMiningException

getValue

public abstract <T extends java.lang.Comparable<?>> T getValue(java.lang.String instanceId,
                                                               java.lang.String featureName)
                                                    throws DataMiningException
Throws:
DataMiningException

removeInstance

public abstract void removeInstance(java.lang.String instanceId)
                             throws DataMiningException
Throws:
DataMiningException

getInstanceIds

public abstract java.util.Set<java.lang.String> getInstanceIds()

getInstanceCount

public abstract int getInstanceCount()

removeFeature

public abstract void removeFeature(java.lang.String featureName)
                            throws DataMiningException
Throws:
DataMiningException

getFeatureNames

public abstract java.util.Set<java.lang.String> getFeatureNames()

getFeatureCount

public abstract int getFeatureCount()

getFeatureValues

public abstract java.util.List<java.lang.String> getFeatureValues(java.lang.String featureName)

setDefaultFeatureValue

public abstract void setDefaultFeatureValue(java.lang.String featureName,
                                            java.lang.String value)
                                     throws DataMiningException
Throws:
DataMiningException

initClassifier

public abstract void initClassifier(java.util.Map<java.lang.String,java.lang.Object> parameters)
                             throws DataMiningException
Throws:
DataMiningException

trainClassifier

public abstract Model trainClassifier()
                               throws DataMiningException
Throws:
DataMiningException

classifyDataset

public abstract ClassificationResult classifyDataset(Model model)
                                              throws DataMiningException
Throws:
DataMiningException

createSubset

public abstract DataHandler createSubset(java.util.Set<java.lang.String> instancesSelected,
                                         java.util.Set<java.lang.String> featuresSelected)
                                  throws DataMiningException
creates a subset of the dataset where only the given instances and/or features are present

Parameters:
instancesSelected -
featuresSelected -
Returns:
Throws:
DataMiningException

addDataHandler

public abstract void addDataHandler(DataHandler dh)
                             throws DataMiningException
Throws:
DataMiningException