I'm currently taking a data mining class, and for one our projects we're required to predict the class label for an unknown data set by first building a classifier on a training data set which already provides the class label.
We're only required to get an accuracy of 80% to get a full mark on the assignment. I have already achieved this using the J48 Decision Tree algorithm (acc=84.08%).
There is also an ongoing competition on who can get the highest accuracy (determined by a Judge system we can't see).
I have two questions:
- How can I use an ensemble method with to do this
- Is there a way to optimize the parameters for each classifier?
import java.io.*;
import weka.core.Instances;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.*;
import weka.classifiers.trees.*;
import weka.classifiers.Evaluation;
public class CompClassifier {
public static FileOutputStream Output;
public static PrintStream file;
public static void main(String[] args) throws Exception {
// load training data
weka.core.Instances training_data = new weka.core.Instances(new
java.io.FileReader("/Users//Weka/training.arff"));
//load test data
weka.core.Instances test_data = new weka.core.Instances(new
java.io.FileReader("/Users//Weka/unknown.arff"));
//Clean up training data
ReplaceMissingValues replace = new ReplaceMissingValues();
replace.setInputFormat(training_data);
Instances training_data_filter1 = Filter.useFilter(training_data, replace);
//Normalize training data
Normalize norm = new Normalize();
norm.setInputFormat(training_data_filter1);
Instances processed_training_data = Filter.useFilter(training_data_filter1, norm);
//Set class attribute for pre-processed training data
processed_training_data.setClassIndex(processed_training_data.numAttributes() - 1);
//output to file
Output = new FileOutputStream("/Users//Desktop/CLASSIFICATION/test.txt");
file = new PrintStream(Output);
//build classifier
J48 tree = new J48();
tree.buildClassifier(processed_training_data);
//Clean up test data
replace.setInputFormat(test_data);
Instances test_data_filter1 = Filter.useFilter(test_data, replace);
//Normalize test data
norm.setInputFormat(training_data_filter1);
Instances processed_test_data = Filter.useFilter(test_data_filter1, norm);
//Set class attribute for pre-processed training data
processed_test_data.setClassIndex(processed_test_data.numAttributes() - 1);
//int num_correct=0;
for (int i = 0; i < processed_test_data.numInstances(); i++) {
weka.core.Instance currentInst = processed_test_data.instance(i);
int predictedClass = (int) tree.classifyInstance(currentInst);
System.out.println(predictedClass);
file.println("O"+ predictedClass);
}
}