I am currently trying to investigate the classification accuracy of two models on a wide dataset (79*222), with 4 balanced classes.
The models are:
Principal Component Analysis, Linear Discriminant Analysis (For reduction and classification)
Principal Component Analysis, Linear Discriminant Analysis (for reduction), Support Vector Machine (RBF)
This is my current code:
pca.lda.svm <- function(i, data, kernel, plotSVM = T){
print(paste("Sampling ID(s):", i, "from data"))
if(!(kernel %in% c("linear", "radial"))){stop("Error: Kernel argument must be 'linear' or 'radial'")}
train <- data[-i,]
test <- data[i,]
pca <- prcomp(train[,-1])
pcaTrain <- as.data.frame(cbind(Genus = data[-i,1],
as.data.frame(pca$x[,c(1:15)])))
pcaTest <- as.data.frame(predict(object = pca, newdata = test))[,c(1:15)]
lda <- lda(Genus ~., pcaTrain)
ldaTrain <- as.data.frame(cbind(Genus = data[-i,1],
as.data.frame(predict(object = lda, newdata = pcaTrain)$x)))
ldaPredict <- predict(object = lda, newdata = pcaTest)
ldaTest <- as.data.frame(ldaPredict$x)
svmTask <- makeClassifTask(data = ldaTrain,
target = "Genus")
svmLearner <- makeLearner("classif.svm", predict.type = "response")
svmResample <- makeResampleDesc("LOO")
gridValues <- c(seq(0.01, 0.1, by = 0.01),
seq(0.2, 1, by = 0.1),
seq(2, 10, by = 1), 50, 100)
if(kernel == "linear"){
svmControl <- makeTuneControlGrid()
svmParams <- makeParamSet(
makeDiscreteParam("kernel", values = "linear"),
makeDiscreteParam("cost", values = gridValues))
}
if(kernel == "radial"){
svmControl <- makeTuneControlRandom(maxit = 100)
svmParams <- makeParamSet(
makeDiscreteParam("kernel", values = "radial"),
makeDiscreteParam("cost", values = gridValues),
makeDiscreteParam("gamma", values = gridValues))
}
tunedParams <- tuneParams("classif.svm", task = svmTask,
resampling = svmResample,
par.set = svmParams,
control = svmControl)
tunedSVM <- setHyperPars(svmLearner, par.vals = tunedParams$x)
modelSVM <- train(tunedSVM, svmTask)
testSVM <- predict(object = modelSVM, newdata = ldaTest)
if(plotSVM == T){
plotSVM <- plotLearnerPrediction(learner = makeLearner("classif.svm",
predict.type = "response",
par.vals = tunedParams$x),
task = svmTask,
cv = 0) +
theme_bw() + scale_fill_manual(values = c(""))
}
return(list("Observed Genus" = data[i,1],
"SVM Result" = as.character(testSVM[["data"]][['response']]),
"LDA Result" = as.character(ldaPredict[["class"]]),
"test.train" = list("pcaTrain" = pcaTrain,
"ldaTrain" = ldaTrain,
"pcaTest" = pcaTest,
"ldaTest" = ldaTest),
"SVM Plot" = plotSVM))
}
However I am unsure whether I am implementing this correctly, as there doesn't seem to be much information on how to use LDA for dimensionality reduction in R. Is this a fair comparison of the two model's performance, or should I be implementing it differently (perhaps not using the predict function to generate the testing discriminants)?