0

I am currently trying to investigate the classification accuracy of two models on a wide dataset (79*222), with 4 balanced classes.

The models are:

Principal Component Analysis, Linear Discriminant Analysis (For reduction and classification)

Principal Component Analysis, Linear Discriminant Analysis (for reduction), Support Vector Machine (RBF)

This is my current code:

pca.lda.svm <- function(i, data, kernel, plotSVM = T){
  
  print(paste("Sampling ID(s):", i, "from data"))
  
  if(!(kernel %in% c("linear", "radial"))){stop("Error: Kernel argument must be 'linear' or 'radial'")}
  
  train <- data[-i,]
  test <- data[i,]
  
  pca <- prcomp(train[,-1])
  pcaTrain <- as.data.frame(cbind(Genus = data[-i,1],
                                  as.data.frame(pca$x[,c(1:15)])))
  pcaTest <- as.data.frame(predict(object = pca, newdata = test))[,c(1:15)]
  
  lda <- lda(Genus ~., pcaTrain)
  ldaTrain <- as.data.frame(cbind(Genus = data[-i,1], 
                                  as.data.frame(predict(object = lda, newdata = pcaTrain)$x)))
  ldaPredict <- predict(object = lda, newdata = pcaTest)
  ldaTest <- as.data.frame(ldaPredict$x)
  
  svmTask <- makeClassifTask(data = ldaTrain, 
                             target = "Genus")
  
  svmLearner <- makeLearner("classif.svm", predict.type = "response")
  
  svmResample <- makeResampleDesc("LOO")
  
  gridValues <- c(seq(0.01, 0.1, by = 0.01), 
                  seq(0.2, 1, by = 0.1),
                  seq(2, 10, by = 1), 50, 100)
  
  if(kernel == "linear"){
    
    svmControl <- makeTuneControlGrid()
    
    svmParams <- makeParamSet(
      makeDiscreteParam("kernel", values = "linear"),
      makeDiscreteParam("cost", values = gridValues))
    }
  
  if(kernel == "radial"){
    
    svmControl <- makeTuneControlRandom(maxit = 100)
    
    svmParams <- makeParamSet(
      makeDiscreteParam("kernel", values = "radial"),
      makeDiscreteParam("cost", values = gridValues),
      makeDiscreteParam("gamma", values = gridValues))
    }

  tunedParams <- tuneParams("classif.svm", task = svmTask, 
                            resampling = svmResample,
                            par.set = svmParams,
                            control = svmControl)
  
  tunedSVM <- setHyperPars(svmLearner, par.vals = tunedParams$x)
  
  modelSVM <- train(tunedSVM, svmTask)
  
  testSVM <- predict(object = modelSVM, newdata = ldaTest)
  
  if(plotSVM == T){
    plotSVM <- plotLearnerPrediction(learner = makeLearner("classif.svm", 
                                                             predict.type = "response", 
                                                             par.vals = tunedParams$x), 
                                     task = svmTask,
                                     cv = 0) + 
                                     theme_bw() + scale_fill_manual(values = c(""))
    }
  
  return(list("Observed Genus" = data[i,1],
              "SVM Result" = as.character(testSVM[["data"]][['response']]),
              "LDA Result" = as.character(ldaPredict[["class"]]),
              "test.train" = list("pcaTrain" = pcaTrain, 
                                  "ldaTrain" = ldaTrain,
                                  "pcaTest" = pcaTest, 
                                  "ldaTest" = ldaTest),
              "SVM Plot" = plotSVM))
  }

However I am unsure whether I am implementing this correctly, as there doesn't seem to be much information on how to use LDA for dimensionality reduction in R. Is this a fair comparison of the two model's performance, or should I be implementing it differently (perhaps not using the predict function to generate the testing discriminants)?

Firebug
  • 15,262
  • 5
  • 60
  • 127
wrace
  • 1

0 Answers0