1

I am trying to make a multilabel classification model for XGBoost. I have one that works for RF, but when I try this code below for XGBoost I get the error:

"Error in predict.xgb.Booster(m, newdata = data.matrix(.newdata), ...) : 
Feature names stored in 'object' and 'newdata' are different!

And as I did not assign it train data and test data or 'newdata' (this is done by the makeResampleDesc() ) I don't know how to fix this error.

Anyone has any ideas?

example data

label_bact <- c(label1,label2,label3,label4,label5)
age <- c(round(rnorm(120,mean = 50,sd = 10)))
sex <- c(round(rnorm(120,mean = 0.5,sd = 0.2)))
l1 <- as.logical(c(round(rnorm(120,mean = 0.5,sd = 0.2))))
l2 <- as.logical(c(round(rnorm(120,mean = 0.5,sd = 0.2))))
l3 <- as.logical(c(round(rnorm(120,mean = 0.5,sd = 0.2))))
l4 <- as.logical(c(round(rnorm(120,mean = 0.5,sd = 0.2))))
l5 <- as.logical(c(round(rnorm(120,mean = 0.5,sd = 0.2))))
data <- as.data.frame(cbind(age,sex,l1,l2,l3,l4,l5))

create learner

learner <- "classif.xgboost"
lrn <- makeLearner(learner, objective = "multi:softprob") 
lrn <- makeMultilabelClassifierChainsWrapper(lrn, order = NULL) 
lrn <- setPredictType(lrn,"prob")

create grid

ps <- makeParamSet(
  makeDiscreteParam("max_depth", values = c(1,3,5)),
  makeDiscreteParam("eta",values = c(0.001,0.01,0.1))
)

set up resampling method and tuning

ctrl <- makeTuneControlGrid()
rdesc <- makeResampleDesc(method = "CV",iters = 5L)

initiate evaluation vector

v_f1 <- c()
v_max_depth <- c()
v_eta <- c()

actual grid search with different train_lines for each outer fold

task <- makeMultilabelTask(data = data, target = label_bact) 
   
res <- tuneParams(lrn,task = task,resampling = rdesc, par.set = ps,
       control = ctrl, measures = multilabel.f1)

v_f1 <- c(v_f1,as.vector(res$y[1]))
v_max_depth <- c(v_max_depth,as.vector(res$x[1]))
v_eta <- c(v_eta,as.vector(res$x[4]))
sebp
  • 1,787
  • 13
  • 24
agnesg2g
  • 11
  • 1

0 Answers0