I am trying to make a multilabel classification model for XGBoost. I have one that works for RF, but when I try this code below for XGBoost I get the error:
"Error in predict.xgb.Booster(m, newdata = data.matrix(.newdata), ...) :
Feature names stored in 'object' and 'newdata' are different!
And as I did not assign it train data and test data or 'newdata' (this is done by the makeResampleDesc() ) I don't know how to fix this error.
Anyone has any ideas?
example data
label_bact <- c(label1,label2,label3,label4,label5)
age <- c(round(rnorm(120,mean = 50,sd = 10)))
sex <- c(round(rnorm(120,mean = 0.5,sd = 0.2)))
l1 <- as.logical(c(round(rnorm(120,mean = 0.5,sd = 0.2))))
l2 <- as.logical(c(round(rnorm(120,mean = 0.5,sd = 0.2))))
l3 <- as.logical(c(round(rnorm(120,mean = 0.5,sd = 0.2))))
l4 <- as.logical(c(round(rnorm(120,mean = 0.5,sd = 0.2))))
l5 <- as.logical(c(round(rnorm(120,mean = 0.5,sd = 0.2))))
data <- as.data.frame(cbind(age,sex,l1,l2,l3,l4,l5))
create learner
learner <- "classif.xgboost"
lrn <- makeLearner(learner, objective = "multi:softprob")
lrn <- makeMultilabelClassifierChainsWrapper(lrn, order = NULL)
lrn <- setPredictType(lrn,"prob")
create grid
ps <- makeParamSet(
makeDiscreteParam("max_depth", values = c(1,3,5)),
makeDiscreteParam("eta",values = c(0.001,0.01,0.1))
)
set up resampling method and tuning
ctrl <- makeTuneControlGrid()
rdesc <- makeResampleDesc(method = "CV",iters = 5L)
initiate evaluation vector
v_f1 <- c()
v_max_depth <- c()
v_eta <- c()
actual grid search with different train_lines for each outer fold
task <- makeMultilabelTask(data = data, target = label_bact)
res <- tuneParams(lrn,task = task,resampling = rdesc, par.set = ps,
control = ctrl, measures = multilabel.f1)
v_f1 <- c(v_f1,as.vector(res$y[1]))
v_max_depth <- c(v_max_depth,as.vector(res$x[1]))
v_eta <- c(v_eta,as.vector(res$x[4]))