Here's some code to explain the usage and here's a link to a thread linking to more threads discussing how to handle unbalanced RF. In short you can implement your prior expectation by changing voting rule (cutoff), using stratified sampling (strata +sampsize) or classwt. I usually use strata. I was a little surprised in code example below that classwt's had to be that much skewed.
library(randomForest)
library(AUC)
make.data = function(N=1000) {
X = data.frame(replicate(6,rnorm(N))) #six features
y = X[,1]^2+sin(X[,2]) + rnorm(N)*1 #some hidden data structure to learn
rare.class.prevalence = 0.1
y.class = factor(y<quantile(y,c(rare.class.prevalence))) #10% TRUE, 90% FALSE
return(data.frame(X,y=y.class))
}
#make some data structure
train.data = make.data()
#1 - Balancing by voting rule, AUC of ROC will be unchanged...
rare.class.prevalence = 0.1
rf.cutoff = randomForest(y~.,data=train.data,cutoff=c(1-rare.class.prevalence,rare.class.prevalence))
print(rf.strata)
#2 - Balancing by sampling stratification
nRareSamples = 1000 * rare.class.prevalence
rf.strata = randomForest(y~.,data=train.data,strata=train.data$y,
sampsize=c(nRareSamples,nRareSamples))
print(rf.strata)
#3 - Balancing by class-weight during training.
rf.classwt = randomForest(y~.,data=train.data,classwt=c(0.0005,1000))
print(rf.classwt)
#view OOB-CV specificity and sensitiviy
plot(roc(rf.cutoff$votes[,2],train.data$y),main="black default, red stata, green classwt")
plot(roc(rf.strata$votes[,2],train.data$y),col=2,add=T)
plot(roc(rf.classwt$votes[,2],train.data$y),col=3,add=T)
#make test.data and remove random sample until both classes are equally prevalent
test.data = make.data(N=50000)
test.data.balanced = test.data[-sample(which(test.data$y=="FALSE"))[1:40000],]
#print prediction performance %predicted correct:
sapply(c("rf.cutoff","rf.strata","rf.classwt"),function(a.model) {
mean(test.data.balanced$y == predict(get(a.model), newdata=test.data.balanced))
})