I'm only getting an accuracy of 59% using the following implementation calculated using the diag(sum(cm))
and sum(cm)
functions. How can I increase this accuracy?
The dataset is of heart patients from UCI with 303 rows and 14 parameters.
The script I've implemented is:
dataset = read.csv('data.csv')
# Encoding categorical data
dataset$Sex = factor(dataset$Sex,
levels = c('1', '0'),
labels = c(1, 0))
dataset$num = factor(dataset$num,
levels = c('0', '1','2','3','4'),
labels = c(0, 1, 2, 3, 4))
dataset$chesp.pain.type = factor(dataset$chesp.pain.type,
levels = c('1','2','3','4'),
labels = c(1, 2, 3, 4))
dataset$fasting.blood.sugar = factor(dataset$fasting.blood.sugar,
levels = c('0', '1'),
labels = c(0, 1))
dataset$exercise.induced.angina = factor(dataset$exercise.induced.angina,
levels = c('0', '1'),
labels = c(0, 1))
dataset$electrocardiographic = factor(dataset$electrocardiographic,
levels = c('0', '1','2'),
labels = c(0, 1,2))
dataset$slope.of.peak.exercise = factor(dataset$slope.of.peak.exercise,
levels = c('1','2','3'),
labels = c(1,2,3))
dataset$thal = factor(dataset$electrocardiographic,
levels = c('0','1','2'),
labels = c(0,1,2))
#Splitting the dataset into training and test set
#install.packages('caTools')
library(caTools)
set.seed(123)
split = sample.split(dataset$num, SplitRatio = 0.8)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)
#Feature scaling
training_set[,c(1,4,5,8,10,12)] = scale(training_set[,c(1,4,5,8,10,12)])
test_set[,c(1,4,5,8,10,12)] = scale(test_set[,c(1,4,5,8,10,12)])
# Fitting Decision Tree Classification to the Training set
# install.packages('rpart')
library(rpart)
classifier = rpart(formula = num ~ .,
data = training_set)
# Predicting the Test set results
y_pred = predict(classifier, newdata = test_set[-14], type = 'class')
# Making the Confusio1n Matrix
cm = table(test_set[, 14], y_pred)