I would like a great help from you. I used the cluster.stats function that is part of the fpc
package to compare the similarity of two custer solutions using a variety of validation criteria, as you can see in the code. However, I have one question:
Is it possible to know which is the most viable cluster, 2 clusters or 5 clusters? If so, could you explain me better how I can know.
Thank you so much!
Best Regards.
library(rdist)
library(geosphere)
library(fpc)
df<-structure(list(Industries = c(1,2,3,4,5,6),
Latitude = c(-23.8, -23.8, -23.9, -23.7, -23.7,-23.7),
Longitude = c(-49.5, -49.6, -49.7, -49.8, -49.6,-49.9),
Waste = c(526, 350, 526, 469, 534, 346)), class = "data.frame", row.names = c(NA, -6L))
df1<-df
#clusters
coordinates<-df[c("Latitude","Longitude")]
d<-as.dist(distm(coordinates[,2:1]))
fit.average<-hclust(d,method="average")
clusters<-cutree(fit.average, k=2)
df$cluster <- clusters
clusters1<-cutree(fit.average, k=5)
df1$cluster <- clusters1
cluster.stats(d,df$cluster,df1$cluster)
> cluster.stats(d,df$cluster,df1$cluster)
$n
[1] 6
$cluster.number
[1] 2
$cluster.size
[1] 4 2
$min.cluster.size
[1] 2
$noisen
[1] 0
$diameter
[1] 24382.84 10198.63
$average.distance
[1] 16490.01 10198.63
$median.distance
[1] 15050.60 10198.63
$separation
[1] 20397.25 20397.25
$average.toother
[1] 29499.05 29499.05
$separation.matrix
[,1] [,2]
[1,] 0.00 20397.25
[2,] 20397.25 0.00
$ave.between.matrix
[,1] [,2]
[1,] 0.00 29499.05
[2,] 29499.05 0.00
$average.between
[1] 29499.05
$average.within
[1] 14392.88
$n.between
[1] 8
$n.within
[1] 7
$max.diameter
[1] 24382.84
$min.separation
[1] 20397.25
$within.cluster.ss
[1] 504967651
$clus.avg.silwidths
1 2
0.4268101 0.6465108
$avg.silwidth
[1] 0.5000437
$g2
NULL
$g3
NULL
$pearsongamma
[1] 0.7547454
$dunn
[1] 0.8365412
$dunn2
[1] 1.788904
$entropy
[1] 0.6365142
$wb.ratio
[1] 0.4879101
$ch
[1] 8.157505
$cwidegap
[1] 15048.01 10198.63
$widestgap
[1] 15048.01
$sindex
[1] 20397.25
$corrected.rand
[1] 0.1509434
$vi
[1] 0.9241962