Determining number of clusters (k) # Rationale: plot within-cluster sum of squares over k, with k=2: 15 # wss: within-group sum of squares # apply(nd, 2, var): calculate variance for each variable; 1|2: variables in # rows|column # sum(apply(nd, 2, var)): sum of variance # DF*var = SS wss <- (nrow(nd)-1)*sum(apply(nd, 2, var)) # kmeans clustering with 2, 3, …, 15 clusters and compute wss for each for (i in 2: 15) wss[i] <- sum(kmeans(nd, centers=i)$withinss) plot(1: 15, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares") # K-Means Cluster Analysis fit <- kmeans(nd, 5) # 5 cluster solution # get cluster means aggregate(nd, by=list(fit$cluster), FUN=mean) # append cluster assignment nd <- data. frame(nd, fit$cluster) Xuhua Xia Slide 5
Number of clusters bending of curve Xuhua Xia Slide 6