sna_and_clustering
This is an old revision of the document!
sna (social network analysis) and clustering
in R
# library(igraph) data <- read.csv("http://commres.net/wiki/_media/r/socialnetworkdata.csv", header=T) head(data) str(data) y <- data.frame(data$first, data$second) head(y) # net <- graph.data.frame(y, directed=T) net <- graph_from_data_frame(y, directed=T) head(net) net V(net) # vertex in net data who.net <- V(net) # 52/52 vertices data.frame(who.net) E(net) # edge info in net data rel.net <- E(net) rel.net # output 290/290 edges # 52 by 290 data set V(net)$degree <- degree(net) degree.net <- data.frame(degree(net)) V(net)$degree str(who.net) who.net$name hist(V(net)$degree) set.seed(222) plot(net, vertex.color = 'lightblue', vertext.size = 2, edge.arrow.size = 0.1, vertex.label.cex = 0.8) plot(net, vertex.color = rainbow(52), vertex.size = V(net)$degree*0.3, edge.arrow.size = 0.1, layout=layout.fruchterman.reingold) # layout layout.fruchterman.reingold # layout.graphopt plot(net, vertex.color = rainbow(52), vertex.size = V(net)$degree*0.8, edge.arrow.size = 0.1, layout=layout.graphopt) plot(net, vertex.color = rainbow(52), vertex.size = V(net)$degree*0.4, edge.arrow.size = 0.1, layout=layout.kamada.kawai) betweenness(net) degree(net) closeness(net) hits_scores(net)$hub # outlinks hits_scores(net)$authority # inlinks hs <- hits_scores(net)$hub # outlinks as <- hits_scores(net)$authority # inlinks set.seed(123) plot(net, vertex.size=hs*30, main = 'Hubs', vertex.color = rainbow(52), edge.arrow.size=0.1, layout = layout.kamada.kawai) set.seed(123) plot(net, vertex.size=as*30, main = 'Authorities', vertex.color = rainbow(52), edge.arrow.size=0.1, layout = layout.kamada.kawai) net <- graph_from_data_frame(y, directed = F) cnet <- cluster_edge_betweenness(net) # cluster_fast_greedy(net) # cluster_fluid_communities(net, 2) # cluster_infomap(net) # cluster_label_prop() # cluster_spinglass() # cluster.distribution() # components(net, "strong") infomap <- cluster_infomap(net) # c.infomap <- cluster_infomap(net) plot(cnet, net, vertex.size = 10, vertex.label.cex = 0.8) plot(infomap, net, vertex.size = 10, vertex.label.cex = 0.8) ############################################################ # clustering in R # clusterdata.csv orgs <- read.csv("http://commres.net/wiki/_media/r/clusterdata.csv", header=T) str(orgs) head(orgs) pairs(orgs[2:9]) plot(Fuel_Cost ~ Sales, data = orgs) with(orgs, text(Fuel_Cost ~ Sales, labels=orgs$Company, pos=4)) # normalization (standardization) z <- orgs[,-c(1,1)] # remove the first column means <- apply(z, 2, mean) sds <- apply(z, 2, sd) means sds nor <- scale(z, center=means, scale=sds) nor distance <- dist(nor) distance orgs.hclust = hclust(distance) plot(orgs.hclust) plot(orgs.hclust, labels = orgs$Company, main='Default from hclust') plot(orgs.hclust, hang=-1, labels=orgs$Company, main='Default from hclust') orgs.hclust.average<-hclust(distance, method="average") plot(orgs.hclust.average, hang=-1, labels=orgs$Company, main='hclust average') plot(orgs.hclust.average, hang=-1, labels=orgs$Company, main='hclust average') rect.hclust(orgs.hclust.average, k=4, border='red') member.by.3 <- cutree(orgs.hclust, 3) table(member.by.3) member.by.3 member.by.5 <- cutree(orgs.hclust, 5) table(member.by.5) member.by.5 aggregate(nor,list(member.by.3),mean) aggregate(nor,list(member.by.3),sd) wss <- (nrow(nor)-1)*sum(apply(nor,2,var)) for (i in 2:20) wss[i] <- sum(kmeans(nor, centers=i)$withinss) plot(1:20, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares") # kmeans clustering set.seed(123) kc<-kmeans(nor, 3) kc # install.packages("cluster") library(cluster) ot<-nor datadistshortset <- dist(ot, method = "euclidean") hc1 <- hclust(datadistshortset, method = "complete" ) pamvshortset <- pam(datadistshortset, 4, diss = FALSE) clusplot(pamvshortset, shade = FALSE, labels=2, col.clus="blue", col.p="red", span=FALSE, main="Cluster Mapping", cex=1.2) # kmeans # install.packages("factoextra") library(factoextra) k2 <- kmeans(nor, centers = 3, nstart = 25) str(k2) fviz_cluster(k2, data = nor) fviz_nbclust(nor, kmeans, method = "wss") fviz_nbclust(nor, kmeans, method = "silhouette") gap_stat <- clusGap(nor, FUN = kmeans, nstart = 25, K.max = 10, B = 50) fviz_gap_stat(gap_stat)
sna_and_clustering.1732491571.txt.gz · Last modified: 2024/11/25 08:39 by hkimscil