User Tools

Site Tools


sna_and_clustering

sna (social network analysis) and clustering

in R

# 
library(igraph)
data <- read.csv("http://commres.net/wiki/_media/r/socialnetworkdata.csv", header=T)
head(data)
str(data)

y <- data.frame(data$first, data$second)
head(y)

# net <- graph.data.frame(y, directed=T)
net <- graph_from_data_frame(y, directed=T)
head(net)
net

V(net) # vertex in net data
who.net <- V(net) # 52/52 vertices
data.frame(who.net)

E(net) # edge info in net data
rel.net <- E(net)
rel.net # output 290/290 edges 

# 52 by 290 data set

V(net)$degree <- degree(net)
degree.net <- data.frame(degree(net))
V(net)$degree


str(who.net)
who.net$name

hist(V(net)$degree)

set.seed(222)
plot(net,
     vertex.color = 'lightblue',
     vertext.size = 2,
     edge.arrow.size = 0.1,
     vertex.label.cex = 0.8)

plot(net,
     vertex.color = rainbow(52),
     vertex.size = V(net)$degree*0.3,
     edge.arrow.size = 0.1,
     layout=layout.fruchterman.reingold)
# layout layout.fruchterman.reingold
# layout.graphopt
plot(net,
     vertex.color = rainbow(52),
     vertex.size = V(net)$degree*0.8,
     edge.arrow.size = 0.1,
     layout=layout.graphopt)

plot(net,
     vertex.color = rainbow(52),
     vertex.size = V(net)$degree*0.4,
     edge.arrow.size = 0.1,
     layout=layout.kamada.kawai)

betweenness(net)
degree(net)
closeness(net)
hits_scores(net)$hub          # outlinks
hits_scores(net)$authority    # inlinks

hs <- hits_scores(net)$hub          # outlinks
as <- hits_scores(net)$authority    # inlinks



set.seed(123)
plot(net,
     vertex.size=hs*30,
     main = 'Hubs',
     vertex.color = rainbow(52),
     edge.arrow.size=0.1,
     layout = layout.kamada.kawai)

set.seed(123)
plot(net,
     vertex.size=as*30,
     main = 'Authorities',
     vertex.color = rainbow(52),
     edge.arrow.size=0.1,
     layout = layout.kamada.kawai)


net <- graph_from_data_frame(y, directed = F)
cnet <- cluster_edge_betweenness(net)

# cluster_fast_greedy(net)
# cluster_fluid_communities(net, 2)
# cluster_infomap(net)
# cluster_label_prop()
# cluster_spinglass()
# cluster.distribution()
# components(net, "strong")

infomap <- cluster_infomap(net)
# c.infomap <- cluster_infomap(net)

plot(cnet,
     net,
     vertex.size = 10,
     vertex.label.cex = 0.8)

plot(infomap,
     net,
     vertex.size = 10,
     vertex.label.cex = 0.8)

############################################################
# clustering in R
# clusterdata.csv
orgs <- read.csv("http://commres.net/wiki/_media/r/clusterdata.csv", header=T)
str(orgs)
head(orgs)

pairs(orgs[2:9])

plot(Fuel_Cost ~ Sales, data = orgs)
with(orgs, text(Fuel_Cost ~ Sales, 
                labels=orgs$Company,
                pos=4))

# normalization (standardization)
z <- orgs[,-c(1,1)] # remove the first column
means <- apply(z, 2, mean)
sds <- apply(z, 2, sd)
means
sds

nor <- scale(z, center=means, scale=sds)
nor 

distance <- dist(nor)
distance

orgs.hclust = hclust(distance)

plot(orgs.hclust)
plot(orgs.hclust,
     labels = orgs$Company,
     main='Default from hclust')
plot(orgs.hclust,
     hang=-1, 
     labels=orgs$Company, 
     main='Default from hclust')


orgs.hclust.average<-hclust(distance, method="average") 
plot(orgs.hclust.average, hang=-1,
     labels=orgs$Company, 
     main='hclust average')

plot(orgs.hclust.average, hang=-1,
     labels=orgs$Company, 
     main='hclust average')
rect.hclust(orgs.hclust.average, k=4, border='red')

member.by.3 <- cutree(orgs.hclust, 3)
table(member.by.3)
member.by.3

member.by.5 <- cutree(orgs.hclust, 5)
table(member.by.5)
member.by.5


aggregate(nor,list(member.by.3),mean)
aggregate(nor,list(member.by.3),sd)

wss <- (nrow(nor)-1)*sum(apply(nor,2,var))
for (i in 2:20) 
  wss[i] <- sum(kmeans(nor, centers=i)$withinss)
plot(1:20, wss, 
     type="b", 
     xlab="Number of Clusters", 
     ylab="Within groups sum of squares")


# kmeans clustering 
set.seed(123)
kc<-kmeans(nor, 3)
kc

# install.packages("cluster")
library(cluster)

ot<-nor
datadistshortset <- dist(ot, method = "euclidean")
hc1 <- hclust(datadistshortset, method = "complete" )
pamvshortset <- pam(datadistshortset, 4, diss = FALSE)
clusplot(pamvshortset, shade = FALSE, 
         labels=2, 
         col.clus="blue",
         col.p="red", 
         span=FALSE,
         main="Cluster Mapping",
         cex=1.2)

# kmeans
# install.packages("factoextra")
library(factoextra) 
k2 <- kmeans(nor, centers = 3, nstart = 25)
str(k2)

fviz_cluster(k2, data = nor)
fviz_nbclust(nor, kmeans, method = "wss")
fviz_nbclust(nor, kmeans, method = "silhouette")

gap_stat <- clusGap(nor, FUN = kmeans, 
                    nstart = 25, K.max = 10, B = 50)
fviz_gap_stat(gap_stat)

output

> 
> 
> # 
> library(igraph)
> data <- read.csv("http://commres.net/wiki/_media/r/socialnetworkdata.csv", header=T)
> head(data)
  first second grade spec
1    AA     DD     6    Y
2    AB     DD     6    R
3    AF     BA     6    Q
4    DD     DA     6    Q
5    CD     EC     6    X
6    DD     CE     6    Y
> str(data)
'data.frame':	290 obs. of  4 variables:
 $ first : chr  "AA" "AB" "AF" "DD" ...
 $ second: chr  "DD" "DD" "BA" "DA" ...
 $ grade : int  6 6 6 6 6 6 6 6 6 6 ...
 $ spec  : chr  "Y" "R" "Q" "Q" ...
> 
> y <- data.frame(data$first, data$second)
> head(y)
  data.first data.second
1         AA          DD
2         AB          DD
3         AF          BA
4         DD          DA
5         CD          EC
6         DD          CE
> 
> # net <- graph.data.frame(y, directed=T)
> net <- graph_from_data_frame(y, directed=T)
> head(net)
6 x 52 sparse Matrix of class "dgCMatrix"
  [[ suppressing 52 column names ‘AA’, ‘AB’, ‘AF’ ... ]]
                                                                 
AA . . . 10 . . . . . . . . . . . . . . . . . . . . . . . . . . .
AB . . .  1 . 4 . . . . . . . . . . . . . . . . . . . . . . . . .
AF . . .  . . 3 . . 2 . . . . 1 . . . . . . . . . . . . . . . . .
DD . . .  . 2 . . . . . . . . . . . . . . . . . . 2 . . . . . . .
CD . . . 10 . . . 4 . 2 . 2 . . 1 . . . . . . . . . . . . . . . .
BA . 2 5  . . . . . 1 1 . . . . . 1 . . . . . . . . 1 . . . . . .
                                            
AA . . . . . . . . . 1 1 . . . . . . . . . .
AB . . 1 . . . . . . . . . . . . . . . . . .
AF . . . . . . . . . . . . . . . . . . . 1 .
DD . . . . 4 . . . . . . . . . . . . . . . .
CD . . . . . 3 1 . . . . . . . . 1 . 1 . . .
BA . . . . . . . . . . . . . . . . . . 1 . 1
> net
IGRAPH 612e768 DN-- 52 290 -- 
+ attr: name (v/c)
+ edges from 612e768 (vertex names):
 [1] AA->DD AB->DD AF->BA DD->DA CD->EC DD->CE CD->FA CD->CC BA->AF
[10] CB->CA CC->CA CD->CA BC->CA DD->DA ED->AD AE->AC AB->BA CD->EC
[19] CA->CC EB->CC BF->CE BB->CD AC->AE CC->FB DC->BB BD->CF DB->DA
[28] DD->DA DB->DD BC->AF CF->DE DF->BF CB->CA BE->CA EA->CA CB->CA
[37] CB->CA CC->CA CD->CA BC->CA BF->CA CE->CA AC->AD BD->BE AE->DF
[46] CB->DF AC->DF AA->DD AA->DD AA->DD CD->DD AA->DD EE->DD CD->DD
[55] DB->AA AA->FC BE->CC EF->FD CF->FE BB->DD CD->DD BA->AB CD->EC
[64] BE->EE CE->CC CD->CC ED->CC BB->CC BE->CE DD->CE AC->CD ED->CD
+ ... omitted several edges
> 
> V(net) # vertex in net data
+ 52/52 vertices, named, from 612e768:
 [1] AA AB AF DD CD BA CB CC BC ED AE CA EB BF BB AC DC BD DB CF DF
[22] BE EA CE EE EF FF FD GB GC GD AD KA KF LC DA EC FA FB DE FC FE
[43] GA GE KB KC KD KE LB LA LD LE
> who.net <- V(net) # 52/52 vertices
> data.frame(who.net)
   who.net
AA       1
AB       2
AF       3
DD       4
CD       5
BA       6
CB       7
CC       8
BC       9
ED      10
AE      11
CA      12
EB      13
BF      14
BB      15
AC      16
DC      17
BD      18
DB      19
CF      20
DF      21
BE      22
EA      23
CE      24
EE      25
EF      26
FF      27
FD      28
GB      29
GC      30
GD      31
AD      32
KA      33
KF      34
LC      35
DA      36
EC      37
FA      38
FB      39
DE      40
FC      41
FE      42
GA      43
GE      44
KB      45
KC      46
KD      47
KE      48
LB      49
LA      50
LD      51
LE      52
> 
> E(net) # edge info in net data
+ 290/290 edges from 612e768 (vertex names):
 [1] AA->DD AB->DD AF->BA DD->DA CD->EC DD->CE CD->FA CD->CC BA->AF
[10] CB->CA CC->CA CD->CA BC->CA DD->DA ED->AD AE->AC AB->BA CD->EC
[19] CA->CC EB->CC BF->CE BB->CD AC->AE CC->FB DC->BB BD->CF DB->DA
[28] DD->DA DB->DD BC->AF CF->DE DF->BF CB->CA BE->CA EA->CA CB->CA
[37] CB->CA CC->CA CD->CA BC->CA BF->CA CE->CA AC->AD BD->BE AE->DF
[46] CB->DF AC->DF AA->DD AA->DD AA->DD CD->DD AA->DD EE->DD CD->DD
[55] DB->AA AA->FC BE->CC EF->FD CF->FE BB->DD CD->DD BA->AB CD->EC
[64] BE->EE CE->CC CD->CC ED->CC BB->CC BE->CE DD->CE AC->CD ED->CD
[73] FF->CD AC->CD DD->CD DD->CD AE->GA AE->GA AE->GA AE->GA BA->ED
[82] BE->ED EB->ED CD->ED FD->EF FD->EF CD->BB BF->BB BC->BB BB->CF
+ ... omitted several edges
> rel.net <- E(net)
> rel.net # output 290/290 edges 
+ 290/290 edges from 612e768 (vertex names):
 [1] AA->DD AB->DD AF->BA DD->DA CD->EC DD->CE CD->FA CD->CC BA->AF
[10] CB->CA CC->CA CD->CA BC->CA DD->DA ED->AD AE->AC AB->BA CD->EC
[19] CA->CC EB->CC BF->CE BB->CD AC->AE CC->FB DC->BB BD->CF DB->DA
[28] DD->DA DB->DD BC->AF CF->DE DF->BF CB->CA BE->CA EA->CA CB->CA
[37] CB->CA CC->CA CD->CA BC->CA BF->CA CE->CA AC->AD BD->BE AE->DF
[46] CB->DF AC->DF AA->DD AA->DD AA->DD CD->DD AA->DD EE->DD CD->DD
[55] DB->AA AA->FC BE->CC EF->FD CF->FE BB->DD CD->DD BA->AB CD->EC
[64] BE->EE CE->CC CD->CC ED->CC BB->CC BE->CE DD->CE AC->CD ED->CD
[73] FF->CD AC->CD DD->CD DD->CD AE->GA AE->GA AE->GA AE->GA BA->ED
[82] BE->ED EB->ED CD->ED FD->EF FD->EF CD->BB BF->BB BC->BB BB->CF
+ ... omitted several edges
> 
> # 52 by 290 data set
> 
> V(net)$degree <- degree(net)
> degree.net <- data.frame(degree(net))
> V(net)$degree
 [1] 18  9 23 36 40 26 24 50 21 27 15 62  7 12 23 27  2  4  8 12 23
[22] 20  8 10  6  8  1  8  1  1  1  9  3  3  1  7  3  1  1  2  1  2
[43]  5  1  1  1  1  1  1  1  1  1
> 
> 
> str(who.net)
 'igraph.vs' Named int [1:52] 1 2 3 4 5 6 7 8 9 10 ...
 - attr(*, "names")= chr [1:52] "AA" "AB" "AF" "DD" ...
 - attr(*, "is_all")= logi TRUE
 - attr(*, "env")=<weakref> 
 - attr(*, "graph")= chr "612e7682-aabd-11ef-9ea2-7d7f9645968c"
> who.net$name
 [1] "AA" "AB" "AF" "DD" "CD" "BA" "CB" "CC" "BC" "ED" "AE" "CA"
[13] "EB" "BF" "BB" "AC" "DC" "BD" "DB" "CF" "DF" "BE" "EA" "CE"
[25] "EE" "EF" "FF" "FD" "GB" "GC" "GD" "AD" "KA" "KF" "LC" "DA"
[37] "EC" "FA" "FB" "DE" "FC" "FE" "GA" "GE" "KB" "KC" "KD" "KE"
[49] "LB" "LA" "LD" "LE"
> 
> hist(V(net)$degree)
> 
> set.seed(222)
> plot(net,
+      vertex.color = 'lightblue',
+      vertext.size = 2,
+      edge.arrow.size = 0.1,
+      vertex.label.cex = 0.8)
> 
> plot(net,
+      vertex.color = rainbow(52),
+      vertex.size = V(net)$degree*0.3,
+      edge.arrow.size = 0.1,
+      layout=layout.fruchterman.reingold)
> # layout layout.fruchterman.reingold
> # layout.graphopt
> plot(net,
+      vertex.color = rainbow(52),
+      vertex.size = V(net)$degree*0.8,
+      edge.arrow.size = 0.1,
+      layout=layout.graphopt)
> 
> plot(net,
+      vertex.color = rainbow(52),
+      vertex.size = V(net)$degree*0.4,
+      edge.arrow.size = 0.1,
+      layout=layout.kamada.kawai)
> 
> betweenness(net)
        AA         AB         AF         DD         CD         BA 
 89.196239   4.564734 109.762225 162.990640 375.992047 184.496219 
        CB         CC         BC         ED         AE         CA 
 36.237779 225.269107  94.821412 343.016093 105.368424 292.157605 
        EB         BF         BB         AC         DC         BD 
  0.000000  28.446981 244.963181 172.427422   0.000000   5.966115 
        DB         CF         DF         BE         EA         CE 
  0.000000 181.814414  42.768995  55.964872 183.604022  11.943347 
        EE         EF         FF         FD         GB         GC 
  4.002137   0.000000   0.000000  33.000000   0.000000   0.000000 
        GD         AD         KA         KF         LC         DA 
  0.000000  48.996857   0.000000  17.229134   0.000000   0.000000 
        EC         FA         FB         DE         FC         FE 
  0.000000   0.000000   0.000000   0.000000   0.000000   0.000000 
        GA         GE         KB         KC         KD         KE 
  0.000000   0.000000   0.000000   0.000000   0.000000   0.000000 
        LB         LA         LD         LE 
  0.000000   0.000000   0.000000   0.000000 
> degree(net)
AA AB AF DD CD BA CB CC BC ED AE CA EB BF BB AC DC BD DB CF DF BE 
18  9 23 36 40 26 24 50 21 27 15 62  7 12 23 27  2  4  8 12 23 20 
EA CE EE EF FF FD GB GC GD AD KA KF LC DA EC FA FB DE FC FE GA GE 
 8 10  6  8  1  8  1  1  1  9  3  3  1  7  3  1  1  2  1  2  5  1 
KB KC KD KE LB LA LD LE 
 1  1  1  1  1  1  1  1 
> closeness(net)
         AA          AB          AF          DD          CD 
0.005988024 0.007874016 0.008196721 0.007352941 0.010101010 
         BA          CB          CC          BC          ED 
0.009900990 0.010101010 0.008771930 0.008695652 0.012048193 
         AE          CA          EB          BF          BB 
0.010000000 0.008264463 0.009009009 0.009090909 0.010869565 
         AC          DC          BD          DB          CF 
0.011111111 0.008547009 0.008771930 0.005917160 0.009433962 
         DF          BE          EA          CE          EE 
0.010869565 0.009433962 0.006711409 0.006849315 0.007751938 
         EF          FF          FD          GB          GC 
0.005319149 0.007042254 0.005319149 0.006329114 0.004761905 
         GD          AD          KA          KF          LC 
0.006369427 0.009174312 0.008620690 0.005347594 0.006097561 
         DA          EC          FA          FB          DE 
        NaN         NaN         NaN         NaN         NaN 
         FC          FE          GA          GE          KB 
        NaN         NaN         NaN         NaN         NaN 
         KC          KD          KE          LB          LA 
        NaN         NaN         NaN         NaN         NaN 
         LD          LE 
        NaN         NaN 
> hits_scores(net)$hub          # outlinks
          AA           AB           AF           DD           CD 
8.042807e-02 1.300950e-02 4.731440e-03 1.255496e-02 2.350165e-01 
          BA           CB           CC           BC           ED 
3.239013e-02 7.145888e-01 1.000000e+00 2.321828e-01 4.858357e-02 
          AE           CA           EB           BF           BB 
7.949708e-02 4.494956e-02 1.870058e-01 1.272314e-01 1.150808e-01 
          AC           DC           BD           DB           CF 
2.093518e-01 7.455515e-03 1.305960e-02 1.738176e-02 7.102816e-03 
          DF           BE           EA           CE           EE 
2.505787e-02 2.195271e-01 5.693409e-02 6.319085e-02 1.273824e-02 
          EF           FF           FD           GB           GC 
1.119098e-03 4.485458e-03 4.563724e-04 3.594258e-04 2.507046e-04 
          GD           AD           KA           KF           LC 
6.374463e-03 9.643170e-03 6.276463e-02 4.418478e-04 5.681639e-02 
          DA           EC           FA           FB           DE 
1.458925e-19 1.458925e-19 1.458925e-19 1.458925e-19 1.458925e-19 
          FC           FE           GA           GE           KB 
1.458925e-19 1.458925e-19 1.458925e-19 1.458925e-19 1.458925e-19 
          KC           KD           KE           LB           LA 
1.458925e-19 1.458925e-19 1.458925e-19 1.458925e-19 1.458925e-19 
          LD           LE 
1.458925e-19 1.458925e-19 
> hits_scores(net)$authority    # inlinks
          AA           AB           AF           DD           CD 
4.412541e-03 3.968841e-03 8.375004e-02 1.409698e-01 7.894656e-02 
          BA           CB           CC           BC           ED 
2.146204e-02 3.660444e-03 1.121941e-01 6.326094e-03 4.799095e-02 
          AE           CA           EB           BF           BB 
1.524499e-02 1.000000e+00 0.000000e+00 6.072030e-03 8.323026e-02 
          AC           DC           BD           DB           CF 
6.262068e-02 0.000000e+00 2.486679e-04 0.000000e+00 8.146610e-03 
          DF           BE           EA           CE           EE 
1.925650e-01 1.095155e-01 7.776767e-03 2.437300e-02 2.419075e-02 
          EF           FF           FD           GB           GC 
6.390996e-05 0.000000e+00 2.071608e-03 0.000000e+00 0.000000e+00 
          GD           AD           KA           KF           LC 
0.000000e+00 1.806051e-02 0.000000e+00 2.156359e-03 0.000000e+00 
          DA           EC           FA           FB           DE 
3.583779e-03 2.468362e-02 8.227873e-03 3.500976e-02 4.973358e-04 
          FC           FE           GA           GE           KB 
2.815768e-03 3.064436e-03 1.391587e-02 3.500976e-02 3.500976e-02 
          KC           KD           KE           LB           LA 
2.486679e-04 8.227873e-03 2.501759e-02 8.227873e-03 1.133971e-03 
          LD           LE 
1.656466e-04 1.133971e-03 
> 
> hs <- hits_scores(net)$hub          # outlinks
> as <- hits_scores(net)$authority    # inlinks
> 
> 
> 
> set.seed(123)
> plot(net,
+      vertex.size=hs*30,
+      main = 'Hubs',
+      vertex.color = rainbow(52),
+      edge.arrow.size=0.1,
+      layout = layout.kamada.kawai)
> 
> set.seed(123)
> plot(net,
+      vertex.size=as*30,
+      main = 'Authorities',
+      vertex.color = rainbow(52),
+      edge.arrow.size=0.1,
+      layout = layout.kamada.kawai)
> 
> 
> net <- graph_from_data_frame(y, directed = F)
> cnet <- cluster_edge_betweenness(net)
> 
> # cluster_fast_greedy(net)
> # cluster_fluid_communities(net, 2)
> # cluster_infomap(net)
> # cluster_label_prop()
> # cluster_spinglass()
> # cluster.distribution()
> # components(net, "strong")
> 
> infomap <- cluster_infomap(net)
> # c.infomap <- cluster_infomap(net)
> 
> plot(cnet,
+      net,
+      vertex.size = 10,
+      vertex.label.cex = 0.8)
> 
> plot(infomap,
+      net,
+      vertex.size = 10,
+      vertex.label.cex = 0.8)
> 
> ############################################################
> # clustering in R
> # clusterdata.csv
> orgs <- read.csv("http://commres.net/wiki/_media/r/clusterdata.csv", header=T)
> str(orgs)
'data.frame':	22 obs. of  9 variables:
 $ Company     : chr  "Arizona " "Boston " "Central " "Commonwealth" ...
 $ Fixed_charge: num  1.06 0.89 1.43 1.02 1.49 1.32 1.22 1.1 1.34 1.12 ...
 $ RoR         : num  9.2 10.3 15.4 11.2 8.8 13.5 12.2 9.2 13 12.4 ...
 $ Cost        : int  151 202 113 168 192 111 175 245 168 197 ...
 $ Load        : num  54.4 57.9 53 56 51.2 60 67.6 57 60.4 53 ...
 $ D.Demand    : num  1.6 2.2 3.4 0.3 1 -2.2 2.2 3.3 7.2 2.7 ...
 $ Sales       : int  9077 5088 9212 6423 3300 11127 7642 13082 8406 6455 ...
 $ Nuclear     : num  0 25.3 0 34.3 15.6 22.5 0 0 0 39.2 ...
 $ Fuel_Cost   : num  0.628 1.555 1.058 0.7 2.044 ...
> head(orgs)
       Company Fixed_charge  RoR Cost Load D.Demand Sales Nuclear
1     Arizona          1.06  9.2  151 54.4      1.6  9077     0.0
2      Boston          0.89 10.3  202 57.9      2.2  5088    25.3
3     Central          1.43 15.4  113 53.0      3.4  9212     0.0
4 Commonwealth         1.02 11.2  168 56.0      0.3  6423    34.3
5    Con Ed NY         1.49  8.8  192 51.2      1.0  3300    15.6
6     Florida          1.32 13.5  111 60.0     -2.2 11127    22.5
  Fuel_Cost
1     0.628
2     1.555
3     1.058
4     0.700
5     2.044
6     1.241
> 
> pairs(orgs[2:9])
> 
> plot(Fuel_Cost ~ Sales, data = orgs)
> with(orgs, text(Fuel_Cost ~ Sales, 
+                 labels=orgs$Company,
+                 pos=4))
> 
> # normalization (standardization)
> z <- orgs[,-c(1,1)] # remove the first column
> means <- apply(z, 2, mean)
> sds <- apply(z, 2, sd)
> means
Fixed_charge          RoR         Cost         Load     D.Demand 
    1.114091    10.736364   168.181818    56.977273     3.240909 
       Sales      Nuclear    Fuel_Cost 
 8914.045455    12.000000     1.102727 
> sds
Fixed_charge          RoR         Cost         Load     D.Demand 
   0.1845112    2.2440494   41.1913495    4.4611478    3.1182503 
       Sales      Nuclear    Fuel_Cost 
3549.9840305   16.7919198    0.5560981 
> 
> nor <- scale(z, center=means, scale=sds)
> nor 
      Fixed_charge         RoR         Cost        Load
 [1,]  -0.29315791 -0.68463896 -0.417122002 -0.57771516
 [2,]  -1.21451134 -0.19445367  0.821002037  0.20683629
 [3,]   1.71214073  2.07822360 -1.339645796 -0.89153574
 [4,]  -0.50994695  0.20660702 -0.004413989 -0.21906307
 [5,]   2.03732429 -0.86288816  0.578232617 -1.29501935
 [6,]   1.11597086  1.23153991 -1.388199680  0.67756716
 [7,]   0.57399826  0.65223002  0.165524604  2.38116460
 [8,]  -0.07636887 -0.68463896  1.864910540  0.00509449
 [9,]   1.22436538  1.00872841 -0.004413989  0.76723019
[10,]   0.03202565  0.74135462  0.699617327 -0.89153574
[11,]  -1.97327298 -1.44219805  0.116970720 -1.22777208
[12,]   0.08622291  0.07292013  0.238355430  1.12588228
[13,]   0.19461744  0.87504152  0.748171211 -0.73462545
[14,]  -0.13056613  0.56310542 -1.752353809 -1.60883993
[15,]  -0.83513051 -1.39763576 -0.101521757  1.17071379
[16,]   0.24881470 -0.37270287  2.034849134 -0.21906307
[17,]  -1.91907572 -1.93238335 -0.781276132  1.10346652
[18,]  -0.34735517  0.83047922 -0.441398944 -0.06215278
[19,]   0.24881470  0.42941852 -1.558138274 -0.66737818
[20,]   0.46560374  0.47398082 -0.489952828  0.65515141
[21,]  -0.40155243 -0.95201276  0.869555920  0.90172472
[22,]  -0.23896065 -0.64007666  0.141247662 -0.60013092
         D.Demand       Sales    Nuclear   Fuel_Cost
 [1,] -0.52622751  0.04590290 -0.7146294 -0.85367545
 [2,] -0.33381191 -1.07776413  0.7920476  0.81329670
 [3,]  0.05101929  0.08393124 -0.7146294 -0.08043055
 [4,] -0.94312798 -0.70170610  1.3280197 -0.72420189
 [5,] -0.71864311 -1.58142837  0.2143888  1.69263800
 [6,] -1.74485965  0.62337028  0.6253007  0.24864810
 [7,] -0.33381191 -0.35832428 -0.7146294  0.98772637
 [8,]  0.01895002  1.17407698 -0.7146294 -1.42731528
 [9,]  1.26965142 -0.14311204 -0.7146294 -0.43288637
[10,] -0.17346558 -0.69269198  1.6198267 -0.86266667
[11,]  1.04516655  2.40196983 -0.7146294 -0.60192130
[12,]  0.14722709 -0.77748109 -0.7146294  1.42829614
[13,]  1.01309729 -0.48874740  2.2749037 -1.03529809
[14,] -0.59036605  0.21379097 -0.7146294 -0.92560521
[15,] -1.07140505 -0.68902999 -0.6610322  0.53456889
[16,]  1.91103676  1.99351729 -0.7146294 -0.86806140
[17,]  1.84689822 -0.90142531 -0.2203441  1.46965575
[18,] -0.17346558  0.34534086 -0.7146294  0.00948165
[19,] -1.71279038  1.29379583 -0.7146294 -0.83928950
[20,]  0.08308855 -0.45832473  1.7329764 -0.72060540
[21,]  0.08308855 -0.63776215 -0.7146294  1.82211157
[22,]  0.85275095  0.33210137  0.8694658  0.36553395
attr(,"scaled:center")
Fixed_charge          RoR         Cost         Load     D.Demand 
    1.114091    10.736364   168.181818    56.977273     3.240909 
       Sales      Nuclear    Fuel_Cost 
 8914.045455    12.000000     1.102727 
attr(,"scaled:scale")
Fixed_charge          RoR         Cost         Load     D.Demand 
   0.1845112    2.2440494   41.1913495    4.4611478    3.1182503 
       Sales      Nuclear    Fuel_Cost 
3549.9840305   16.7919198    0.5560981 
> 
> distance <- dist(nor)
> distance
          1        2        3        4        5        6        7
2  3.096154                                                      
3  3.679230 4.916465                                             
4  2.462149 2.164213 4.107079                                    
5  4.123129 3.852850 4.468735 4.127368                           
6  3.606269 4.218804 2.992760 3.201836 4.600183                  
7  3.901898 3.448346 4.217769 3.969367 4.596261 3.352919         
8  2.737407 3.892524 4.990876 3.692949 5.155516 4.913953 4.364509
9  3.253851 3.957125 2.752623 3.753627 4.489900 3.730814 2.796298
10 3.099116 2.705330 3.934935 1.491427 4.045276 3.829058 4.506512
11 3.491163 4.792640 5.902882 4.864730 6.460986 6.004557 5.995814
12 3.223138 2.432568 4.031434 3.498769 3.603863 3.738824 1.660047
13 3.959637 3.434878 4.385973 2.577003 4.758059 4.554909 5.010221
14 2.113490 4.323825 2.742000 3.230069 4.818803 3.469268 4.914949
15 2.593481 2.501195 5.156977 3.190250 4.255251 4.065764 2.930142
16 4.033051 4.837051 5.264442 4.967244 5.816715 5.842268 5.042444
17 4.396680 3.623588 6.356548 4.893679 5.628591 6.099456 4.577294
18 1.877248 2.904409 2.723954 2.651532 4.338150 2.853942 2.949006
19 2.410434 4.634878 3.179392 3.464171 5.133791 2.581208 4.515428
20 3.174488 2.997481 3.733274 1.816465 4.385852 2.912401 3.541931
21 3.453407 2.318451 5.088018 3.884260 3.644137 4.628341 2.675404
22 2.509287 2.421916 4.109321 2.578463 3.771757 4.026935 4.000096
          8        9       10       11       12       13       14
2                                                                
3                                                                
4                                                                
5                                                                
6                                                                
7                                                                
8                                                                
9  3.594824                                                      
10 3.673884 3.572023                                             
11 3.462587 5.175240 5.081469                                    
12 4.059770 2.735861 3.942171 5.208504                           
13 4.140607 3.658647 1.407032 5.309741 4.496249                  
14 4.335241 3.816443 3.610272 4.315584 4.335484 4.385649         
15 3.849872 4.113606 4.264133 4.735659 2.328833 5.103646 4.239522
16 2.201457 3.627307 4.531420 3.429962 4.617791 4.406173 5.169314
17 5.426511 4.901037 5.484537 4.751387 3.497555 5.606577 5.558002
18 3.237409 2.428533 3.070750 3.945595 2.451935 3.780942 2.301050
19 4.107966 4.109049 4.130120 4.522319 4.414578 5.010864 1.876051
20 4.094283 2.948021 2.054393 5.352136 3.430937 2.226493 3.744430
21 3.977130 3.742680 4.361961 4.883977 1.384124 4.937119 4.926966
22 3.239374 3.208932 2.559945 3.436927 2.995066 2.739910 3.512207
         15       16       17       18       19       20       21
2                                                                
3                                                                
4                                                                
5                                                                
6                                                                
7                                                                
8                                                                
9                                                                
10                                                               
11                                                               
12                                                               
13                                                               
14                                                               
15                                                               
16 5.175157                                                      
17 3.399659 5.559320                                             
18 2.998784 3.973815 4.426129                                    
19 4.030721 5.232256 6.089597 2.473696                           
20 3.782111 4.823711 4.866540 2.922392 3.903723                  
21 2.097150 4.568885 3.095002 3.185250 4.972551 4.145222         
22 3.352644 3.457129 3.628061 2.548060 3.967618 2.618050 3.012264
> 
> orgs.hclust = hclust(distance)
> 
> plot(orgs.hclust)
> plot(orgs.hclust,
+      labels = orgs$Company,
+      main='Default from hclust')
> plot(orgs.hclust,
+      hang=-1, 
+      labels=orgs$Company, 
+      main='Default from hclust')
> 
> 
> orgs.hclust.average<-hclust(distance, method="average") 
> plot(orgs.hclust.average, hang=-1,
+      labels=orgs$Company, 
+      main='hclust average')
> 
> plot(orgs.hclust.average, hang=-1,
+      labels=orgs$Company, 
+      main='hclust average')
> rect.hclust(orgs.hclust.average, k=4, border='red')
> 
> member.by.3 <- cutree(orgs.hclust, 3)
> table(member.by.3)
member.by.3
 1  2  3 
14  5  3 
> member.by.3
 [1] 1 1 1 1 1 1 2 3 1 1 3 2 1 1 2 3 2 1 1 1 2 1
> 
> member.by.5 <- cutree(orgs.hclust, 5)
> table(member.by.5)
member.by.5
1 2 3 4 5 
7 6 1 5 3 
> member.by.5
 [1] 1 2 1 2 3 1 4 5 1 2 5 4 2 1 4 5 4 1 1 2 4 2
> 
> 
> aggregate(nor,list(member.by.3),mean)
  Group.1 Fixed_charge        RoR        Cost       Load
1       1    0.3068832  0.4326015 -0.31481203 -0.3743722
2       2   -0.4991075 -0.7113763  0.07812761  1.3365904
3       3   -0.6002757 -0.8331800  1.33891013 -0.4805802
    D.Demand      Sales    Nuclear  Fuel_Cost
1 -0.2605107 -0.1575387  0.3692252 -0.2389329
2  0.1343994 -0.6728046 -0.6050529  1.2484717
3  0.9917178  1.8565214 -0.7146294 -0.9657660
> aggregate(nor,list(member.by.3),sd)
  Group.1 Fixed_charge       RoR      Cost      Load  D.Demand
1       1    0.9132159 0.8165532 0.8995420 0.7412840 0.9032477
2       2    0.9530092 1.0599834 0.5980215 0.5929804 1.0733867
3       3    1.2001155 0.5500030 1.0616363 0.6567217 0.9471751
      Sales   Nuclear Fuel_Cost
1 0.7392081 1.0933974 0.7974146
2 0.2022460 0.2163074 0.4969827
3 0.6253048 0.0000000 0.4212819
> 
> wss <- (nrow(nor)-1)*sum(apply(nor,2,var))
> for (i in 2:20) 
+   wss[i] <- sum(kmeans(nor, centers=i)$withinss)
> plot(1:20, wss, 
+      type="b", 
+      xlab="Number of Clusters", 
+      ylab="Within groups sum of squares")
> 
> 
> # kmeans clustering 
> set.seed(123)
> kc<-kmeans(nor, 3)
> kc
K-means clustering with 3 clusters of sizes 7, 5, 10

Cluster means:
  Fixed_charge         RoR       Cost       Load    D.Demand
1  -0.23896065 -0.65917479  0.2556961  0.7992527 -0.05435116
2   0.51980100  1.02655333 -1.2959473 -0.5104679 -0.83409247
3  -0.09262805 -0.05185431  0.4689864 -0.3042429  0.45509205
       Sales    Nuclear  Fuel_Cost
1 -0.8604593 -0.2884040  1.2497562
2  0.5120458 -0.4466434 -0.3174391
3  0.3462986  0.4252045 -0.7161098

Clustering vector:
 [1] 3 1 2 3 1 2 1 3 3 3 3 1 3 2 1 3 1 2 2 3 1 3

Within cluster sum of squares by cluster:
[1] 34.16481 15.15613 57.53424
 (between_SS / total_SS =  36.4 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"    
[5] "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"      
> 
> # install.packages("cluster")
> library(cluster)
> 
> ot<-nor
> datadistshortset <- dist(ot, method = "euclidean")
> hc1 <- hclust(datadistshortset, method = "complete" )
> pamvshortset <- pam(datadistshortset, 4, diss = FALSE)
> clusplot(pamvshortset, shade = FALSE, 
+          labels=2, 
+          col.clus="blue",
+          col.p="red", 
+          span=FALSE,
+          main="Cluster Mapping",
+          cex=1.2)
> 
> # kmeans
> # install.packages("factoextra")
> library(factoextra) 
> k2 <- kmeans(nor, centers = 3, nstart = 25)
> str(k2)
List of 9
 $ cluster     : int [1:22] 3 2 3 3 2 3 2 1 3 3 ...
 $ centers     : num [1:3, 1:8] -0.6 -0.239 0.289 -0.833 -0.659 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "1" "2" "3"
  .. ..$ : chr [1:8] "Fixed_charge" "RoR" "Cost" "Load" ...
 $ totss       : num 168
 $ withinss    : num [1:3] 9.53 34.16 58.01
 $ tot.withinss: num 102
 $ betweenss   : num 66.3
 $ size        : int [1:3] 3 7 12
 $ iter        : int 2
 $ ifault      : int 0
 - attr(*, "class")= chr "kmeans"
> 
> fviz_cluster(k2, data = nor)
> fviz_nbclust(nor, kmeans, method = "wss")
> fviz_nbclust(nor, kmeans, method = "silhouette")
> 
> gap_stat <- clusGap(nor, FUN = kmeans, 
+                     nstart = 25, K.max = 10, B = 50)
Clustering k = 1,2,..., K.max (= 10): .. done
Bootstrapping, b = 1,2,..., B (= 50)  [one "." per sample]:
.................................................. 50 
> fviz_gap_stat(gap_stat)
> 



















sna_and_clustering.txt · Last modified: 2024/11/25 08:45 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki