====== sna (social network analysis) and clustering ======
in R
#
library(igraph)
data <- read.csv("http://commres.net/wiki/_media/r/socialnetworkdata.csv", header=T)
head(data)
str(data)
y <- data.frame(data$first, data$second)
head(y)
# net <- graph.data.frame(y, directed=T)
net <- graph_from_data_frame(y, directed=T)
head(net)
net
V(net) # vertex in net data
who.net <- V(net) # 52/52 vertices
data.frame(who.net)
E(net) # edge info in net data
rel.net <- E(net)
rel.net # output 290/290 edges
# 52 by 290 data set
V(net)$degree <- degree(net)
degree.net <- data.frame(degree(net))
V(net)$degree
str(who.net)
who.net$name
hist(V(net)$degree)
set.seed(222)
plot(net,
vertex.color = 'lightblue',
vertext.size = 2,
edge.arrow.size = 0.1,
vertex.label.cex = 0.8)
plot(net,
vertex.color = rainbow(52),
vertex.size = V(net)$degree*0.3,
edge.arrow.size = 0.1,
layout=layout.fruchterman.reingold)
# layout layout.fruchterman.reingold
# layout.graphopt
plot(net,
vertex.color = rainbow(52),
vertex.size = V(net)$degree*0.8,
edge.arrow.size = 0.1,
layout=layout.graphopt)
plot(net,
vertex.color = rainbow(52),
vertex.size = V(net)$degree*0.4,
edge.arrow.size = 0.1,
layout=layout.kamada.kawai)
betweenness(net)
degree(net)
closeness(net)
hits_scores(net)$hub # outlinks
hits_scores(net)$authority # inlinks
hs <- hits_scores(net)$hub # outlinks
as <- hits_scores(net)$authority # inlinks
set.seed(123)
plot(net,
vertex.size=hs*30,
main = 'Hubs',
vertex.color = rainbow(52),
edge.arrow.size=0.1,
layout = layout.kamada.kawai)
set.seed(123)
plot(net,
vertex.size=as*30,
main = 'Authorities',
vertex.color = rainbow(52),
edge.arrow.size=0.1,
layout = layout.kamada.kawai)
net <- graph_from_data_frame(y, directed = F)
cnet <- cluster_edge_betweenness(net)
# cluster_fast_greedy(net)
# cluster_fluid_communities(net, 2)
# cluster_infomap(net)
# cluster_label_prop()
# cluster_spinglass()
# cluster.distribution()
# components(net, "strong")
infomap <- cluster_infomap(net)
# c.infomap <- cluster_infomap(net)
plot(cnet,
net,
vertex.size = 10,
vertex.label.cex = 0.8)
plot(infomap,
net,
vertex.size = 10,
vertex.label.cex = 0.8)
############################################################
# clustering in R
# clusterdata.csv
orgs <- read.csv("http://commres.net/wiki/_media/r/clusterdata.csv", header=T)
str(orgs)
head(orgs)
pairs(orgs[2:9])
plot(Fuel_Cost ~ Sales, data = orgs)
with(orgs, text(Fuel_Cost ~ Sales,
labels=orgs$Company,
pos=4))
# normalization (standardization)
z <- orgs[,-c(1,1)] # remove the first column
means <- apply(z, 2, mean)
sds <- apply(z, 2, sd)
means
sds
nor <- scale(z, center=means, scale=sds)
nor
distance <- dist(nor)
distance
orgs.hclust = hclust(distance)
plot(orgs.hclust)
plot(orgs.hclust,
labels = orgs$Company,
main='Default from hclust')
plot(orgs.hclust,
hang=-1,
labels=orgs$Company,
main='Default from hclust')
orgs.hclust.average<-hclust(distance, method="average")
plot(orgs.hclust.average, hang=-1,
labels=orgs$Company,
main='hclust average')
plot(orgs.hclust.average, hang=-1,
labels=orgs$Company,
main='hclust average')
rect.hclust(orgs.hclust.average, k=4, border='red')
member.by.3 <- cutree(orgs.hclust, 3)
table(member.by.3)
member.by.3
member.by.5 <- cutree(orgs.hclust, 5)
table(member.by.5)
member.by.5
aggregate(nor,list(member.by.3),mean)
aggregate(nor,list(member.by.3),sd)
wss <- (nrow(nor)-1)*sum(apply(nor,2,var))
for (i in 2:20)
wss[i] <- sum(kmeans(nor, centers=i)$withinss)
plot(1:20, wss,
type="b",
xlab="Number of Clusters",
ylab="Within groups sum of squares")
# kmeans clustering
set.seed(123)
kc<-kmeans(nor, 3)
kc
# install.packages("cluster")
library(cluster)
ot<-nor
datadistshortset <- dist(ot, method = "euclidean")
hc1 <- hclust(datadistshortset, method = "complete" )
pamvshortset <- pam(datadistshortset, 4, diss = FALSE)
clusplot(pamvshortset, shade = FALSE,
labels=2,
col.clus="blue",
col.p="red",
span=FALSE,
main="Cluster Mapping",
cex=1.2)
# kmeans
# install.packages("factoextra")
library(factoextra)
k2 <- kmeans(nor, centers = 3, nstart = 25)
str(k2)
fviz_cluster(k2, data = nor)
fviz_nbclust(nor, kmeans, method = "wss")
fviz_nbclust(nor, kmeans, method = "silhouette")
gap_stat <- clusGap(nor, FUN = kmeans,
nstart = 25, K.max = 10, B = 50)
fviz_gap_stat(gap_stat)
====== output ======
>
>
> #
> library(igraph)
> data <- read.csv("http://commres.net/wiki/_media/r/socialnetworkdata.csv", header=T)
> head(data)
first second grade spec
1 AA DD 6 Y
2 AB DD 6 R
3 AF BA 6 Q
4 DD DA 6 Q
5 CD EC 6 X
6 DD CE 6 Y
> str(data)
'data.frame': 290 obs. of 4 variables:
$ first : chr "AA" "AB" "AF" "DD" ...
$ second: chr "DD" "DD" "BA" "DA" ...
$ grade : int 6 6 6 6 6 6 6 6 6 6 ...
$ spec : chr "Y" "R" "Q" "Q" ...
>
> y <- data.frame(data$first, data$second)
> head(y)
data.first data.second
1 AA DD
2 AB DD
3 AF BA
4 DD DA
5 CD EC
6 DD CE
>
> # net <- graph.data.frame(y, directed=T)
> net <- graph_from_data_frame(y, directed=T)
> head(net)
6 x 52 sparse Matrix of class "dgCMatrix"
[[ suppressing 52 column names ‘AA’, ‘AB’, ‘AF’ ... ]]
AA . . . 10 . . . . . . . . . . . . . . . . . . . . . . . . . . .
AB . . . 1 . 4 . . . . . . . . . . . . . . . . . . . . . . . . .
AF . . . . . 3 . . 2 . . . . 1 . . . . . . . . . . . . . . . . .
DD . . . . 2 . . . . . . . . . . . . . . . . . . 2 . . . . . . .
CD . . . 10 . . . 4 . 2 . 2 . . 1 . . . . . . . . . . . . . . . .
BA . 2 5 . . . . . 1 1 . . . . . 1 . . . . . . . . 1 . . . . . .
AA . . . . . . . . . 1 1 . . . . . . . . . .
AB . . 1 . . . . . . . . . . . . . . . . . .
AF . . . . . . . . . . . . . . . . . . . 1 .
DD . . . . 4 . . . . . . . . . . . . . . . .
CD . . . . . 3 1 . . . . . . . . 1 . 1 . . .
BA . . . . . . . . . . . . . . . . . . 1 . 1
> net
IGRAPH 612e768 DN-- 52 290 --
+ attr: name (v/c)
+ edges from 612e768 (vertex names):
[1] AA->DD AB->DD AF->BA DD->DA CD->EC DD->CE CD->FA CD->CC BA->AF
[10] CB->CA CC->CA CD->CA BC->CA DD->DA ED->AD AE->AC AB->BA CD->EC
[19] CA->CC EB->CC BF->CE BB->CD AC->AE CC->FB DC->BB BD->CF DB->DA
[28] DD->DA DB->DD BC->AF CF->DE DF->BF CB->CA BE->CA EA->CA CB->CA
[37] CB->CA CC->CA CD->CA BC->CA BF->CA CE->CA AC->AD BD->BE AE->DF
[46] CB->DF AC->DF AA->DD AA->DD AA->DD CD->DD AA->DD EE->DD CD->DD
[55] DB->AA AA->FC BE->CC EF->FD CF->FE BB->DD CD->DD BA->AB CD->EC
[64] BE->EE CE->CC CD->CC ED->CC BB->CC BE->CE DD->CE AC->CD ED->CD
+ ... omitted several edges
>
> V(net) # vertex in net data
+ 52/52 vertices, named, from 612e768:
[1] AA AB AF DD CD BA CB CC BC ED AE CA EB BF BB AC DC BD DB CF DF
[22] BE EA CE EE EF FF FD GB GC GD AD KA KF LC DA EC FA FB DE FC FE
[43] GA GE KB KC KD KE LB LA LD LE
> who.net <- V(net) # 52/52 vertices
> data.frame(who.net)
who.net
AA 1
AB 2
AF 3
DD 4
CD 5
BA 6
CB 7
CC 8
BC 9
ED 10
AE 11
CA 12
EB 13
BF 14
BB 15
AC 16
DC 17
BD 18
DB 19
CF 20
DF 21
BE 22
EA 23
CE 24
EE 25
EF 26
FF 27
FD 28
GB 29
GC 30
GD 31
AD 32
KA 33
KF 34
LC 35
DA 36
EC 37
FA 38
FB 39
DE 40
FC 41
FE 42
GA 43
GE 44
KB 45
KC 46
KD 47
KE 48
LB 49
LA 50
LD 51
LE 52
>
> E(net) # edge info in net data
+ 290/290 edges from 612e768 (vertex names):
[1] AA->DD AB->DD AF->BA DD->DA CD->EC DD->CE CD->FA CD->CC BA->AF
[10] CB->CA CC->CA CD->CA BC->CA DD->DA ED->AD AE->AC AB->BA CD->EC
[19] CA->CC EB->CC BF->CE BB->CD AC->AE CC->FB DC->BB BD->CF DB->DA
[28] DD->DA DB->DD BC->AF CF->DE DF->BF CB->CA BE->CA EA->CA CB->CA
[37] CB->CA CC->CA CD->CA BC->CA BF->CA CE->CA AC->AD BD->BE AE->DF
[46] CB->DF AC->DF AA->DD AA->DD AA->DD CD->DD AA->DD EE->DD CD->DD
[55] DB->AA AA->FC BE->CC EF->FD CF->FE BB->DD CD->DD BA->AB CD->EC
[64] BE->EE CE->CC CD->CC ED->CC BB->CC BE->CE DD->CE AC->CD ED->CD
[73] FF->CD AC->CD DD->CD DD->CD AE->GA AE->GA AE->GA AE->GA BA->ED
[82] BE->ED EB->ED CD->ED FD->EF FD->EF CD->BB BF->BB BC->BB BB->CF
+ ... omitted several edges
> rel.net <- E(net)
> rel.net # output 290/290 edges
+ 290/290 edges from 612e768 (vertex names):
[1] AA->DD AB->DD AF->BA DD->DA CD->EC DD->CE CD->FA CD->CC BA->AF
[10] CB->CA CC->CA CD->CA BC->CA DD->DA ED->AD AE->AC AB->BA CD->EC
[19] CA->CC EB->CC BF->CE BB->CD AC->AE CC->FB DC->BB BD->CF DB->DA
[28] DD->DA DB->DD BC->AF CF->DE DF->BF CB->CA BE->CA EA->CA CB->CA
[37] CB->CA CC->CA CD->CA BC->CA BF->CA CE->CA AC->AD BD->BE AE->DF
[46] CB->DF AC->DF AA->DD AA->DD AA->DD CD->DD AA->DD EE->DD CD->DD
[55] DB->AA AA->FC BE->CC EF->FD CF->FE BB->DD CD->DD BA->AB CD->EC
[64] BE->EE CE->CC CD->CC ED->CC BB->CC BE->CE DD->CE AC->CD ED->CD
[73] FF->CD AC->CD DD->CD DD->CD AE->GA AE->GA AE->GA AE->GA BA->ED
[82] BE->ED EB->ED CD->ED FD->EF FD->EF CD->BB BF->BB BC->BB BB->CF
+ ... omitted several edges
>
> # 52 by 290 data set
>
> V(net)$degree <- degree(net)
> degree.net <- data.frame(degree(net))
> V(net)$degree
[1] 18 9 23 36 40 26 24 50 21 27 15 62 7 12 23 27 2 4 8 12 23
[22] 20 8 10 6 8 1 8 1 1 1 9 3 3 1 7 3 1 1 2 1 2
[43] 5 1 1 1 1 1 1 1 1 1
>
>
> str(who.net)
'igraph.vs' Named int [1:52] 1 2 3 4 5 6 7 8 9 10 ...
- attr(*, "names")= chr [1:52] "AA" "AB" "AF" "DD" ...
- attr(*, "is_all")= logi TRUE
- attr(*, "env")=
- attr(*, "graph")= chr "612e7682-aabd-11ef-9ea2-7d7f9645968c"
> who.net$name
[1] "AA" "AB" "AF" "DD" "CD" "BA" "CB" "CC" "BC" "ED" "AE" "CA"
[13] "EB" "BF" "BB" "AC" "DC" "BD" "DB" "CF" "DF" "BE" "EA" "CE"
[25] "EE" "EF" "FF" "FD" "GB" "GC" "GD" "AD" "KA" "KF" "LC" "DA"
[37] "EC" "FA" "FB" "DE" "FC" "FE" "GA" "GE" "KB" "KC" "KD" "KE"
[49] "LB" "LA" "LD" "LE"
>
> hist(V(net)$degree)
>
> set.seed(222)
> plot(net,
+ vertex.color = 'lightblue',
+ vertext.size = 2,
+ edge.arrow.size = 0.1,
+ vertex.label.cex = 0.8)
>
> plot(net,
+ vertex.color = rainbow(52),
+ vertex.size = V(net)$degree*0.3,
+ edge.arrow.size = 0.1,
+ layout=layout.fruchterman.reingold)
> # layout layout.fruchterman.reingold
> # layout.graphopt
> plot(net,
+ vertex.color = rainbow(52),
+ vertex.size = V(net)$degree*0.8,
+ edge.arrow.size = 0.1,
+ layout=layout.graphopt)
>
> plot(net,
+ vertex.color = rainbow(52),
+ vertex.size = V(net)$degree*0.4,
+ edge.arrow.size = 0.1,
+ layout=layout.kamada.kawai)
>
> betweenness(net)
AA AB AF DD CD BA
89.196239 4.564734 109.762225 162.990640 375.992047 184.496219
CB CC BC ED AE CA
36.237779 225.269107 94.821412 343.016093 105.368424 292.157605
EB BF BB AC DC BD
0.000000 28.446981 244.963181 172.427422 0.000000 5.966115
DB CF DF BE EA CE
0.000000 181.814414 42.768995 55.964872 183.604022 11.943347
EE EF FF FD GB GC
4.002137 0.000000 0.000000 33.000000 0.000000 0.000000
GD AD KA KF LC DA
0.000000 48.996857 0.000000 17.229134 0.000000 0.000000
EC FA FB DE FC FE
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
GA GE KB KC KD KE
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
LB LA LD LE
0.000000 0.000000 0.000000 0.000000
> degree(net)
AA AB AF DD CD BA CB CC BC ED AE CA EB BF BB AC DC BD DB CF DF BE
18 9 23 36 40 26 24 50 21 27 15 62 7 12 23 27 2 4 8 12 23 20
EA CE EE EF FF FD GB GC GD AD KA KF LC DA EC FA FB DE FC FE GA GE
8 10 6 8 1 8 1 1 1 9 3 3 1 7 3 1 1 2 1 2 5 1
KB KC KD KE LB LA LD LE
1 1 1 1 1 1 1 1
> closeness(net)
AA AB AF DD CD
0.005988024 0.007874016 0.008196721 0.007352941 0.010101010
BA CB CC BC ED
0.009900990 0.010101010 0.008771930 0.008695652 0.012048193
AE CA EB BF BB
0.010000000 0.008264463 0.009009009 0.009090909 0.010869565
AC DC BD DB CF
0.011111111 0.008547009 0.008771930 0.005917160 0.009433962
DF BE EA CE EE
0.010869565 0.009433962 0.006711409 0.006849315 0.007751938
EF FF FD GB GC
0.005319149 0.007042254 0.005319149 0.006329114 0.004761905
GD AD KA KF LC
0.006369427 0.009174312 0.008620690 0.005347594 0.006097561
DA EC FA FB DE
NaN NaN NaN NaN NaN
FC FE GA GE KB
NaN NaN NaN NaN NaN
KC KD KE LB LA
NaN NaN NaN NaN NaN
LD LE
NaN NaN
> hits_scores(net)$hub # outlinks
AA AB AF DD CD
8.042807e-02 1.300950e-02 4.731440e-03 1.255496e-02 2.350165e-01
BA CB CC BC ED
3.239013e-02 7.145888e-01 1.000000e+00 2.321828e-01 4.858357e-02
AE CA EB BF BB
7.949708e-02 4.494956e-02 1.870058e-01 1.272314e-01 1.150808e-01
AC DC BD DB CF
2.093518e-01 7.455515e-03 1.305960e-02 1.738176e-02 7.102816e-03
DF BE EA CE EE
2.505787e-02 2.195271e-01 5.693409e-02 6.319085e-02 1.273824e-02
EF FF FD GB GC
1.119098e-03 4.485458e-03 4.563724e-04 3.594258e-04 2.507046e-04
GD AD KA KF LC
6.374463e-03 9.643170e-03 6.276463e-02 4.418478e-04 5.681639e-02
DA EC FA FB DE
1.458925e-19 1.458925e-19 1.458925e-19 1.458925e-19 1.458925e-19
FC FE GA GE KB
1.458925e-19 1.458925e-19 1.458925e-19 1.458925e-19 1.458925e-19
KC KD KE LB LA
1.458925e-19 1.458925e-19 1.458925e-19 1.458925e-19 1.458925e-19
LD LE
1.458925e-19 1.458925e-19
> hits_scores(net)$authority # inlinks
AA AB AF DD CD
4.412541e-03 3.968841e-03 8.375004e-02 1.409698e-01 7.894656e-02
BA CB CC BC ED
2.146204e-02 3.660444e-03 1.121941e-01 6.326094e-03 4.799095e-02
AE CA EB BF BB
1.524499e-02 1.000000e+00 0.000000e+00 6.072030e-03 8.323026e-02
AC DC BD DB CF
6.262068e-02 0.000000e+00 2.486679e-04 0.000000e+00 8.146610e-03
DF BE EA CE EE
1.925650e-01 1.095155e-01 7.776767e-03 2.437300e-02 2.419075e-02
EF FF FD GB GC
6.390996e-05 0.000000e+00 2.071608e-03 0.000000e+00 0.000000e+00
GD AD KA KF LC
0.000000e+00 1.806051e-02 0.000000e+00 2.156359e-03 0.000000e+00
DA EC FA FB DE
3.583779e-03 2.468362e-02 8.227873e-03 3.500976e-02 4.973358e-04
FC FE GA GE KB
2.815768e-03 3.064436e-03 1.391587e-02 3.500976e-02 3.500976e-02
KC KD KE LB LA
2.486679e-04 8.227873e-03 2.501759e-02 8.227873e-03 1.133971e-03
LD LE
1.656466e-04 1.133971e-03
>
> hs <- hits_scores(net)$hub # outlinks
> as <- hits_scores(net)$authority # inlinks
>
>
>
> set.seed(123)
> plot(net,
+ vertex.size=hs*30,
+ main = 'Hubs',
+ vertex.color = rainbow(52),
+ edge.arrow.size=0.1,
+ layout = layout.kamada.kawai)
>
> set.seed(123)
> plot(net,
+ vertex.size=as*30,
+ main = 'Authorities',
+ vertex.color = rainbow(52),
+ edge.arrow.size=0.1,
+ layout = layout.kamada.kawai)
>
>
> net <- graph_from_data_frame(y, directed = F)
> cnet <- cluster_edge_betweenness(net)
>
> # cluster_fast_greedy(net)
> # cluster_fluid_communities(net, 2)
> # cluster_infomap(net)
> # cluster_label_prop()
> # cluster_spinglass()
> # cluster.distribution()
> # components(net, "strong")
>
> infomap <- cluster_infomap(net)
> # c.infomap <- cluster_infomap(net)
>
> plot(cnet,
+ net,
+ vertex.size = 10,
+ vertex.label.cex = 0.8)
>
> plot(infomap,
+ net,
+ vertex.size = 10,
+ vertex.label.cex = 0.8)
>
> ############################################################
> # clustering in R
> # clusterdata.csv
> orgs <- read.csv("http://commres.net/wiki/_media/r/clusterdata.csv", header=T)
> str(orgs)
'data.frame': 22 obs. of 9 variables:
$ Company : chr "Arizona " "Boston " "Central " "Commonwealth" ...
$ Fixed_charge: num 1.06 0.89 1.43 1.02 1.49 1.32 1.22 1.1 1.34 1.12 ...
$ RoR : num 9.2 10.3 15.4 11.2 8.8 13.5 12.2 9.2 13 12.4 ...
$ Cost : int 151 202 113 168 192 111 175 245 168 197 ...
$ Load : num 54.4 57.9 53 56 51.2 60 67.6 57 60.4 53 ...
$ D.Demand : num 1.6 2.2 3.4 0.3 1 -2.2 2.2 3.3 7.2 2.7 ...
$ Sales : int 9077 5088 9212 6423 3300 11127 7642 13082 8406 6455 ...
$ Nuclear : num 0 25.3 0 34.3 15.6 22.5 0 0 0 39.2 ...
$ Fuel_Cost : num 0.628 1.555 1.058 0.7 2.044 ...
> head(orgs)
Company Fixed_charge RoR Cost Load D.Demand Sales Nuclear
1 Arizona 1.06 9.2 151 54.4 1.6 9077 0.0
2 Boston 0.89 10.3 202 57.9 2.2 5088 25.3
3 Central 1.43 15.4 113 53.0 3.4 9212 0.0
4 Commonwealth 1.02 11.2 168 56.0 0.3 6423 34.3
5 Con Ed NY 1.49 8.8 192 51.2 1.0 3300 15.6
6 Florida 1.32 13.5 111 60.0 -2.2 11127 22.5
Fuel_Cost
1 0.628
2 1.555
3 1.058
4 0.700
5 2.044
6 1.241
>
> pairs(orgs[2:9])
>
> plot(Fuel_Cost ~ Sales, data = orgs)
> with(orgs, text(Fuel_Cost ~ Sales,
+ labels=orgs$Company,
+ pos=4))
>
> # normalization (standardization)
> z <- orgs[,-c(1,1)] # remove the first column
> means <- apply(z, 2, mean)
> sds <- apply(z, 2, sd)
> means
Fixed_charge RoR Cost Load D.Demand
1.114091 10.736364 168.181818 56.977273 3.240909
Sales Nuclear Fuel_Cost
8914.045455 12.000000 1.102727
> sds
Fixed_charge RoR Cost Load D.Demand
0.1845112 2.2440494 41.1913495 4.4611478 3.1182503
Sales Nuclear Fuel_Cost
3549.9840305 16.7919198 0.5560981
>
> nor <- scale(z, center=means, scale=sds)
> nor
Fixed_charge RoR Cost Load
[1,] -0.29315791 -0.68463896 -0.417122002 -0.57771516
[2,] -1.21451134 -0.19445367 0.821002037 0.20683629
[3,] 1.71214073 2.07822360 -1.339645796 -0.89153574
[4,] -0.50994695 0.20660702 -0.004413989 -0.21906307
[5,] 2.03732429 -0.86288816 0.578232617 -1.29501935
[6,] 1.11597086 1.23153991 -1.388199680 0.67756716
[7,] 0.57399826 0.65223002 0.165524604 2.38116460
[8,] -0.07636887 -0.68463896 1.864910540 0.00509449
[9,] 1.22436538 1.00872841 -0.004413989 0.76723019
[10,] 0.03202565 0.74135462 0.699617327 -0.89153574
[11,] -1.97327298 -1.44219805 0.116970720 -1.22777208
[12,] 0.08622291 0.07292013 0.238355430 1.12588228
[13,] 0.19461744 0.87504152 0.748171211 -0.73462545
[14,] -0.13056613 0.56310542 -1.752353809 -1.60883993
[15,] -0.83513051 -1.39763576 -0.101521757 1.17071379
[16,] 0.24881470 -0.37270287 2.034849134 -0.21906307
[17,] -1.91907572 -1.93238335 -0.781276132 1.10346652
[18,] -0.34735517 0.83047922 -0.441398944 -0.06215278
[19,] 0.24881470 0.42941852 -1.558138274 -0.66737818
[20,] 0.46560374 0.47398082 -0.489952828 0.65515141
[21,] -0.40155243 -0.95201276 0.869555920 0.90172472
[22,] -0.23896065 -0.64007666 0.141247662 -0.60013092
D.Demand Sales Nuclear Fuel_Cost
[1,] -0.52622751 0.04590290 -0.7146294 -0.85367545
[2,] -0.33381191 -1.07776413 0.7920476 0.81329670
[3,] 0.05101929 0.08393124 -0.7146294 -0.08043055
[4,] -0.94312798 -0.70170610 1.3280197 -0.72420189
[5,] -0.71864311 -1.58142837 0.2143888 1.69263800
[6,] -1.74485965 0.62337028 0.6253007 0.24864810
[7,] -0.33381191 -0.35832428 -0.7146294 0.98772637
[8,] 0.01895002 1.17407698 -0.7146294 -1.42731528
[9,] 1.26965142 -0.14311204 -0.7146294 -0.43288637
[10,] -0.17346558 -0.69269198 1.6198267 -0.86266667
[11,] 1.04516655 2.40196983 -0.7146294 -0.60192130
[12,] 0.14722709 -0.77748109 -0.7146294 1.42829614
[13,] 1.01309729 -0.48874740 2.2749037 -1.03529809
[14,] -0.59036605 0.21379097 -0.7146294 -0.92560521
[15,] -1.07140505 -0.68902999 -0.6610322 0.53456889
[16,] 1.91103676 1.99351729 -0.7146294 -0.86806140
[17,] 1.84689822 -0.90142531 -0.2203441 1.46965575
[18,] -0.17346558 0.34534086 -0.7146294 0.00948165
[19,] -1.71279038 1.29379583 -0.7146294 -0.83928950
[20,] 0.08308855 -0.45832473 1.7329764 -0.72060540
[21,] 0.08308855 -0.63776215 -0.7146294 1.82211157
[22,] 0.85275095 0.33210137 0.8694658 0.36553395
attr(,"scaled:center")
Fixed_charge RoR Cost Load D.Demand
1.114091 10.736364 168.181818 56.977273 3.240909
Sales Nuclear Fuel_Cost
8914.045455 12.000000 1.102727
attr(,"scaled:scale")
Fixed_charge RoR Cost Load D.Demand
0.1845112 2.2440494 41.1913495 4.4611478 3.1182503
Sales Nuclear Fuel_Cost
3549.9840305 16.7919198 0.5560981
>
> distance <- dist(nor)
> distance
1 2 3 4 5 6 7
2 3.096154
3 3.679230 4.916465
4 2.462149 2.164213 4.107079
5 4.123129 3.852850 4.468735 4.127368
6 3.606269 4.218804 2.992760 3.201836 4.600183
7 3.901898 3.448346 4.217769 3.969367 4.596261 3.352919
8 2.737407 3.892524 4.990876 3.692949 5.155516 4.913953 4.364509
9 3.253851 3.957125 2.752623 3.753627 4.489900 3.730814 2.796298
10 3.099116 2.705330 3.934935 1.491427 4.045276 3.829058 4.506512
11 3.491163 4.792640 5.902882 4.864730 6.460986 6.004557 5.995814
12 3.223138 2.432568 4.031434 3.498769 3.603863 3.738824 1.660047
13 3.959637 3.434878 4.385973 2.577003 4.758059 4.554909 5.010221
14 2.113490 4.323825 2.742000 3.230069 4.818803 3.469268 4.914949
15 2.593481 2.501195 5.156977 3.190250 4.255251 4.065764 2.930142
16 4.033051 4.837051 5.264442 4.967244 5.816715 5.842268 5.042444
17 4.396680 3.623588 6.356548 4.893679 5.628591 6.099456 4.577294
18 1.877248 2.904409 2.723954 2.651532 4.338150 2.853942 2.949006
19 2.410434 4.634878 3.179392 3.464171 5.133791 2.581208 4.515428
20 3.174488 2.997481 3.733274 1.816465 4.385852 2.912401 3.541931
21 3.453407 2.318451 5.088018 3.884260 3.644137 4.628341 2.675404
22 2.509287 2.421916 4.109321 2.578463 3.771757 4.026935 4.000096
8 9 10 11 12 13 14
2
3
4
5
6
7
8
9 3.594824
10 3.673884 3.572023
11 3.462587 5.175240 5.081469
12 4.059770 2.735861 3.942171 5.208504
13 4.140607 3.658647 1.407032 5.309741 4.496249
14 4.335241 3.816443 3.610272 4.315584 4.335484 4.385649
15 3.849872 4.113606 4.264133 4.735659 2.328833 5.103646 4.239522
16 2.201457 3.627307 4.531420 3.429962 4.617791 4.406173 5.169314
17 5.426511 4.901037 5.484537 4.751387 3.497555 5.606577 5.558002
18 3.237409 2.428533 3.070750 3.945595 2.451935 3.780942 2.301050
19 4.107966 4.109049 4.130120 4.522319 4.414578 5.010864 1.876051
20 4.094283 2.948021 2.054393 5.352136 3.430937 2.226493 3.744430
21 3.977130 3.742680 4.361961 4.883977 1.384124 4.937119 4.926966
22 3.239374 3.208932 2.559945 3.436927 2.995066 2.739910 3.512207
15 16 17 18 19 20 21
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 5.175157
17 3.399659 5.559320
18 2.998784 3.973815 4.426129
19 4.030721 5.232256 6.089597 2.473696
20 3.782111 4.823711 4.866540 2.922392 3.903723
21 2.097150 4.568885 3.095002 3.185250 4.972551 4.145222
22 3.352644 3.457129 3.628061 2.548060 3.967618 2.618050 3.012264
>
> orgs.hclust = hclust(distance)
>
> plot(orgs.hclust)
> plot(orgs.hclust,
+ labels = orgs$Company,
+ main='Default from hclust')
> plot(orgs.hclust,
+ hang=-1,
+ labels=orgs$Company,
+ main='Default from hclust')
>
>
> orgs.hclust.average<-hclust(distance, method="average")
> plot(orgs.hclust.average, hang=-1,
+ labels=orgs$Company,
+ main='hclust average')
>
> plot(orgs.hclust.average, hang=-1,
+ labels=orgs$Company,
+ main='hclust average')
> rect.hclust(orgs.hclust.average, k=4, border='red')
>
> member.by.3 <- cutree(orgs.hclust, 3)
> table(member.by.3)
member.by.3
1 2 3
14 5 3
> member.by.3
[1] 1 1 1 1 1 1 2 3 1 1 3 2 1 1 2 3 2 1 1 1 2 1
>
> member.by.5 <- cutree(orgs.hclust, 5)
> table(member.by.5)
member.by.5
1 2 3 4 5
7 6 1 5 3
> member.by.5
[1] 1 2 1 2 3 1 4 5 1 2 5 4 2 1 4 5 4 1 1 2 4 2
>
>
> aggregate(nor,list(member.by.3),mean)
Group.1 Fixed_charge RoR Cost Load
1 1 0.3068832 0.4326015 -0.31481203 -0.3743722
2 2 -0.4991075 -0.7113763 0.07812761 1.3365904
3 3 -0.6002757 -0.8331800 1.33891013 -0.4805802
D.Demand Sales Nuclear Fuel_Cost
1 -0.2605107 -0.1575387 0.3692252 -0.2389329
2 0.1343994 -0.6728046 -0.6050529 1.2484717
3 0.9917178 1.8565214 -0.7146294 -0.9657660
> aggregate(nor,list(member.by.3),sd)
Group.1 Fixed_charge RoR Cost Load D.Demand
1 1 0.9132159 0.8165532 0.8995420 0.7412840 0.9032477
2 2 0.9530092 1.0599834 0.5980215 0.5929804 1.0733867
3 3 1.2001155 0.5500030 1.0616363 0.6567217 0.9471751
Sales Nuclear Fuel_Cost
1 0.7392081 1.0933974 0.7974146
2 0.2022460 0.2163074 0.4969827
3 0.6253048 0.0000000 0.4212819
>
> wss <- (nrow(nor)-1)*sum(apply(nor,2,var))
> for (i in 2:20)
+ wss[i] <- sum(kmeans(nor, centers=i)$withinss)
> plot(1:20, wss,
+ type="b",
+ xlab="Number of Clusters",
+ ylab="Within groups sum of squares")
>
>
> # kmeans clustering
> set.seed(123)
> kc<-kmeans(nor, 3)
> kc
K-means clustering with 3 clusters of sizes 7, 5, 10
Cluster means:
Fixed_charge RoR Cost Load D.Demand
1 -0.23896065 -0.65917479 0.2556961 0.7992527 -0.05435116
2 0.51980100 1.02655333 -1.2959473 -0.5104679 -0.83409247
3 -0.09262805 -0.05185431 0.4689864 -0.3042429 0.45509205
Sales Nuclear Fuel_Cost
1 -0.8604593 -0.2884040 1.2497562
2 0.5120458 -0.4466434 -0.3174391
3 0.3462986 0.4252045 -0.7161098
Clustering vector:
[1] 3 1 2 3 1 2 1 3 3 3 3 1 3 2 1 3 1 2 2 3 1 3
Within cluster sum of squares by cluster:
[1] 34.16481 15.15613 57.53424
(between_SS / total_SS = 36.4 %)
Available components:
[1] "cluster" "centers" "totss" "withinss"
[5] "tot.withinss" "betweenss" "size" "iter"
[9] "ifault"
>
> # install.packages("cluster")
> library(cluster)
>
> ot<-nor
> datadistshortset <- dist(ot, method = "euclidean")
> hc1 <- hclust(datadistshortset, method = "complete" )
> pamvshortset <- pam(datadistshortset, 4, diss = FALSE)
> clusplot(pamvshortset, shade = FALSE,
+ labels=2,
+ col.clus="blue",
+ col.p="red",
+ span=FALSE,
+ main="Cluster Mapping",
+ cex=1.2)
>
> # kmeans
> # install.packages("factoextra")
> library(factoextra)
> k2 <- kmeans(nor, centers = 3, nstart = 25)
> str(k2)
List of 9
$ cluster : int [1:22] 3 2 3 3 2 3 2 1 3 3 ...
$ centers : num [1:3, 1:8] -0.6 -0.239 0.289 -0.833 -0.659 ...
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:3] "1" "2" "3"
.. ..$ : chr [1:8] "Fixed_charge" "RoR" "Cost" "Load" ...
$ totss : num 168
$ withinss : num [1:3] 9.53 34.16 58.01
$ tot.withinss: num 102
$ betweenss : num 66.3
$ size : int [1:3] 3 7 12
$ iter : int 2
$ ifault : int 0
- attr(*, "class")= chr "kmeans"
>
> fviz_cluster(k2, data = nor)
> fviz_nbclust(nor, kmeans, method = "wss")
> fviz_nbclust(nor, kmeans, method = "silhouette")
>
> gap_stat <- clusGap(nor, FUN = kmeans,
+ nstart = 25, K.max = 10, B = 50)
Clustering k = 1,2,..., K.max (= 10): .. done
Bootstrapping, b = 1,2,..., B (= 50) [one "." per sample]:
.................................................. 50
> fviz_gap_stat(gap_stat)
>
{{:pasted:20241125-084208.png}}
{{:pasted:20241125-084217.png}}
{{:pasted:20241125-084238.png}}
{{:pasted:20241125-084251.png}}
{{:pasted:20241125-084259.png}}
{{:pasted:20241125-084307.png}}
{{:pasted:20241125-084325.png}}
{{:pasted:20241125-084337.png}}
{{:pasted:20241125-084346.png}}
----
{{:pasted:20241125-084358.png}}
{{:pasted:20241125-084405.png}}
{{:pasted:20241125-084413.png}}
{{:pasted:20241125-084421.png}}
{{:pasted:20241125-084429.png}}
{{:pasted:20241125-084440.png}}
{{:pasted:20241125-084450.png}}
{{:pasted:20241125-084501.png}}
{{:pasted:20241125-084511.png}}
{{:pasted:20241125-084521.png}}
{{:pasted:20241125-084533.png}}
{{:pasted:20241125-084541.png}}
{{:pasted:20241125-084552.png}}