Differences

This shows you the differences between two versions of the page.

--- r:neural_network [2016/12/14 08:10] – [E.G. 2] hkimscil
+++ r:neural_network [2016/12/14 09:02] (current) – [E.G 5] hkimscil
@@ Line 212: / Line 212: @@
 ====== E.G. 2 ======
 {{:r:wine.csv}}
+for the description of the data: https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.names
 <code>wine <- read.csv("wine.csv")
 head(wine)
 summary(wine)
-</code>
-<code>wine.scale <- cbind(wine[1], scale(wine[-1]))
+wine.scale <- cbind(wine[1], scale(wine[-1]))
 summary(wine.scale)
 apply(wine.scale[-1], 2, sd)
-</code>
+# Partitioning the data into training and test data
-<code># Partitioning the data into training and test data
 data.size <- nrow(wine.scale)
 set.seed(1111)
@@ Line 232: / Line 230: @@
 summary(data.tr)
 summary(data.test)
-</code>
-<code># Fitting the neural network for the training data
+# Fitting the neural network for the training data
 library(nnet)
-model.nnet <- nnet(Type ~ ., data = data.tr, size = 2, decay = 5e-04, maxit = 200)</code>
+model.nnet <- nnet(Type ~ ., data = data.tr, size = 2, decay = 5e-04, maxit = 200)
+names(model.nnet)
+# Creating the confusion matrix for the model
+predicted <- predict(model.nnet, data.test, type = "class")
+predicted
+actual <- data.test$Type
+model.confusion.matrix <- table(actual, predicted)
+model.confusion.matrix
+confusion.matrix.rate = prop.table(model.confusion.matrix) * 100
+round(confusion.matrix.rate, digit = 2)
+diag.index <- cbind(1:3, 1:3)
+error.overall = sum(confusion.matrix.rate) - sum(confusion.matrix.rate[diag.index])
+paste("오차율 =", round(error.overall, digit = 2), "%")
+</code>
+<code>> wine <- read.csv("wine.csv")
+> head(wine)
+  Type Alcohol Malic  Ash Alcalinity Magnesium Phenols Flavanoids
+   t1   14.23  1.71 2.43       15.6       127    2.80       3.06
+   t1   13.20  1.78 2.14       11.2       100    2.65       2.76
+   t1   13.16  2.36 2.67       18.6       101    2.80       3.24
+   t1   14.37  1.95 2.50       16.8       113    3.85       3.49
+   t1   13.24  2.59 2.87       21.0       118    2.80       2.69
+   t1   14.20  1.76 2.45       15.2       112    3.27       3.39
+  Nonflavanoids Proanthocyanins Color  Hue Dilution Proline
+          0.28            2.29  5.64 1.04     3.92    1065
+          0.26            1.28  4.38 1.05     3.40    1050
+          0.30            2.81  5.68 1.03     3.17    1185
+          0.24            2.18  7.80 0.86     3.45    1480
+          0.39            1.82  4.32 1.04     2.93     735
+          0.34            1.97  6.75 1.05     2.85    1450
+> summary(wine)
+ Type       Alcohol          Malic            Ash
+ t1:59   Min.   :11.03   Min.   :0.740   Min.   :1.360
+ t2:71   1st Qu.:12.36   1st Qu.:1.603   1st Qu.:2.210
+ t3:48   Median :13.05   Median :1.865   Median :2.360
+         Mean   :13.00   Mean   :2.336   Mean   :2.367
+rd Qu.:13.68   3rd Qu.:3.083   3rd Qu.:2.558
+         Max.   :14.83   Max.   :5.800   Max.   :3.230
+   Alcalinity      Magnesium         Phenols        Flavanoids
+ Min.   :10.60   Min.   : 70.00   Min.   :0.980   Min.   :0.340
+st Qu.:17.20   1st Qu.: 88.00   1st Qu.:1.742   1st Qu.:1.205
+ Median :19.50   Median : 98.00   Median :2.355   Median :2.135
+ Mean   :19.49   Mean   : 99.74   Mean   :2.295   Mean   :2.029
+rd Qu.:21.50   3rd Qu.:107.00   3rd Qu.:2.800   3rd Qu.:2.875
+ Max.   :30.00   Max.   :162.00   Max.   :3.880   Max.   :5.080
+ Nonflavanoids    Proanthocyanins     Color
+ Min.   :0.1300   Min.   :0.410   Min.   : 1.280
+st Qu.:0.2700   1st Qu.:1.250   1st Qu.: 3.220
+ Median :0.3400   Median :1.555   Median : 4.690
+ Mean   :0.3619   Mean   :1.591   Mean   : 5.058
+rd Qu.:0.4375   3rd Qu.:1.950   3rd Qu.: 6.200
+ Max.   :0.6600   Max.   :3.580   Max.   :13.000
+      Hue            Dilution        Proline
+ Min.   :0.4800   Min.   :1.270   Min.   : 278.0
+st Qu.:0.7825   1st Qu.:1.938   1st Qu.: 500.5
+ Median :0.9650   Median :2.780   Median : 673.5
+ Mean   :0.9574   Mean   :2.612   Mean   : 746.9
+rd Qu.:1.1200   3rd Qu.:3.170   3rd Qu.: 985.0
+ Max.   :1.7100   Max.   :4.000   Max.   :1680.0
+> </code>
+<code>> wine.scale <- cbind(wine[1], scale(wine[-1]))
+> summary(wine.scale)
+ Type       Alcohol             Malic              Ash
+ t1:59   Min.   :-2.42739   Min.   :-1.4290   Min.   :-3.66881
+ t2:71   1st Qu.:-0.78603   1st Qu.:-0.6569   1st Qu.:-0.57051
+ t3:48   Median : 0.06083   Median :-0.4219   Median :-0.02375
+         Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.00000
+rd Qu.: 0.83378   3rd Qu.: 0.6679   3rd Qu.: 0.69615
+         Max.   : 2.25341   Max.   : 3.1004   Max.   : 3.14745
+   Alcalinity          Magnesium          Phenols
+ Min.   :-2.663505   Min.   :-2.0824   Min.   :-2.10132
+st Qu.:-0.687199   1st Qu.:-0.8221   1st Qu.:-0.88298
+ Median : 0.001514   Median :-0.1219   Median : 0.09569
+ Mean   : 0.000000   Mean   : 0.0000   Mean   : 0.00000
+rd Qu.: 0.600395   3rd Qu.: 0.5082   3rd Qu.: 0.80672
+ Max.   : 3.145637   Max.   : 4.3591   Max.   : 2.53237
+   Flavanoids      Nonflavanoids     Proanthocyanins
+ Min.   :-1.6912   Min.   :-1.8630   Min.   :-2.06321
+st Qu.:-0.8252   1st Qu.:-0.7381   1st Qu.:-0.59560
+ Median : 0.1059   Median :-0.1756   Median :-0.06272
+ Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.00000
+rd Qu.: 0.8467   3rd Qu.: 0.6078   3rd Qu.: 0.62741
+ Max.   : 3.0542   Max.   : 2.3956   Max.   : 3.47527
+     Color              Hue              Dilution
+ Min.   :-1.6297   Min.   :-2.08884   Min.   :-1.8897
+st Qu.:-0.7929   1st Qu.:-0.76540   1st Qu.:-0.9496
+ Median :-0.1588   Median : 0.03303   Median : 0.2371
+ Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000
+rd Qu.: 0.4926   3rd Qu.: 0.71116   3rd Qu.: 0.7864
+ Max.   : 3.4258   Max.   : 3.29241   Max.   : 1.9554
+    Proline
+ Min.   :-1.4890
+st Qu.:-0.7824
+ Median :-0.2331
+ Mean   : 0.0000
+rd Qu.: 0.7561
+ Max.   : 2.9631
+> apply(wine.scale[-1], 2, sd)
+        Alcohol           Malic             Ash      Alcalinity
+               1               1               1
+      Magnesium         Phenols      Flavanoids   Nonflavanoids
+               1               1               1
+Proanthocyanins           Color             Hue        Dilution
+               1               1               1
+        Proline
+
+></code>
+<code>> data.size <- nrow(wine.scale)
+> set.seed(1111)
+> samp <- c(sample(1:data.size, data.size * 0.7))
+> data.tr <- wine.scale[samp, ]
+> data.test <- wine.scale[-samp, ]
+> summary(data.tr)
+ Type       Alcohol              Malic              Ash
+ t1:41   Min.   :-1.897718   Min.   :-1.2947   Min.   :-3.66881
+ t2:50   1st Qu.:-0.816822   1st Qu.:-0.6815   1st Qu.:-0.57051
+ t3:33   Median : 0.023875   Median :-0.4712   Median :-0.02375
+         Mean   : 0.004802   Mean   :-0.0207   Mean   :-0.01053
+rd Qu.: 0.778346   3rd Qu.: 0.6992   3rd Qu.: 0.56857
+         Max.   : 2.253415   Max.   : 2.9662   Max.   : 3.14745
+   Alcalinity         Magnesium           Phenols
+ Min.   :-2.66350   Min.   :-2.08238   Min.   :-2.10132
+st Qu.:-0.74709   1st Qu.:-0.62955   1st Qu.:-0.79110
+ Median :-0.14821   Median :-0.08693   Median : 0.16759
+ Mean   :-0.04002   Mean   : 0.08867   Mean   : 0.04969
+rd Qu.: 0.60039   3rd Qu.: 0.71825   3rd Qu.: 0.80672
+ Max.   : 3.14564   Max.   : 4.35908   Max.   : 2.53237
+   Flavanoids       Nonflavanoids      Proanthocyanins
+ Min.   :-1.69120   Min.   :-1.86298   Min.   :-2.06321
+st Qu.:-0.79267   1st Qu.:-0.81841   1st Qu.:-0.59560
+ Median : 0.13589   Median :-0.17560   Median : 0.05958
+ Mean   : 0.02124   Mean   :-0.05118   Mean   : 0.08015
+rd Qu.: 0.88424   3rd Qu.: 0.54756   3rd Qu.: 0.66672
+ Max.   : 3.05422   Max.   : 2.15459   Max.   : 3.47527
+     Color               Hue              Dilution
+ Min.   :-1.62969   Min.   :-2.08884   Min.   :-1.88972
+st Qu.:-0.82737   1st Qu.:-0.69978   1st Qu.:-0.96365
+ Median :-0.17603   Median : 0.05491   Median : 0.28636
+ Mean   : 0.00959   Mean   :-0.02603   Mean   : 0.01557
+rd Qu.: 0.51953   3rd Qu.: 0.59085   3rd Qu.: 0.80398
+ Max.   : 3.42577   Max.   : 2.15491   Max.   : 1.95540
+    Proline
+ Min.   :-1.38102
+st Qu.:-0.77926
+ Median :-0.21560
+ Mean   : 0.01084
+rd Qu.: 0.72039
+ Max.   : 2.96311
+> summary(data.test)
+ Type       Alcohol             Malic               Ash
+ t1:18   Min.   :-2.42739   Min.   :-1.42895   Min.   :-2.42949
+ t2:21   1st Qu.:-0.76139   1st Qu.:-0.62109   1st Qu.:-0.58874
+ t3:15   Median : 0.06083   Median :-0.21156   Median :-0.18778
+         Mean   :-0.01103   Mean   : 0.04753   Mean   : 0.02417
+rd Qu.: 0.90768   3rd Qu.: 0.61196   3rd Qu.: 0.92396
+         Max.   : 1.69911   Max.   : 3.10045   Max.   : 2.01748
+   Alcalinity        Magnesium          Phenols
+ Min.   :-2.4838   Min.   :-1.5223   Min.   :-1.6220
+st Qu.:-0.5974   1st Qu.:-0.8221   1st Qu.:-1.0188
+ Median : 0.1512   Median :-0.4370   Median :-0.1839
+ Mean   : 0.0919   Mean   :-0.2036   Mean   :-0.1141
+rd Qu.: 0.6004   3rd Qu.: 0.1581   3rd Qu.: 0.7468
+ Max.   : 2.6965   Max.   : 2.3986   Max.   : 1.6056
+   Flavanoids         Nonflavanoids     Proanthocyanins
+ Min.   :-1.5610513   Min.   :-1.7826   Min.   :-1.6614
+st Qu.:-0.9828915   1st Qu.:-0.7381   1st Qu.:-0.7834
+ Median : 0.0007312   Median :-0.2158   Median :-0.2724
+ Mean   :-0.0487698   Mean   : 0.1175   Mean   :-0.1841
+rd Qu.: 0.7315653   3rd Qu.: 1.0498   3rd Qu.: 0.3610
+ Max.   : 1.6125708   Max.   : 2.3956   Max.   : 2.3920
+     Color               Hue              Dilution
+ Min.   :-1.36225   Min.   :-1.82634   Min.   :-1.84747
+st Qu.:-0.74973   1st Qu.:-0.88572   1st Qu.:-0.80872
+ Median :-0.13721   Median : 0.01116   Median : 0.18073
+ Mean   :-0.02202   Mean   : 0.05977   Mean   :-0.03576
+rd Qu.: 0.42247   3rd Qu.: 0.82053   3rd Qu.: 0.75820
+ Max.   : 2.24386   Max.   : 3.29241   Max.   : 1.33567
+    Proline
+ Min.   :-1.48899
+st Qu.:-0.77608
+ Median :-0.38708
+ Mean   :-0.02489
+rd Qu.: 0.91092
+ Max.   : 2.54077  </code>
+<code>> library(nnet)
+> model.nnet <- nnet(Type ~ ., data = data.tr, size = 2, decay = 5e-04, maxit = 200)
+# weights:  37
+initial  value 164.152084
+iter  10 value 7.066118
+iter  20 value 1.286566
+iter  30 value 0.542334
+iter  40 value 0.444873
+iter  50 value 0.365821
+iter  60 value 0.335803
+iter  70 value 0.311584
+iter  80 value 0.298015
+iter  90 value 0.288199
+iter 100 value 0.281759
+iter 110 value 0.276935
+iter 120 value 0.270048
+iter 130 value 0.261452
+iter 140 value 0.258495
+iter 150 value 0.257073
+iter 160 value 0.256035
+iter 170 value 0.255792
+iter 180 value 0.255743
+iter 190 value 0.255725
+iter 200 value 0.255714
+final  value 0.255714
+stopped after 200 iterations
+>
+> names(model.nnet)
+ [1] "n"             "nunits"        "nconn"
+ [4] "conn"          "nsunits"       "decay"
+ [7] "entropy"       "softmax"       "censored"
+[10] "value"         "wts"           "convergence"
+[13] "fitted.values" "residuals"     "lev"
+[16] "call"          "terms"         "coefnames"
+[19] "xlevels"
+> </code>
+<code>> # Creating the confusion matrix for the model
+> predicted <- predict(model.nnet, data.test, type = "class")
+> predicted
+ [1] "t1" "t1" "t1" "t1" "t1" "t1" "t1" "t1" "t1" "t1" "t1" "t1"
+[13] "t1" "t1" "t1" "t1" "t1" "t1" "t2" "t2" "t2" "t2" "t2" "t2"
+[25] "t2" "t2" "t2" "t2" "t2" "t2" "t2" "t2" "t2" "t2" "t2" "t2"
+[37] "t2" "t2" "t2" "t3" "t2" "t3" "t3" "t3" "t3" "t3" "t3" "t3"
+[49] "t3" "t3" "t3" "t3" "t3" "t3"
+> </code>
+<code>> actual <- data.test$Type
+> model.confusion.matrix <- table(actual, predicted)
+> model.confusion.matrix
+      predicted
+actual t1 t2 t3
+    t1 18  0  0
+    t2  0 21  0
+    t3  0  1 14
+</code>
+<code>> confusion.matrix.rate = prop.table(model.confusion.matrix) * 100
+> round(confusion.matrix.rate, digit = 2)
+      predicted
+actual    t1    t2    t3
+    t1 33.33  0.00  0.00
+    t2  0.00 38.89  0.00
+    t3  0.00  1.85 25.93
+</code>
+<code>> diag.index <- cbind(1:3, 1:3)
+>
+> error.overall = sum(confusion.matrix.rate) - sum(confusion.matrix.rate[diag.index])
+> paste("오차율 =", round(error.overall, digit = 2), "%")
+[1] "오차율 = 1.85 %"</code>
+====== E.G. 3 ======
+{{:r:creditset.csv}}
+<WRAP info 70%>The dataset contains information on different clients who received a loan at least 10 years ago. The variables income (yearly), age, loan (size in euros) and LTI (the loan to yearly income ratio) are available. Our goal is to devise a model which predicts, based on the input variables LTI and age, whether or not a default will occur within 10 years.
+</WRAP>
+<code>set.seed(1234567890)
+library("neuralnet")
+dataset <- read.csv("creditset.csv")
+head(dataset)
+# extract a set to train the NN
+trainset <- dataset[1:800, ]
+# select the test set
+testset <- dataset[801:2000, ]
+## build the neural network (NN)
+creditnet <- neuralnet(default10yr ~ LTI + age, trainset, hidden = 4, lifesign = "minimal",
+    linear.output = FALSE, threshold = 0.1)
+## plot the NN
+plot(creditnet, rep = "best")
+</code>
+__test the resulting output__
+<code>temp_test <- subset(testset, select = c("LTI", "age"))
+creditnet.results <- compute(creditnet, temp_test)
+head(temp_test)
+results <- data.frame(actual = testset$default10yr, prediction = creditnet.results$net.result)
+results[100:115, ]
+results$prediction <- round(results$prediction)
+results[100:115, ]
+pred.table <- table(testset$default10yr, results$prediction)
+pred.table
+library(plyr)
+count(testset, 'default10yr')
+</code>
+with outputs
+<code>> set.seed(1234567890)
+> library("neuralnet")
+>
+> dataset <- read.csv("creditset.csv")
+> head(dataset)
+  clientid      income         age          loan             LTI
+        1 66155.92510 59.01701507 8106.53213129 0.1225367511623
+        2 34415.15397 48.11715310 6564.74501768 0.1907515806612
+        3 57317.17006 63.10804949 8020.95329639 0.1399397996720
+        4 42709.53420 45.75197235 6103.64226014 0.1429105321411
+        5 66952.68885 18.58433593 8770.09923520 0.1309894999955
+        6 24904.06414 57.47160710   15.49859844 0.0006223320961
+  default10yr
+           0
+           0
+           0
+           0
+           1
+           0
+>
+> # extract a set to train the NN
+> trainset <- dataset[1:800, ]
+>
+> # select the test set
+> testset <- dataset[801:2000, ]
+>
+> ## build the neural network (NN)
+> creditnet <- neuralnet(default10yr ~ LTI + age, trainset, hidden = 4, lifesign = "minimal",
++                        linear.output = FALSE, threshold = 0.1)
+hidden: 4    thresh: 0.1    rep: 1/1    steps:    7266	error: 0.79202	time: 2.94 secs
+>
+> ## plot the NN
+> plot(creditnet, rep = "best")
+>
+</code>
+<code>> temp_test <- subset(testset, select = c("LTI", "age"))
+>
+> creditnet.results <- compute(creditnet, temp_test)
+> head(temp_test)
+              LTI         age
+0.02306808811 25.90644520
+0.13729704954 40.77430558
+0.10456984914 32.47350580
+0.15985046411 53.22813215
+0.11161429579 46.47915325
+0.11489364221 47.12736998
+>
+> results <- data.frame(actual = testset$default10yr, prediction = creditnet.results$net.result)
+> results[100:115, ]
+    actual                                 prediction
+      0 0.0000000000000000000000000015964854322398
+      0 0.0000000000000000000000000065162871249459
+      0 0.0000000000164043993271687692878796349660
+      1 0.9999999999219191249011373656685464084148
+      0 0.0000000000000000013810778585990655628959
+      0 0.0000000000000000539636283549268839978413
+      0 0.0000000000000000000234592312583964807452
+      1 0.9581419934268182725389806364546529948711
+      0 0.2499229633059938393557786184828728437424
+      0 0.0000000000000007044361454974903653282470
+      0 0.0006082559674722681341413332845036165963
+      1 0.9999999878713862200285689141310285776854
+      0 0.0000000000000000000000000015562211243506
+      1 0.9999999993455563895849991240538656711578
+      0 0.0000000000000000000000000000003082538282
+      0 0.0000000019359618836434052080615331181690
+>
+> results$prediction <- round(results$prediction)
+> results[100:115, ]
+    actual prediction
+      0          0
+      0          0
+      0          0
+      1          1
+      0          0
+      0          0
+      0          0
+      1          1
+      0          0
+      0          0
+      0          0
+      1          1
+      0          0
+      1          1
+      0          0
+      0          0
+</code>
+<code>> pred.table <- table(testset$default10yr, results$prediction)
+> pred.table
+    1
+1035    1
+    3  161
+>
+> library(plyr)
+> count(testset, 'default10yr')
+  default10yr freq
+           0 1036
+           1  164
+</code>
+====== E.G 5 ======
-<code>names(model.nnet)</code>