r:neural_network
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
r:neural_network [2016/12/14 08:04] – [E.G. 2] hkimscil | r:neural_network [2016/12/14 09:02] (current) – [E.G 5] hkimscil | ||
---|---|---|---|
Line 212: | Line 212: | ||
====== E.G. 2 ====== | ====== E.G. 2 ====== | ||
{{: | {{: | ||
+ | for the description of the data: https:// | ||
+ | |||
< | < | ||
head(wine) | head(wine) | ||
summary(wine) | summary(wine) | ||
+ | |||
+ | wine.scale <- cbind(wine[1], | ||
+ | summary(wine.scale) | ||
+ | apply(wine.scale[-1], | ||
+ | |||
+ | # Partitioning the data into training and test data | ||
+ | data.size <- nrow(wine.scale) | ||
+ | set.seed(1111) | ||
+ | samp <- c(sample(1: | ||
+ | data.tr <- wine.scale[samp, | ||
+ | data.test <- wine.scale[-samp, | ||
+ | summary(data.tr) | ||
+ | summary(data.test) | ||
+ | |||
+ | # Fitting the neural network for the training data | ||
+ | library(nnet) | ||
+ | model.nnet <- nnet(Type ~ ., data = data.tr, size = 2, decay = 5e-04, maxit = 200) | ||
+ | |||
+ | names(model.nnet) | ||
+ | |||
+ | |||
+ | # Creating the confusion matrix for the model | ||
+ | predicted <- predict(model.nnet, | ||
+ | predicted | ||
+ | |||
+ | |||
+ | actual <- data.test$Type | ||
+ | model.confusion.matrix <- table(actual, | ||
+ | model.confusion.matrix | ||
+ | |||
+ | confusion.matrix.rate = prop.table(model.confusion.matrix) * 100 | ||
+ | round(confusion.matrix.rate, | ||
+ | |||
+ | diag.index <- cbind(1:3, 1:3) | ||
+ | |||
+ | error.overall = sum(confusion.matrix.rate) - sum(confusion.matrix.rate[diag.index]) | ||
+ | paste(" | ||
</ | </ | ||
+ | < | ||
+ | > head(wine) | ||
+ | Type Alcohol Malic Ash Alcalinity Magnesium Phenols Flavanoids | ||
+ | 1 | ||
+ | 2 | ||
+ | 3 | ||
+ | 4 | ||
+ | 5 | ||
+ | 6 | ||
+ | Nonflavanoids Proanthocyanins Color Hue Dilution Proline | ||
+ | 1 0.28 2.29 5.64 1.04 | ||
+ | 2 0.26 1.28 4.38 1.05 | ||
+ | 3 0.30 2.81 5.68 1.03 | ||
+ | 4 0.24 2.18 7.80 0.86 | ||
+ | 5 0.39 1.82 4.32 1.04 | ||
+ | 6 0.34 1.97 6.75 1.05 | ||
+ | > summary(wine) | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: | ||
+ | | ||
+ | | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: | ||
+ | | ||
+ | | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: | ||
+ | | ||
+ | Hue Dilution | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: | ||
+ | | ||
+ | > </ | ||
+ | |||
+ | < | ||
+ | > summary(wine.scale) | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.83378 | ||
+ | | ||
+ | | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.600395 | ||
+ | | ||
+ | | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.8467 | ||
+ | | ||
+ | | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.4926 | ||
+ | | ||
+ | Proline | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.7561 | ||
+ | | ||
+ | > apply(wine.scale[-1], | ||
+ | Alcohol | ||
+ | 1 | ||
+ | Magnesium | ||
+ | 1 | ||
+ | Proanthocyanins | ||
+ | 1 | ||
+ | Proline | ||
+ | 1 | ||
+ | ></ | ||
+ | |||
+ | < | ||
+ | > set.seed(1111) | ||
+ | > samp <- c(sample(1: | ||
+ | > data.tr <- wine.scale[samp, | ||
+ | > data.test <- wine.scale[-samp, | ||
+ | > summary(data.tr) | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.778346 | ||
+ | | ||
+ | | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.60039 | ||
+ | | ||
+ | | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.88424 | ||
+ | | ||
+ | | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.51953 | ||
+ | | ||
+ | Proline | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.72039 | ||
+ | | ||
+ | > summary(data.test) | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.90768 | ||
+ | | ||
+ | | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.6004 | ||
+ | | ||
+ | | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.7315653 | ||
+ | | ||
+ | | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.42247 | ||
+ | | ||
+ | Proline | ||
+ | | ||
+ | 1st Qu.: | ||
+ | | ||
+ | | ||
+ | 3rd Qu.: 0.91092 | ||
+ | | ||
+ | |||
+ | < | ||
+ | > model.nnet <- nnet(Type ~ ., data = data.tr, size = 2, decay = 5e-04, maxit = 200) | ||
+ | # weights: | ||
+ | initial | ||
+ | iter 10 value 7.066118 | ||
+ | iter 20 value 1.286566 | ||
+ | iter 30 value 0.542334 | ||
+ | iter 40 value 0.444873 | ||
+ | iter 50 value 0.365821 | ||
+ | iter 60 value 0.335803 | ||
+ | iter 70 value 0.311584 | ||
+ | iter 80 value 0.298015 | ||
+ | iter 90 value 0.288199 | ||
+ | iter 100 value 0.281759 | ||
+ | iter 110 value 0.276935 | ||
+ | iter 120 value 0.270048 | ||
+ | iter 130 value 0.261452 | ||
+ | iter 140 value 0.258495 | ||
+ | iter 150 value 0.257073 | ||
+ | iter 160 value 0.256035 | ||
+ | iter 170 value 0.255792 | ||
+ | iter 180 value 0.255743 | ||
+ | iter 190 value 0.255725 | ||
+ | iter 200 value 0.255714 | ||
+ | final value 0.255714 | ||
+ | stopped after 200 iterations | ||
+ | > | ||
+ | > names(model.nnet) | ||
+ | [1] " | ||
+ | [4] " | ||
+ | [7] " | ||
+ | [10] " | ||
+ | [13] " | ||
+ | [16] " | ||
+ | [19] " | ||
+ | > </ | ||
+ | |||
+ | |||
+ | < | ||
+ | > predicted <- predict(model.nnet, | ||
+ | > predicted | ||
+ | [1] " | ||
+ | [13] " | ||
+ | [25] " | ||
+ | [37] " | ||
+ | [49] " | ||
+ | > </ | ||
+ | |||
+ | < | ||
+ | > model.confusion.matrix <- table(actual, | ||
+ | > model.confusion.matrix | ||
+ | predicted | ||
+ | actual t1 t2 t3 | ||
+ | t1 18 0 0 | ||
+ | t2 0 21 0 | ||
+ | t3 0 1 14 | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > round(confusion.matrix.rate, | ||
+ | predicted | ||
+ | actual | ||
+ | t1 33.33 0.00 0.00 | ||
+ | t2 0.00 38.89 0.00 | ||
+ | t3 0.00 1.85 25.93 | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > | ||
+ | > error.overall = sum(confusion.matrix.rate) - sum(confusion.matrix.rate[diag.index]) | ||
+ | > paste(" | ||
+ | [1] " | ||
+ | |||
+ | ====== E.G. 3 ====== | ||
+ | {{: | ||
+ | <WRAP info 70%>The dataset contains information on different clients who received a loan at least 10 years ago. The variables income (yearly), age, loan (size in euros) and LTI (the loan to yearly income ratio) are available. Our goal is to devise a model which predicts, based on the input variables LTI and age, whether or not a default will occur within 10 years. | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | library(" | ||
+ | |||
+ | dataset <- read.csv(" | ||
+ | head(dataset) | ||
+ | |||
+ | # extract a set to train the NN | ||
+ | trainset <- dataset[1: | ||
+ | |||
+ | # select the test set | ||
+ | testset <- dataset[801: | ||
+ | |||
+ | ## build the neural network (NN) | ||
+ | creditnet <- neuralnet(default10yr ~ LTI + age, trainset, hidden = 4, lifesign = " | ||
+ | linear.output = FALSE, threshold = 0.1) | ||
+ | |||
+ | ## plot the NN | ||
+ | plot(creditnet, | ||
+ | </ | ||
+ | |||
+ | __test the resulting output__ | ||
+ | < | ||
+ | |||
+ | creditnet.results <- compute(creditnet, | ||
+ | head(temp_test) | ||
+ | |||
+ | results <- data.frame(actual = testset$default10yr, | ||
+ | results[100: | ||
+ | |||
+ | results$prediction <- round(results$prediction) | ||
+ | results[100: | ||
+ | |||
+ | pred.table <- table(testset$default10yr, | ||
+ | pred.table | ||
+ | |||
+ | library(plyr) | ||
+ | count(testset, | ||
+ | </ | ||
+ | |||
+ | with outputs | ||
+ | < | ||
+ | > library(" | ||
+ | > | ||
+ | > dataset <- read.csv(" | ||
+ | > head(dataset) | ||
+ | clientid | ||
+ | 1 1 66155.92510 59.01701507 8106.53213129 0.1225367511623 | ||
+ | 2 2 34415.15397 48.11715310 6564.74501768 0.1907515806612 | ||
+ | 3 3 57317.17006 63.10804949 8020.95329639 0.1399397996720 | ||
+ | 4 4 42709.53420 45.75197235 6103.64226014 0.1429105321411 | ||
+ | 5 5 66952.68885 18.58433593 8770.09923520 0.1309894999955 | ||
+ | 6 6 24904.06414 57.47160710 | ||
+ | default10yr | ||
+ | 1 0 | ||
+ | 2 0 | ||
+ | 3 0 | ||
+ | 4 0 | ||
+ | 5 1 | ||
+ | 6 0 | ||
+ | > | ||
+ | > # extract a set to train the NN | ||
+ | > trainset <- dataset[1: | ||
+ | > | ||
+ | > # select the test set | ||
+ | > testset <- dataset[801: | ||
+ | > | ||
+ | > ## build the neural network (NN) | ||
+ | > creditnet <- neuralnet(default10yr ~ LTI + age, trainset, hidden = 4, lifesign = " | ||
+ | + linear.output = FALSE, threshold = 0.1) | ||
+ | hidden: 4 thresh: 0.1 rep: 1/1 steps: | ||
+ | > | ||
+ | > ## plot the NN | ||
+ | > plot(creditnet, | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > | ||
+ | > creditnet.results <- compute(creditnet, | ||
+ | > head(temp_test) | ||
+ | LTI age | ||
+ | 801 0.02306808811 25.90644520 | ||
+ | 802 0.13729704954 40.77430558 | ||
+ | 803 0.10456984914 32.47350580 | ||
+ | 804 0.15985046411 53.22813215 | ||
+ | 805 0.11161429579 46.47915325 | ||
+ | 806 0.11489364221 47.12736998 | ||
+ | > | ||
+ | > results <- data.frame(actual = testset$default10yr, | ||
+ | > results[100: | ||
+ | actual | ||
+ | 900 0 0.0000000000000000000000000015964854322398 | ||
+ | 901 0 0.0000000000000000000000000065162871249459 | ||
+ | 902 0 0.0000000000164043993271687692878796349660 | ||
+ | 903 1 0.9999999999219191249011373656685464084148 | ||
+ | 904 0 0.0000000000000000013810778585990655628959 | ||
+ | 905 0 0.0000000000000000539636283549268839978413 | ||
+ | 906 0 0.0000000000000000000234592312583964807452 | ||
+ | 907 1 0.9581419934268182725389806364546529948711 | ||
+ | 908 0 0.2499229633059938393557786184828728437424 | ||
+ | 909 0 0.0000000000000007044361454974903653282470 | ||
+ | 910 0 0.0006082559674722681341413332845036165963 | ||
+ | 911 1 0.9999999878713862200285689141310285776854 | ||
+ | 912 0 0.0000000000000000000000000015562211243506 | ||
+ | 913 1 0.9999999993455563895849991240538656711578 | ||
+ | 914 0 0.0000000000000000000000000000003082538282 | ||
+ | 915 0 0.0000000019359618836434052080615331181690 | ||
+ | > | ||
+ | > results$prediction <- round(results$prediction) | ||
+ | > results[100: | ||
+ | actual prediction | ||
+ | 900 0 0 | ||
+ | 901 0 0 | ||
+ | 902 0 0 | ||
+ | 903 1 1 | ||
+ | 904 0 0 | ||
+ | 905 0 0 | ||
+ | 906 0 0 | ||
+ | 907 1 1 | ||
+ | 908 0 0 | ||
+ | 909 0 0 | ||
+ | 910 0 0 | ||
+ | 911 1 1 | ||
+ | 912 0 0 | ||
+ | 913 1 1 | ||
+ | 914 0 0 | ||
+ | 915 0 0 | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > pred.table | ||
+ | |||
+ | | ||
+ | 0 1035 1 | ||
+ | 1 3 161 | ||
+ | > | ||
+ | |||
+ | > library(plyr) | ||
+ | > count(testset, | ||
+ | default10yr freq | ||
+ | 1 0 1036 | ||
+ | 2 | ||
+ | |||
+ | </ | ||
+ | |||
+ | ====== E.G 5 ====== | ||
r/neural_network.1481672098.txt.gz · Last modified: 2016/12/14 08:04 by hkimscil