c:ms:2023:schedule:w10.lecture.note
This is an old revision of the document!
R code
set.seed(101) x <- rnorm(400, 100, 10) x y <- 1.4*x + 2 + rnorm(400, 0, 4) y df <- data.frame(x,y) # density graph ggplot(data=df, aes(y)) + geom_histogram() + geom_vline(aes(xintercept=mean(y)), color="red", linetype="dashed", size=1) + coord_flip() ggplot(data=df, aes(y)) + geom_density(color="blue", size=1.5) + geom_vline(aes(xintercept=mean(y)), color="red", linetype="dashed", size=1) + coord_flip() ggplot(data=df, aes(x,y)) + geom_point(color="blue", siz=1.5, pch=1.5) + stat_smooth(method = "lm", formula = y ~ x, geom = "smooth", color="red", size=1) set.seed(401) sn <- 25 x <- rnorm(sn, 100, 10) x y <- 1.4 * x + 2 + rnorm(sn, 0, 10) y df <- data.frame(x, y) # density graph ggplot(data=df, aes(y)) + geom_histogram() + geom_vline(aes(xintercept=mean(y)), color="red", linetype="dashed", size=1) + coord_flip() ggplot(data=df, aes(y)) + geom_density(color="blue", size=1.5) + geom_vline(aes(xintercept=mean(y)), color="red", linetype="dashed", size=1) + coord_flip() lm.mod <- lm(y~x, data=df) summary(lm.mod) str(lm.mod) inc.y <- lm.mod$coefficients[1] slope.x <- lm.mod$coefficients[2] inc.y slope.x ggplot(data=df, aes(x,y)) + geom_point(color="blue", size=1.5, pch=1.5) + geom_hline(aes(yintercept=mean(y))) + geom_abline(intercept=inc.y, slope=slope.x) ggplot(data=df, aes(x,y)) + geom_point(color="blue", size=2.5, pch=2) + geom_hline(aes(yintercept=mean(y)), size=1.5, color="red") + geom_abline(intercept=inc.y, slope=slope.x, size=1.5, color="darkgreen")
> set.seed(101) > x <- rnorm(400, 100, 10) > x [1] 96.73964 105.52462 93.25056 102.14359 103.10769 111.73966 106.18790 [8] 98.87266 109.17028 97.76741 105.26448 92.05156 114.27756 85.33180 [15] 97.63317 98.06662 91.50245 100.58465 91.82330 79.49692 98.36244 [22] 107.08522 97.32019 85.36078 107.44436 85.89610 104.67068 98.80680 [29] 104.67239 104.98136 108.94937 102.79152 110.07866 79.26894 111.89853 [36] 92.75626 101.67984 109.20335 83.28395 104.48469 104.82459 107.58214 [43] 76.80673 95.40495 88.94616 104.02928 105.68935 92.93917 97.09909 [50] 85.16122 88.49745 97.25529 105.77901 86.03097 107.49058 89.48813 [57] 101.65381 111.29809 111.73722 95.72137 97.40198 85.88827 93.58642 [64] 101.12458 104.22604 103.86835 93.12202 101.48902 99.42350 99.25177 [71] 115.09897 116.19937 111.53158 99.22396 81.81065 89.62555 103.02492 [78] 87.22054 101.38339 99.49016 118.52148 111.11675 94.88625 94.56119 [85] 82.71073 104.70750 100.05387 113.48046 107.24097 115.52549 113.25470 [92] 99.65735 96.38987 92.79835 102.82015 92.09474 95.55095 113.64993 [99] 104.97454 91.85604 102.68066 94.07792 121.33486 111.72749 107.46761 [106] 97.69491 100.87772 78.16260 95.33368 116.85960 94.32079 99.53257 [113] 98.43019 116.02242 107.68654 92.28371 93.69318 91.69719 94.08887 [120] 109.81085 93.38395 92.27582 79.81527 94.66415 104.34728 92.28833 [127] 92.46059 97.00642 116.63966 87.55670 92.16866 102.44831 98.56113 [134] 83.91369 109.51580 81.80868 117.83672 118.87139 114.90719 96.19400 [141] 90.90625 96.61906 85.88116 102.17543 106.70126 97.12141 104.69303 [148] 95.29929 97.60734 95.52538 93.81170 102.52963 92.46632 107.32277 [155] 95.97413 71.77000 104.62974 121.32870 97.29513 102.48525 100.38116 [162] 103.94069 84.95915 84.13109 90.72882 107.76197 92.19316 87.21433 [169] 99.98572 81.49022 104.51505 95.67053 107.13603 109.60695 103.81535 [176] 112.18073 99.82863 99.61791 112.43734 90.44141 109.15425 90.60662 [183] 101.12125 105.53013 105.31742 91.26238 98.13151 97.86290 97.95989 [190] 117.19709 102.02033 105.12656 114.52400 103.63865 91.24151 99.85439 [197] 92.75507 119.69370 94.63598 99.73768 98.35968 86.16725 104.23511 [204] 92.09511 112.09925 108.94517 98.98801 102.97123 101.97298 98.43016 [211] 115.36571 78.32330 105.98448 100.43112 112.95027 107.06303 103.45545 [218] 99.20103 104.54808 112.76252 112.64838 102.69254 98.79456 107.95271 [225] 94.85972 95.93407 112.19719 100.83711 105.89902 94.82581 107.69463 [232] 108.01970 93.03140 111.77853 105.85845 95.33106 103.85650 94.65394 [239] 110.56668 97.93907 106.07012 94.51936 79.00024 102.50813 99.45055 [246] 93.40272 85.44143 100.23729 105.47908 91.91099 97.60977 96.46881 [253] 108.19599 96.54720 88.17116 89.68679 99.24911 108.28585 89.64015 [260] 98.52844 97.18546 86.26314 115.58450 94.25055 121.87335 107.94292 [267] 102.02560 99.77950 103.04531 88.90751 107.65473 99.77974 90.96002 [274] 104.00002 88.50712 101.88168 102.17290 89.50914 99.24877 84.32011 [281] 88.26203 78.51694 103.41715 109.04971 110.96490 85.28739 97.19403 [288] 108.46140 87.14086 96.87564 96.37157 114.12454 97.44798 103.87018 [295] 105.24941 106.15145 96.67184 107.45329 96.88735 97.65749 88.34143 [302] 97.60201 111.84383 102.07508 99.56367 90.28922 105.74223 87.40135 [309] 110.61679 95.66006 102.39177 94.97671 82.96123 105.23772 115.64172 [316] 75.33747 98.51179 121.10837 96.53805 109.82183 103.91431 94.90006 [323] 89.50354 100.46007 89.91172 93.43750 100.94018 100.17924 83.96476 [330] 104.83076 97.75577 95.62602 113.27768 95.31522 86.60042 116.65433 [337] 105.38950 109.99385 103.41489 89.41373 99.58401 87.07172 91.06136 [344] 105.41909 109.69636 97.55011 108.60180 93.24156 94.99956 118.32033 [351] 96.70728 102.61308 88.87839 110.62210 102.05000 79.75672 112.27375 [358] 111.95344 107.72516 85.90462 95.67570 95.57712 113.48992 89.25368 [365] 96.64918 107.50832 119.97207 117.91908 87.67024 68.22790 89.15158 [372] 100.60763 98.18234 103.41311 111.06792 106.88816 104.70992 83.59933 [379] 93.20977 104.86308 102.26175 93.53319 98.86714 105.20524 125.86743 [386] 105.42192 110.65204 92.49852 107.93791 92.50237 103.06302 113.18370 [393] 97.33460 102.65806 111.39838 90.13455 93.26333 80.95256 92.82078 [400] 96.16458 > y <- 1.4*x + 2 + rnorm(400, 0, 4) > y [1] 136.15334 141.36678 125.14346 146.75969 146.91839 155.13529 145.17834 [8] 140.52125 161.16237 135.83550 147.21609 129.83101 159.41017 126.32554 [15] 145.81444 139.03712 132.60136 139.06414 123.22188 109.68975 135.90381 [22] 148.57810 140.43266 127.95524 149.80964 122.10795 148.09642 145.29754 [29] 145.77165 144.31213 150.30475 148.80207 157.55103 114.00139 161.66950 [36] 127.88826 150.29708 151.97808 118.98413 149.13802 150.82833 151.42978 [43] 108.12141 136.10181 125.61718 151.09330 156.38194 130.33149 133.18045 [50] 122.21219 122.85046 136.58148 143.17756 123.49512 146.02445 126.81431 [57] 145.61904 150.21359 156.55289 136.50530 133.85461 120.63638 133.04161 [64] 146.26457 142.13102 142.84121 131.16752 138.52898 133.08298 142.97334 [71] 166.22456 158.22759 162.71390 149.30672 115.86546 126.40807 149.41418 [78] 122.28137 140.04994 139.39608 171.72069 161.62761 141.84821 138.57647 [85] 116.28359 150.62291 137.79468 160.29574 148.09053 168.70778 160.24102 [92] 138.29111 132.96101 128.48436 142.14526 126.44547 135.23103 157.61361 [99] 149.99908 123.61788 140.66349 131.05680 170.68264 156.12262 152.34996 [106] 139.81179 145.36247 109.25937 136.81783 164.65179 138.55190 142.16435 [113] 139.73879 165.17218 153.19408 133.89316 129.36258 128.38218 131.11035 [120] 155.23088 130.18606 132.53642 113.62406 139.44472 154.42732 129.94617 [127] 135.34784 140.77735 174.19736 118.59002 135.46041 143.31440 145.48961 [134] 115.86949 154.57100 113.05077 167.94167 173.29327 158.94914 138.90997 [141] 132.15036 130.16013 123.12985 140.48566 156.88936 145.09040 149.74647 [148] 131.69242 141.19274 132.39223 129.25680 146.18736 133.97415 152.13388 [155] 136.41302 101.31183 155.30386 165.98746 139.15260 143.44305 134.06869 [162] 149.77271 128.09894 119.99632 132.67192 152.28072 130.36562 119.87791 [169] 146.93628 120.59761 145.57902 142.95496 153.13412 155.06478 149.48563 [176] 156.21369 142.86568 142.69522 156.10727 130.39935 152.61788 123.07734 [183] 142.67060 150.94834 150.14259 127.43596 138.46810 142.98959 141.95404 [190] 161.79946 144.36598 151.94688 155.64581 147.73510 126.63936 142.25827 [197] 127.70118 170.28144 141.24582 146.34713 138.14630 118.34356 146.34777 [204] 127.55320 156.31519 144.49527 141.25458 151.02216 145.78275 146.27848 [211] 163.49551 112.91190 151.03768 139.75291 155.02415 156.16073 143.62611 [218] 137.50858 154.10782 155.95803 160.84298 147.02123 146.39854 149.88526 [225] 135.00785 134.35517 153.71808 139.42892 146.75898 131.42651 151.92026 [232] 152.63363 131.94064 156.00171 152.01589 140.00259 142.24212 135.06532 [239] 156.13705 139.03987 149.63894 138.52558 105.47473 144.58488 143.57210 [246] 131.25376 120.69584 138.64322 147.25182 130.93258 136.01900 132.83907 [253] 147.34248 138.79248 126.84719 120.63842 140.19360 148.00099 128.20340 [260] 141.75121 134.64641 126.47734 163.93086 135.37476 177.71675 151.46994 [267] 143.59227 144.39371 149.66929 127.55865 152.88301 141.01573 135.69735 [274] 153.18730 121.72656 145.10166 145.46457 127.04068 139.68628 121.98121 [281] 127.61569 108.13119 149.86375 155.90616 158.57453 120.39602 138.54738 [288] 147.72705 126.09845 141.08382 136.63450 162.44393 140.00548 145.29970 [295] 145.84471 145.87345 134.98375 153.15825 138.22498 141.74321 125.78910 [302] 139.22164 157.63110 141.93373 142.43277 130.12262 156.29907 130.35248 [309] 153.92230 136.93778 142.08328 139.87360 120.68367 151.90523 160.57232 [316] 108.41862 139.70771 175.21783 135.67468 156.81052 145.72460 130.22365 [323] 126.24873 140.64648 131.17491 130.68261 141.79391 137.54841 108.48632 [330] 147.75584 145.52284 135.52419 155.60041 137.11896 131.39066 159.55025 [337] 157.32631 150.35471 146.15982 125.19119 145.43292 121.85669 129.99533 [344] 150.54293 156.22357 140.02952 151.83912 130.16063 131.44195 162.99746 [351] 135.54709 146.14491 132.91331 156.74646 143.75338 108.51161 164.03761 [358] 157.74286 150.92070 117.53851 131.10239 141.45090 158.11971 130.93894 [365] 140.37843 154.53988 169.47647 170.81682 118.81946 92.03441 128.56961 [372] 144.66285 138.32993 141.59815 153.62912 149.58162 153.25996 123.22005 [379] 129.33102 148.02073 147.81094 120.44112 135.01371 143.00462 185.69520 [386] 143.88558 159.06145 131.69241 152.04899 125.41866 143.80242 158.80394 [393] 138.00040 144.97010 157.22821 127.05684 131.59783 110.14579 133.96685 [400] 138.35520 > df <- data.frame(x,y) > # density graph > ggplot(data=df, aes(y)) + + geom_histogram() + + geom_vline(aes(xintercept=mean(y)), + color="red", linetype="dashed", size=1) + + coord_flip() `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. > > ggplot(data=df, aes(y)) + + geom_density(color="blue", size=1.5) + + geom_vline(aes(xintercept=mean(y)), + color="red", linetype="dashed", size=1) + + coord_flip() > > ggplot(data=df, aes(x,y)) + + geom_point(color="blue", siz=1.5, pch=1.5) + + stat_smooth(method = "lm", + formula = y ~ x, + geom = "smooth", color="red", size=1) Warning message: In geom_point(color = "blue", siz = 1.5, pch = 1.5) : Ignoring unknown parameters: `siz` > > > set.seed(401) > sn <- 25 > x <- rnorm(sn, 100, 10) > x [1] 99.04030 112.53423 111.25717 95.37048 106.51630 110.03586 99.37429 [8] 83.40702 91.38017 80.14344 95.16165 105.55799 100.47560 95.35164 [15] 103.18120 101.21572 115.59812 104.79399 89.67882 86.01922 114.26808 [22] 113.21215 110.42156 104.10994 107.89136 > y <- 1.4 * x + 2 + rnorm(sn, 0, 10) > y [1] 147.7866 178.1177 167.8750 124.8276 147.9924 133.5853 144.6882 102.0537 [9] 140.3838 112.9193 125.8841 135.8684 137.4363 129.0042 159.6048 137.0136 [17] 161.4669 147.8364 127.3562 122.0032 168.4221 138.2663 147.7574 135.0859 [25] 153.9057 > df <- data.frame(x, y) > # density graph > ggplot(data=df, aes(y)) + + geom_histogram() + + geom_vline(aes(xintercept=mean(y)), + color="red", linetype="dashed", size=1) + + coord_flip() `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. > > ggplot(data=df, aes(y)) + + geom_density(color="blue", size=1.5) + + geom_vline(aes(xintercept=mean(y)), + color="red", linetype="dashed", size=1) + + coord_flip() > > lm.mod <- lm(y~x, data=df) > summary(lm.mod) Call: lm(formula = y ~ x, data = df) Residuals: Min 1Q Median 3Q Max -19.958 -6.345 -0.137 6.596 20.954 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -5.9221 22.5365 -0.263 0.795 x 1.4492 0.2212 6.553 1.1e-06 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 10.75 on 23 degrees of freedom Multiple R-squared: 0.6512, Adjusted R-squared: 0.636 F-statistic: 42.94 on 1 and 23 DF, p-value: 1.097e-06 > str(lm.mod) List of 12 $ coefficients : Named num [1:2] -5.92 1.45 ..- attr(*, "names")= chr [1:2] "(Intercept)" "x" $ residuals : Named num [1:25] 10.18 20.95 12.56 -7.46 -0.45 ... ..- attr(*, "names")= chr [1:25] "1" "2" "3" "4" ... $ effects : Named num [1:25] -705.43 -70.46 7.59 -7.34 -3.9 ... ..- attr(*, "names")= chr [1:25] "(Intercept)" "x" "" "" ... $ rank : int 2 $ fitted.values: Named num [1:25] 138 157 155 132 148 ... ..- attr(*, "names")= chr [1:25] "1" "2" "3" "4" ... $ assign : int [1:2] 0 1 $ qr :List of 5 ..$ qr : num [1:25, 1:2] -5 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 ... .. ..- attr(*, "dimnames")=List of 2 .. .. ..$ : chr [1:25] "1" "2" "3" "4" ... .. .. ..$ : chr [1:2] "(Intercept)" "x" .. ..- attr(*, "assign")= int [1:2] 0 1 ..$ qraux: num [1:2] 1.2 1.24 ..$ pivot: int [1:2] 1 2 ..$ tol : num 1e-07 ..$ rank : int 2 ..- attr(*, "class")= chr "qr" $ df.residual : int 23 $ xlevels : Named list() $ call : language lm(formula = y ~ x, data = df) $ terms :Classes 'terms', 'formula' language y ~ x .. ..- attr(*, "variables")= language list(y, x) .. ..- attr(*, "factors")= int [1:2, 1] 0 1 .. .. ..- attr(*, "dimnames")=List of 2 .. .. .. ..$ : chr [1:2] "y" "x" .. .. .. ..$ : chr "x" .. ..- attr(*, "term.labels")= chr "x" .. ..- attr(*, "order")= int 1 .. ..- attr(*, "intercept")= int 1 .. ..- attr(*, "response")= int 1 .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> .. ..- attr(*, "predvars")= language list(y, x) .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric" .. .. ..- attr(*, "names")= chr [1:2] "y" "x" $ model :'data.frame': 25 obs. of 2 variables: ..$ y: num [1:25] 148 178 168 125 148 ... ..$ x: num [1:25] 99 112.5 111.3 95.4 106.5 ... ..- attr(*, "terms")=Classes 'terms', 'formula' language y ~ x .. .. ..- attr(*, "variables")= language list(y, x) .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1 .. .. .. ..- attr(*, "dimnames")=List of 2 .. .. .. .. ..$ : chr [1:2] "y" "x" .. .. .. .. ..$ : chr "x" .. .. ..- attr(*, "term.labels")= chr "x" .. .. ..- attr(*, "order")= int 1 .. .. ..- attr(*, "intercept")= int 1 .. .. ..- attr(*, "response")= int 1 .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> .. .. ..- attr(*, "predvars")= language list(y, x) .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric" .. .. .. ..- attr(*, "names")= chr [1:2] "y" "x" - attr(*, "class")= chr "lm" > inc.y <- lm.mod$coefficients[1] > slope.x <- lm.mod$coefficients[2] > inc.y (Intercept) -5.92206 > slope.x x 1.449211 > > ggplot(data=df, aes(x,y)) + + geom_point(color="blue", size=1.5, pch=1.5) + + geom_hline(aes(yintercept=mean(y))) + + geom_abline(intercept=inc.y, slope=slope.x) > > > ggplot(data=df, aes(x,y)) + + geom_point(color="blue", size=2.5, pch=2) + + geom_hline(aes(yintercept=mean(y)), size=1.5, color="red") + + geom_abline(intercept=inc.y, slope=slope.x, size=1.5, color="darkgreen") >
c/ms/2023/schedule/w10.lecture.note.1683476124.txt.gz · Last modified: 2023/05/08 01:15 by hkimscil