c:ms:multiple_regression_lecture_note_for_r
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| c:ms:multiple_regression_lecture_note_for_r [2024/06/05 16:13] – hkimscil | c:ms:multiple_regression_lecture_note_for_r [2024/09/30 08:56] (current) – [Simple regression] hkimscil | ||
|---|---|---|---|
| Line 10: | Line 10: | ||
| colnames(d) <- c(" | colnames(d) <- c(" | ||
| d | d | ||
| - | attach(d) | + | # attach(d) |
| lm.y.x1 <- lm(y ~ x1, data=d) | lm.y.x1 <- lm(y ~ x1, data=d) | ||
| summary(lm.y.x1) | summary(lm.y.x1) | ||
| Line 56: | Line 56: | ||
| beta2 | beta2 | ||
| - | install.packages(" | + | # install.packages(" |
| library(lm.beta) | library(lm.beta) | ||
| lm.beta(lm.y.x1x2) | lm.beta(lm.y.x1x2) | ||
| Line 67: | Line 67: | ||
| lm.tmp.2 <- lm(y~x1, data=d) | lm.tmp.2 <- lm(y~x1, data=d) | ||
| - | res.y.x2 <- lm.tmp.2$residuals | + | res.y.x1 <- lm.tmp.2$residuals |
| - | lm.tmp.3 <- lm(res.y.x2 ~ res.x2.x1, data=d) | + | lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d) |
| summary(lm.tmp.3) | summary(lm.tmp.3) | ||
| - | install.packages(" | + | # install.packages(" |
| library(ppcor) | library(ppcor) | ||
| pcor(d) | pcor(d) | ||
| spcor(d) | spcor(d) | ||
| - | partial.r <- pcor.test(y, | + | partial.r <- pcor.test(y, |
| - | str(partial.r) | + | |
| partial.r | partial.r | ||
| + | str(partial.r) | ||
| partial.r$estimate^2 | partial.r$estimate^2 | ||
| Line 86: | Line 86: | ||
| lm.tmp.5 <- lm(y~x2, data=d) | lm.tmp.5 <- lm(y~x2, data=d) | ||
| - | res.y.x1 <- lm.tmp.5$residuals | + | res.y.x2 <- lm.tmp.5$residuals |
| - | lm.tmp.6 <- lm(res.y.x1 ~ res.x1.x2, data=d) | + | lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d) |
| summary(lm.tmp.6) | summary(lm.tmp.6) | ||
| - | partial.r2 <- pcor.test(y, | + | partial.r2 <- pcor.test(y, |
| str(partial.r2) | str(partial.r2) | ||
| partial.r2$estimate^2 | partial.r2$estimate^2 | ||
| Line 105: | Line 105: | ||
| spr.2$estimate^2 | spr.2$estimate^2 | ||
| - | lm.tmp.7 <- lm(y~res.x2.x1, | + | lm.tmp.7 <- lm(y ~ res.x2.x1, data = d) |
| summary(lm.tmp.7) | summary(lm.tmp.7) | ||
| ####################################################### | ####################################################### | ||
| Line 140: | Line 140: | ||
| cma.2 | cma.2 | ||
| cma.3 | cma.3 | ||
| - | # Note that it is only possible with | + | # Note that sorting out unique and common |
| + | # explanation area is only possible with | ||
| # semi-partial correlation determinant | # semi-partial correlation determinant | ||
| # NOT partial correlation determinant | # NOT partial correlation determinant | ||
| + | # because only semi-partial correlation | ||
| + | # shares the same denominator (as total | ||
| + | # y). | ||
| ############################################# | ############################################# | ||
| + | |||
| </ | </ | ||
| ====== Output ====== | ====== Output ====== | ||
| + | ===== Multiple regression ===== | ||
| < | < | ||
| - | > # multiple regression: a simple e.g. | ||
| - | > # | ||
| - | > # | ||
| - | > rm(list=ls()) | ||
| - | > d <- read.csv(" | ||
| - | > d | ||
| - | | ||
| - | 1 6 220 5 | ||
| - | 2 5 190 6 | ||
| - | 3 7 260 3 | ||
| - | 4 7 200 4 | ||
| - | 5 8 330 2 | ||
| - | 6 | ||
| - | 7 8 210 3 | ||
| - | 8 | ||
| - | 9 9 320 1 | ||
| - | 10 | ||
| - | > | ||
| - | > colnames(d) <- c(" | ||
| - | > d | ||
| - | y x1 x2 | ||
| - | 1 6 220 5 | ||
| - | 2 5 190 6 | ||
| - | 3 7 260 3 | ||
| - | 4 7 200 4 | ||
| - | 5 8 330 2 | ||
| - | 6 10 490 4 | ||
| - | 7 8 210 3 | ||
| - | 8 11 380 2 | ||
| - | 9 9 320 1 | ||
| - | 10 9 270 3 | ||
| - | > attach(d) | ||
| - | The following objects are masked from d (pos = 3): | ||
| - | |||
| - | x1, x2, y | ||
| - | |||
| - | The following objects are masked from d (pos = 7): | ||
| - | |||
| - | x1, x2, y | ||
| - | |||
| - | The following objects are masked from d (pos = 8): | ||
| - | |||
| - | x1, x2, y | ||
| - | |||
| - | The following objects are masked from dx (pos = 9): | ||
| - | |||
| - | x1, x2, y | ||
| - | |||
| - | The following objects are masked from dx (pos = 10): | ||
| - | |||
| - | x1, x2, y | ||
| - | |||
| - | The following objects are masked from dx (pos = 11): | ||
| - | |||
| - | x1, x2, y | ||
| - | |||
| - | The following objects are masked from d (pos = 13): | ||
| - | |||
| - | x1, x2, y | ||
| - | |||
| - | The following objects are masked from d (pos = 14): | ||
| - | |||
| - | x1, x2, y | ||
| - | |||
| - | > lm.y.x1 <- lm(y ~ x1, data=d) | ||
| - | > summary(lm.y.x1) | ||
| - | |||
| - | Call: | ||
| - | lm(formula = y ~ x1, data = d) | ||
| - | |||
| - | Residuals: | ||
| - | Min 1Q Median | ||
| - | -1.5189 -0.8969 -0.1297 | ||
| - | |||
| - | Coefficients: | ||
| - | Estimate Std. Error t value Pr(> | ||
| - | (Intercept) 3.617781 | ||
| - | x1 0.015269 | ||
| - | --- | ||
| - | Signif. codes: | ||
| - | |||
| - | Residual standard error: 1.176 on 8 degrees of freedom | ||
| - | Multiple R-squared: | ||
| - | F-statistic: | ||
| - | |||
| - | > | ||
| - | > lm.y.x2 <- lm(y ~ x2, data=d) | ||
| - | > summary(lm.y.x2) | ||
| - | |||
| - | Call: | ||
| - | lm(formula = y ~ x2, data = d) | ||
| - | |||
| - | Residuals: | ||
| - | Min 1Q Median | ||
| - | -1.2537 -0.8881 -0.4851 | ||
| - | |||
| - | Coefficients: | ||
| - | Estimate Std. Error t value Pr(> | ||
| - | (Intercept) | ||
| - | x2 | ||
| - | --- | ||
| - | Signif. codes: | ||
| - | |||
| - | Residual standard error: 1.397 on 8 degrees of freedom | ||
| - | Multiple R-squared: | ||
| - | F-statistic: | ||
| - | |||
| > | > | ||
| > lm.y.x1x2 <- lm(y ~ x1+x2, data=d) | > lm.y.x1x2 <- lm(y ~ x1+x2, data=d) | ||
| Line 290: | Line 189: | ||
| > y.pred <- a + (b1 * x1) + (b2 * x2) | > y.pred <- a + (b1 * x1) + (b2 * x2) | ||
| > y.pred | > y.pred | ||
| - | | + | |
| - | [10] | + | |
| > y.real <- y | > y.real <- y | ||
| > y.real | > y.real | ||
| Line 349: | Line 248: | ||
| -0.4458785 | -0.4458785 | ||
| > | > | ||
| - | > install.packages(" | + | > # install.packages(" |
| - | Error in install.packages : Updating loaded packages | + | |
| > library(lm.beta) | > library(lm.beta) | ||
| > lm.beta(lm.y.x1x2) | > lm.beta(lm.y.x1x2) | ||
| Line 369: | Line 267: | ||
| > | > | ||
| > lm.tmp.2 <- lm(y~x1, data=d) | > lm.tmp.2 <- lm(y~x1, data=d) | ||
| - | > res.y.x2 <- lm.tmp.2$residuals | + | > res.y.x1 <- lm.tmp.2$residuals |
| > | > | ||
| - | > lm.tmp.3 <- lm(res.y.x2 ~ res.x2.x1, data=d) | + | > lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d) |
| > summary(lm.tmp.3) | > summary(lm.tmp.3) | ||
| Call: | Call: | ||
| - | lm(formula = res.y.x2 ~ res.x2.x1, data = d) | + | lm(formula = res.y.x1 ~ res.x2.x1, data = d) |
| Residuals: | Residuals: | ||
| Line 393: | Line 291: | ||
| > | > | ||
| - | > install.packages(" | + | > # install.packages(" |
| - | Error in install.packages : Updating loaded packages | + | |
| > library(ppcor) | > library(ppcor) | ||
| > pcor(d) | > pcor(d) | ||
| Line 452: | Line 349: | ||
| [1] " | [1] " | ||
| - | > partial.r <- pcor.test(y, | + | > partial.r <- pcor.test(y, |
| + | > partial.r | ||
| + | | ||
| + | 1 -0.672856 0.04702022 -2.406425 10 1 pearson | ||
| > str(partial.r) | > str(partial.r) | ||
| ' | ' | ||
| Line 461: | Line 361: | ||
| $ gp : num 1 | $ gp : num 1 | ||
| $ Method | $ Method | ||
| - | > partial.r | ||
| - | | ||
| - | 1 -0.672856 0.04702022 -2.406425 10 1 pearson | ||
| > partial.r$estimate^2 | > partial.r$estimate^2 | ||
| [1] 0.4527352 | [1] 0.4527352 | ||
| Line 472: | Line 369: | ||
| > | > | ||
| > lm.tmp.5 <- lm(y~x2, data=d) | > lm.tmp.5 <- lm(y~x2, data=d) | ||
| - | > res.y.x1 <- lm.tmp.5$residuals | + | > res.y.x2 <- lm.tmp.5$residuals |
| > | > | ||
| - | > lm.tmp.6 <- lm(res.y.x1 ~ res.x1.x2, data=d) | + | > lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d) |
| > summary(lm.tmp.6) | > summary(lm.tmp.6) | ||
| Call: | Call: | ||
| - | lm(formula = res.y.x1 ~ res.x1.x2, data = d) | + | lm(formula = res.y.x2 ~ res.x1.x2, data = d) |
| Residuals: | Residuals: | ||
| Line 496: | Line 393: | ||
| > | > | ||
| - | > partial.r2 <- pcor.test(y, | + | > partial.r2 <- pcor.test(y, |
| > str(partial.r2) | > str(partial.r2) | ||
| ' | ' | ||
| Line 524: | Line 421: | ||
| [1] 0.3188552 | [1] 0.3188552 | ||
| > | > | ||
| - | > lm.tmp.7 <- lm(y~res.x2.x1, | + | > lm.tmp.7 <- lm(y ~ res.x2.x1, data = d) |
| > summary(lm.tmp.7) | > summary(lm.tmp.7) | ||
| Line 649: | Line 546: | ||
| > cma.3 | > cma.3 | ||
| [1] 0.3122658 | [1] 0.3122658 | ||
| - | > # Note that it is only possible with | + | > # Note that sorting out unique and common |
| + | > # explanation area is only possible with | ||
| > # semi-partial correlation determinant | > # semi-partial correlation determinant | ||
| > # NOT partial correlation determinant | > # NOT partial correlation determinant | ||
| + | > # because only semi-partial correlation | ||
| + | > # shares the same denominator (as total | ||
| + | > # y). | ||
| > ############################################# | > ############################################# | ||
| > | > | ||
| + | > | ||
| + | > | ||
| </ | </ | ||
| + | ====== Simple regression ====== | ||
| + | < | ||
| + | > # multiple regression: a simple e.g. | ||
| + | > # | ||
| + | > # | ||
| + | > rm(list=ls()) | ||
| + | > d <- read.csv(" | ||
| + | > d | ||
| + | | ||
| + | 1 6 220 5 | ||
| + | 2 5 190 6 | ||
| + | 3 7 260 3 | ||
| + | 4 7 200 4 | ||
| + | 5 8 330 2 | ||
| + | 6 | ||
| + | 7 8 210 3 | ||
| + | 8 | ||
| + | 9 9 320 1 | ||
| + | 10 | ||
| + | > | ||
| + | > colnames(d) <- c(" | ||
| + | > d | ||
| + | y x1 x2 | ||
| + | 1 6 220 5 | ||
| + | 2 5 190 6 | ||
| + | 3 7 260 3 | ||
| + | 4 7 200 4 | ||
| + | 5 8 330 2 | ||
| + | 6 10 490 4 | ||
| + | 7 8 210 3 | ||
| + | 8 11 380 2 | ||
| + | 9 9 320 1 | ||
| + | 10 9 270 3 | ||
| + | > # attach(d) | ||
| + | > lm.y.x1 <- lm(y ~ x1, data=d) | ||
| + | > summary(lm.y.x1) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x1, data = d) | ||
| + | |||
| + | Residuals: | ||
| + | Min 1Q Median | ||
| + | -1.5189 -0.8969 -0.1297 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) 3.617781 | ||
| + | x1 0.015269 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 1.176 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | </ | ||
| + | |||
| + | 단순회귀분석에서 (simple regression) F-test와 t-test는 (slope test) 기본적으로 똑 같은 테스트를 말한다. 왜냐하면 F-test에 기여하는 독립변인이 오직하나이고 그 하나가 slope test에 (t-test) 사용되기 때문이다. 이것은 t-test의 t값과 F-test의 F값의 관계에서도 나타난다. | ||
| + | |||
| + | $$ t^2 = F $$ | ||
| + | |||
| + | < | ||
| + | > t.cal <- 3.7 | ||
| + | > t.cal^2 | ||
| + | [1] 13.69 | ||
| + | > F.cal <- 13.69 | ||
| + | > F.cal | ||
| + | [1] 13.69 | ||
| + | </ | ||
| + | |||
| + | Simple regression에서 설명한 것처럼 기울기에 (slope) 대한 t-test는 기울기가 y 변인의 variability를 (평균을 중심으로 흔들림을) 설명하는 데 기여했는가를 테스트 하기 위한 것이다. 기울기가 0 이라면 이는 평균을 (평균선이 기울기가 0이다) 사용하는 것과 같으므로 기울기의 효과가 없음을 의미한다. 따라서 b와 b zero의 차이가 통계학적으로 의미있었는가를 t-test한다. | ||
| + | $$ \text{t calculated value} = \frac {b - 0}{se} $$ | ||
| + | 위에서 $se$는 아래처럼 구한다고 언급하였다. | ||
| + | |||
| + | \begin{eqnarray*} | ||
| + | se & = & \sqrt{\frac{1}{n-2} * \frac{\text{SSE}}{\text{SSx}}} \\ | ||
| + | & = & \sqrt{\frac {\text{MSE}} {\text{SSx}}} \\ | ||
| + | \text{note that MSE } & = & \text{mean square error } \\ | ||
| + | & = & \text{ms.res } | ||
| + | \end{eqnarray*} | ||
| + | |||
| + | 위에서 구한 t값의 p value는 R에서 | ||
| + | < | ||
| + | summary(lm.y.x1) | ||
| + | n <- length(y) | ||
| + | k <- 1 # num of predictor variables | ||
| + | sse <- sum(lm.y.x1$residuals^2) # ss.res | ||
| + | ssx1 <- sum((x1-mean(x1))^2) | ||
| + | b <- lm.y.x1$coefficient[2] | ||
| + | se <- sqrt((1/ | ||
| + | t.b.cal <- (b - 0) / se | ||
| + | t.b.cal | ||
| + | p.value <- 2 * pt(t.b.cal, n-k-1, lower.tail=F) | ||
| + | p.value | ||
| + | # checck | ||
| + | t.b.cal | ||
| + | f.cal <- t.b.cal^2 | ||
| + | f.cal | ||
| + | p.value | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > summary(lm.y.x1) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x1, data = d) | ||
| + | |||
| + | Residuals: | ||
| + | Min 1Q Median | ||
| + | -1.5189 -0.8969 -0.1297 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) 3.617781 | ||
| + | x1 0.015269 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 1.176 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > n <- length(y) | ||
| + | > k <- 1 # num of predictor variables | ||
| + | > sse <- sum(lm.y.x1$residuals^2) | ||
| + | > ssx1 <- sum((x1-mean(x1))^2) | ||
| + | > b <- lm.y.x1$coefficient[2] | ||
| + | > se < | ||
| + | > se < | ||
| + | > t.b.cal <- (b - 0) / se | ||
| + | > t.b.cal | ||
| + | x1 | ||
| + | 3.699639 | ||
| + | > p.value <- 2 * pt(t.b.cal, n-k-1, lower.tail=F) | ||
| + | > | ||
| + | > # checck | ||
| + | > t.b.cal | ||
| + | x1 | ||
| + | 3.699639 | ||
| + | > t.b.cal^2 | ||
| + | x1 | ||
| + | 13.68733 | ||
| + | > p.value | ||
| + | | ||
| + | 0.006045749 | ||
| + | > | ||
| + | > | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > | ||
| + | > lm.y.x2 <- lm(y ~ x2, data=d) | ||
| + | > summary(lm.y.x2) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x2, data = d) | ||
| + | |||
| + | Residuals: | ||
| + | Min 1Q Median | ||
| + | -1.2537 -0.8881 -0.4851 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | x2 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 1.397 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | > | ||
| + | > | ||
| + | </ | ||
| + | |||
c/ms/multiple_regression_lecture_note_for_r.1717571596.txt.gz · Last modified: by hkimscil
