Differences

This shows you the differences between two versions of the page.

--- c:ms:multiple_regression_lecture_note_for_r [2024/09/30 08:55] – [Multiple regression] hkimscil
+++ c:ms:multiple_regression_lecture_note_for_r [2024/09/30 08:56] (current) – [Simple regression] hkimscil
@@ Line 152: / Line 152: @@
 ====== Output ======
-===== Simple regression =====
+===== Multiple regression =====
+<code>
+>
+> lm.y.x1x2 <- lm(y ~ x1+x2, data=d)
+> summary(lm.y.x1x2)
+Call:
+lm(formula = y ~ x1 + x2, data = d)
+Residuals:
+    Min      1Q  Median      3Q     Max
+-1.2173 -0.5779 -0.1515  0.6642  1.1906
+Coefficients:
+             Estimate Std. Error t value Pr(>|t|)
+(Intercept)  6.399103   1.516539   4.220  0.00394 **
+x1           0.011841   0.003561   3.325  0.01268 *
+x2          -0.544727   0.226364  -2.406  0.04702 *
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 0.9301 on 7 degrees of freedom
+Multiple R-squared:  0.7981,	Adjusted R-squared:  0.7404
+F-statistic: 13.84 on 2 and 7 DF,  p-value: 0.003696
+>
+>
+> lm.y.x1x2$coefficient
+(Intercept)          x1          x2
+.39910298  0.01184145 -0.54472725
+> # y.hat = 6.399103 + (0.011841)*x1 + (−0.544727)*x2
+> a <- lm.y.x1x2$coefficient[1]
+> b1 <- lm.y.x1x2$coefficient[2]
+> b2 <- lm.y.x1x2$coefficient[3]
+>
+> y.pred <- a + (b1 * x1) + (b2 * x2)
+> y.pred
+ [1]  6.280586  5.380616  7.843699  6.588485  9.217328 10.022506
+ [7]  7.251626  9.809401  9.643641  7.962113
+> y.real <- y
+> y.real
+ [1]  6  5  7  7  8 10  8 11  9  9
+> y.mean <- mean(y)
+> y.mean
+[1] 8
+>
+> res <- y.real - y.pred
+> reg <- y.pred - y.mean
+> ss.res <- sum(res^2)
+> ss.reg <- sum(reg^2)
+>
+> ss.tot <- var(y) * (length(y)-1)
+> ss.tot
+[1] 30
+> ss.res
+[1] 6.056235
+> ss.reg
+[1] 23.94376
+> ss.res+ss.reg
+[1] 30
+>
+> # slope test
+> summary(lm.y.x1x2)
+Call:
+lm(formula = y ~ x1 + x2, data = d)
+Residuals:
+    Min      1Q  Median      3Q     Max
+-1.2173 -0.5779 -0.1515  0.6642  1.1906
+Coefficients:
+             Estimate Std. Error t value Pr(>|t|)
+(Intercept)  6.399103   1.516539   4.220  0.00394 **
+x1           0.011841   0.003561   3.325  0.01268 *
+x2          -0.544727   0.226364  -2.406  0.04702 *
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 0.9301 on 7 degrees of freedom
+Multiple R-squared:  0.7981,	Adjusted R-squared:  0.7404
+F-statistic: 13.84 on 2 and 7 DF,  p-value: 0.003696
+> # note on 2 t-tests
+>
+> # beta coefficient (standardized b)
+> # beta <- b * (sd(x)/sd(y))
+> beta1 <- b1 * (sd(x1)/sd(y))
+> beta2 <- b2 * (sd(x2)/sd(y))
+> beta1
+      x1
+.616097
+> beta2
+        x2
+-0.4458785
+>
+> # install.packages("lm.beta")
+> library(lm.beta)
+> lm.beta(lm.y.x1x2)
+Call:
+lm(formula = y ~ x1 + x2, data = d)
+Standardized Coefficients::
+(Intercept)          x1          x2
+         NA   0.6160970  -0.4458785
+>
+> #######################################################
+> # partial correlation coefficient and pr2
+> # x2's explanation?
+> lm.tmp.1 <- lm(x2~x1, data=d)
+> res.x2.x1 <- lm.tmp.1$residuals
+>
+> lm.tmp.2 <- lm(y~x1, data=d)
+> res.y.x1 <- lm.tmp.2$residuals
+>
+> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)
+> summary(lm.tmp.3)
+Call:
+lm(formula = res.y.x1 ~ res.x2.x1, data = d)
+Residuals:
+    Min      1Q  Median      3Q     Max
+-1.2173 -0.5779 -0.1515  0.6642  1.1906
+Coefficients:
+              Estimate Std. Error t value Pr(>|t|)
+(Intercept)  6.281e-18  2.751e-01   0.000    1.000
+res.x2.x1   -5.447e-01  2.117e-01  -2.573    0.033 *
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 0.8701 on 8 degrees of freedom
+Multiple R-squared:  0.4527,	Adjusted R-squared:  0.3843
+F-statistic: 6.618 on 1 and 8 DF,  p-value: 0.033
+>
+> # install.packages("ppcor")
+> library(ppcor)
+> pcor(d)
+$estimate
+            y        x1         x2
+y   1.0000000 0.7825112 -0.6728560
+x1  0.7825112 1.0000000  0.3422911
+x2 -0.6728560 0.3422911  1.0000000
+$p.value
+            y         x1         x2
+y  0.00000000 0.01267595 0.04702022
+x1 0.01267595 0.00000000 0.36723388
+x2 0.04702022 0.36723388 0.00000000
+$statistic
+           y        x1         x2
+y   0.000000 3.3251023 -2.4064253
+x1  3.325102 0.0000000  0.9638389
+x2 -2.406425 0.9638389  0.0000000
+$n
+[1] 10
+$gp
+[1] 1
+$method
+[1] "pearson"
+> spcor(d)
+$estimate
+            y        x1         x2
+y   1.0000000 0.5646726 -0.4086619
+x1  0.7171965 1.0000000  0.2078919
+x2 -0.6166940 0.2470028  1.0000000
+$p.value
+            y       x1        x2
+y  0.00000000 0.113182 0.2748117
+x1 0.02964029 0.000000 0.5914441
+x2 0.07691195 0.521696 0.0000000
+$statistic
+           y        x1         x2
+y   0.000000 1.8101977 -1.1846548
+x1  2.722920 0.0000000  0.5623159
+x2 -2.072679 0.6744045  0.0000000
+$n
+[1] 10
+$gp
+[1] 1
+$method
+[1] "pearson"
+> partial.r <- pcor.test(y, x2, x1)
+> partial.r
+   estimate    p.value statistic  n gp  Method
+-0.672856 0.04702022 -2.406425 10  1 pearson
+> str(partial.r)
+'data.frame':	1 obs. of  6 variables:
+ $ estimate : num -0.673
+ $ p.value  : num 0.047
+ $ statistic: num -2.41
+ $ n        : int 10
+ $ gp       : num 1
+ $ Method   : chr "pearson"
+> partial.r$estimate^2
+[1] 0.4527352
+>
+> # x1's own explanation?
+> lm.tmp.4 <- lm(x1~x2, data=d)
+> res.x1.x2 <- lm.tmp.4$residuals
+>
+> lm.tmp.5 <- lm(y~x2, data=d)
+> res.y.x2 <- lm.tmp.5$residuals
+>
+> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)
+> summary(lm.tmp.6)
+Call:
+lm(formula = res.y.x2 ~ res.x1.x2, data = d)
+Residuals:
+    Min      1Q  Median      3Q     Max
+-1.2173 -0.5779 -0.1515  0.6642  1.1906
+Coefficients:
+             Estimate Std. Error t value Pr(>|t|)
+(Intercept) 1.330e-17  2.751e-01   0.000  1.00000
+res.x1.x2   1.184e-02  3.331e-03   3.555  0.00746 **
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 0.8701 on 8 degrees of freedom
+Multiple R-squared:  0.6123,	Adjusted R-squared:  0.5639
+F-statistic: 12.64 on 1 and 8 DF,  p-value: 0.007458
+>
+> partial.r2 <- pcor.test(y, x1, x2)
+> str(partial.r2)
+'data.frame':	1 obs. of  6 variables:
+ $ estimate : num 0.783
+ $ p.value  : num 0.0127
+ $ statistic: num 3.33
+ $ n        : int 10
+ $ gp       : num 1
+ $ Method   : chr "pearson"
+> partial.r2$estimate^2
+[1] 0.6123238
+> #######################################################
+>
+> # semipartial correlation coefficient and spr2
+> #
+> spr.1 <- spcor.test(y,x2,x1)
+> spr.2 <- spcor.test(y,x1,x2)
+> spr.1
+    estimate   p.value statistic  n gp  Method
+-0.4086619 0.2748117 -1.184655 10  1 pearson
+> spr.2
+   estimate  p.value statistic  n gp  Method
+0.5646726 0.113182  1.810198 10  1 pearson
+> spr.1$estimate^2
+[1] 0.1670045
+> spr.2$estimate^2
+[1] 0.3188552
+>
+> lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)
+> summary(lm.tmp.7)
+Call:
+lm(formula = y ~ res.x2.x1, data = d)
+Residuals:
+    Min      1Q  Median      3Q     Max
+-1.8617 -1.1712 -0.4940  0.5488  3.0771
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept)   8.0000     0.5589  14.314 5.54e-07 ***
+res.x2.x1    -0.5447     0.4301  -1.266    0.241
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 1.767 on 8 degrees of freedom
+Multiple R-squared:  0.167,	Adjusted R-squared:  0.06288
+F-statistic: 1.604 on 1 and 8 DF,  p-value: 0.241
+> #######################################################
+>
+> # get the common area that explain the y variable
+> # 1.
+> summary(lm.y.x2)
+Call:
+lm(formula = y ~ x2, data = d)
+Residuals:
+    Min      1Q  Median      3Q     Max
+-1.2537 -0.8881 -0.4851  0.4963  2.5920
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept)  10.7910     1.1195   9.639 1.12e-05 ***
+x2           -0.8458     0.3117  -2.713   0.0265 *
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 1.397 on 8 degrees of freedom
+Multiple R-squared:  0.4793,	Adjusted R-squared:  0.4142
+F-statistic: 7.363 on 1 and 8 DF,  p-value: 0.02651
+> all.x2 <- summary(lm.y.x2)$r.squared
+> sp.x2 <- spr.1$estimate^2
+> all.x2
+[1] 0.4792703
+> sp.x2
+[1] 0.1670045
+> cma.1 <- all.x2 - sp.x2
+> cma.1
+[1] 0.3122658
+>
+> # 2.
+> summary(lm.y.x1)
+Call:
+lm(formula = y ~ x1, data = d)
+Residuals:
+    Min      1Q  Median      3Q     Max
+-1.5189 -0.8969 -0.1297  1.0058  1.5800
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept) 3.617781   1.241518   2.914  0.01947 *
+x1          0.015269   0.004127   3.700  0.00605 **
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 1.176 on 8 degrees of freedom
+Multiple R-squared:  0.6311,	Adjusted R-squared:  0.585
+F-statistic: 13.69 on 1 and 8 DF,  p-value: 0.006046
+> all.x1 <- summary(lm.y.x1)$r.squared
+> sp.x1 <- spr.2$estimate^2
+> all.x1
+[1] 0.631121
+> sp.x1
+[1] 0.3188552
+> cma.2 <- all.x1 - sp.x1
+> cma.2
+[1] 0.3122658
+>
+> # OR 3.
+> summary(lm.y.x1x2)
+Call:
+lm(formula = y ~ x1 + x2, data = d)
+Residuals:
+    Min      1Q  Median      3Q     Max
+-1.2173 -0.5779 -0.1515  0.6642  1.1906
+Coefficients:
+             Estimate Std. Error t value Pr(>|t|)
+(Intercept)  6.399103   1.516539   4.220  0.00394 **
+x1           0.011841   0.003561   3.325  0.01268 *
+x2          -0.544727   0.226364  -2.406  0.04702 *
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 0.9301 on 7 degrees of freedom
+Multiple R-squared:  0.7981,	Adjusted R-squared:  0.7404
+F-statistic: 13.84 on 2 and 7 DF,  p-value: 0.003696
+> r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
+> r2.y.x1x2
+[1] 0.7981255
+> sp.x1
+[1] 0.3188552
+> sp.x2
+[1] 0.1670045
+> cma.3 <- r2.y.x1x2 - (sp.x1 + sp.x2)
+> cma.3
+[1] 0.3122658
+>
+> cma.1
+[1] 0.3122658
+> cma.2
+[1] 0.3122658
+> cma.3
+[1] 0.3122658
+> # Note that sorting out unique and common
+> # explanation area is only possible with
+> # semi-partial correlation determinant
+> # NOT partial correlation determinant
+> # because only semi-partial correlation
+> # shares the same denominator (as total
+> # y).
+> #############################################
+>
+>
+>
+</code>
+====== Simple regression ======
 <code>
 > # multiple regression: a simple e.g.