User Tools

Site Tools


c:ms:multiple_regression_lecture_note_for_r

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Next revision
Previous revision
c:ms:multiple_regression_lecture_note_for_r [2024/06/05 10:33] – created hkimscilc:ms:multiple_regression_lecture_note_for_r [2024/09/30 08:56] (current) – [Simple regression] hkimscil
Line 1: Line 1:
 +====== Multiple regression with pr, spr, zero-order r ======
 <code> <code>
 # multiple regression: a simple e.g. # multiple regression: a simple e.g.
Line 9: Line 10:
 colnames(d) <- c("y", "x1", "x2") colnames(d) <- c("y", "x1", "x2")
 d d
-attach(d)+attach(d)
 lm.y.x1 <- lm(y ~ x1, data=d) lm.y.x1 <- lm(y ~ x1, data=d)
 summary(lm.y.x1) summary(lm.y.x1)
Line 55: Line 56:
 beta2 beta2
  
-install.packages("lm.beta")+install.packages("lm.beta")
 library(lm.beta) library(lm.beta)
 lm.beta(lm.y.x1x2) lm.beta(lm.y.x1x2)
Line 66: Line 67:
  
 lm.tmp.2 <- lm(y~x1, data=d) lm.tmp.2 <- lm(y~x1, data=d)
-res.y.x2 <- lm.tmp.2$residuals+res.y.x1 <- lm.tmp.2$residuals
  
-lm.tmp.3 <- lm(res.y.x2 ~ res.x2.x1, data=d)+lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)
 summary(lm.tmp.3) summary(lm.tmp.3)
  
-install.packages("ppcor")+install.packages("ppcor")
 library(ppcor) library(ppcor)
 pcor(d) pcor(d)
 spcor(d) spcor(d)
-partial.r <- pcor.test(y,x2,x1+partial.r <- pcor.test(y, x2, x1)
-str(partial.r)+
 partial.r partial.r
 +str(partial.r)
 partial.r$estimate^2 partial.r$estimate^2
  
Line 85: Line 86:
  
 lm.tmp.5 <- lm(y~x2, data=d) lm.tmp.5 <- lm(y~x2, data=d)
-res.y.x1 <- lm.tmp.5$residuals+res.y.x2 <- lm.tmp.5$residuals
  
-lm.tmp.6 <- lm(res.y.x1 ~ res.x1.x2, data=d)+lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)
 summary(lm.tmp.6) summary(lm.tmp.6)
  
-partial.r2 <- pcor.test(y,x1,x2)+partial.r2 <- pcor.test(y, x1, x2)
 str(partial.r2) str(partial.r2)
 partial.r2$estimate^2 partial.r2$estimate^2
Line 104: Line 105:
 spr.2$estimate^2 spr.2$estimate^2
  
-lm.tmp.7 <- lm(y~res.x2.x1, data = d)+lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)
 summary(lm.tmp.7) summary(lm.tmp.7)
 ####################################################### #######################################################
  
 +# get the common area that explain the y variable
 +# 1.
 summary(lm.y.x2) summary(lm.y.x2)
 all.x2 <- summary(lm.y.x2)$r.squared all.x2 <- summary(lm.y.x2)$r.squared
Line 113: Line 116:
 all.x2 all.x2
 sp.x2 sp.x2
-all.x2 - sp.x2+cma.1 <- all.x2 - sp.x2 
 +cma.1
  
 +# 2.
 summary(lm.y.x1) summary(lm.y.x1)
 all.x1 <- summary(lm.y.x1)$r.squared all.x1 <- summary(lm.y.x1)$r.squared
Line 120: Line 125:
 all.x1 all.x1
 sp.x1 sp.x1
-all.x1 - sp.x1+cma.2 <- all.x1 - sp.x1 
 +cma.2
  
 +# OR 3.
 +summary(lm.y.x1x2)
 +r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
 +r2.y.x1x2
 +sp.x1
 +sp.x2
 +cma.3 <- r2.y.x1x2 - (sp.x1 + sp.x2)
 +cma.3
 +
 +cma.1
 +cma.2
 +cma.3
 +# Note that sorting out unique and common
 +# explanation area is only possible with 
 +# semi-partial correlation determinant
 +# NOT partial correlation determinant
 +# because only semi-partial correlation
 +# shares the same denominator (as total 
 +# y).
 +#############################################
  
 </code> </code>
 +
 +====== Output ======
 +===== Multiple regression =====
 +<code>
 +
 +> lm.y.x1x2 <- lm(y ~ x1+x2, data=d)
 +> summary(lm.y.x1x2)
 +
 +Call:
 +lm(formula = y ~ x1 + x2, data = d)
 +
 +Residuals:
 +    Min      1Q  Median      3Q     Max 
 +-1.2173 -0.5779 -0.1515  0.6642  1.1906 
 +
 +Coefficients:
 +             Estimate Std. Error t value Pr(>|t|)   
 +(Intercept)  6.399103   1.516539   4.220  0.00394 **
 +x1           0.011841   0.003561   3.325  0.01268 * 
 +x2          -0.544727   0.226364  -2.406  0.04702 * 
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 0.9301 on 7 degrees of freedom
 +Multiple R-squared:  0.7981, Adjusted R-squared:  0.7404 
 +F-statistic: 13.84 on 2 and 7 DF,  p-value: 0.003696
 +
 +
 +
 +> lm.y.x1x2$coefficient
 +(Intercept)          x1          x2 
 + 6.39910298  0.01184145 -0.54472725 
 +> # y.hat = 6.399103 + (0.011841)*x1 + (−0.544727)*x2 
 +> a <- lm.y.x1x2$coefficient[1]
 +> b1 <- lm.y.x1x2$coefficient[2]
 +> b2 <- lm.y.x1x2$coefficient[3]
 +
 +> y.pred <- a + (b1 * x1) + (b2 * x2)
 +> y.pred
 + [1]  6.280586  5.380616  7.843699  6.588485  9.217328 10.022506
 + [7]  7.251626  9.809401  9.643641  7.962113
 +> y.real <- y
 +> y.real
 + [1]  6  5  7  7  8 10  8 11  9  9
 +> y.mean <- mean(y)
 +> y.mean 
 +[1] 8
 +
 +> res <- y.real - y.pred
 +> reg <- y.pred - y.mean
 +> ss.res <- sum(res^2)
 +> ss.reg <- sum(reg^2)
 +
 +> ss.tot <- var(y) * (length(y)-1)
 +> ss.tot
 +[1] 30
 +> ss.res
 +[1] 6.056235
 +> ss.reg
 +[1] 23.94376
 +> ss.res+ss.reg
 +[1] 30
 +
 +> # slope test
 +> summary(lm.y.x1x2)
 +
 +Call:
 +lm(formula = y ~ x1 + x2, data = d)
 +
 +Residuals:
 +    Min      1Q  Median      3Q     Max 
 +-1.2173 -0.5779 -0.1515  0.6642  1.1906 
 +
 +Coefficients:
 +             Estimate Std. Error t value Pr(>|t|)   
 +(Intercept)  6.399103   1.516539   4.220  0.00394 **
 +x1           0.011841   0.003561   3.325  0.01268 * 
 +x2          -0.544727   0.226364  -2.406  0.04702 * 
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 0.9301 on 7 degrees of freedom
 +Multiple R-squared:  0.7981, Adjusted R-squared:  0.7404 
 +F-statistic: 13.84 on 2 and 7 DF,  p-value: 0.003696
 +
 +> # note on 2 t-tests 
 +
 +> # beta coefficient (standardized b)
 +> # beta <- b * (sd(x)/sd(y))
 +> beta1 <- b1 * (sd(x1)/sd(y))
 +> beta2 <- b2 * (sd(x2)/sd(y))
 +> beta1
 +      x1 
 +0.616097 
 +> beta2
 +        x2 
 +-0.4458785 
 +
 +> # install.packages("lm.beta")
 +> library(lm.beta)
 +> lm.beta(lm.y.x1x2)
 +
 +Call:
 +lm(formula = y ~ x1 + x2, data = d)
 +
 +Standardized Coefficients::
 +(Intercept)          x1          x2 
 +         NA   0.6160970  -0.4458785 
 +
 +
 +> #######################################################
 +> # partial correlation coefficient and pr2
 +> # x2's explanation? 
 +> lm.tmp.1 <- lm(x2~x1, data=d)
 +> res.x2.x1 <- lm.tmp.1$residuals
 +
 +> lm.tmp.2 <- lm(y~x1, data=d)
 +> res.y.x1 <- lm.tmp.2$residuals
 +
 +> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)
 +> summary(lm.tmp.3)
 +
 +Call:
 +lm(formula = res.y.x1 ~ res.x2.x1, data = d)
 +
 +Residuals:
 +    Min      1Q  Median      3Q     Max 
 +-1.2173 -0.5779 -0.1515  0.6642  1.1906 
 +
 +Coefficients:
 +              Estimate Std. Error t value Pr(>|t|)  
 +(Intercept)  6.281e-18  2.751e-01   0.000    1.000  
 +res.x2.x1   -5.447e-01  2.117e-01  -2.573    0.033 *
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 0.8701 on 8 degrees of freedom
 +Multiple R-squared:  0.4527, Adjusted R-squared:  0.3843 
 +F-statistic: 6.618 on 1 and 8 DF,  p-value: 0.033
 +
 +
 +> # install.packages("ppcor")
 +> library(ppcor)
 +> pcor(d)
 +$estimate
 +            y        x1         x2
 +y   1.0000000 0.7825112 -0.6728560
 +x1  0.7825112 1.0000000  0.3422911
 +x2 -0.6728560 0.3422911  1.0000000
 +
 +$p.value
 +            y         x1         x2
 +y  0.00000000 0.01267595 0.04702022
 +x1 0.01267595 0.00000000 0.36723388
 +x2 0.04702022 0.36723388 0.00000000
 +
 +$statistic
 +                  x1         x2
 +y   0.000000 3.3251023 -2.4064253
 +x1  3.325102 0.0000000  0.9638389
 +x2 -2.406425 0.9638389  0.0000000
 +
 +$n
 +[1] 10
 +
 +$gp
 +[1] 1
 +
 +$method
 +[1] "pearson"
 +
 +> spcor(d)
 +$estimate
 +            y        x1         x2
 +y   1.0000000 0.5646726 -0.4086619
 +x1  0.7171965 1.0000000  0.2078919
 +x2 -0.6166940 0.2470028  1.0000000
 +
 +$p.value
 +            y       x1        x2
 +y  0.00000000 0.113182 0.2748117
 +x1 0.02964029 0.000000 0.5914441
 +x2 0.07691195 0.521696 0.0000000
 +
 +$statistic
 +                  x1         x2
 +y   0.000000 1.8101977 -1.1846548
 +x1  2.722920 0.0000000  0.5623159
 +x2 -2.072679 0.6744045  0.0000000
 +
 +$n
 +[1] 10
 +
 +$gp
 +[1] 1
 +
 +$method
 +[1] "pearson"
 +
 +> partial.r <- pcor.test(y, x2, x1)
 +> partial.r
 +   estimate    p.value statistic  n gp  Method
 +1 -0.672856 0.04702022 -2.406425 10  1 pearson
 +> str(partial.r)
 +'data.frame': 1 obs. of  6 variables:
 + $ estimate : num -0.673
 + $ p.value  : num 0.047
 + $ statistic: num -2.41
 + $ n        : int 10
 + $ gp       : num 1
 + $ Method   : chr "pearson"
 +> partial.r$estimate^2
 +[1] 0.4527352
 +
 +> # x1's own explanation?
 +> lm.tmp.4 <- lm(x1~x2, data=d)
 +> res.x1.x2 <- lm.tmp.4$residuals
 +
 +> lm.tmp.5 <- lm(y~x2, data=d)
 +> res.y.x2 <- lm.tmp.5$residuals
 +
 +> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)
 +> summary(lm.tmp.6)
 +
 +Call:
 +lm(formula = res.y.x2 ~ res.x1.x2, data = d)
 +
 +Residuals:
 +    Min      1Q  Median      3Q     Max 
 +-1.2173 -0.5779 -0.1515  0.6642  1.1906 
 +
 +Coefficients:
 +             Estimate Std. Error t value Pr(>|t|)   
 +(Intercept) 1.330e-17  2.751e-01   0.000  1.00000   
 +res.x1.x2   1.184e-02  3.331e-03   3.555  0.00746 **
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 0.8701 on 8 degrees of freedom
 +Multiple R-squared:  0.6123, Adjusted R-squared:  0.5639 
 +F-statistic: 12.64 on 1 and 8 DF,  p-value: 0.007458
 +
 +
 +> partial.r2 <- pcor.test(y, x1, x2)
 +> str(partial.r2)
 +'data.frame': 1 obs. of  6 variables:
 + $ estimate : num 0.783
 + $ p.value  : num 0.0127
 + $ statistic: num 3.33
 + $ n        : int 10
 + $ gp       : num 1
 + $ Method   : chr "pearson"
 +> partial.r2$estimate^2
 +[1] 0.6123238
 +> #######################################################
 +
 +> # semipartial correlation coefficient and spr2
 +> #
 +> spr.1 <- spcor.test(y,x2,x1)
 +> spr.2 <- spcor.test(y,x1,x2)
 +> spr.1
 +    estimate   p.value statistic  n gp  Method
 +1 -0.4086619 0.2748117 -1.184655 10  1 pearson
 +> spr.2
 +   estimate  p.value statistic  n gp  Method
 +1 0.5646726 0.113182  1.810198 10  1 pearson
 +> spr.1$estimate^2
 +[1] 0.1670045
 +> spr.2$estimate^2
 +[1] 0.3188552
 +
 +> lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)
 +> summary(lm.tmp.7)
 +
 +Call:
 +lm(formula = y ~ res.x2.x1, data = d)
 +
 +Residuals:
 +    Min      1Q  Median      3Q     Max 
 +-1.8617 -1.1712 -0.4940  0.5488  3.0771 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)    
 +(Intercept)   8.0000     0.5589  14.314 5.54e-07 ***
 +res.x2.x1    -0.5447     0.4301  -1.266    0.241    
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 1.767 on 8 degrees of freedom
 +Multiple R-squared:  0.167, Adjusted R-squared:  0.06288 
 +F-statistic: 1.604 on 1 and 8 DF,  p-value: 0.241
 +
 +> #######################################################
 +
 +> # get the common area that explain the y variable
 +> # 1.
 +> summary(lm.y.x2)
 +
 +Call:
 +lm(formula = y ~ x2, data = d)
 +
 +Residuals:
 +    Min      1Q  Median      3Q     Max 
 +-1.2537 -0.8881 -0.4851  0.4963  2.5920 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)    
 +(Intercept)  10.7910     1.1195   9.639 1.12e-05 ***
 +x2           -0.8458     0.3117  -2.713   0.0265 *  
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 1.397 on 8 degrees of freedom
 +Multiple R-squared:  0.4793, Adjusted R-squared:  0.4142 
 +F-statistic: 7.363 on 1 and 8 DF,  p-value: 0.02651
 +
 +> all.x2 <- summary(lm.y.x2)$r.squared
 +> sp.x2 <- spr.1$estimate^2
 +> all.x2
 +[1] 0.4792703
 +> sp.x2
 +[1] 0.1670045
 +> cma.1 <- all.x2 - sp.x2
 +> cma.1
 +[1] 0.3122658
 +
 +> # 2.
 +> summary(lm.y.x1)
 +
 +Call:
 +lm(formula = y ~ x1, data = d)
 +
 +Residuals:
 +    Min      1Q  Median      3Q     Max 
 +-1.5189 -0.8969 -0.1297  1.0058  1.5800 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)   
 +(Intercept) 3.617781   1.241518   2.914  0.01947 * 
 +x1          0.015269   0.004127   3.700  0.00605 **
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 1.176 on 8 degrees of freedom
 +Multiple R-squared:  0.6311, Adjusted R-squared:  0.585 
 +F-statistic: 13.69 on 1 and 8 DF,  p-value: 0.006046
 +
 +> all.x1 <- summary(lm.y.x1)$r.squared
 +> sp.x1 <- spr.2$estimate^2
 +> all.x1
 +[1] 0.631121
 +> sp.x1
 +[1] 0.3188552
 +> cma.2 <- all.x1 - sp.x1
 +> cma.2
 +[1] 0.3122658
 +
 +> # OR 3.
 +> summary(lm.y.x1x2)
 +
 +Call:
 +lm(formula = y ~ x1 + x2, data = d)
 +
 +Residuals:
 +    Min      1Q  Median      3Q     Max 
 +-1.2173 -0.5779 -0.1515  0.6642  1.1906 
 +
 +Coefficients:
 +             Estimate Std. Error t value Pr(>|t|)   
 +(Intercept)  6.399103   1.516539   4.220  0.00394 **
 +x1           0.011841   0.003561   3.325  0.01268 * 
 +x2          -0.544727   0.226364  -2.406  0.04702 * 
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 0.9301 on 7 degrees of freedom
 +Multiple R-squared:  0.7981, Adjusted R-squared:  0.7404 
 +F-statistic: 13.84 on 2 and 7 DF,  p-value: 0.003696
 +
 +> r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
 +> r2.y.x1x2
 +[1] 0.7981255
 +> sp.x1
 +[1] 0.3188552
 +> sp.x2
 +[1] 0.1670045
 +> cma.3 <- r2.y.x1x2 - (sp.x1 + sp.x2)
 +> cma.3
 +[1] 0.3122658
 +
 +> cma.1
 +[1] 0.3122658
 +> cma.2
 +[1] 0.3122658
 +> cma.3
 +[1] 0.3122658
 +> # Note that sorting out unique and common
 +> # explanation area is only possible with 
 +> # semi-partial correlation determinant
 +> # NOT partial correlation determinant
 +> # because only semi-partial correlation
 +> # shares the same denominator (as total 
 +> # y).
 +> #############################################
 +
 +
 +>
 +</code>
 +====== Simple regression ======
 +<code>
 +> # multiple regression: a simple e.g.
 +> #
 +> #
 +> rm(list=ls())
 +> d <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv"
 +> d
 +   bankaccount income famnum
 +1            6    220      5
 +2            5    190      6
 +3            7    260      3
 +4            7    200      4
 +5            8    330      2
 +6           10    490      4
 +7            8    210      3
 +8           11    380      2
 +9            9    320      1
 +10              270      3
 +
 +> colnames(d) <- c("y", "x1", "x2")
 +> d
 +    y  x1 x2
 +1   6 220  5
 +2   5 190  6
 +3   7 260  3
 +4   7 200  4
 +5   8 330  2
 +6  10 490  4
 +7   8 210  3
 +8  11 380  2
 +9   9 320  1
 +10  9 270  3
 +> # attach(d)
 +> lm.y.x1 <- lm(y ~ x1, data=d)
 +> summary(lm.y.x1)
 +
 +Call:
 +lm(formula = y ~ x1, data = d)
 +
 +Residuals:
 +    Min      1Q  Median      3Q     Max 
 +-1.5189 -0.8969 -0.1297  1.0058  1.5800 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)   
 +(Intercept) 3.617781   1.241518   2.914  0.01947 * 
 +x1          0.015269   0.004127   3.700  0.00605 **
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 1.176 on 8 degrees of freedom
 +Multiple R-squared:  0.6311, Adjusted R-squared:  0.585 
 +F-statistic: 13.69 on 1 and 8 DF,  p-value: 0.006046
 +</code>
 +
 +단순회귀분석에서 (simple regression) F-test와 t-test는 (slope test) 기본적으로 똑 같은 테스트를 말한다. 왜냐하면 F-test에 기여하는 독립변인이 오직하나이고 그 하나가 slope test에 (t-test) 사용되기 때문이다. 이것은 t-test의 t값과 F-test의 F값의 관계에서도 나타난다. 
 +
 +$$ t^2 = F $$
 +
 +<code>
 +> t.cal <- 3.7 
 +> t.cal^2 
 +[1] 13.69
 +> F.cal <- 13.69
 +> F.cal
 +[1] 13.69
 +</code> 
 +
 +Simple regression에서 설명한 것처럼 기울기에 (slope) 대한 t-test는 기울기가 y 변인의 variability를 (평균을 중심으로 흔들림을) 설명하는 데 기여했는가를 테스트 하기 위한 것이다. 기울기가 0 이라면 이는 평균을 (평균선이 기울기가 0이다) 사용하는 것과 같으므로 기울기의 효과가 없음을 의미한다. 따라서 b와 b zero의 차이가 통계학적으로 의미있었는가를 t-test한다.
 +$$ \text{t calculated value} = \frac {b - 0}{se} $$
 +위에서 $se$는 아래처럼 구한다고 언급하였다.
 +
 +\begin{eqnarray*}
 +se & = & \sqrt{\frac{1}{n-2} * \frac{\text{SSE}}{\text{SSx}}} \\
 +& = & \sqrt{\frac {\text{MSE}} {\text{SSx}}} \\
 +\text{note that MSE } & = & \text{mean square error } \\
 +& = & \text{ms.res }
 +\end{eqnarray*}
 +
 +위에서 구한 t값의 p value는 R에서 
 +<code>
 +summary(lm.y.x1)
 +n <- length(y)
 +k <- 1 # num of predictor variables
 +sse <- sum(lm.y.x1$residuals^2) # ss.res
 +ssx1 <- sum((x1-mean(x1))^2)
 +b <- lm.y.x1$coefficient[2]
 +se <- sqrt((1/(n-2))*(sse/ssx1))
 +t.b.cal <- (b - 0) / se
 +t.b.cal
 +p.value <- 2 * pt(t.b.cal, n-k-1, lower.tail=F)
 +p.value 
 +# checck
 +t.b.cal
 +f.cal <- t.b.cal^2
 +f.cal
 +p.value 
 +</code>
 +
 +<code>
 +> summary(lm.y.x1)
 +
 +Call:
 +lm(formula = y ~ x1, data = d)
 +
 +Residuals:
 +    Min      1Q  Median      3Q     Max 
 +-1.5189 -0.8969 -0.1297  1.0058  1.5800 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)   
 +(Intercept) 3.617781   1.241518   2.914  0.01947 * 
 +x1          0.015269   0.004127   3.700  0.00605 **
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 1.176 on 8 degrees of freedom
 +Multiple R-squared:  0.6311, Adjusted R-squared:  0.585 
 +F-statistic: 13.69 on 1 and 8 DF,  p-value: 0.006046
 +
 +> n <- length(y)
 +> k <- 1 # num of predictor variables
 +> sse <- sum(lm.y.x1$residuals^2)
 +> ssx1 <- sum((x1-mean(x1))^2)
 +> b <- lm.y.x1$coefficient[2]
 +> se <-sqrt((1/(n-2))*(sse/ssx1))
 +> se <-sqrt(mse/ssx1)
 +> t.b.cal <- (b - 0) / se
 +> t.b.cal
 +      x1 
 +3.699639 
 +> p.value <- 2 * pt(t.b.cal, n-k-1, lower.tail=F)
 +
 +> # checck
 +> t.b.cal
 +      x1 
 +3.699639 
 +> t.b.cal^2
 +      x1 
 +13.68733 
 +> p.value 
 +         x1 
 +0.006045749 
 +
 +
 +</code>
 +
 +<code>
 +
 +> lm.y.x2 <- lm(y ~ x2, data=d)
 +> summary(lm.y.x2)
 +
 +Call:
 +lm(formula = y ~ x2, data = d)
 +
 +Residuals:
 +    Min      1Q  Median      3Q     Max 
 +-1.2537 -0.8881 -0.4851  0.4963  2.5920 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)    
 +(Intercept)  10.7910     1.1195   9.639 1.12e-05 ***
 +x2           -0.8458     0.3117  -2.713   0.0265 *  
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 1.397 on 8 degrees of freedom
 +Multiple R-squared:  0.4793, Adjusted R-squared:  0.4142 
 +F-statistic: 7.363 on 1 and 8 DF,  p-value: 0.02651
 +>
 +>
 +</code>
 +
c/ms/multiple_regression_lecture_note_for_r.1717551215.txt.gz · Last modified: 2024/06/05 10:33 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki