User Tools

Site Tools


c:ms:2023:schedule:w11.lecture.note

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Next revision
Previous revision
c:ms:2023:schedule:w11.lecture.note [2023/05/15 15:08] – created hkimscilc:ms:2023:schedule:w11.lecture.note [2023/05/24 08:37] (current) hkimscil
Line 1: Line 1:
 +====== 하나의 독립변인이 갖는 고유의 영향력 혹은 설명력 파악하기 ======
 +[[:multiple regression]] 참조
 +[[:partial and semipartial correlation]] 참조
 <code> <code>
 datavar <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv" datavar <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv"
Line 6: Line 9:
 attach(datavar) attach(datavar)
  
 +library(ggplot2) # ggplot 사용을 위해서 
 +# install.packages("ggplot2")
  
 lm.y.x1 <- lm(y~x1, data=datavar) lm.y.x1 <- lm(y~x1, data=datavar)
 summary(lm.y.x1) summary(lm.y.x1)
 +str(lm.y.x1)
 intercept.y <- lm.y.x1$coefficients[1] intercept.y <- lm.y.x1$coefficients[1]
 slope.x <- lm.y.x1$coefficients[2] slope.x <- lm.y.x1$coefficients[2]
 intercept.y intercept.y
 slope.x slope.x
-ggplot(data=datavar, aes(income,bankaccount)) ++ggplot(data=datavar, aes(x1,y)) +
   geom_point(color="blue", size=1.5, pch=2) +   geom_point(color="blue", size=1.5, pch=2) +
-  geom_hline(aes(yintercept=mean(bankaccount)),size=1.5, color="red") ++  geom_hline(aes(yintercept=mean(y)), size=1.5, color="red") +
   geom_abline(intercept=intercept.y, slope=slope.x, size=1.5, color="darkgreen")   geom_abline(intercept=intercept.y, slope=slope.x, size=1.5, color="darkgreen")
  
 +# recapping
 +ss.tot <- var(y)*(length(y)-1)
 +ss.tot
 +ss.res <- sum(lm.y.x1$residuals^2)
 +ss.res
 +ss.reg <- sum((lm.y.x1$fitted.values-mean(y))^2)
 +ss.reg
 +ss.tot
 +ss.res + ss.reg
 +
 +summary(lm(lm.y.x1$residuals~x1))
 +summary(lm((lm.y.x1$fitted.values~x1)))
 +summary(lm((lm.y.x1$fitted.values-mean(y))~x1))
 +# recapping ends
  
 lm.y.x2 <- lm(y~x2, data=datavar) lm.y.x2 <- lm(y~x2, data=datavar)
Line 27: Line 46:
 intercept.y intercept.y
 slope.x slope.x
-ggplot(data=datavar, aes(famnum,bankaccount)) ++ggplot(data=datavar, aes(x2,y)) +
   geom_point(color="blue", size=1.5, pch=2) +   geom_point(color="blue", size=1.5, pch=2) +
-  geom_hline(aes(yintercept=mean(bankaccount)),size=1.5, color="red") ++  geom_hline(aes(yintercept=mean(y)),size=1.5, color="red") +
   geom_abline(intercept=intercept.y, slope=slope.x, size=1.5, color="darkgreen")   geom_abline(intercept=intercept.y, slope=slope.x, size=1.5, color="darkgreen")
 +
  
 lm.y.x1x2 <- lm(y~x1+x2, data=datavar) lm.y.x1x2 <- lm(y~x1+x2, data=datavar)
 summary(lm.y.x1x2) summary(lm.y.x1x2)
  
-res.y.minus.x2 <- resid(lm.y.x2) # y - x2 +################################## 
 +res.lm.y.x2 <- resid(lm.y.x2) # y - x2 
  
 +# x2를 제외하기 위한 계산
 lm.x1.x2 <- lm(x1~x2, data=datavar) lm.x1.x2 <- lm(x1~x2, data=datavar)
-res.x1.minus.x2 <- resid(lm.x1.x2) # x1 - x2+res.lm.x1.x2 <- resid(lm.x1.x2) # x1 - x2
  
-lm(res.y.minus.x2~res.x1.minus.x2) +lm(res.lm.y.x2~res.lm.x1.x2) 
-summary(lm(res.y.minus.x2~res.x1.minus.x2)) # x2를 y, x1에서 모두 제외한 분석+summary(lm(res.lm.y.x2~res.lm.x1.x2)) # x2를 y, x1에서 모두 제외한 분석
 # or  # or 
-cor(res.y.minus.x2, res.x1.minus.x2) +cor(res.lm.y.x2, res.lm.x1.x2) 
-cor(res.y.minus.x2, res.x1.minus.x2)^2 +# cor제곱 값은 x2의 설명력을 제어 혹은 제외한 후에 보는  
-summary(lm(res.y.minus.x2~res.x1.minus.x2))$r.squared+# y와 x1간의 r 제곱값이다  
 +cor(res.lm.y.x2, res.lm.x1.x2)^2 
 +summary(lm(res.lm.y.x2~res.lm.x1.x2))$r.squared
  
-################## +############################ 
-install.packages("ppcor")+# http://commres.net/wiki/partial_and_semipartial_correlation 문서 확인할 것 
 +install.packages("ppcor")
 library(ppcor) library(ppcor)
 pcor(datavar) pcor(datavar)
  
-+datavar # data 확인 
-x1 +pcor.test(y, x1, x2) # x2제어 하고, y와 x1과의 correlation값을 묻는 펑션
-x2 +
-datavar +
-pcor.test(y, x1, x2)+
 summary(lm(res.y.minus.x2~res.x1.minus.x2)) summary(lm(res.y.minus.x2~res.x1.minus.x2))
-r.sq <- (summary(lm(res.lm.ba.fn~res.lm.inc.fn)))$r.squared+r.sq <- (summary(lm(res.lm.y.x2~res.lm.x1.x2)))$r.squared 
 +r.sq 
 sqrt(r.sq) sqrt(r.sq)
  
Line 77: Line 100:
 common.area common.area
  
-y.x1 <- lm(y~x1, data=datavar) +# common.area가 맞는지 확인  
-y.x2 <- lm(y~x2, data=datavar) +# b + common.area는 lm(y~x1,data=datavar)에서의 r.square 값이어야 한다 
-bd <- summary(y.x1)$r.squared +summary(lm.y.x1)$r.squared 
-cd <- summary(y.x2)$r.squared +b + common.area 
-bd +# 마찬가지로 c + common.area should be the same as 
-cd +# summary(lm.y.x2)$r.squared 
-bd+cd +summary(lm.y.x2)$r.squared 
-bd+cd-common.area +c + common.area 
-summary(lm.y.x1x2)$r.squared+</code> 
 + 
 +====== 기울기 b에 대한 통계학적인 테스트 ====== 
 +[[:multiple regression]]에서 앞 쪽에 기울기에 대한 테스트 부분 
 +regression 문서에서 [[:regression#eg_3_simple_regressionadjusted_r_squared_slope_test]] 부분 
 + 
 +<code> 
 +################################################ 
 +# standard error of coefficient의 의미  
 +# b의 사선을 중심으로 에러의 분포가 정규분포를  
 +# 이룬다고 할 때, 그 때의 standard error 값. 
 +# 따라서, fitted.value +2 * se 값이 에러의 
 +# 범위를 (range) 알려주는 방법. 
 +# 단, 2 = t critical value로 대체해야  
 +################################################ 
 + 
 +summary(lm.y.x1x2) 
 +mean(y
 +mean(x1) 
 +mean(x2
 +intercept <- lm.y.x1x2$coefficients[1] 
 +b1 <- lm.y.x1x2$coefficients[2] 
 +b2 <- lm.y.x1x2$coefficients[3] 
 +intercept 
 +b1 
 +b2 
 + 
 +ggplot(datavar,aes(x x1, y = y)) + geom_point() + geom_smooth(method = "lm"
 +ggplot(datavar,aes(x = x2, y = y)) + geom_point() + geom_smooth(method = "lm"
 + 
 +summary(lm.y.x1) 
 +<- lm.y.x1$coefficients[1] 
 +b <- lm.y.x1$coefficients[2] 
 +# y hat = a + b x1 
 +# x1 = 200 ? 
 +p <- as.data.frame(200) 
 +
 +colnames(p) <- "x1" 
 +
 + 
 +predict(lm.y.x1, newdata=p) 
 + 
 + 
 +lm.y.x1$coefficients 
 +pred <- predict(lm.y.x1,newdata=p) 
 +summary(lm.y.x1
 +t.critical <- qt(.975,9) 
 +t.critical 
 + 
 +(summary(lm.y.x1))$coefficients  
 +# coefficient 값 중 독립변인의 se값은 $coefficient[4] 
 +# 절편의 se는 [3] 
 + 
 +se.x1 <- (summary(lm.y.x1))$coefficients[4] 
 +se.x1 
 +pred.range.down <- pred - (t.critical*se.x1) 
 +pred.range.up <- pred + (t.critical*se.x1) 
 +pred.range.down 
 +pred.range.up 
 + 
 +summary(lm.y.x2) 
 +a <- lm.y.x2$coefficients[1] 
 +b <- lm.y.x2$coefficients[2] 
 +a 
 +b 
 + 
 +# y hat = a b x2 
 +# x2 = 5 명일때? 
 +p <- as.data.frame(5) 
 +colnames(p) <- "x2" 
 + 
 +lm.y.x2$coefficients 
 +pred <- predict(lm.y.x2,newdata=p) 
 +pred 
 + 
 +t.critical <- qt(.975,9) 
 +t.critical 
 +se.x2 <- (summary(lm.y.x2))$coefficients[4] 
 +se.x2 
 + 
 +lim <- t.critical*se.x2 
 + 
 +pred.range.down <- pred-lim  
 +pred.range.up <- pred+lim 
 +pred.range.down 
 +pred.range.up 
 +# or  
 +pred.range <pred + c(-lim, lim) 
 +pred.range 
 + 
 + 
 +# Note that the range depends upon the se value 
 +summary(lm.y.x1) 
 +summary(lm.y.x2) 
 +summary(lm.y.x1x2) 
 + 
 + 
 +dvar <- read.csv("http://commres.net/wiki/_media/elemapi2_.csv", sep = "\t", fileEncoding="UTF-8-BOM")
  
 +mod1 <- lm(api00 ~ ell + acs_k3 + avg_ed + meals, data=dvar)
 +mod2 <- lm(api00 ~ ell + avg_ed + meals + acs_k3, data=dvar)
 +mod3 <- lm(api00 ~ meals + acs_k3 + avg_ed + ell, data=dvar)
 +summary(mod1)
 +summary(mod2)
 +summary(mod3)
  
 +anova(mod1)
 +anova(mod2)
 +anova(mod3)
 </code> </code>
c/ms/2023/schedule/w11.lecture.note.1684130887.txt.gz · Last modified: 2023/05/15 15:08 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki