partial_and_semipartial_correlation
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
partial_and_semipartial_correlation [2024/06/12 07:56] – [e.g., 독립변인 들이 서로 독립적일 때의 각각의 설명력] hkimscil | partial_and_semipartial_correlation [2025/06/04 08:37] (current) – [X1과 X2 간의 상관관계가 심할 때 Regression 결과의 오류] hkimscil | ||
---|---|---|---|
Line 414: | Line 414: | ||
====== e.g. Using ppcor.test with 4 var ====== | ====== e.g. Using ppcor.test with 4 var ====== | ||
- | |||
< | < | ||
- | options(digits | + | rm(list=ls()) |
- | HSGPA <- c(3.0, 3.2, 2.8, 2.5, 3.2, 3.8, 3.9, 3.8, 3.5, 3.1) | + | library(ggplot2) |
- | FGPA <- c(2.8, 3.0, 2.8, 2.2, 3.3, 3.3, 3.5, 3.7, 3.4, 2.9) | + | library(dplyr) |
- | SATV <- c(500, 550, 450, 400, 600, 650, 700, 550, 650, 550) | + | library(tidyr) |
- | GREV <- c(600, 670, 540, 800, 750, 820, 830, 670, 690, 600) | + | library(faux) |
- | ##GREV <- c(510, 670, 440, 800, 750, 420, 830, 470, 690, 600) | + | |
+ | set.seed(101) | ||
+ | scholar <- rnorm_multi(n = 50, | ||
+ | mu = c(3.12, 3.3, 540, 650), | ||
+ | sd = c(.25, .34, 12, 13), | ||
+ | r = c(0.15, 0.44, 0.47, 0.55, 0.45, 0.88), | ||
+ | | ||
+ | | ||
+ | attach(scholar) | ||
- | scholar <- data.frame(HSGPA, | + | # library(psych) |
- | # install.packages(" | + | |
- | library(psych) | + | |
describe(scholar) # provides descrptive information about each variable | describe(scholar) # provides descrptive information about each variable | ||
Line 437: | Line 441: | ||
# install.packages(" | # install.packages(" | ||
library(ppcor) | library(ppcor) | ||
- | pcor.test(scholar$GREV, | ||
- | reg3 <- lm(GREV ~ SATV + HSGPA) | + | reg.g.sh |
- | resid3 | + | res.g.sh |
- | reg4 <- lm(FGPA ~ SATV + HSGPA) | + | reg.g.fh <- lm(GREV ~ FGPA + HSGPA) |
- | resid4 | + | res.g.fh <- resid(reg.g.fh) |
+ | |||
+ | reg.g.sf <- lm(GREV ~ SATV + FGPA) | ||
+ | res.g.sf <- resid(reg.g.sf) | ||
+ | |||
+ | reg.f.sh | ||
+ | res.f <- resid(reg.f.sh) # second set of residuals - FGPA free of SATV and HSGPA | ||
+ | |||
+ | reg.s.fh <- lm(SATV ~ FGPA + HSGPA) | ||
+ | res.s <- resid(reg.s.fh) | ||
+ | |||
+ | reg.h.sf <- lm(HSGPA ~ FGPA + SATV) | ||
+ | res.h <- resid(reg.h.sf) | ||
+ | |||
+ | reg.all <- lm(GREV ~ HSGPA + FGPA + SATV) | ||
+ | reg.1 <- lm(GREV ~ res.f) | ||
+ | reg.2 <- lm(GREV ~ res.s) | ||
+ | reg.3 <- lm(GREV ~ res.h) | ||
+ | |||
+ | summary(reg.all) | ||
+ | summary(reg.1) | ||
+ | summary(reg.2) | ||
+ | summary(reg.3) | ||
+ | |||
+ | reg.1a <- lm(res.g.sh~res.f) | ||
+ | reg.2a <- lm(res.g.fh~res.s) | ||
+ | reg.3a <- lm(res.g.sf~res.h) | ||
+ | |||
+ | reg.1$coefficient[2] | ||
+ | reg.2$coefficient[2] | ||
+ | reg.3$coefficient[2] | ||
+ | |||
+ | reg.1a$coefficient[2] | ||
+ | reg.2a$coefficient[2] | ||
+ | reg.3a$coefficient[2] | ||
+ | |||
+ | spr.y.f <- spcor.test(GREV, | ||
+ | spr.y.s <- spcor.test(GREV, | ||
+ | spr.y.h <- spcor.test(GREV, | ||
+ | |||
+ | spr.y.f$estimate | ||
+ | spr.y.s$estimate | ||
+ | spr.y.h$estimate | ||
+ | |||
+ | spr.y.f$estimate^2 | ||
+ | spr.y.s$estimate^2 | ||
+ | spr.y.h$estimate^2 | ||
+ | |||
+ | summary(reg.1)$r.square | ||
+ | summary(reg.2)$r.square | ||
+ | summary(reg.3)$r.square | ||
+ | |||
+ | ca <- summary(reg.1)$r.square + | ||
+ | summary(reg.2)$r.square + | ||
+ | summary(reg.3)$r.square | ||
+ | # so common explanation area should be | ||
+ | summary(reg.all)$r.square - ca | ||
- | cor(resid3, resid4) | ||
</ | </ | ||
< | < | ||
- | > pcor.test(scholar$GREV, | ||
- | estimate p.value statistic | ||
- | 1 | ||
> | > | ||
- | > reg3 <- lm(GREV ~ SATV + HSGPA) | + | > rm(list=ls()) |
- | > resid3 <- resid(reg3) # find the residuals - GREV free of SATV and HSGPA | + | |
> | > | ||
- | > reg4 <- lm(FGPA ~ SATV + HSGPA) # second regression | + | > library(ggplot2) |
- | > resid4 <- resid(reg4) # second set of residuals - FGPA free of SATV and HSGPA | + | > library(dplyr) |
+ | > library(tidyr) | ||
+ | > library(faux) | ||
> | > | ||
- | > cor(resid3, resid4) # correlation of residuals - partial correlation | + | > set.seed(101) |
- | [1] -0.535 | + | > scholar <- rnorm_multi(n = 50, |
+ | + mu = c(3.12, 3.3, 540, 650), | ||
+ | + sd = c(.25, .34, 12, 13), | ||
+ | + r = c(0.15, 0.44, 0.47, 0.55, 0.45, 0.88), | ||
+ | + varnames = c(" | ||
+ | + empirical = FALSE) | ||
+ | > attach(scholar) | ||
+ | The following objects are masked from scholar (pos = 3): | ||
+ | FGPA, GREV, HSGPA, SATV | ||
- | </code> | + | > |
- | ---- | + | > # library(psych) |
- | ---- | + | > describe(scholar) # provides descrptive information about each variable |
- | 학자인 A는 GRE점수는 | + | vars n |
- | <code> | + | HSGPA 1 50 |
- | pcor.test(scholar$GREV, | + | FGPA 2 50 |
+ | SATV 3 50 541.28 11.43 538.45 | ||
+ | GREV 4 50 651.72 11.90 649.70 | ||
+ | kurtosis | ||
+ | HSGPA 1.21 0.03 | ||
+ | FGPA -0.01 0.05 | ||
+ | SATV -0.60 1.62 | ||
+ | GREV -0.54 1.68 | ||
+ | > | ||
+ | > corrs <- cor(scholar) # find the correlations and set them into an object called ' | ||
+ | > corrs # print corrs | ||
+ | | ||
+ | HSGPA 1.0000 0.3404 0.4627 0.5406 | ||
+ | FGPA 0.3404 1.0000 0.5266 0.5096 | ||
+ | SATV 0.4627 0.5266 1.0000 0.8802 | ||
+ | GREV 0.5406 0.5096 0.8802 1.0000 | ||
+ | > | ||
+ | > pairs(scholar) # pairwise scatterplots | ||
+ | > | ||
+ | > # install.packages(" | ||
+ | > library(ppcor) | ||
+ | > | ||
+ | > reg.f.sh <- lm(FGPA ~ SATV + HSGPA) | ||
+ | > res.f <- resid(reg.f.sh) | ||
+ | > | ||
+ | > reg.s.fh <- lm(SATV ~ FGPA + HSGPA) | ||
+ | > res.s <- resid(reg.s.fh) | ||
+ | > | ||
+ | > reg.h.sf <- lm(HSGPA ~ FGPA + SATV) | ||
+ | > res.h <- resid(reg.h.sf) | ||
+ | > | ||
+ | > reg.all <- lm(GREV ~ HSGPA + FGPA + SATV) | ||
+ | > reg.1 <- lm(GREV ~ res.f) | ||
+ | > reg.2 <- lm(GREV ~ res.s) | ||
+ | > reg.3 <- lm(GREV ~ res.h) | ||
+ | > | ||
+ | > summary(reg.all) | ||
- | reg7 <- lm(GREV ~ HSGPA + FGPA) # run linear regression | + | Call: |
- | resid7 <- resid(reg7) | + | lm(formula = GREV ~ HSGPA + FGPA + SATV) |
- | reg8 <- lm(SATV ~ HSGPA+ FGPA) # second regression | + | Residuals: |
- | resid8 <- resid(reg8) | + | |
+ | -13.541 | ||
- | cor(resid7, resid8) # correlation of residuals | + | Coefficients: |
+ | Estimate Std. Error t value Pr(>|t|) | ||
+ | (Intercept) 180.2560 | ||
+ | HSGPA | ||
+ | FGPA 1.3994 | ||
+ | SATV 0.8143 | ||
+ | --- | ||
+ | Signif. codes: | ||
- | </code> | + | Residual standard error: 5.51 on 46 degrees of freedom |
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > summary(reg.1) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = GREV ~ res.f) | ||
+ | |||
+ | Residuals: | ||
+ | | ||
+ | -21.76 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | res.f | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 12 on 48 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > summary(reg.2) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = GREV ~ res.s) | ||
+ | |||
+ | Residuals: | ||
+ | | ||
+ | -22.54 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | res.s 0.814 0.148 | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 9.42 on 48 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > summary(reg.3) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = GREV ~ res.h) | ||
+ | |||
+ | Residuals: | ||
+ | | ||
+ | -22.71 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | res.h | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 11.9 on 48 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
- | < | ||
- | > pcor.test(scholar$GREV, | ||
- | estimate p.value statistic | ||
- | 1 | ||
> | > | ||
- | > reg7 <- lm(GREV ~ HSGPA + FGPA) # run linear regression | + | > reg.1$coefficient[2] |
- | > resid7 <- resid(reg7) | + | res.f |
+ | 1.399 | ||
+ | > reg.2$coefficient[2] | ||
+ | res.s | ||
+ | 0.8143 | ||
+ | > reg.3$coefficient[2] | ||
+ | res.h | ||
+ | 8.321 | ||
> | > | ||
- | > reg8 <- lm(SATV ~ HSGPA+ FGPA) # second regression | + | > spr.y.f |
- | > resid8 | + | > spr.y.s |
+ | > spr.y.h <- spcor.test(GREV, | ||
> | > | ||
- | > cor(resid7, resid8) | + | > spr.y.f$estimate |
- | [1] 0.3179 | + | [1] 0.03519 |
+ | > spr.y.s$estimate | ||
+ | [1] 0.6217 | ||
+ | > spr.y.h$estimate | ||
+ | [1] 0.1447 | ||
> | > | ||
+ | > spr.y.f$estimate^2 | ||
+ | [1] 0.001238 | ||
+ | > spr.y.s$estimate^2 | ||
+ | [1] 0.3865 | ||
+ | > spr.y.h$estimate^2 | ||
+ | [1] 0.02094 | ||
+ | > | ||
+ | > summary(reg.1)$r.square | ||
+ | [1] 0.001238 | ||
+ | > summary(reg.2)$r.square | ||
+ | [1] 0.3865 | ||
+ | > summary(reg.3)$r.square | ||
+ | [1] 0.02094 | ||
+ | > | ||
+ | > ca <- summary(reg.1)$r.square + | ||
+ | + | ||
+ | + | ||
+ | > # so common explanation area should be | ||
+ | > summary(reg.all)$r.square - ca | ||
+ | [1] 0.39 | ||
> | > | ||
</ | </ | ||
+ | ---- | ||
+ | {{: | ||
+ | |||
+ | multiple regression 분석을 보면 독립변인의 coefficient 값은 각각 | ||
+ | * HSGPA | ||
+ | * FGPA 1.3994 | ||
+ | * SATV 0.8143 | ||
+ | 이 기울기에 대해서 t-test를 각각 하여 HSGPA와 FGPA의 설명력이 significant 한지를 확인하였다. 그리고 이 때의 R< | ||
+ | * 0.799 이었다. | ||
+ | 그런데 이 coefficient값은 독립변인 각각의 고유의 설명력을 가지고 (spcor.test(GREV, | ||
+ | |||
+ | reg.all | ||
+ | {{: | ||
+ | reg.1 | ||
+ | {{: | ||
+ | reg.2 | ||
+ | {{: | ||
+ | reg.3 | ||
+ | {{: | ||
+ | 또한 세 독립변인이 공통적으로 설명하는 부분은 | ||
+ | * 0.39 | ||
+ | 임을 알 수 있다. | ||
====== e.g., 독립변인 들이 서로 독립적일 때의 각각의 설명력 ====== | ====== e.g., 독립변인 들이 서로 독립적일 때의 각각의 설명력 ====== | ||
In this example, the two IVs are orthogonal to each other (not correlated with each other). Hence, regress res.y.x2 against x1 would not result in any problem. | In this example, the two IVs are orthogonal to each other (not correlated with each other). Hence, regress res.y.x2 against x1 would not result in any problem. | ||
Line 645: | Line 865: | ||
x2의 영향력을 control한 후에 x1영향력을 보면 64.54%에 달하게 된다. | x2의 영향력을 control한 후에 x1영향력을 보면 64.54%에 달하게 된다. | ||
- | ====== | + | ====== |
see https:// | see https:// | ||
Line 658: | Line 878: | ||
m <- lm(weights ~ LSS + RSS) | m <- lm(weights ~ LSS + RSS) | ||
- | ## F-value is very small, but neither LSS or RSS are significant | + | ## F-value is very large, and significant. |
+ | # but neither LSS or RSS are significant | ||
summary(m) | summary(m) | ||
partial_and_semipartial_correlation.1718146598.txt.gz · Last modified: by hkimscil