partial_and_semipartial_correlation
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| partial_and_semipartial_correlation [2024/06/12 07:56] – [e.g., 독립변인 들이 서로 독립적일 때의 각각의 설명력] hkimscil | partial_and_semipartial_correlation [2025/06/04 08:37] (current) – [X1과 X2 간의 상관관계가 심할 때 Regression 결과의 오류] hkimscil | ||
|---|---|---|---|
| Line 414: | Line 414: | ||
| ====== e.g. Using ppcor.test with 4 var ====== | ====== e.g. Using ppcor.test with 4 var ====== | ||
| - | |||
| < | < | ||
| - | options(digits | + | rm(list=ls()) |
| - | HSGPA <- c(3.0, 3.2, 2.8, 2.5, 3.2, 3.8, 3.9, 3.8, 3.5, 3.1) | + | library(ggplot2) |
| - | FGPA <- c(2.8, 3.0, 2.8, 2.2, 3.3, 3.3, 3.5, 3.7, 3.4, 2.9) | + | library(dplyr) |
| - | SATV <- c(500, 550, 450, 400, 600, 650, 700, 550, 650, 550) | + | library(tidyr) |
| - | GREV <- c(600, 670, 540, 800, 750, 820, 830, 670, 690, 600) | + | library(faux) |
| - | ##GREV <- c(510, 670, 440, 800, 750, 420, 830, 470, 690, 600) | + | |
| + | set.seed(101) | ||
| + | scholar <- rnorm_multi(n = 50, | ||
| + | mu = c(3.12, 3.3, 540, 650), | ||
| + | sd = c(.25, .34, 12, 13), | ||
| + | r = c(0.15, 0.44, 0.47, 0.55, 0.45, 0.88), | ||
| + | | ||
| + | | ||
| + | attach(scholar) | ||
| - | scholar <- data.frame(HSGPA, | + | # library(psych) |
| - | # install.packages(" | + | |
| - | library(psych) | + | |
| describe(scholar) # provides descrptive information about each variable | describe(scholar) # provides descrptive information about each variable | ||
| Line 437: | Line 441: | ||
| # install.packages(" | # install.packages(" | ||
| library(ppcor) | library(ppcor) | ||
| - | pcor.test(scholar$GREV, | ||
| - | reg3 <- lm(GREV ~ SATV + HSGPA) | + | reg.g.sh |
| - | resid3 | + | res.g.sh |
| - | reg4 <- lm(FGPA ~ SATV + HSGPA) | + | reg.g.fh <- lm(GREV ~ FGPA + HSGPA) |
| - | resid4 | + | res.g.fh <- resid(reg.g.fh) |
| + | |||
| + | reg.g.sf <- lm(GREV ~ SATV + FGPA) | ||
| + | res.g.sf <- resid(reg.g.sf) | ||
| + | |||
| + | reg.f.sh | ||
| + | res.f <- resid(reg.f.sh) # second set of residuals - FGPA free of SATV and HSGPA | ||
| + | |||
| + | reg.s.fh <- lm(SATV ~ FGPA + HSGPA) | ||
| + | res.s <- resid(reg.s.fh) | ||
| + | |||
| + | reg.h.sf <- lm(HSGPA ~ FGPA + SATV) | ||
| + | res.h <- resid(reg.h.sf) | ||
| + | |||
| + | reg.all <- lm(GREV ~ HSGPA + FGPA + SATV) | ||
| + | reg.1 <- lm(GREV ~ res.f) | ||
| + | reg.2 <- lm(GREV ~ res.s) | ||
| + | reg.3 <- lm(GREV ~ res.h) | ||
| + | |||
| + | summary(reg.all) | ||
| + | summary(reg.1) | ||
| + | summary(reg.2) | ||
| + | summary(reg.3) | ||
| + | |||
| + | reg.1a <- lm(res.g.sh~res.f) | ||
| + | reg.2a <- lm(res.g.fh~res.s) | ||
| + | reg.3a <- lm(res.g.sf~res.h) | ||
| + | |||
| + | reg.1$coefficient[2] | ||
| + | reg.2$coefficient[2] | ||
| + | reg.3$coefficient[2] | ||
| + | |||
| + | reg.1a$coefficient[2] | ||
| + | reg.2a$coefficient[2] | ||
| + | reg.3a$coefficient[2] | ||
| + | |||
| + | spr.y.f <- spcor.test(GREV, | ||
| + | spr.y.s <- spcor.test(GREV, | ||
| + | spr.y.h <- spcor.test(GREV, | ||
| + | |||
| + | spr.y.f$estimate | ||
| + | spr.y.s$estimate | ||
| + | spr.y.h$estimate | ||
| + | |||
| + | spr.y.f$estimate^2 | ||
| + | spr.y.s$estimate^2 | ||
| + | spr.y.h$estimate^2 | ||
| + | |||
| + | summary(reg.1)$r.square | ||
| + | summary(reg.2)$r.square | ||
| + | summary(reg.3)$r.square | ||
| + | |||
| + | ca <- summary(reg.1)$r.square + | ||
| + | summary(reg.2)$r.square + | ||
| + | summary(reg.3)$r.square | ||
| + | # so common explanation area should be | ||
| + | summary(reg.all)$r.square - ca | ||
| - | cor(resid3, resid4) | ||
| </ | </ | ||
| < | < | ||
| - | > pcor.test(scholar$GREV, | ||
| - | estimate p.value statistic | ||
| - | 1 | ||
| > | > | ||
| - | > reg3 <- lm(GREV ~ SATV + HSGPA) | + | > rm(list=ls()) |
| - | > resid3 <- resid(reg3) # find the residuals - GREV free of SATV and HSGPA | + | |
| > | > | ||
| - | > reg4 <- lm(FGPA ~ SATV + HSGPA) # second regression | + | > library(ggplot2) |
| - | > resid4 <- resid(reg4) # second set of residuals - FGPA free of SATV and HSGPA | + | > library(dplyr) |
| + | > library(tidyr) | ||
| + | > library(faux) | ||
| > | > | ||
| - | > cor(resid3, resid4) # correlation of residuals - partial correlation | + | > set.seed(101) |
| - | [1] -0.535 | + | > scholar <- rnorm_multi(n = 50, |
| + | + mu = c(3.12, 3.3, 540, 650), | ||
| + | + sd = c(.25, .34, 12, 13), | ||
| + | + r = c(0.15, 0.44, 0.47, 0.55, 0.45, 0.88), | ||
| + | + varnames = c(" | ||
| + | + empirical = FALSE) | ||
| + | > attach(scholar) | ||
| + | The following objects are masked from scholar (pos = 3): | ||
| + | FGPA, GREV, HSGPA, SATV | ||
| - | </code> | + | > |
| - | ---- | + | > # library(psych) |
| - | ---- | + | > describe(scholar) # provides descrptive information about each variable |
| - | 학자인 A는 GRE점수는 | + | vars n |
| - | <code> | + | HSGPA 1 50 |
| - | pcor.test(scholar$GREV, | + | FGPA 2 50 |
| + | SATV 3 50 541.28 11.43 538.45 | ||
| + | GREV 4 50 651.72 11.90 649.70 | ||
| + | kurtosis | ||
| + | HSGPA 1.21 0.03 | ||
| + | FGPA -0.01 0.05 | ||
| + | SATV -0.60 1.62 | ||
| + | GREV -0.54 1.68 | ||
| + | > | ||
| + | > corrs <- cor(scholar) # find the correlations and set them into an object called ' | ||
| + | > corrs # print corrs | ||
| + | | ||
| + | HSGPA 1.0000 0.3404 0.4627 0.5406 | ||
| + | FGPA 0.3404 1.0000 0.5266 0.5096 | ||
| + | SATV 0.4627 0.5266 1.0000 0.8802 | ||
| + | GREV 0.5406 0.5096 0.8802 1.0000 | ||
| + | > | ||
| + | > pairs(scholar) # pairwise scatterplots | ||
| + | > | ||
| + | > # install.packages(" | ||
| + | > library(ppcor) | ||
| + | > | ||
| + | > reg.f.sh <- lm(FGPA ~ SATV + HSGPA) | ||
| + | > res.f <- resid(reg.f.sh) | ||
| + | > | ||
| + | > reg.s.fh <- lm(SATV ~ FGPA + HSGPA) | ||
| + | > res.s <- resid(reg.s.fh) | ||
| + | > | ||
| + | > reg.h.sf <- lm(HSGPA ~ FGPA + SATV) | ||
| + | > res.h <- resid(reg.h.sf) | ||
| + | > | ||
| + | > reg.all <- lm(GREV ~ HSGPA + FGPA + SATV) | ||
| + | > reg.1 <- lm(GREV ~ res.f) | ||
| + | > reg.2 <- lm(GREV ~ res.s) | ||
| + | > reg.3 <- lm(GREV ~ res.h) | ||
| + | > | ||
| + | > summary(reg.all) | ||
| - | reg7 <- lm(GREV ~ HSGPA + FGPA) # run linear regression | + | Call: |
| - | resid7 <- resid(reg7) | + | lm(formula = GREV ~ HSGPA + FGPA + SATV) |
| - | reg8 <- lm(SATV ~ HSGPA+ FGPA) # second regression | + | Residuals: |
| - | resid8 <- resid(reg8) | + | |
| + | -13.541 | ||
| - | cor(resid7, resid8) # correlation of residuals | + | Coefficients: |
| + | Estimate Std. Error t value Pr(>|t|) | ||
| + | (Intercept) 180.2560 | ||
| + | HSGPA | ||
| + | FGPA 1.3994 | ||
| + | SATV 0.8143 | ||
| + | --- | ||
| + | Signif. codes: | ||
| - | </code> | + | Residual standard error: 5.51 on 46 degrees of freedom |
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > summary(reg.1) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = GREV ~ res.f) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -21.76 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | res.f | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 12 on 48 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > summary(reg.2) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = GREV ~ res.s) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -22.54 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | res.s 0.814 0.148 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 9.42 on 48 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > summary(reg.3) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = GREV ~ res.h) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -22.71 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | res.h | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 11.9 on 48 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| - | < | ||
| - | > pcor.test(scholar$GREV, | ||
| - | estimate p.value statistic | ||
| - | 1 | ||
| > | > | ||
| - | > reg7 <- lm(GREV ~ HSGPA + FGPA) # run linear regression | + | > reg.1$coefficient[2] |
| - | > resid7 <- resid(reg7) | + | res.f |
| + | 1.399 | ||
| + | > reg.2$coefficient[2] | ||
| + | res.s | ||
| + | 0.8143 | ||
| + | > reg.3$coefficient[2] | ||
| + | res.h | ||
| + | 8.321 | ||
| > | > | ||
| - | > reg8 <- lm(SATV ~ HSGPA+ FGPA) # second regression | + | > spr.y.f |
| - | > resid8 | + | > spr.y.s |
| + | > spr.y.h <- spcor.test(GREV, | ||
| > | > | ||
| - | > cor(resid7, resid8) | + | > spr.y.f$estimate |
| - | [1] 0.3179 | + | [1] 0.03519 |
| + | > spr.y.s$estimate | ||
| + | [1] 0.6217 | ||
| + | > spr.y.h$estimate | ||
| + | [1] 0.1447 | ||
| > | > | ||
| + | > spr.y.f$estimate^2 | ||
| + | [1] 0.001238 | ||
| + | > spr.y.s$estimate^2 | ||
| + | [1] 0.3865 | ||
| + | > spr.y.h$estimate^2 | ||
| + | [1] 0.02094 | ||
| + | > | ||
| + | > summary(reg.1)$r.square | ||
| + | [1] 0.001238 | ||
| + | > summary(reg.2)$r.square | ||
| + | [1] 0.3865 | ||
| + | > summary(reg.3)$r.square | ||
| + | [1] 0.02094 | ||
| + | > | ||
| + | > ca <- summary(reg.1)$r.square + | ||
| + | + | ||
| + | + | ||
| + | > # so common explanation area should be | ||
| + | > summary(reg.all)$r.square - ca | ||
| + | [1] 0.39 | ||
| > | > | ||
| </ | </ | ||
| + | ---- | ||
| + | {{: | ||
| + | |||
| + | multiple regression 분석을 보면 독립변인의 coefficient 값은 각각 | ||
| + | * HSGPA | ||
| + | * FGPA 1.3994 | ||
| + | * SATV 0.8143 | ||
| + | 이 기울기에 대해서 t-test를 각각 하여 HSGPA와 FGPA의 설명력이 significant 한지를 확인하였다. 그리고 이 때의 R< | ||
| + | * 0.799 이었다. | ||
| + | 그런데 이 coefficient값은 독립변인 각각의 고유의 설명력을 가지고 (spcor.test(GREV, | ||
| + | |||
| + | reg.all | ||
| + | {{: | ||
| + | reg.1 | ||
| + | {{: | ||
| + | reg.2 | ||
| + | {{: | ||
| + | reg.3 | ||
| + | {{: | ||
| + | 또한 세 독립변인이 공통적으로 설명하는 부분은 | ||
| + | * 0.39 | ||
| + | 임을 알 수 있다. | ||
| ====== e.g., 독립변인 들이 서로 독립적일 때의 각각의 설명력 ====== | ====== e.g., 독립변인 들이 서로 독립적일 때의 각각의 설명력 ====== | ||
| In this example, the two IVs are orthogonal to each other (not correlated with each other). Hence, regress res.y.x2 against x1 would not result in any problem. | In this example, the two IVs are orthogonal to each other (not correlated with each other). Hence, regress res.y.x2 against x1 would not result in any problem. | ||
| Line 645: | Line 865: | ||
| x2의 영향력을 control한 후에 x1영향력을 보면 64.54%에 달하게 된다. | x2의 영향력을 control한 후에 x1영향력을 보면 64.54%에 달하게 된다. | ||
| - | ====== | + | ====== |
| see https:// | see https:// | ||
| Line 658: | Line 878: | ||
| m <- lm(weights ~ LSS + RSS) | m <- lm(weights ~ LSS + RSS) | ||
| - | ## F-value is very small, but neither LSS or RSS are significant | + | ## F-value is very large, and significant. |
| + | # but neither LSS or RSS are significant | ||
| summary(m) | summary(m) | ||
partial_and_semipartial_correlation.1718146598.txt.gz · Last modified: by hkimscil
