User Tools

Site Tools


partial_and_semipartial_correlation

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
partial_and_semipartial_correlation [2024/10/15 13:23] – [e.g. Using ppcor.test with 4 var] hkimscilpartial_and_semipartial_correlation [2024/10/17 10:28] (current) – [e.g. Using ppcor.test with 4 var] hkimscil
Line 414: Line 414:
  
 ====== e.g. Using ppcor.test with 4 var ====== ====== e.g. Using ppcor.test with 4 var ======
- 
 <code> <code>
 rm(list=ls()) rm(list=ls())
-options(digits = 4) 
  
-HSGPA <- c(3.0, 3.2, 2.8, 2.5, 3.2, 3.8, 3.9, 3.8, 3.5, 3.1+library(ggplot2
-FGPA <-  c(2.8, 3.0, 2.8, 2.2, 3.3, 3.3, 3.5, 3.7, 3.4, 2.9+library(dplyr
-SATV <-  c(500, 550, 450, 400, 600, 650, 700, 550, 650, 550+library(tidyr
-GREV <-  c(600, 670, 540, 800, 750, 820, 830, 670, 690, 600) +library(faux)
-##GREV <- c(510, 670, 440, 800, 750, 420, 830, 470, 690, 600)+
  
 +set.seed(101)
 +scholar <- rnorm_multi(n = 50, 
 +                       mu = c(3.12, 3.3, 540, 650),
 +                       sd = c(.25, .34, 12, 13),
 +                       r = c(0.15, 0.44, 0.47, 0.55, 0.45, 0.88), 
 +                       varnames = c("HSGPA", "FGPA", "SATV", "GREV"),
 +                       empirical = FALSE)
 +attach(scholar)
  
-scholar <- data.frame(HSGPA, FGPA, SATV, GREV) collect into a data frame +# library(psych)
-# install.packages("psych"+
-library(psych)+
 describe(scholar) # provides descrptive information about each variable describe(scholar) # provides descrptive information about each variable
  
Line 436: Line 439:
 pairs(scholar)        # pairwise scatterplots pairs(scholar)        # pairwise scatterplots
  
 +# install.packages("ppcor")
 +library(ppcor)
 +
 +reg.g.sh <- lm(GREV ~ SATV + HSGPA)
 +res.g.sh <- resid(reg.g.sh)
 +
 +reg.g.fh <- lm(GREV ~ FGPA + HSGPA)
 +res.g.fh <- resid(reg.g.fh)
 +
 +reg.g.sf <- lm(GREV ~ SATV + FGPA)
 +res.g.sf <- resid(reg.g.sf)
 +
 +reg.f.sh <- lm(FGPA ~ SATV + HSGPA)   # second regression
 +res.f <- resid(reg.f.sh)     # second set of residuals - FGPA free of SATV and HSGPA
 +
 +reg.s.fh <- lm(SATV ~ FGPA + HSGPA)   
 +res.s <- resid(reg.s.fh)    
 +
 +reg.h.sf <- lm(HSGPA ~ FGPA + SATV)   
 +res.h <- resid(reg.h.sf)    
 +
 +reg.all <- lm(GREV ~ HSGPA + FGPA + SATV)
 +reg.1 <- lm(GREV ~ res.f)
 +reg.2 <- lm(GREV ~ res.s)
 +reg.3 <- lm(GREV ~ res.h)
 +
 +summary(reg.all)
 +summary(reg.1)
 +summary(reg.2)
 +summary(reg.3)
 +
 +reg.1a <- lm(res.g.sh~res.f)
 +reg.2a <- lm(res.g.fh~res.s)
 +reg.3a <- lm(res.g.sf~res.h)
 +
 +reg.1$coefficient[2]
 +reg.2$coefficient[2]
 +reg.3$coefficient[2]
 +
 +reg.1a$coefficient[2]
 +reg.2a$coefficient[2]
 +reg.3a$coefficient[2]
 +
 +spr.y.f <- spcor.test(GREV, FGPA, scholar[,c("SATV", "HSGPA")])
 +spr.y.s <- spcor.test(GREV, SATV, scholar[,c("HSGPA", "FGPA")])
 +spr.y.h <- spcor.test(GREV, HSGPA, scholar[,c("SATV", "FGPA")])
 +
 +spr.y.f$estimate
 +spr.y.s$estimate
 +spr.y.h$estimate
 +
 +spr.y.f$estimate^2
 +spr.y.s$estimate^2
 +spr.y.h$estimate^2
 +
 +summary(reg.1)$r.square
 +summary(reg.2)$r.square
 +summary(reg.3)$r.square
 +
 +ca <- summary(reg.1)$r.square + 
 +  summary(reg.2)$r.square + 
 +  summary(reg.3)$r.square
 +# so common explanation area should be
 +summary(reg.all)$r.square - carm(list=ls())
 +
 +library(ggplot2)
 +library(dplyr)
 +library(tidyr)
 +library(faux)
 +
 +set.seed(101)
 +scholar <- rnorm_multi(n = 50, 
 +                       mu = c(3.12, 3.3, 540, 650),
 +                       sd = c(.25, .34, 12, 13),
 +                       r = c(0.15, 0.44, 0.47, 0.55, 0.45, 0.88), 
 +                       varnames = c("HSGPA", "FGPA", "SATV", "GREV"),
 +                       empirical = FALSE)
 attach(scholar) attach(scholar)
 +
 +# library(psych)
 +describe(scholar) # provides descrptive information about each variable
 +
 +corrs <- cor(scholar) # find the correlations and set them into an object called 'corrs'
 +corrs                 # print corrs
 +
 +pairs(scholar)        # pairwise scatterplots
 +
 # install.packages("ppcor") # install.packages("ppcor")
 library(ppcor) library(ppcor)
-pcor.test(GREV, FGPA, scholar[,c("SATV", "HSGPA")]) # working 
  
 reg.f.sh <- lm(FGPA ~ SATV + HSGPA)   # second regression reg.f.sh <- lm(FGPA ~ SATV + HSGPA)   # second regression
Line 460: Line 548:
 summary(reg.3) summary(reg.3)
  
 +reg.1$coefficient[2]
 +reg.2$coefficient[2]
 +reg.3$coefficient[2]
 +
 +spr.y.f <- spcor.test(GREV, FGPA, scholar[,c("SATV", "HSGPA")])
 +spr.y.s <- spcor.test(GREV, SATV, scholar[,c("HSGPA", "FGPA")])
 +spr.y.h <- spcor.test(GREV, HSGPA, scholar[,c("SATV", "FGPA")])
 +
 +spr.y.f$estimate
 +spr.y.s$estimate
 +spr.y.h$estimate
 +
 +spr.y.f$estimate^2
 +spr.y.s$estimate^2
 +spr.y.h$estimate^2
 +
 +summary(reg.1)$r.square
 +summary(reg.2)$r.square
 +summary(reg.3)$r.square
 +
 +ca <- summary(reg.1)$r.square + 
 +  summary(reg.2)$r.square + 
 +  summary(reg.3)$r.square
 +# so common explanation area should be
 +summary(reg.all)$r.square - ca
 </code> </code>
  
Line 465: Line 578:
  
 > rm(list=ls()) > rm(list=ls())
-> options(digits = 4) 
  
-HSGPA <- c(3.0, 3.2, 2.8, 2.5, 3.2, 3.8, 3.9, 3.8, 3.5, 3.1+library(ggplot2
-FGPA <-  c(2.8, 3.0, 2.8, 2.2, 3.3, 3.3, 3.5, 3.7, 3.4, 2.9+library(dplyr
-SATV <-  c(500, 550, 450, 400, 600, 650, 700, 550, 650, 550+library(tidyr
-GREV <-  c(600, 670, 540, 800, 750, 820, 830, 670, 690, 600) +library(faux)
-> ##GREV <- c(510, 670, 440, 800, 750, 420, 830, 470, 690, 600)+
  
 +> set.seed(101)
 +> scholar <- rnorm_multi(n = 50, 
 ++                        mu = c(3.12, 3.3, 540, 650),
 ++                        sd = c(.25, .34, 12, 13),
 ++                        r = c(0.15, 0.44, 0.47, 0.55, 0.45, 0.88), 
 ++                        varnames = c("HSGPA", "FGPA", "SATV", "GREV"),
 ++                        empirical = FALSE)
 +> attach(scholar)
 +The following objects are masked from scholar (pos = 3):
 +
 +    FGPA, GREV, HSGPA, SATV
 +
  
-scholar <- data.frame(HSGPA, FGPA, SATV, GREV) collect into a data frame +> # library(psych)
-> # install.packages("psych"+
-library(psych)+
 > describe(scholar) # provides descrptive information about each variable > describe(scholar) # provides descrptive information about each variable
-      vars  n   mean     sd median trimmed    mad   min   max range  skew +      vars  n   mean    sd median trimmed   mad    min    max range  skew 
-HSGPA    1 10   3.28   0.46   3.20    3.30   0.52   2.  3.9   1.-0.08 +HSGPA    1 50   3.13  0.24   3.11    3.13  0.16   2.35   3.62  1.26 -0.42 
-FGPA     10   3.09   0.44   3.15    3.12   0.44   2.  3.7   1.5 -0.50 +FGPA     50   3.34  0.35   3.32    3.33  0.33   2.50   4.19  1.68  0.27 
-SATV     10 560.00  93.69 550.00  562.50 111.19 400.0 700.0 300.0 -0.17 +SATV     50 541.28 11.43 538.45  540.50 10.85 523.74 567.97 44.24  0.58 
-GREV     10 697.00 100.67 680.00  700.00 118.61 540.0 830.0 290.0 -0.03 +GREV     50 651.72 11.90 649.70  651.29 10.55 629.89 678.33 48.45  0.35 
-      kurtosis    se +      kurtosis   se 
-HSGPA    -1.43  0.15 +HSGPA     1.21 0.03 
-FGPA     -0.77  0.14 +FGPA     -0.01 0.05 
-SATV     -1.27 29.63 +SATV     -0.60 1.62 
-GREV     -1.59 31.83+GREV     -0.54 1.68
  
 > corrs <- cor(scholar) # find the correlations and set them into an object called 'corrs' > corrs <- cor(scholar) # find the correlations and set them into an object called 'corrs'
 > corrs                 # print corrs > corrs                 # print corrs
        HSGPA   FGPA   SATV   GREV        HSGPA   FGPA   SATV   GREV
-HSGPA 1.0000 0.9226 0.8745 0.4007 +HSGPA 1.0000 0.3404 0.4627 0.5406 
-FGPA  0.9226 1.0000 0.8144 0.1906 +FGPA  0.3404 1.0000 0.5266 0.5096 
-SATV  0.8745 0.8144 1.0000 0.4630 +SATV  0.4627 0.5266 1.0000 0.8802 
-GREV  0.4007 0.1906 0.4630 1.0000+GREV  0.5406 0.5096 0.8802 1.0000
  
 > pairs(scholar)        # pairwise scatterplots > pairs(scholar)        # pairwise scatterplots
  
-> attach(scholar) 
-The following objects are masked _by_ .GlobalEnv: 
- 
-    FGPA, GREV, HSGPA, SATV 
- 
-The following objects are masked from scholar (pos = 3): 
- 
-    FGPA, GREV, HSGPA, SATV 
- 
-The following objects are masked from scholar (pos = 4): 
- 
-    FGPA, GREV, HSGPA, SATV 
- 
 > # install.packages("ppcor") > # install.packages("ppcor")
 > library(ppcor) > library(ppcor)
-> pcor.test(GREV, FGPA, scholar[,c("SATV", "HSGPA")]) # working 
-  estimate p.value statistic  n gp  Method 
-1   -0.535  0.1719    -1.551 10  2 pearson 
  
 > reg.f.sh <- lm(FGPA ~ SATV + HSGPA)   # second regression > reg.f.sh <- lm(FGPA ~ SATV + HSGPA)   # second regression
Line 539: Line 644:
 Residuals: Residuals:
     Min      1Q  Median      3Q     Max      Min      1Q  Median      3Q     Max 
--102.82  -55.69   -5.59   27.85  123.89 +-13.541  -3.441   0.148   4.823   7.796 
  
 Coefficients: Coefficients:
-            Estimate Std. Error t value Pr(>|t|)   +            Estimate Std. Error t value Pr(>|t|)     
-(Intercept)  487.549    224.048    2.18    0.072 +(Intercept) 180.2560    40.3988    4.46  5.2e-05 *** 
-HSGPA        235.039    205.978    1.14    0.297   +HSGPA         8.3214     3.8050    2.19    0.034 *   
-FGPA        -282.595    182.188   -1.55    0.172   +FGPA          1.3994     2.6311    0.53    0.597     
-SATV           0.557      0.678    0.82    0.443  +SATV          0.8143     0.0867    9.40  2.8e-12 ***
 --- ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
  
-Residual standard error: 92.on degrees of freedom +Residual standard error: 5.51 on 46 degrees of freedom 
-Multiple R-squared:  0.439, Adjusted R-squared:  0.159  +Multiple R-squared:  0.799, Adjusted R-squared:  0.786  
-F-statistic: 1.57 on 3 and DF,  p-value: 0.292+F-statistic: 60.on 3 and 46 DF,  p-value: 4.84e-16
  
 > summary(reg.1) > summary(reg.1)
Line 561: Line 666:
 Residuals: Residuals:
    Min     1Q Median     3Q    Max     Min     1Q Median     3Q    Max 
--119. -88.7   22.  51. 129.+-21.76  -8.65  -2.08   7.83  26.10 
  
 Coefficients: Coefficients:
             Estimate Std. Error t value Pr(>|t|)                 Estimate Std. Error t value Pr(>|t|)    
-(Intercept)    697.      29.7   23.45  1.2e-08 *** +(Intercept)   651.72       1.70  383.59   <2e-16 *** 
-res.f         -282.6      185.5   -1.52     0.17    +res.f           1.40       5.74    0.24     0.81    
 --- ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
  
-Residual standard error: 94 on degrees of freedom +Residual standard error: 12 on 48 degrees of freedom 
-Multiple R-squared:  0.225, Adjusted R-squared:  0.128  +Multiple R-squared:  0.00124, Adjusted R-squared:  -0.0196  
-F-statistic: 2.32 on 1 and DF,  p-value: 0.166+F-statistic: 0.0595 on 1 and 48 DF,  p-value: 0.808
  
 > summary(reg.2) > summary(reg.2)
Line 580: Line 685:
  
 Residuals: Residuals:
-    Min      1Q  Median      3Q     Max  +   Min     1Q Median     3Q    Max  
--142.19  -77.25   -2.43   93.23  122.56 +-22.54  -4.94  -1.24   6.08  20.35 
  
 Coefficients: Coefficients:
             Estimate Std. Error t value Pr(>|t|)                 Estimate Std. Error t value Pr(>|t|)    
-(Intercept)  697.000     32.684   21.33  2.5e-08 *** +(Intercept)  651.715      1.332   489. < 2e-16 *** 
-res.s          0.557      0.759    0.73     0.48    +res.s          0.814      0.148     5.5  1.4e-06 ***
 --- ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
  
-Residual standard error: 103 on degrees of freedom +Residual standard error: 9.42 on 48 degrees of freedom 
-Multiple R-squared:  0.063, Adjusted R-squared:  -0.0541  +Multiple R-squared:  0.386, Adjusted R-squared:  0.374  
-F-statistic: 0.538 on 1 and DF,  p-value: 0.484+F-statistic: 30.on 1 and 48 DF,  p-value: 1.45e-06
  
 > summary(reg.3) > summary(reg.3)
Line 600: Line 705:
  
 Residuals: Residuals:
-    Min      1Q  Median      3Q     Max  +   Min     1Q Median     3Q    Max  
--136.04  -81.02    0.66   77.89  121.45 +-22.71  -9.32  -1.30   7.92  26.43 
  
 Coefficients: Coefficients:
             Estimate Std. Error t value Pr(>|t|)                 Estimate Std. Error t value Pr(>|t|)    
-(Intercept)    697.      31.6   22.03  1.9e-08 *** +(Intercept)   651.72       1.68  387.43   <2e-16 *** 
-res.h          235.0      223.   1.05     0.32    +res.h           8.32       8.21    1.01     0.32    
 --- ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
  
-Residual standard error: 100 on degrees of freedom +Residual standard error: 11.9 on 48 degrees of freedom 
-Multiple R-squared:  0.122, Adjusted R-squared:  0.0119  +Multiple R-squared:  0.0209, Adjusted R-squared:  0.000538  
-F-statistic: 1.11 on 1 and DF,  p-value: 0.323 +F-statistic: 1.03 on 1 and 48 DF,  p-value: 0.316
->  +
-+
-</code> +
-{{:pasted:20241015-132339.png}} +
----- +
----- +
-학자인 A는 GRE점수는 (GREV) 학점에 신경을 쓰는 활동보다는 지능지수와 관련된다고 믿는 SATV의 영향력이 더 클것으로 생각된다. 그래서 SATV만의 영향력을 다른 변인을 콘트롤하여 살펴보고 싶다.  +
-<code> +
-pcor.test(scholar$GREV, scholar$SATV, scholar[, c("HSGPA", "FGPA")])+
  
-reg7 <- lm(GREV ~ HSGPA + FGPA)   # run linear regression 
-resid7 <- resid(reg7)     # find the residuals - HSGPA free of SATV 
- 
-reg8 <- lm(SATV ~ HSGPA+ FGPA)   # second regression 
-resid8 <- resid(reg8)     # second set of residuals - FGPA free of SATV 
- 
-cor(resid7, resid8)       # correlation of residuals - partial correlation 
- 
-</code> 
- 
-<code> 
-> pcor.test(scholar$GREV, scholar$SATV, scholar[, c("HSGPA", "FGPA")]) 
-  estimate p.value statistic  n gp  Method 
-1   0.3179  0.4429    0.8213 10  2 pearson 
  
-reg7 <- lm(GREV ~ HSGPA + FGPA)   # run linear regression +reg.1$coefficient[2] 
-resid7 <- resid(reg7)     # find the residuals - HSGPA free of SATV+res.f  
 +1.399  
 +reg.2$coefficient[2] 
 + res.s  
 +0.8143  
 +> reg.3$coefficient[2] 
 +res.h  
 +8.321 
  
-reg8 <- lm(SATV HSGPA+ FGPA  # second regression +spr.y.f <- spcor.test(GREV, FGPA, scholar[,c("SATV", "HSGPA")]
-resid8 <- resid(reg8    # second set of residuals FGPA free of SATV+spr.y.s <- spcor.test(GREV, SATV, scholar[,c("HSGPA", "FGPA")]) 
 +> spr.y.h <spcor.test(GREV, HSGPA, scholar[,c("SATV", "FGPA")])
  
-cor(resid7, resid8)       # correlation of residuals - partial correlation +spr.y.f$estimate 
-[1] 0.3179+[1] 0.03519 
 +> spr.y.s$estimate 
 +[1] 0.6217 
 +> spr.y.h$estimate 
 +[1] 0.1447
  
 +> spr.y.f$estimate^2
 +[1] 0.001238
 +> spr.y.s$estimate^2
 +[1] 0.3865
 +> spr.y.h$estimate^2
 +[1] 0.02094
 +
 +> summary(reg.1)$r.square
 +[1] 0.001238
 +> summary(reg.2)$r.square
 +[1] 0.3865
 +> summary(reg.3)$r.square
 +[1] 0.02094
 +
 +> ca <- summary(reg.1)$r.square + 
 ++   summary(reg.2)$r.square + 
 ++   summary(reg.3)$r.square
 +> # so common explanation area should be
 +> summary(reg.all)$r.square - ca
 +[1] 0.39
  
 </code> </code>
 +{{:pasted:20241016-080226.png}}
 +
 +multiple regression 분석을 보면 독립변인의 coefficient 값은 각각 
 +  * HSGPA         8.3214 
 +  * FGPA          1.3994
 +  * SATV          0.8143
 +이 기울기에 대해서 t-test를 각각 하여 HSGPA와 FGPA의 설명력이 significant 한지를 확인하였다. 그리고 이 때의 R<sup>2</sup> 값은 
 +  * 0.799 이었다. 
 +그런데 이 coefficient값은 독립변인 각각의 고유의 설명력을 가지고 (spcor.test(GREV, x1, 나머지제어)로 얻은 부분) 종속변인에 대해서 regression을 하여 얻은 coefficient값과 같음을 알 수 있다. 즉, <fc #ff0000>multiple regression의 독립변인의 b coefficient 값들은 고유의 설명부분을 (spr) 추출해서 y에 (GREV) regression한 결과와 같음을</fc> 알 수 있다. 
  
 +또한 세 독립변인이 공통적으로 설명하는 부분은 
 +  * 0.39 
 +임을 알 수 있다. 
 ====== e.g., 독립변인 들이 서로 독립적일 때의 각각의 설명력 ====== ====== e.g., 독립변인 들이 서로 독립적일 때의 각각의 설명력 ======
 In this example, the two IVs are orthogonal to each other (not correlated with each other). Hence, regress res.y.x2 against x1 would not result in any problem.  In this example, the two IVs are orthogonal to each other (not correlated with each other). Hence, regress res.y.x2 against x1 would not result in any problem. 
partial_and_semipartial_correlation.1728966220.txt.gz · Last modified: 2024/10/15 13:23 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki