sequential_regression
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
sequential_regression [2020/11/26 08:35] – [r] hkimscil | sequential_regression [2024/06/12 08:30] (current) – [r] hkimscil | ||
---|---|---|---|
Line 1: | Line 1: | ||
+ | ====== Sequential or Hierarchical regression ====== | ||
+ | 연구자가 판단하여 독립변인들 중 필요한 것들을 묶어서 스테이지 별로 (단계 별) 넣고 분석하는 것을 말한다. Stepwise regression은 이를 컴퓨터나 계산방법을 통하여 수행하게 된다. | ||
====== 데이터 ====== | ====== 데이터 ====== | ||
^ DATA for regression analysis | ^ DATA for regression analysis | ||
Line 44: | Line 46: | ||
The below is just an exercise for figuring out the unique part of r< | The below is just an exercise for figuring out the unique part of r< | ||
- | | zero-order | + | | zero-order |
| x1 | x2 | x1p | x2p | | | x1 | x2 | x1p | x2p | | ||
| .794 | -.692 | .565 | -.409 | | | .794 | -.692 | .565 | -.409 | | ||
| zero-order square | | zero-order square | ||
- | | x1 sq (x1sq) | x2 sq (x1sq) | x1 part sq (x1psq) | x2 part sq (x1psq) | | + | | x1 zsq (x1zsq) | x2 zsq (x1zsq) | x1 semi-partial (or part) sq (x1spsq) | x2 part sq (x1spsq) | |
| .630436 | | .630436 | ||
| a+b / a+b+c+d | | a+b / a+b+c+d | ||
- | x1sq - x1psq | + | x1zsq - x1spsq |
0.311211 ~= 0.311583 | 0.311211 ~= 0.311583 | ||
+ | |||
+ | 아래는 r 에서 계산한 것 | ||
+ | < | ||
+ | > .794^2 - .565^2 | ||
+ | [1] 0.3112 | ||
+ | > .692^2 - .409^2 | ||
+ | [1] 0.3116 | ||
+ | </ | ||
R에서 보는 예는 아래를 참조 | R에서 보는 예는 아래를 참조 | ||
Line 230: | Line 240: | ||
| famnum | | famnum | ||
+ | < | ||
+ | sp.b.i <- 0.5646726 ## (1) | ||
+ | c.b.i <- 0.7944312 ## (3) | ||
- | sp.b.i <- 0.5646726 | + | sp.b.f <- -0.4086619 |
- | c.b.i <- 0.7944312 | + | c.b.f <- -0.6922935 |
- | + | ||
- | sp.b.f <- -0.4086619 | + | |
- | c.b.f <- -0.6922935 | + | |
c.b.i.sq <- c.b.i^2 ## (3)^2 | c.b.i.sq <- c.b.i^2 ## (3)^2 | ||
- | sp.b.i.sq <- sp.b.i^2 | + | sp.b.i.sq <- sp.b.i^2 |
c.b.i.sq - sp.b.i.sq | c.b.i.sq - sp.b.i.sq | ||
c.b.f.sq <- c.b.f^2 ## (4)^2 | c.b.f.sq <- c.b.f^2 ## (4)^2 | ||
- | sp.b.f.sq <- sp.b.f^2 | + | sp.b.f.sq <- sp.b.f^2 |
c.b.f.sq - sp.b.f.sq | c.b.f.sq - sp.b.f.sq | ||
+ | </ | ||
< | < | ||
- | pcor.test(datavar$bankaccount, | + | > sp.b.i <- 0.5646726 |
- | pcor.test(datavar$bankaccount, | + | > c.b.i <- 0.7944312 |
+ | > | ||
+ | > sp.b.f <- -0.4086619 | ||
+ | > c.b.f <- -0.6922935 | ||
+ | > | ||
+ | > c.b.i.sq <- c.b.i^2 ## (3)^2 | ||
+ | > sp.b.i.sq <- sp.b.i^2 | ||
+ | > | ||
+ | > c.b.i.sq - sp.b.i.sq | ||
+ | [1] 0.3123 | ||
+ | > | ||
+ | > c.b.f.sq <- c.b.f^2 ## (4)^2 | ||
+ | > sp.b.f.sq <- sp.b.f^2 | ||
+ | > | ||
+ | > c.b.f.sq - sp.b.f.sq | ||
+ | [1] 0.3123 | ||
+ | </ | ||
+ | |||
+ | 0.3123 가 두 독립변인이 DV에 같이 (공히) 미치는 영향력 분량이다. | ||
+ | |||
+ | < | ||
+ | pp.b.i <- pcor.test(datavar$bankaccount, | ||
+ | p.b.i | ||
+ | p.b.i$estimate | ||
+ | |||
+ | p.b.f <- pcor.test(datavar$bankaccount, | ||
+ | p.b.f | ||
+ | p.b.f$estimate | ||
+ | |||
+ | sp.b.i <- spcor.test(datavar$bankaccount, | ||
+ | sp.b.i | ||
+ | sp.b.i$estimate | ||
+ | sp.b.f <- spcor.test(datavar$bankaccount, | ||
+ | sp.b.f | ||
+ | sp.b.f$estimate | ||
+ | |||
+ | |||
+ | zc.b.i <- cor(datavar$bankaccount, | ||
+ | zc.b.i | ||
+ | zc.b.f <- cor(datavar$bankaccount, | ||
+ | zc.b.f | ||
+ | |||
+ | zc.b.i^2 - (sp.b.i$estimate)^2 | ||
+ | zc.b.f^2 - (sp.b.f$estimate)^2 | ||
- | spcor.test(datavar$bankaccount, | ||
- | spcor.test(datavar$bankaccount, | ||
</ | </ | ||
. . . | . . . | ||
< | < | ||
- | > pcor.test(datavar$bankaccount, | + | > pp.b.i <- pcor.test(datavar$bankaccount, |
- | | + | > p.b.i |
- | 1 0.7825112 | + | |
- | > pcor.test(datavar$bankaccount, | + | 1 |
- | | + | > p.b.i$estimate |
- | 1 -0.672856 | + | [1] 0.7825 |
- | > | + | > |
- | > spcor.test(datavar$bankaccount, | + | > p.b.f <- pcor.test(datavar$bankaccount, |
- | estimate | + | > p.b.f |
- | 1 0.5646726 | + | |
- | > spcor.test(datavar$bankaccount, | + | 1 -0.6729 0.04702 |
- | estimate | + | > p.b.f$estimate |
- | 1 -0.4086619 | + | [1] -0.6729 |
+ | > | ||
+ | > sp.b.i <- spcor.test(datavar$bankaccount, | ||
+ | > sp.b.i | ||
+ | estimate | ||
+ | 1 | ||
+ | > sp.b.i$estimate | ||
+ | [1] 0.5647 | ||
+ | > sp.b.f <- spcor.test(datavar$bankaccount, | ||
+ | > sp.b.f | ||
+ | | ||
+ | 1 -0.4087 | ||
+ | > sp.b.f$estimate | ||
+ | [1] -0.4087 | ||
+ | > | ||
+ | > | ||
+ | > zc.b.i <- cor(datavar$bankaccount, | ||
+ | > zc.b.i | ||
+ | [1] 0.7944 | ||
+ | > zc.b.f <- cor(datavar$bankaccount, | ||
+ | > zc.b.f | ||
+ | [1] -0.6923 | ||
+ | > | ||
+ | > zc.b.i^2 - (sp.b.i$estimate)^2 | ||
+ | [1] 0.3123 | ||
+ | > zc.b.f^2 - (sp.b.f$estimate)^2 | ||
+ | [1] 0.3123 | ||
+ | > | ||
> | > | ||
> | > | ||
- | |||
</ | </ | ||
+ | |||
+ | ====== e.g. 3. College enrollment in New Mexico University ====== | ||
+ | < | ||
+ | > datavar <- read.csv(" | ||
+ | > str(datavar) | ||
+ | ' | ||
+ | $ YEAR : int 1 2 3 4 5 6 7 8 9 10 ... | ||
+ | $ ROLL : int 5501 5945 6629 7556 8716 9369 9920 10167 11084 12504 ... | ||
+ | $ UNEM : num 8.1 7 7.3 7.5 7 6.4 6.5 6.4 6.3 7.7 ... | ||
+ | $ HGRAD: int 9552 9680 9731 11666 14675 15265 15484 15723 16501 16890 ... | ||
+ | $ INC : int 1923 1961 1979 2030 2112 2192 2235 2351 2411 2475 ... | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | onePredictorModel <- lm(ROLL ~ UNEM, data = datavar) | ||
+ | twoPredictorModel <- lm(ROLL ~ UNEM + HGRAD, data = datavar) | ||
+ | threePredictorModel <- lm(ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | summary(twoPredictorModel) | ||
+ | summary(threePredictorModel) | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM, data = datavar) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -7640.0 -1046.5 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | UNEM 1133.8 | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 3049 on 27 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD, data = datavar) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -2102.2 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) -8.256e+03 | ||
+ | UNEM | ||
+ | HGRAD 9.423e-01 | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 1313 on 26 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > </ | ||
+ | < | ||
+ | > summary(threePredictorModel) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
+ | |||
+ | Residuals: | ||
+ | | ||
+ | -1148.84 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) -9.153e+03 | ||
+ | UNEM | ||
+ | HGRAD 4.065e-01 | ||
+ | INC 4.275e+00 | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 670.4 on 25 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | </ | ||
+ | |||
+ | < | ||
+ | Analysis of Variance Table | ||
+ | |||
+ | Model 1: ROLL ~ UNEM | ||
+ | Model 2: ROLL ~ UNEM + HGRAD | ||
+ | Model 3: ROLL ~ UNEM + HGRAD + INC | ||
+ | Res.Df | ||
+ | 1 27 251084710 | ||
+ | 2 | ||
+ | 3 | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | ====== e.g. 4. Happiness | ||
+ | {{: | ||
+ | |||
+ | < | ||
+ | # Import data (simulated data for this example) | ||
+ | # myData <- read.csv(' | ||
+ | myData <- read.csv(" | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > str(myData) | ||
+ | ' | ||
+ | $ happiness: int 5 5 6 4 3 5 5 5 4 4 ... | ||
+ | $ age : int 24 28 25 26 20 25 24 24 26 26 ... | ||
+ | $ gender | ||
+ | $ friends | ||
+ | $ pets : int 3 1 0 2 0 0 5 2 1 4 ... | ||
+ | > myData$gender <- factor(myData$gender) | ||
+ | > str(myData) | ||
+ | ' | ||
+ | $ happiness: int 5 5 6 4 3 5 5 5 4 4 ... | ||
+ | $ age : int 24 28 25 26 20 25 24 24 26 26 ... | ||
+ | $ gender | ||
+ | $ friends | ||
+ | $ pets : int 3 1 0 2 0 0 5 2 1 4 ... | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > m0 <- lm(happiness ~ 1, data = myData) | ||
+ | > anova(m0) | ||
+ | Analysis of Variance Table | ||
+ | |||
+ | Response: happiness | ||
+ | Df Sum Sq Mean Sq F value Pr(>F) | ||
+ | Residuals 99 240.84 | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | # 불필요하지만 위의 분석이 variance와 | ||
+ | # 같은 것이라는 것을 아래처럼 확인한다. | ||
+ | > attach(myData) | ||
+ | The following objects are masked from myData (pos = 3): | ||
+ | |||
+ | age, friends, gender, happiness, pets | ||
+ | |||
+ | > var(happiness) | ||
+ | [1] 2.432727 | ||
+ | > length(happiness) | ||
+ | [1] 100 | ||
+ | > df.happiness <- length(happiness) - 1 | ||
+ | > df.happiness # degrees of freedom | ||
+ | [1] 99 | ||
+ | > ss.happiness <- var(happiness)* df.happiness # sum of square (ss) value for happiness variable | ||
+ | > ss.happiness | ||
+ | [1] 240.84 | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > m1 <- lm(happiness ~ age + gender, data=myData) | ||
+ | > summary(m1) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = happiness ~ age + gender, data = myData) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -3.6688 -1.0094 -0.1472 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | age | ||
+ | genderMale | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 1.553 on 97 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | # m1은 이미 위에서 실행 | ||
+ | > m2 <- lm(happiness ~ age + gender + friends, data=myData) | ||
+ | > m3 <- lm(happiness ~ age + gender + friends + pets, data = myData) # Model 3: Adding pets variable | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > anova(m1, m2, m3) | ||
+ | Analysis of Variance Table | ||
+ | |||
+ | Model 1: happiness ~ age + gender | ||
+ | Model 2: happiness ~ age + gender + friends | ||
+ | Model 3: happiness ~ age + gender + friends + pets | ||
+ | Res.Df | ||
+ | 1 97 233.97 | ||
+ | 2 96 209.27 | ||
+ | 3 95 193.42 | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > summary(m1) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = happiness ~ age + gender, data = myData) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -3.6688 -1.0094 -0.1472 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | age | ||
+ | genderMale | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 1.553 on 97 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > summary(m2) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = happiness ~ age + gender + friends, data = myData) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -3.5758 -1.0204 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | age | ||
+ | genderMale | ||
+ | friends | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 1.476 on 96 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > summary(m3) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = happiness ~ age + gender + friends + pets, data = myData) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -3.0556 -1.0183 -0.1109 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | age | ||
+ | genderMale | ||
+ | friends | ||
+ | pets | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 1.427 on 95 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > | ||
+ | </ | ||
+ | |||
+ | Report in research paper | ||
+ | {{: | ||
+ | {{: | ||
+ | |||
+ | ====== e.g. 5: Stock Market ====== | ||
+ | see [[: | ||
+ | |||
+ | ====== e.g. 6: SWISS ====== | ||
+ | |||
+ |
sequential_regression.1606347312.txt.gz · Last modified: 2020/11/26 08:35 by hkimscil