sequential_regression
                Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| sequential_regression [2020/11/26 08:35] – [r] hkimscil | sequential_regression [2024/06/12 08:30] (current) – [r] hkimscil | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| + | ====== Sequential or Hierarchical regression ====== | ||
| + | 연구자가 판단하여 독립변인들 중 필요한 것들을 묶어서 스테이지 별로 (단계 별) 넣고 분석하는 것을 말한다. Stepwise regression은 이를 컴퓨터나 계산방법을 통하여 수행하게 된다.   | ||
| ====== 데이터 ====== | ====== 데이터 ====== | ||
| ^  DATA for regression analysis  | ^  DATA for regression analysis  | ||
| Line 44: | Line 46: | ||
| The below is just an exercise for figuring out the unique part of r< | The below is just an exercise for figuring out the unique part of r< | ||
| - | |  zero-order  | + | |  zero-order  | 
| | x1 | x2 | x1p | x2p | | | x1 | x2 | x1p | x2p | | ||
| | .794 | -.692 | .565 | -.409 | | | .794 | -.692 | .565 | -.409 | | ||
| |  zero-order square  | |  zero-order square  | ||
| - | | x1 sq (x1sq) | x2 sq (x1sq) | x1 part sq (x1psq) | x2 part sq (x1psq) | | + | | x1 zsq (x1zsq) | x2 zsq (x1zsq) | x1 semi-partial (or part) sq (x1spsq) | x2 part sq (x1spsq) | | 
| | .630436  | | .630436  | ||
| | a+b / a+b+c+d  | | a+b / a+b+c+d  | ||
| - | x1sq - x1psq   | + | x1zsq - x1spsq  | 
| 0.311211 ~= 0.311583 | 0.311211 ~= 0.311583 | ||
| + | |||
| + | 아래는 r 에서 계산한 것 | ||
| + | < | ||
| + | > .794^2 - .565^2 | ||
| + | [1] 0.3112 | ||
| + | > .692^2 - .409^2 | ||
| + | [1] 0.3116 | ||
| + | </ | ||
| R에서 보는 예는 아래를 참조 | R에서 보는 예는 아래를 참조 | ||
| Line 230: | Line 240: | ||
| | famnum  | | famnum  | ||
| + | < | ||
| + | sp.b.i <- 0.5646726 ## (1) | ||
| + | c.b.i <- 0.7944312 ## (3) | ||
| - | sp.b.i <- 0.5646726 | + | sp.b.f <- -0.4086619  | 
| - | c.b.i <- 0.7944312 | + | c.b.f <- -0.6922935  | 
| - | + | ||
| - | sp.b.f <- -0.4086619 | + | |
| - | c.b.f <- -0.6922935 | + | |
| c.b.i.sq <- c.b.i^2 ## (3)^2 | c.b.i.sq <- c.b.i^2 ## (3)^2 | ||
| - | sp.b.i.sq <- sp.b.i^2 | + | sp.b.i.sq <- sp.b.i^2  | 
| c.b.i.sq - sp.b.i.sq  | c.b.i.sq - sp.b.i.sq  | ||
| c.b.f.sq <- c.b.f^2 ## (4)^2 | c.b.f.sq <- c.b.f^2 ## (4)^2 | ||
| - | sp.b.f.sq <- sp.b.f^2 | + | sp.b.f.sq <- sp.b.f^2  | 
| c.b.f.sq - sp.b.f.sq | c.b.f.sq - sp.b.f.sq | ||
| + | </ | ||
| < | < | ||
| - | pcor.test(datavar$bankaccount, | + | > sp.b.i <- 0.5646726 | 
| - | pcor.test(datavar$bankaccount, | + | > c.b.i <- 0.7944312 | 
| + | > | ||
| + | > sp.b.f <- -0.4086619 | ||
| + | > c.b.f <- -0.6922935 | ||
| + | > | ||
| + | > c.b.i.sq <- c.b.i^2 ## (3)^2 | ||
| + | > sp.b.i.sq <- sp.b.i^2 | ||
| + | > | ||
| + | > c.b.i.sq - sp.b.i.sq | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > c.b.f.sq <- c.b.f^2 ## (4)^2 | ||
| + | > sp.b.f.sq <- sp.b.f^2 | ||
| + | > | ||
| + | > c.b.f.sq - sp.b.f.sq | ||
| + | [1] 0.3123 | ||
| + | </ | ||
| + | |||
| + | 0.3123 가 두 독립변인이 DV에 같이 (공히) 미치는 영향력 분량이다.  | ||
| + | |||
| + | < | ||
| + | pp.b.i <- pcor.test(datavar$bankaccount, | ||
| + | p.b.i | ||
| + | p.b.i$estimate | ||
| + | |||
| + | p.b.f <- pcor.test(datavar$bankaccount, | ||
| + | p.b.f | ||
| + | p.b.f$estimate | ||
| + | |||
| + | sp.b.i <- spcor.test(datavar$bankaccount, | ||
| + | sp.b.i | ||
| + | sp.b.i$estimate | ||
| + | sp.b.f <- spcor.test(datavar$bankaccount, | ||
| + | sp.b.f | ||
| + | sp.b.f$estimate | ||
| + | |||
| + | |||
| + | zc.b.i <- cor(datavar$bankaccount, | ||
| + | zc.b.i | ||
| + | zc.b.f <- cor(datavar$bankaccount, | ||
| + | zc.b.f | ||
| + | |||
| + | zc.b.i^2 - (sp.b.i$estimate)^2 | ||
| + | zc.b.f^2 - (sp.b.f$estimate)^2 | ||
| - | spcor.test(datavar$bankaccount, | ||
| - | spcor.test(datavar$bankaccount, | ||
| </ | </ | ||
| . . . | . . . | ||
| < | < | ||
| - | > pcor.test(datavar$bankaccount, | + | > pp.b.i <- pcor.test(datavar$bankaccount, | 
| - |     | + | > p.b.i | 
| - | 1 0.7825112  | + |    | 
| - | > pcor.test(datavar$bankaccount, | + | 1    | 
| - |     | + | > p.b.i$estimate | 
| - | 1 -0.672856  | + | [1] 0.7825 | 
| - | > | + | > | 
| - | > spcor.test(datavar$bankaccount, | + | > p.b.f <- pcor.test(datavar$bankaccount, | 
| - |    estimate  | + | > p.b.f | 
| - | 1 0.5646726  | + |    | 
| - | > spcor.test(datavar$bankaccount, | + | 1  -0.6729 0.04702     | 
| - |     estimate  | + | > p.b.f$estimate | 
| - | 1 -0.4086619  | + | [1] -0.6729 | 
| + | > | ||
| + | > sp.b.i <- spcor.test(datavar$bankaccount, | ||
| + | > sp.b.i | ||
| + |   estimate  | ||
| + | 1    | ||
| + | > sp.b.i$estimate | ||
| + | [1] 0.5647 | ||
| + | > sp.b.f <- spcor.test(datavar$bankaccount, | ||
| + | > sp.b.f | ||
| + |    | ||
| + | 1  -0.4087   | ||
| + | > sp.b.f$estimate | ||
| + | [1] -0.4087 | ||
| + | > | ||
| + | > | ||
| + | > zc.b.i <- cor(datavar$bankaccount, | ||
| + | > zc.b.i | ||
| + | [1] 0.7944 | ||
| + | > zc.b.f <- cor(datavar$bankaccount, | ||
| + | > zc.b.f | ||
| + | [1] -0.6923 | ||
| + | > | ||
| + | > zc.b.i^2 - (sp.b.i$estimate)^2 | ||
| + | [1] 0.3123 | ||
| + | > zc.b.f^2 - (sp.b.f$estimate)^2 | ||
| + | [1] 0.3123 | ||
| + | > | ||
| > | > | ||
| > | > | ||
| - | |||
| </ | </ | ||
| + | |||
| + | ====== e.g. 3. College enrollment in New Mexico University ====== | ||
| + | < | ||
| + | > datavar <- read.csv(" | ||
| + | > str(datavar) | ||
| + | ' | ||
| + | $ YEAR : int 1 2 3 4 5 6 7 8 9 10 ... | ||
| + | $ ROLL : int 5501 5945 6629 7556 8716 9369 9920 10167 11084 12504 ... | ||
| + | $ UNEM : num 8.1 7 7.3 7.5 7 6.4 6.5 6.4 6.3 7.7 ... | ||
| + | $ HGRAD: int 9552 9680 9731 11666 14675 15265 15484 15723 16501 16890 ... | ||
| + | $ INC : int 1923 1961 1979 2030 2112 2192 2235 2351 2411 2475 ... | ||
| + | > | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | onePredictorModel <- lm(ROLL ~ UNEM, data = datavar) | ||
| + | twoPredictorModel <- lm(ROLL ~ UNEM + HGRAD, data = datavar) | ||
| + | threePredictorModel <- lm(ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | summary(twoPredictorModel) | ||
| + | summary(threePredictorModel) | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | |||
| + | Call: | ||
| + | lm(formula = ROLL ~ UNEM, data = datavar) | ||
| + | |||
| + | Residuals: | ||
| + |     Min      1Q  Median  | ||
| + | -7640.0 -1046.5  | ||
| + | |||
| + | Coefficients: | ||
| + |             Estimate Std. Error t value Pr(> | ||
| + | (Intercept)  | ||
| + | UNEM          1133.8  | ||
| + | --- | ||
| + | Signif. codes:  | ||
| + | |||
| + | Residual standard error: 3049 on 27 degrees of freedom | ||
| + | Multiple R-squared:  | ||
| + | F-statistic: | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | |||
| + | Call: | ||
| + | lm(formula = ROLL ~ UNEM + HGRAD, data = datavar) | ||
| + | |||
| + | Residuals: | ||
| + |     Min      1Q  Median  | ||
| + | -2102.2  | ||
| + | |||
| + | Coefficients: | ||
| + |               Estimate Std. Error t value Pr(> | ||
| + | (Intercept) -8.256e+03  | ||
| + | UNEM          | ||
| + | HGRAD        9.423e-01  | ||
| + | --- | ||
| + | Signif. codes:  | ||
| + | |||
| + | Residual standard error: 1313 on 26 degrees of freedom | ||
| + | Multiple R-squared:  | ||
| + | F-statistic: | ||
| + | |||
| + | > </ | ||
| + | < | ||
| + | > summary(threePredictorModel) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
| + | |||
| + | Residuals: | ||
| + |       | ||
| + | -1148.84  | ||
| + | |||
| + | Coefficients: | ||
| + |               Estimate Std. Error t value Pr(> | ||
| + | (Intercept) -9.153e+03  | ||
| + | UNEM          | ||
| + | HGRAD        4.065e-01  | ||
| + | INC          4.275e+00  | ||
| + | --- | ||
| + | Signif. codes:  | ||
| + | |||
| + | Residual standard error: 670.4 on 25 degrees of freedom | ||
| + | Multiple R-squared:  | ||
| + | F-statistic: | ||
| + | |||
| + | </ | ||
| + | |||
| + | < | ||
| + | Analysis of Variance Table | ||
| + | |||
| + | Model 1: ROLL ~ UNEM | ||
| + | Model 2: ROLL ~ UNEM + HGRAD | ||
| + | Model 3: ROLL ~ UNEM + HGRAD + INC | ||
| + |   Res.Df  | ||
| + | 1     27 251084710  | ||
| + | 2      | ||
| + | 3      | ||
| + | --- | ||
| + | Signif. codes:  | ||
| + | > | ||
| + | </ | ||
| + | |||
| + | ====== e.g. 4. Happiness  | ||
| + | {{: | ||
| + | |||
| + | < | ||
| + | # Import data (simulated data for this example) | ||
| + | # myData <- read.csv(' | ||
| + | myData <- read.csv(" | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > str(myData) | ||
| + | ' | ||
| + | $ happiness: int 5 5 6 4 3 5 5 5 4 4 ... | ||
| + | $ age : int 24 28 25 26 20 25 24 24 26 26 ... | ||
| + |  $ gender  | ||
| + |  $ friends  | ||
| + | $ pets : int 3 1 0 2 0 0 5 2 1 4 ... | ||
| + | > myData$gender <- factor(myData$gender) | ||
| + | > str(myData) | ||
| + | ' | ||
| + | $ happiness: int 5 5 6 4 3 5 5 5 4 4 ... | ||
| + | $ age : int 24 28 25 26 20 25 24 24 26 26 ... | ||
| + |  $ gender  | ||
| + |  $ friends  | ||
| + | $ pets : int 3 1 0 2 0 0 5 2 1 4 ... | ||
| + | > | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > m0 <- lm(happiness ~ 1, data = myData) | ||
| + | > anova(m0) | ||
| + | Analysis of Variance Table | ||
| + | |||
| + | Response: happiness | ||
| + | Df Sum Sq Mean Sq F value Pr(>F) | ||
| + | Residuals 99 240.84  | ||
| + | > | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | # 불필요하지만 위의 분석이 variance와  | ||
| + | # 같은 것이라는 것을 아래처럼 확인한다. | ||
| + | > attach(myData) | ||
| + | The following objects are masked from myData (pos = 3): | ||
| + | |||
| + | age, friends, gender, happiness, pets | ||
| + | |||
| + | > var(happiness) | ||
| + | [1] 2.432727 | ||
| + | > length(happiness) | ||
| + | [1] 100 | ||
| + | > df.happiness <- length(happiness) - 1 | ||
| + | > df.happiness # degrees of freedom  | ||
| + | [1] 99 | ||
| + | > ss.happiness <- var(happiness)* df.happiness # sum of square (ss) value for happiness variable | ||
| + | > ss.happiness  | ||
| + | [1] 240.84 | ||
| + | > | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > m1 <- lm(happiness ~ age + gender, data=myData)  | ||
| + | > summary(m1) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = happiness ~ age + gender, data = myData) | ||
| + | |||
| + | Residuals: | ||
| + |     Min      1Q  Median  | ||
| + | -3.6688 -1.0094 -0.1472  | ||
| + | |||
| + | Coefficients: | ||
| + |             Estimate Std. Error t value Pr(> | ||
| + | (Intercept)  | ||
| + | age          | ||
| + | genderMale  | ||
| + | --- | ||
| + | Signif. codes:  | ||
| + | |||
| + | Residual standard error: 1.553 on 97 degrees of freedom | ||
| + | Multiple R-squared:  | ||
| + | F-statistic: | ||
| + | |||
| + | > | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | # m1은 이미 위에서 실행 | ||
| + | > m2 <- lm(happiness ~ age + gender + friends, data=myData)  | ||
| + | > m3 <- lm(happiness ~ age + gender + friends + pets, data = myData) # Model 3: Adding pets variable | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > anova(m1, m2, m3) | ||
| + | Analysis of Variance Table | ||
| + | |||
| + | Model 1: happiness ~ age + gender | ||
| + | Model 2: happiness ~ age + gender + friends | ||
| + | Model 3: happiness ~ age + gender + friends + pets | ||
| + |   Res.Df  | ||
| + | 1     97 233.97  | ||
| + | 2     96 209.27  | ||
| + | 3     95 193.42  | ||
| + | --- | ||
| + | Signif. codes:  | ||
| + | > | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > summary(m1) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = happiness ~ age + gender, data = myData) | ||
| + | |||
| + | Residuals: | ||
| + |     Min      1Q  Median  | ||
| + | -3.6688 -1.0094 -0.1472  | ||
| + | |||
| + | Coefficients: | ||
| + |             Estimate Std. Error t value Pr(> | ||
| + | (Intercept)  | ||
| + | age          | ||
| + | genderMale  | ||
| + | --- | ||
| + | Signif. codes:  | ||
| + | |||
| + | Residual standard error: 1.553 on 97 degrees of freedom | ||
| + | Multiple R-squared:  | ||
| + | F-statistic: | ||
| + | |||
| + | > summary(m2) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = happiness ~ age + gender + friends, data = myData) | ||
| + | |||
| + | Residuals: | ||
| + |     Min      1Q  Median  | ||
| + | -3.5758 -1.0204  | ||
| + | |||
| + | Coefficients: | ||
| + |             Estimate Std. Error t value Pr(> | ||
| + | (Intercept)  | ||
| + | age          | ||
| + | genderMale  | ||
| + | friends  | ||
| + | --- | ||
| + | Signif. codes:  | ||
| + | |||
| + | Residual standard error: 1.476 on 96 degrees of freedom | ||
| + | Multiple R-squared:  | ||
| + | F-statistic: | ||
| + | |||
| + | > summary(m3) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = happiness ~ age + gender + friends + pets, data = myData) | ||
| + | |||
| + | Residuals: | ||
| + |     Min      1Q  Median  | ||
| + | -3.0556 -1.0183 -0.1109  | ||
| + | |||
| + | Coefficients: | ||
| + |             Estimate Std. Error t value Pr(> | ||
| + | (Intercept)  | ||
| + | age          | ||
| + | genderMale  | ||
| + | friends  | ||
| + | pets          | ||
| + | --- | ||
| + | Signif. codes:  | ||
| + | |||
| + | Residual standard error: 1.427 on 95 degrees of freedom | ||
| + | Multiple R-squared:  | ||
| + | F-statistic: | ||
| + | |||
| + | > | ||
| + | </ | ||
| + | |||
| + | Report in research paper | ||
| + | {{: | ||
| + | {{: | ||
| + | |||
| + | ====== e.g. 5: Stock Market ====== | ||
| + | see [[: | ||
| + | |||
| + | ====== e.g. 6: SWISS ====== | ||
| + | |||
| + | |||
sequential_regression.1606347312.txt.gz · Last modified:  by hkimscil
                
                