beta_coefficients
Differences
This shows you the differences between two versions of the page.
Next revision | Previous revision | ||
beta_coefficients [2019/05/21 11:40] – created hkimscil | beta_coefficients [2020/12/09 18:47] (current) – [e.g.] hkimscil | ||
---|---|---|---|
Line 1: | Line 1: | ||
- | ====== Beta coefficients in regression ====== | + | ====== Beta coefficients in linear |
- | $$ \beta = b * \frac{sd(x)}{sd(y)} | + | |
+ | {{: | ||
+ | |||
+ | \begin{align*} | ||
+ | \large{\beta = b * \frac{sd(x)}{sd(y)}} \ | ||
+ | \end{align*} | ||
+ | |||
+ | < | ||
+ | # import test score data " | ||
+ | tests <- read.csv(" | ||
+ | colnames(tests) <- c(" | ||
+ | tests <- subset(tests, | ||
+ | attach(tests) | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | summary(lm.gpa.clepsat) | ||
+ | Call: | ||
+ | lm(formula = gpa ~ clep + sat, data = tests) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -0.197888 -0.128974 -0.000528 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | clep | ||
+ | sat | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | ||
+ | |||
+ | Residual standard error: 0.1713 on 7 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > sd.sat <- sd(sat) | ||
+ | > sd.gpa <- sd(gpa) | ||
+ | > lm.gpa.clepsat <- lm(gpa ~ clep + sat, data = tests) | ||
+ | > summary(lm.gpa.clepsat) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = gpa ~ clep + sat, data = tests) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -0.197888 -0.128974 -0.000528 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | clep | ||
+ | sat | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 0.1713 on 7 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > b.clep <- 0.0729294 | ||
+ | > b.sat <- -0.0007015 | ||
+ | > beta.clep <- b.clep * (sd.clep/ | ||
+ | > beta.sat <- b.sat * (sd.sat/ | ||
+ | > lm.beta(lm.gpa.clepsat) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = gpa ~ clep + sat, data = tests) | ||
+ | |||
+ | Standardized Coefficients:: | ||
+ | (Intercept) | ||
+ | 0.0000000 | ||
+ | |||
+ | > beta.clep | ||
+ | [1] 1.055648 | ||
+ | > beta.sat | ||
+ | [1] -0.2051187 | ||
+ | > | ||
+ | </ | ||
+ | ====== e.g. ====== | ||
+ | |||
+ | < | ||
+ | # get marketing data | ||
+ | marketing <- read.csv(" | ||
+ | head(marketing) | ||
+ | # note that I need - X to get rid of X column in the marketing data | ||
+ | mod <- lm(sales ~ . - X, data=marketing) | ||
+ | summary(mod) | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > marketing <- read.csv(" | ||
+ | > head(marketing) | ||
+ | X youtube facebook newspaper sales | ||
+ | 1 1 276.12 | ||
+ | 2 2 | ||
+ | 3 3 | ||
+ | 4 4 181.80 | ||
+ | 5 5 216.96 | ||
+ | 6 6 | ||
+ | # note that I need - X to get rid of X column in the marketing data | ||
+ | > mod <- lm(sales ~ . - X, data=marketing) | ||
+ | > summary(mod) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = sales ~ . - X, data = marketing) | ||
+ | |||
+ | Residuals: | ||
+ | | ||
+ | -10.5932 | ||
+ | |||
+ | Coefficients: | ||
+ | | ||
+ | (Intercept) | ||
+ | youtube | ||
+ | facebook | ||
+ | newspaper | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 2.023 on 196 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | </ | ||
+ | |||
+ | |||
+ | |||
+ | < | ||
+ | install.packages(lm.beta) | ||
+ | library(lm.beta) | ||
+ | lm.beta(mod) | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | lm.beta(mod) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = sales ~ . - X, data = marketing) | ||
+ | |||
+ | Standardized Coefficients:: | ||
+ | | ||
+ | | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | These beta coefficients also can be got from the coefficents from standardized data. | ||
+ | |||
+ | < | ||
+ | mod.formula <- sales ~ youtube + facebook + newspaper | ||
+ | all.vars(mod.formula) | ||
+ | marketing.temp <- sapply(marketing[ , all.vars(mod.formula)], | ||
+ | head(marketing.temp) | ||
+ | mod.scaled <- lm(sales ~ ., data=marketing.scaled) | ||
+ | head(marketing.scaled) | ||
+ | coefficients(mod.scaled) | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > all.vars(mod.formula) | ||
+ | [1] " | ||
+ | > marketing.temp <- sapply(marketing[ , all.vars(mod.formula)], | ||
+ | > head(marketing.temp) | ||
+ | sales | ||
+ | [1,] 1.5481681 | ||
+ | [2,] -0.6943038 -1.19437904 | ||
+ | [3,] -0.9051345 -1.51235985 | ||
+ | [4,] 0.8581768 | ||
+ | [5,] -0.2151431 | ||
+ | [6,] -1.3076295 -1.61136487 | ||
+ | > mod.scaled <- lm(sales ~ ., data=marketing.scaled) | ||
+ | > head(marketing.scaled) | ||
+ | | ||
+ | 1 1.5481681 | ||
+ | 2 -0.6943038 -1.19437904 | ||
+ | 3 -0.9051345 -1.51235985 | ||
+ | 4 0.8581768 | ||
+ | 5 -0.2151431 | ||
+ | 6 -1.3076295 -1.61136487 | ||
+ | > coefficients(mod.scaled) | ||
+ | (Intercept) | ||
+ | -5.034110e-16 | ||
+ | > | ||
+ | > </ | ||
+ | |||
+ | check out that | ||
+ | '' | ||
+ | |||
+ | and | ||
+ | 베타를 구하고 나면 서로의 계수값을 절대비교할 수 있다. | ||
+ | '' | ||
+ | '' | ||
+ |
beta_coefficients.1558406454.txt.gz · Last modified: 2019/05/21 11:40 by hkimscil