regression:note2
rm(list = ls()) dat <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv") head(dat) attach(dat) mod <- lm(bankaccount ~ income, data = dat) summary(mod) df.tot <- length(bankaccount) - 1 ss.tot <- var(bankaccount)*df.tot var.tot <- var(bankaccount) df.x <- length(income)-1 ss.x <- var(income)*df.x m.y <- mean(bankaccount) m.x <- mean(income) sp.xy <- sum((income-m.x)*(bankaccount-m.y)) cov.xy <- sp.xy / df.tot cov.xy cov(income, bankaccount) sd.x <- sd(income) sd.y <- sd(bankaccount) r.xy <- cov.xy / (sd.x * sd.y) r.xy cor(income, bankaccount) b <- sp.xy / ss.x b a <- m.y - (b * m.x) a y.pred <- a + (b * income) y.pred # mod$fitted.values y.obs <- bankaccount y.mean <- m.y ss.xy <- sum((bankaccount-y.mean)^2) ss.xy ss.tot ss.res <- sum((bankaccount-y.pred)^2) ss.res ss.reg <- sum((y.pred-y.mean)^2) ss.reg ss.reg + ss.res ss.xy df.tot df.res <- length(bankaccount) - 1 - 1 df.reg <- 2 - 1 r.sq <- ss.reg / ss.xy r.sq sqrt(r.sq) r.xy cor(income, bankaccount) se.b <- sqrt((ss.res/df.res)/ss.x) se.b t.b <- b / se.b t.b p.b <- pt(t.b, df.res, lower.tail = F) * 2 p.b ms.reg <- ss.reg / df.reg ms.res <- ss.res / df.res f.cal <- ms.reg / ms.res f.cal t.b^2 p.f <- pf(f.cal, df.reg, df.res, lower.tail = F) p.f summary(mod) anova(mod) a b se.b t.b p.b r.sq f.cal p.f
> rm(list = ls()) > > dat <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv") > head(dat) bankaccount income famnum 1 6 220 5 2 5 190 6 3 7 260 3 4 7 200 4 5 8 330 2 6 10 490 4 > attach(dat) The following objects are masked from dat (pos = 4): bankaccount, famnum, income The following objects are masked from dat (pos = 7): bankaccount, famnum, income The following objects are masked from dat (pos = 8): bankaccount, famnum, income The following objects are masked from dat (pos = 9): bankaccount, famnum, income The following object is masked from df (pos = 10): income The following object is masked from df (pos = 11): income The following object is masked from df (pos = 12): income The following object is masked from df (pos = 13): income The following object is masked from df (pos = 15): income > > mod <- lm(bankaccount ~ income, data = dat) > summary(mod) Call: lm(formula = bankaccount ~ income, data = dat) Residuals: Min 1Q Median 3Q Max -1.5189 -0.8969 -0.1297 1.0058 1.5800 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 3.617781 1.241518 2.914 0.01947 * income 0.015269 0.004127 3.700 0.00605 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 1.176 on 8 degrees of freedom Multiple R-squared: 0.6311, Adjusted R-squared: 0.585 F-statistic: 13.69 on 1 and 8 DF, p-value: 0.006046 > df.tot <- length(bankaccount) - 1 > ss.tot <- var(bankaccount)*df.tot > var.tot <- var(bankaccount) > > df.x <- length(income)-1 > ss.x <- var(income)*df.x > > > m.y <- mean(bankaccount) > m.x <- mean(income) > > sp.xy <- sum((income-m.x)*(bankaccount-m.y)) > cov.xy <- sp.xy / df.tot > cov.xy [1] 137.7778 > cov(income, bankaccount) [1] 137.7778 > > sd.x <- sd(income) > sd.y <- sd(bankaccount) > > r.xy <- cov.xy / (sd.x * sd.y) > r.xy [1] 0.7944312 > cor(income, bankaccount) [1] 0.7944312 > > b <- sp.xy / ss.x > b [1] 0.01526906 > a <- m.y - (b * m.x) > a [1] 3.617781 > > y.pred <- a + (b * income) > y.pred [1] 6.976973 6.518902 7.587736 6.671592 8.656569 11.099618 6.824283 9.420022 [9] 8.503879 7.740426 > # mod$fitted.values > y.obs <- bankaccount > y.mean <- m.y > > ss.xy <- sum((bankaccount-y.mean)^2) > ss.xy [1] 30 > ss.tot [1] 30 > > ss.res <- sum((bankaccount-y.pred)^2) > ss.res [1] 11.06637 > > ss.reg <- sum((y.pred-y.mean)^2) > ss.reg [1] 18.93363 > > ss.reg + ss.res [1] 30 > ss.xy [1] 30 > > df.tot [1] 9 > df.res <- length(bankaccount) - 1 - 1 > df.reg <- 2 - 1 > > r.sq <- ss.reg / ss.xy > r.sq [1] 0.631121 > > sqrt(r.sq) [1] 0.7944312 > r.xy [1] 0.7944312 > cor(income, bankaccount) [1] 0.7944312 > > se.b <- sqrt((ss.res/df.res)/ss.x) > se.b [1] 0.004127175 > t.b <- b / se.b > t.b [1] 3.699639 > p.b <- pt(t.b, df.res, lower.tail = F) * 2 > p.b [1] 0.006045749 > > ms.reg <- ss.reg / df.reg > ms.res <- ss.res / df.res > f.cal <- ms.reg / ms.res > f.cal [1] 13.68733 > t.b^2 [1] 13.68733 > p.f <- pf(f.cal, df.reg, df.res, lower.tail = F) > p.f [1] 0.006045749 > > summary(mod) Call: lm(formula = bankaccount ~ income, data = dat) Residuals: Min 1Q Median 3Q Max -1.5189 -0.8969 -0.1297 1.0058 1.5800 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 3.617781 1.241518 2.914 0.01947 * income 0.015269 0.004127 3.700 0.00605 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 1.176 on 8 degrees of freedom Multiple R-squared: 0.6311, Adjusted R-squared: 0.585 F-statistic: 13.69 on 1 and 8 DF, p-value: 0.006046 > anova(mod) Analysis of Variance Table Response: bankaccount Df Sum Sq Mean Sq F value Pr(>F) income 1 18.934 18.9336 13.687 0.006046 ** Residuals 8 11.066 1.3833 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > > a [1] 3.617781 > b [1] 0.01526906 > se.b [1] 0.004127175 > t.b [1] 3.699639 > p.b [1] 0.006045749 > > r.sq [1] 0.631121 > f.cal [1] 13.68733 > p.f [1] 0.006045749 > >
regression/note2.txt · Last modified: 2024/09/30 10:41 by hkimscil