rm(list = ls())
dat <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
head(dat)
attach(dat)
mod <- lm(bankaccount ~ income, data = dat)
summary(mod)
df.tot <- length(bankaccount) - 1
ss.tot <- var(bankaccount)*df.tot
var.tot <- var(bankaccount)
df.x <- length(income)-1
ss.x <- var(income)*df.x
m.y <- mean(bankaccount)
m.x <- mean(income)
sp.xy <- sum((income-m.x)*(bankaccount-m.y))
cov.xy <- sp.xy / df.tot
cov.xy
cov(income, bankaccount)
sd.x <- sd(income)
sd.y <- sd(bankaccount)
r.xy <- cov.xy / (sd.x * sd.y)
r.xy
cor(income, bankaccount)
b <- sp.xy / ss.x
b
a <- m.y - (b * m.x)
a
y.pred <- a + (b * income)
y.pred
# mod$fitted.values
y.obs <- bankaccount
y.mean <- m.y
ss.xy <- sum((bankaccount-y.mean)^2)
ss.xy
ss.tot
ss.res <- sum((bankaccount-y.pred)^2)
ss.res
ss.reg <- sum((y.pred-y.mean)^2)
ss.reg
ss.reg + ss.res
ss.xy
df.tot
df.res <- length(bankaccount) - 1 - 1
df.reg <- 2 - 1
r.sq <- ss.reg / ss.xy
r.sq
sqrt(r.sq)
r.xy
cor(income, bankaccount)
se.b <- sqrt((ss.res/df.res)/ss.x)
se.b
t.b <- b / se.b
t.b
p.b <- pt(t.b, df.res, lower.tail = F) * 2
p.b
ms.reg <- ss.reg / df.reg
ms.res <- ss.res / df.res
f.cal <- ms.reg / ms.res
f.cal
t.b^2
p.f <- pf(f.cal, df.reg, df.res, lower.tail = F)
p.f
summary(mod)
anova(mod)
a
b
se.b
t.b
p.b
r.sq
f.cal
p.f
> rm(list = ls())
>
> dat <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
> head(dat)
bankaccount income famnum
1 6 220 5
2 5 190 6
3 7 260 3
4 7 200 4
5 8 330 2
6 10 490 4
> attach(dat)
The following objects are masked from dat (pos = 4):
bankaccount, famnum, income
The following objects are masked from dat (pos = 7):
bankaccount, famnum, income
The following objects are masked from dat (pos = 8):
bankaccount, famnum, income
The following objects are masked from dat (pos = 9):
bankaccount, famnum, income
The following object is masked from df (pos = 10):
income
The following object is masked from df (pos = 11):
income
The following object is masked from df (pos = 12):
income
The following object is masked from df (pos = 13):
income
The following object is masked from df (pos = 15):
income
>
> mod <- lm(bankaccount ~ income, data = dat)
> summary(mod)
Call:
lm(formula = bankaccount ~ income, data = dat)
Residuals:
Min 1Q Median 3Q Max
-1.5189 -0.8969 -0.1297 1.0058 1.5800
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.617781 1.241518 2.914 0.01947 *
income 0.015269 0.004127 3.700 0.00605 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.176 on 8 degrees of freedom
Multiple R-squared: 0.6311, Adjusted R-squared: 0.585
F-statistic: 13.69 on 1 and 8 DF, p-value: 0.006046
> df.tot <- length(bankaccount) - 1
> ss.tot <- var(bankaccount)*df.tot
> var.tot <- var(bankaccount)
>
> df.x <- length(income)-1
> ss.x <- var(income)*df.x
>
>
> m.y <- mean(bankaccount)
> m.x <- mean(income)
>
> sp.xy <- sum((income-m.x)*(bankaccount-m.y))
> cov.xy <- sp.xy / df.tot
> cov.xy
[1] 137.7778
> cov(income, bankaccount)
[1] 137.7778
>
> sd.x <- sd(income)
> sd.y <- sd(bankaccount)
>
> r.xy <- cov.xy / (sd.x * sd.y)
> r.xy
[1] 0.7944312
> cor(income, bankaccount)
[1] 0.7944312
>
> b <- sp.xy / ss.x
> b
[1] 0.01526906
> a <- m.y - (b * m.x)
> a
[1] 3.617781
>
> y.pred <- a + (b * income)
> y.pred
[1] 6.976973 6.518902 7.587736 6.671592 8.656569 11.099618 6.824283 9.420022
[9] 8.503879 7.740426
> # mod$fitted.values
> y.obs <- bankaccount
> y.mean <- m.y
>
> ss.xy <- sum((bankaccount-y.mean)^2)
> ss.xy
[1] 30
> ss.tot
[1] 30
>
> ss.res <- sum((bankaccount-y.pred)^2)
> ss.res
[1] 11.06637
>
> ss.reg <- sum((y.pred-y.mean)^2)
> ss.reg
[1] 18.93363
>
> ss.reg + ss.res
[1] 30
> ss.xy
[1] 30
>
> df.tot
[1] 9
> df.res <- length(bankaccount) - 1 - 1
> df.reg <- 2 - 1
>
> r.sq <- ss.reg / ss.xy
> r.sq
[1] 0.631121
>
> sqrt(r.sq)
[1] 0.7944312
> r.xy
[1] 0.7944312
> cor(income, bankaccount)
[1] 0.7944312
>
> se.b <- sqrt((ss.res/df.res)/ss.x)
> se.b
[1] 0.004127175
> t.b <- b / se.b
> t.b
[1] 3.699639
> p.b <- pt(t.b, df.res, lower.tail = F) * 2
> p.b
[1] 0.006045749
>
> ms.reg <- ss.reg / df.reg
> ms.res <- ss.res / df.res
> f.cal <- ms.reg / ms.res
> f.cal
[1] 13.68733
> t.b^2
[1] 13.68733
> p.f <- pf(f.cal, df.reg, df.res, lower.tail = F)
> p.f
[1] 0.006045749
>
> summary(mod)
Call:
lm(formula = bankaccount ~ income, data = dat)
Residuals:
Min 1Q Median 3Q Max
-1.5189 -0.8969 -0.1297 1.0058 1.5800
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.617781 1.241518 2.914 0.01947 *
income 0.015269 0.004127 3.700 0.00605 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.176 on 8 degrees of freedom
Multiple R-squared: 0.6311, Adjusted R-squared: 0.585
F-statistic: 13.69 on 1 and 8 DF, p-value: 0.006046
> anova(mod)
Analysis of Variance Table
Response: bankaccount
Df Sum Sq Mean Sq F value Pr(>F)
income 1 18.934 18.9336 13.687 0.006046 **
Residuals 8 11.066 1.3833
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
>
> a
[1] 3.617781
> b
[1] 0.01526906
> se.b
[1] 0.004127175
> t.b
[1] 3.699639
> p.b
[1] 0.006045749
>
> r.sq
[1] 0.631121
> f.cal
[1] 13.68733
> p.f
[1] 0.006045749
>
>