regression.lecturenote.r
set.seed(401)
sn <- 25
x <- rnorm(sn, 100, 10)
x
y <- 1.4 * x + 2 + rnorm(sn, 0, 10)
y
df <- data.frame(x, y)
# density graph
install.packages("ggplot2")
library(ggplot2)
ggplot(data=df, aes(y)) +
geom_histogram() +
geom_vline(aes(xintercept=mean(y)),
color="red", linetype="dashed", size=1) +
coord_flip()
ggplot(data=df, aes(y)) +
geom_density(color="blue", size=1.5) +
geom_vline(aes(xintercept=mean(y)),
color="red", linetype="dashed", size=1) +
coord_flip()
lm.mod <- lm(y~x, data=df)
summary(lm.mod)
str(lm.mod)
inc.y <- lm.mod$coefficients[1]
slope.x <- lm.mod$coefficients[2]
inc.y
slope.x
ggplot(data=df, aes(x,y)) +
geom_point(color="blue", size=1.5, pch=1.5) +
geom_hline(aes(yintercept=mean(y))) +
geom_abline(intercept=inc.y, slope=slope.x)
ggplot(data=df, aes(x,y)) +
geom_point(color="blue", size=2.5, pch=2) +
geom_hline(aes(yintercept=mean(y)), size=1.5, color="red") +
geom_abline(intercept=inc.y, slope=slope.x, size=1.5, color="darkgreen")
################################
################################
################################
################################
set.seed(101)
sn <- 400
x <- rnorm(sn, 100, 10)
x
y <- 1.4*x + 2 + rnorm(sn, 0, 16)
y
df <- data.frame(x,y)
# density graph
ggplot(data=df, aes(y)) +
geom_histogram() +
geom_vline(aes(xintercept=mean(y)),
color="red", linetype="dashed", size=1) +
coord_flip()
ggplot(data=df, aes(y)) +
geom_density(color="blue", size=1.5) +
geom_vline(aes(xintercept=mean(y)),
color="red", linetype="dashed", size=1) +
coord_flip()
ggplot(data=df, aes(x,y)) +
geom_point(color="blue", size=1.5, pch=2) +
geom_hline(aes(yintercept=mean(y)), size=1, color="darkgreen") +
geom_abline(intercept=10, slope=1.5, size=1.5, color="red")
lm.mod2 <- lm(y~x, data=df)
sum.lm.mod2 <- summary(lm.mod2)
sum.lm.mod2
lm.mod2$coefficients[2]
lm.mod2$coefficients[1]
b <- lm.mod2$coefficients[2]
a <- lm.mod2$coefficients[1]
ggplot(data=df, aes(x,y)) +
geom_point(color="blue", size=1.5, pch=2) +
geom_hline(aes(yintercept=mean(y)), size=1, color="darkgreen") +
geom_abline(intercept=a, slope=b, size=1.5, color="red")
lm.mod2$residuals
sum(lm.mod2$residuals^2)
ss.res <- sum(lm.mod2$residuals^2)
mean.y <- mean(df$y)
var.tot <- var(df$y)
df.tot <- length(df$y)-1
ss.tot <- var.tot*df.tot
ss.tot
y.hat <- lm.mod2$fitted.values
y.hat - mean(df$y)
explained <- y.hat - mean(df$y)
ss.exp <- sum(explained^2)
ss.exp
ss.res
ss.exp + ss.res
ss.tot
r.square <- ss.exp / ss.tot
r.square
sum.lm.mod2
r.coeff <- sqrt(r.square)
r.coeff
cor(x,y)
###
ggplot(data=df, aes(x,y)) +
geom_point(color="blue", size=1.5, pch=1.5) +
geom_hline(aes(yintercept=mean(y)), size=1, color="darkgreen") +
stat_smooth(method = "lm",
formula = y ~ x,
geom = "smooth", color="red", size=1)