c:ms:2025:w07.2_factorial_anova
This is an old revision of the document!
Two-way ANOVA
Factorial ANOVA
################################################# # two-way anova # subject = factor(paste('sub', 1:30, sep='')) ################################################# n.a.group <- 3 # a treatment 숫자 n.b.group <- 2 # b 그룹 숫자 n.sub <- 30 # 총 샘플 숫자 n.sub/n.a.group # 데이터 생성 set.seed(9) a <- gl(3, n.sub/n.a.group, n.sub, labels=c('a1', 'a2', 'a3')) b <- gl(2, (n.sub/n.a.group)/2, n.sub, labels=c('b1', 'b2')) a b y <- rnorm(30, mean=10) + 3.14 * (a=='a1' & b=='b2') + 1.43 * (a=='a3' & b=='b2') y dat <- data.frame(a, b, y) dat # aov.dat <- aov(y~a*b) # anova test # summary(aov.dat) # summary of the test output # hand calculation table(a,b) tapply(y, list(a,b), mean) # 각 셀에서의 평균 tapply(y, a, mean) tapply(y, b, mean) n.within.each <- tapply(y, list(a,b), length) # the same as table(a, b) n.within.each df.within.each <- n.within.each - 1 # 각 셀에서의 샘플숫자 df.within.each df.within <- sum(df.within.each) # df within df.within var.within <- tapply(y, list(a,b), var) # var.within var.within ss.within.each <- tapply(y, list(a,b), var) * df.within.each ss.within.each ss.within <- sum(ss.within.each) # ss.within ss.within ms.within <- ss.within / df.within ms.within # interaction.plot(a,b,y) mean.a <- tapply(y, list(a), mean) mean.b <- tapply(y, list(b), mean) mean.a mean.b var.a <- tapply(y, list(a), var) var.b <- tapply(y, list(b), var) var.a var.b mean.tot <- mean(dat$y) var.tot <- var(dat$y) df.tot <- n.sub - 1 ss.tot <- var.tot * df.tot ms.tot <- ss.tot/df.tot mean.tot var.tot df.tot ss.tot ms.tot ss.tot/df.tot ## between mean.each <- tapply(y, list(a,b), mean) mean.each mean.tot <- mean(y) mean.tot n.each <- tapply(y, list(a,b), length) n.a.each <- tapply(y, list(a), length) n.b.each <- tapply(y, list(b), length) n.each n.a.each n.b.each ss.bet <- sum(n.each*(mean.each-mean.tot)^2) ss.bet ss.tot ss.within ss.bet ss.bet + ss.within ss.a <- sum(n.a.each * ((mean.tot - mean.a)^2)) ss.b <- sum(n.b.each * ((mean.tot - mean.b)^2)) ss.a ss.b ss.ab <- ss.bet - (ss.a + ss.b) # ss.ab = ss.interaction ss.ab ss.tot ss.bet ss.within ss.a ss.b ss.ab df.tot <- n.sub - 1 df.bet <- (n.a.group * n.b.group) - 1 df.a <- n.a.group - 1 df.b <- n.b.group - 1 df.ab <- df.bet - (df.a + df.b) df.within <- sum(df.within.each) df.tot df.bet df.a df.b df.ab df.within ms.tot <- ss.tot / df.tot # we did it above ms.bet <- ss.bet / df.bet ms.a <- ss.a / df.a ms.b <- ss.b / df.b ms.ab <- ss.ab / df.ab ms.within <- ss.within / df.within ms.tot ms.bet ms.within ms.a ms.b ms.ab f.a <- ms.a / ms.within f.b <- ms.b / ms.within f.ab <- ms.ab / ms.within alpha <- .05 # confidence interval ci <- 1 - alpha f.a # 봐야할 F분포표에서의 F-value # qt 처럼 qf 사용 # qf(alpha, df.between, df.within, lower.tail=F) 처럼 사용 qf(ci, df.a, df.within) # or qf(alpha, df.a, df.within, lower.tail = F) # 혹은 # qf(alpha, df.a, df.within, lower.tail = F) # 도 마찬가지 f.a.crit <- qf(ci, df.a, df.within) f.a > f.a.crit # 만약 f.a가 크다면 # 혹은 f.a 값에 해당하는 percentage를 구하면 pf(f.a, df.a, df.within, lower.tail = F) f.b qf(ci, df.b, df.within) f.b > qf(ci, df.b, df.within) pf(f.b, df.b, df.within, lower.tail = F) f.ab qf(ci, df.ab, df.within) f.ab > qf(ci, df.ab, df.within) pf(f.ab, df.ab, df.within, lower.tail = F) # aov result aov.dat.all <- aov(y ~ a * b, data = dat) # anova test summary(aov.dat.all) # summary of the test output aov.dat.noint <- aov(y ~ a + b, data = dat) # interaction이 갖는 SS와 df를 residuals이 갖음을 주목 summary(aov.dat.noint) aov.dat.all2 <- aov(y ~ a + b + a:b, data = dat) summary(aov.dat.all2) aov.dat.all # what is standard error of residuals
Output, two-way ANOVA
> ################################################# > # two-way anova > # subject = factor(paste('sub', 1:30, sep='')) > ################################################# > > n.a.group <- 3 # a treatment 숫자 > n.b.group <- 2 # b 그룹 숫자 > n.sub <- 30 # 총 샘플 숫자 > n.sub/n.a.group [1] 10 > > # 데이터 생성 > set.seed(9) > a <- gl(3, n.sub/n.a.group, n.sub, labels=c('a1', 'a2', 'a3')) > b <- gl(2, (n.sub/n.a.group)/2, n.sub, labels=c('b1', 'b2')) > a [1] a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a2 a2 a2 a2 a2 a2 a2 a2 [19] a2 a2 a3 a3 a3 a3 a3 a3 a3 a3 a3 a3 Levels: a1 a2 a3 > b [1] b1 b1 b1 b1 b1 b2 b2 b2 b2 b2 b1 b1 b1 b1 b1 b2 b2 b2 [19] b2 b2 b1 b1 b1 b1 b1 b2 b2 b2 b2 b2 Levels: b1 b2 > y <- rnorm(30, mean=10) + + 3.14 * (a=='a1' & b=='b2') + + 1.43 * (a=='a3' & b=='b2') > y [1] 9.233204 9.183542 9.858465 9.722395 10.436307 [6] 11.953127 14.331987 13.121810 12.891915 12.777063 [11] 11.277571 9.531103 10.071054 9.733962 11.845257 [16] 9.160550 9.922552 7.382294 10.887884 9.292509 [21] 11.756993 10.182252 9.733111 10.926422 9.306668 [26] 14.111990 11.652524 10.723328 11.847213 11.799557 > > dat <- data.frame(a, b, y) > dat a b y 1 a1 b1 9.233204 2 a1 b1 9.183542 3 a1 b1 9.858465 4 a1 b1 9.722395 5 a1 b1 10.436307 6 a1 b2 11.953127 7 a1 b2 14.331987 8 a1 b2 13.121810 9 a1 b2 12.891915 10 a1 b2 12.777063 11 a2 b1 11.277571 12 a2 b1 9.531103 13 a2 b1 10.071054 14 a2 b1 9.733962 15 a2 b1 11.845257 16 a2 b2 9.160550 17 a2 b2 9.922552 18 a2 b2 7.382294 19 a2 b2 10.887884 20 a2 b2 9.292509 21 a3 b1 11.756993 22 a3 b1 10.182252 23 a3 b1 9.733111 24 a3 b1 10.926422 25 a3 b1 9.306668 26 a3 b2 14.111990 27 a3 b2 11.652524 28 a3 b2 10.723328 29 a3 b2 11.847213 30 a3 b2 11.799557 > # aov.dat <- aov(y~a*b) # anova test > # summary(aov.dat) # summary of the test output > > # hand calculation > table(a,b) b a b1 b2 a1 5 5 a2 5 5 a3 5 5 > tapply(y, list(a,b), mean) # 각 셀에서의 평균 b1 b2 a1 9.686782 13.015181 a2 10.491789 9.329158 a3 10.381089 12.026922 > tapply(y, a, mean) a1 a2 a3 11.350981 9.910474 11.204006 > tapply(y, b, mean) b1 b2 10.18655 11.45709 > n.within.each <- tapply(y, list(a,b), length) # the same as table(a, b) > n.within.each b1 b2 a1 5 5 a2 5 5 a3 5 5 > df.within.each <- n.within.each - 1 # 각 셀에서의 샘플숫자 > df.within.each b1 b2 a1 4 4 a2 4 4 a3 4 4 > df.within <- sum(df.within.each) # df within > df.within [1] 24 > > var.within <- tapply(y, list(a,b), var) # var.within > var.within b1 b2 a1 0.2628787 0.7362999 a2 1.0308917 1.6504481 a3 0.9510727 1.5677577 > ss.within.each <- tapply(y, list(a,b), var) * df.within.each > ss.within.each b1 b2 a1 1.051515 2.945200 a2 4.123567 6.601793 a3 3.804291 6.271031 > ss.within <- sum(ss.within.each) # ss.within > ss.within [1] 24.7974 > > ms.within <- ss.within / df.within > ms.within [1] 1.033225 > > # interaction.plot(a,b,y) > > mean.a <- tapply(y, list(a), mean) > mean.b <- tapply(y, list(b), mean) > mean.a a1 a2 a3 11.350981 9.910474 11.204006 > mean.b b1 b2 10.18655 11.45709 > > var.a <- tapply(y, list(a), var) > var.b <- tapply(y, list(b), var) > var.a a1 a2 a3 3.521366 1.567182 1.871915 > var.b b1 b2 0.7773781 3.7300196 > > mean.tot <- mean(dat$y) > var.tot <- var(dat$y) > df.tot <- n.sub - 1 > ss.tot <- var.tot * df.tot > ms.tot <- ss.tot/df.tot > > mean.tot [1] 10.82182 > var.tot [1] 2.593465 > df.tot [1] 29 > ss.tot [1] 75.21048 > ms.tot [1] 2.593465 > ss.tot/df.tot [1] 2.593465 > > > ## between > mean.each <- tapply(y, list(a,b), mean) > mean.each b1 b2 a1 9.686782 13.015181 a2 10.491789 9.329158 a3 10.381089 12.026922 > mean.tot <- mean(y) > mean.tot [1] 10.82182 > n.each <- tapply(y, list(a,b), length) > n.a.each <- tapply(y, list(a), length) > n.b.each <- tapply(y, list(b), length) > n.each b1 b2 a1 5 5 a2 5 5 a3 5 5 > n.a.each a1 a2 a3 10 10 10 > n.b.each b1 b2 15 15 > > > > ss.bet <- sum(n.each*(mean.each-mean.tot)^2) > ss.bet [1] 50.41308 > > ss.tot [1] 75.21048 > ss.within [1] 24.7974 > ss.bet [1] 50.41308 > ss.bet + ss.within [1] 75.21048 > > ss.a <- sum(n.a.each * ((mean.tot - mean.a)^2)) > ss.b <- sum(n.b.each * ((mean.tot - mean.b)^2)) > ss.a [1] 12.5663 > ss.b [1] 12.10691 > ss.ab <- ss.bet - (ss.a + ss.b) # ss.ab = ss.interaction > ss.ab [1] 25.73987 > > ss.tot [1] 75.21048 > ss.bet [1] 50.41308 > ss.within [1] 24.7974 > ss.a [1] 12.5663 > ss.b [1] 12.10691 > ss.ab [1] 25.73987 > > df.tot <- n.sub - 1 > df.bet <- (n.a.group * n.b.group) - 1 > df.a <- n.a.group - 1 > df.b <- n.b.group - 1 > df.ab <- df.bet - (df.a + df.b) > df.within <- sum(df.within.each) > > df.tot [1] 29 > df.bet [1] 5 > df.a [1] 2 > df.b [1] 1 > df.ab [1] 2 > df.within [1] 24 > > ms.tot <- ss.tot / df.tot # we did it above > ms.bet <- ss.bet / df.bet > ms.a <- ss.a / df.a > ms.b <- ss.b / df.b > ms.ab <- ss.ab / df.ab > ms.within <- ss.within / df.within > > ms.tot [1] 2.593465 > ms.bet [1] 10.08262 > ms.within [1] 1.033225 > ms.a [1] 6.283151 > ms.b [1] 12.10691 > ms.ab [1] 12.86993 > > > f.a <- ms.a / ms.within > f.b <- ms.b / ms.within > f.ab <- ms.ab / ms.within > > alpha <- .05 > # confidence interval > ci <- 1 - alpha > > f.a [1] 6.081107 > # 봐야할 F분포표에서의 F-value > # qt 처럼 qf 사용 > # qf(alpha, df.between, df.within, lower.tail=F) 처럼 사용 > qf(ci, df.a, df.within) # or [1] 3.402826 > qf(alpha, df.a, df.within, lower.tail = F) [1] 3.402826 > # 혹은 > # qf(alpha, df.a, df.within, lower.tail = F) > # 도 마찬가지 > f.a.crit <- qf(ci, df.a, df.within) > f.a > f.a.crit # 만약 f.a가 크다면 [1] TRUE > # 혹은 f.a 값에 해당하는 percentage를 구하면 > pf(f.a, df.a, df.within, lower.tail = F) [1] 0.007302552 > > f.b [1] 11.7176 > qf(ci, df.b, df.within) [1] 4.259677 > f.b > qf(ci, df.b, df.within) [1] TRUE > pf(f.b, df.b, df.within, lower.tail = F) [1] 0.00222738 > > f.ab [1] 12.45608 > qf(ci, df.ab, df.within) [1] 3.402826 > f.ab > qf(ci, df.ab, df.within) [1] TRUE > pf(f.ab, df.ab, df.within, lower.tail = F) [1] 0.0001947745 > > # aov result > aov.dat.all <- aov(y ~ a * b, data = dat) # anova test > summary(aov.dat.all) # summary of the test output Df Sum Sq Mean Sq F value Pr(>F) a 2 12.57 6.283 6.081 0.007303 ** b 1 12.11 12.107 11.718 0.002227 ** a:b 2 25.74 12.870 12.456 0.000195 *** Residuals 24 24.80 1.033 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > aov.dat.noint <- aov(y ~ a + b, data = dat) > # interaction이 갖는 SS와 df를 residuals이 갖음을 주목 > summary(aov.dat.noint) Df Sum Sq Mean Sq F value Pr(>F) a 2 12.57 6.283 3.233 0.0558 . b 1 12.11 12.107 6.229 0.0192 * Residuals 26 50.54 1.944 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > aov.dat.all2 <- aov(y ~ a + b + a:b, data = dat) > summary(aov.dat.all2) Df Sum Sq Mean Sq F value Pr(>F) a 2 12.57 6.283 6.081 0.007303 ** b 1 12.11 12.107 11.718 0.002227 ** a:b 2 25.74 12.870 12.456 0.000195 *** Residuals 24 24.80 1.033 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > aov.dat.all # what is standard error of residuals Call: aov(formula = y ~ a * b, data = dat) Terms: a b a:b Residuals Sum of Squares 12.56630 12.10691 25.73987 24.79740 Deg. of Freedom 2 1 2 24 Residual standard error: 1.016477 Estimated effects may be unbalanced > > >
c/ms/2025/w07.2_factorial_anova.1745190679.txt.gz · Last modified: 2025/04/21 08:11 by hkimscil