c:ms:2025:w07.2_factorial_anova
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| c:ms:2025:w07.2_factorial_anova [2025/04/20 23:11] – hkimscil | c:ms:2025:w07.2_factorial_anova [2026/04/14 23:34] (current) – removed hkimscil | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| - | ====== Two-way ANOVA ====== | ||
| - | Factorial ANOVA | ||
| - | < | ||
| - | ################################################# | ||
| - | # two-way anova | ||
| - | # subject = factor(paste(' | ||
| - | ################################################# | ||
| - | n.a.group <- 3 # a treatment 숫자 | ||
| - | n.b.group <- 2 # b 그룹 숫자 | ||
| - | n.sub <- 30 # 총 샘플 숫자 | ||
| - | n.sub/ | ||
| - | |||
| - | # 데이터 생성 | ||
| - | set.seed(9) | ||
| - | a <- gl(3, n.sub/ | ||
| - | b <- gl(2, (n.sub/ | ||
| - | a | ||
| - | b | ||
| - | y <- rnorm(30, mean=10) + | ||
| - | 3.14 * (a==' | ||
| - | 1.43 * (a==' | ||
| - | y | ||
| - | |||
| - | dat <- data.frame(a, | ||
| - | dat | ||
| - | # aov.dat <- aov(y~a*b) # anova test | ||
| - | # summary(aov.dat) # summary of the test output | ||
| - | |||
| - | # hand calculation | ||
| - | table(a,b) | ||
| - | tapply(y, list(a,b), mean) # 각 셀에서의 평균 | ||
| - | tapply(y, a, mean) | ||
| - | tapply(y, b, mean) | ||
| - | n.within.each <- tapply(y, list(a,b), length) # the same as table(a, b) | ||
| - | n.within.each | ||
| - | df.within.each <- n.within.each - 1 # 각 셀에서의 샘플숫자 | ||
| - | df.within.each | ||
| - | df.within <- sum(df.within.each) # df within | ||
| - | df.within | ||
| - | |||
| - | var.within <- tapply(y, list(a,b), var) # var.within | ||
| - | var.within | ||
| - | ss.within.each <- tapply(y, list(a,b), var) * df.within.each | ||
| - | ss.within.each | ||
| - | ss.within <- sum(ss.within.each) # ss.within | ||
| - | ss.within | ||
| - | |||
| - | ms.within <- ss.within / df.within | ||
| - | ms.within | ||
| - | |||
| - | # interaction.plot(a, | ||
| - | |||
| - | mean.a <- tapply(y, list(a), mean) | ||
| - | mean.b <- tapply(y, list(b), mean) | ||
| - | mean.a | ||
| - | mean.b | ||
| - | |||
| - | var.a <- tapply(y, list(a), var) | ||
| - | var.b <- tapply(y, list(b), var) | ||
| - | var.a | ||
| - | var.b | ||
| - | |||
| - | mean.tot <- mean(dat$y) | ||
| - | var.tot <- var(dat$y) | ||
| - | df.tot <- n.sub - 1 | ||
| - | ss.tot <- var.tot * df.tot | ||
| - | ms.tot <- ss.tot/ | ||
| - | |||
| - | mean.tot | ||
| - | var.tot | ||
| - | df.tot | ||
| - | ss.tot | ||
| - | ms.tot | ||
| - | ss.tot/ | ||
| - | |||
| - | |||
| - | ## between | ||
| - | mean.each <- tapply(y, list(a,b), mean) | ||
| - | mean.each | ||
| - | mean.tot <- mean(y) | ||
| - | mean.tot | ||
| - | n.each <- tapply(y, list(a,b), length) | ||
| - | n.a.each <- tapply(y, list(a), length) | ||
| - | n.b.each <- tapply(y, list(b), length) | ||
| - | n.each | ||
| - | n.a.each | ||
| - | n.b.each | ||
| - | |||
| - | |||
| - | |||
| - | ss.bet <- sum(n.each*(mean.each-mean.tot)^2) | ||
| - | ss.bet | ||
| - | |||
| - | ss.tot | ||
| - | ss.within | ||
| - | ss.bet | ||
| - | ss.bet + ss.within | ||
| - | |||
| - | ss.a <- sum(n.a.each * ((mean.tot - mean.a)^2)) | ||
| - | ss.b <- sum(n.b.each * ((mean.tot - mean.b)^2)) | ||
| - | ss.a | ||
| - | ss.b | ||
| - | ss.ab <- ss.bet - (ss.a + ss.b) # ss.ab = ss.interaction | ||
| - | ss.ab | ||
| - | |||
| - | ss.tot | ||
| - | ss.bet | ||
| - | ss.within | ||
| - | ss.a | ||
| - | ss.b | ||
| - | ss.ab | ||
| - | |||
| - | df.tot <- n.sub - 1 | ||
| - | df.bet <- (n.a.group * n.b.group) - 1 | ||
| - | df.a <- n.a.group - 1 | ||
| - | df.b <- n.b.group - 1 | ||
| - | df.ab <- df.bet - (df.a + df.b) | ||
| - | df.within <- sum(df.within.each) | ||
| - | |||
| - | df.tot | ||
| - | df.bet | ||
| - | df.a | ||
| - | df.b | ||
| - | df.ab | ||
| - | df.within | ||
| - | |||
| - | ms.tot <- ss.tot / df.tot # we did it above | ||
| - | ms.bet <- ss.bet / df.bet | ||
| - | ms.a <- ss.a / df.a | ||
| - | ms.b <- ss.b / df.b | ||
| - | ms.ab <- ss.ab / df.ab | ||
| - | ms.within <- ss.within / df.within | ||
| - | |||
| - | ms.tot | ||
| - | ms.bet | ||
| - | ms.within | ||
| - | ms.a | ||
| - | ms.b | ||
| - | ms.ab | ||
| - | |||
| - | |||
| - | f.a <- ms.a / ms.within | ||
| - | f.b <- ms.b / ms.within | ||
| - | f.ab <- ms.ab / ms.within | ||
| - | |||
| - | alpha <- .05 | ||
| - | # confidence interval | ||
| - | ci <- 1 - alpha | ||
| - | |||
| - | f.a | ||
| - | # 봐야할 F분포표에서의 F-value | ||
| - | # qt 처럼 qf 사용 | ||
| - | # qf(alpha, df.between, df.within, lower.tail=F) 처럼 사용 | ||
| - | qf(ci, df.a, df.within) # or | ||
| - | qf(alpha, df.a, df.within, lower.tail = F) | ||
| - | # 혹은 | ||
| - | # qf(alpha, df.a, df.within, lower.tail = F) | ||
| - | # 도 마찬가지 | ||
| - | f.a.crit <- qf(ci, df.a, df.within) | ||
| - | f.a > f.a.crit # 만약 f.a가 크다면 | ||
| - | # 혹은 f.a 값에 해당하는 percentage를 구하면 | ||
| - | pf(f.a, df.a, df.within, lower.tail = F) | ||
| - | |||
| - | f.b | ||
| - | qf(ci, df.b, df.within) | ||
| - | f.b > qf(ci, df.b, df.within) | ||
| - | pf(f.b, df.b, df.within, lower.tail = F) | ||
| - | |||
| - | f.ab | ||
| - | qf(ci, df.ab, df.within) | ||
| - | f.ab > qf(ci, df.ab, df.within) | ||
| - | pf(f.ab, df.ab, df.within, lower.tail = F) | ||
| - | |||
| - | # aov result | ||
| - | aov.dat.all <- aov(y ~ a * b, data = dat) # anova test | ||
| - | summary(aov.dat.all) # summary of the test output | ||
| - | aov.dat.noint <- aov(y ~ a + b, data = dat) | ||
| - | # interaction이 갖는 SS와 df를 residuals이 갖음을 주목 | ||
| - | summary(aov.dat.noint) | ||
| - | aov.dat.all2 <- aov(y ~ a + b + a:b, data = dat) | ||
| - | summary(aov.dat.all2) | ||
| - | aov.dat.all # what is standard error of residuals | ||
| - | </ | ||
| - | |||
| - | ====== Output, two-way ANOVA ====== | ||
| - | < | ||
| - | > ################################################# | ||
| - | > # two-way anova | ||
| - | > # subject = factor(paste(' | ||
| - | > ################################################# | ||
| - | > | ||
| - | > n.a.group <- 3 # a treatment 숫자 | ||
| - | > n.b.group <- 2 # b 그룹 숫자 | ||
| - | > n.sub <- 30 # 총 샘플 숫자 | ||
| - | > n.sub/ | ||
| - | [1] 10 | ||
| - | > | ||
| - | > # 데이터 생성 | ||
| - | > set.seed(9) | ||
| - | > a <- gl(3, n.sub/ | ||
| - | > b <- gl(2, (n.sub/ | ||
| - | > a | ||
| - | [1] a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a2 a2 a2 a2 a2 a2 a2 a2 | ||
| - | [19] a2 a2 a3 a3 a3 a3 a3 a3 a3 a3 a3 a3 | ||
| - | Levels: a1 a2 a3 | ||
| - | > b | ||
| - | [1] b1 b1 b1 b1 b1 b2 b2 b2 b2 b2 b1 b1 b1 b1 b1 b2 b2 b2 | ||
| - | [19] b2 b2 b1 b1 b1 b1 b1 b2 b2 b2 b2 b2 | ||
| - | Levels: b1 b2 | ||
| - | > y <- rnorm(30, mean=10) + | ||
| - | + 3.14 * (a==' | ||
| - | + 1.43 * (a==' | ||
| - | > y | ||
| - | | ||
| - | [6] 11.953127 14.331987 13.121810 12.891915 12.777063 | ||
| - | [11] 11.277571 | ||
| - | [16] 9.160550 | ||
| - | [21] 11.756993 10.182252 | ||
| - | [26] 14.111990 11.652524 10.723328 11.847213 11.799557 | ||
| - | > | ||
| - | > dat <- data.frame(a, | ||
| - | > dat | ||
| - | a b y | ||
| - | 1 a1 b1 9.233204 | ||
| - | 2 a1 b1 9.183542 | ||
| - | 3 a1 b1 9.858465 | ||
| - | 4 a1 b1 9.722395 | ||
| - | 5 a1 b1 10.436307 | ||
| - | 6 a1 b2 11.953127 | ||
| - | 7 a1 b2 14.331987 | ||
| - | 8 a1 b2 13.121810 | ||
| - | 9 a1 b2 12.891915 | ||
| - | 10 a1 b2 12.777063 | ||
| - | 11 a2 b1 11.277571 | ||
| - | 12 a2 b1 9.531103 | ||
| - | 13 a2 b1 10.071054 | ||
| - | 14 a2 b1 9.733962 | ||
| - | 15 a2 b1 11.845257 | ||
| - | 16 a2 b2 9.160550 | ||
| - | 17 a2 b2 9.922552 | ||
| - | 18 a2 b2 7.382294 | ||
| - | 19 a2 b2 10.887884 | ||
| - | 20 a2 b2 9.292509 | ||
| - | 21 a3 b1 11.756993 | ||
| - | 22 a3 b1 10.182252 | ||
| - | 23 a3 b1 9.733111 | ||
| - | 24 a3 b1 10.926422 | ||
| - | 25 a3 b1 9.306668 | ||
| - | 26 a3 b2 14.111990 | ||
| - | 27 a3 b2 11.652524 | ||
| - | 28 a3 b2 10.723328 | ||
| - | 29 a3 b2 11.847213 | ||
| - | 30 a3 b2 11.799557 | ||
| - | > # aov.dat <- aov(y~a*b) # anova test | ||
| - | > # summary(aov.dat) # summary of the test output | ||
| - | > | ||
| - | > # hand calculation | ||
| - | > table(a,b) | ||
| - | b | ||
| - | a b1 b2 | ||
| - | a1 5 5 | ||
| - | a2 5 5 | ||
| - | a3 5 5 | ||
| - | > tapply(y, list(a,b), mean) # 각 셀에서의 평균 | ||
| - | b1 b2 | ||
| - | a1 9.686782 13.015181 | ||
| - | a2 10.491789 | ||
| - | a3 10.381089 12.026922 | ||
| - | > tapply(y, a, mean) | ||
| - | | ||
| - | 11.350981 | ||
| - | > tapply(y, b, mean) | ||
| - | b1 | ||
| - | 10.18655 11.45709 | ||
| - | > n.within.each <- tapply(y, list(a,b), length) # the same as table(a, b) | ||
| - | > n.within.each | ||
| - | b1 b2 | ||
| - | a1 5 5 | ||
| - | a2 5 5 | ||
| - | a3 5 5 | ||
| - | > df.within.each <- n.within.each - 1 # 각 셀에서의 샘플숫자 | ||
| - | > df.within.each | ||
| - | b1 b2 | ||
| - | a1 4 4 | ||
| - | a2 4 4 | ||
| - | a3 4 4 | ||
| - | > df.within <- sum(df.within.each) # df within | ||
| - | > df.within | ||
| - | [1] 24 | ||
| - | > | ||
| - | > var.within <- tapply(y, list(a,b), var) # var.within | ||
| - | > var.within | ||
| - | b1 b2 | ||
| - | a1 0.2628787 0.7362999 | ||
| - | a2 1.0308917 1.6504481 | ||
| - | a3 0.9510727 1.5677577 | ||
| - | > ss.within.each <- tapply(y, list(a,b), var) * df.within.each | ||
| - | > ss.within.each | ||
| - | | ||
| - | a1 1.051515 2.945200 | ||
| - | a2 4.123567 6.601793 | ||
| - | a3 3.804291 6.271031 | ||
| - | > ss.within <- sum(ss.within.each) # ss.within | ||
| - | > ss.within | ||
| - | [1] 24.7974 | ||
| - | > | ||
| - | > ms.within <- ss.within / df.within | ||
| - | > ms.within | ||
| - | [1] 1.033225 | ||
| - | > | ||
| - | > # interaction.plot(a, | ||
| - | > | ||
| - | > mean.a <- tapply(y, list(a), mean) | ||
| - | > mean.b <- tapply(y, list(b), mean) | ||
| - | > mean.a | ||
| - | | ||
| - | 11.350981 | ||
| - | > mean.b | ||
| - | b1 | ||
| - | 10.18655 11.45709 | ||
| - | > | ||
| - | > var.a <- tapply(y, list(a), var) | ||
| - | > var.b <- tapply(y, list(b), var) | ||
| - | > var.a | ||
| - | a1 | ||
| - | 3.521366 1.567182 1.871915 | ||
| - | > var.b | ||
| - | | ||
| - | 0.7773781 3.7300196 | ||
| - | > | ||
| - | > mean.tot <- mean(dat$y) | ||
| - | > var.tot <- var(dat$y) | ||
| - | > df.tot <- n.sub - 1 | ||
| - | > ss.tot <- var.tot * df.tot | ||
| - | > ms.tot <- ss.tot/ | ||
| - | > | ||
| - | > mean.tot | ||
| - | [1] 10.82182 | ||
| - | > var.tot | ||
| - | [1] 2.593465 | ||
| - | > df.tot | ||
| - | [1] 29 | ||
| - | > ss.tot | ||
| - | [1] 75.21048 | ||
| - | > ms.tot | ||
| - | [1] 2.593465 | ||
| - | > ss.tot/ | ||
| - | [1] 2.593465 | ||
| - | > | ||
| - | > | ||
| - | > ## between | ||
| - | > mean.each <- tapply(y, list(a,b), mean) | ||
| - | > mean.each | ||
| - | b1 b2 | ||
| - | a1 9.686782 13.015181 | ||
| - | a2 10.491789 | ||
| - | a3 10.381089 12.026922 | ||
| - | > mean.tot <- mean(y) | ||
| - | > mean.tot | ||
| - | [1] 10.82182 | ||
| - | > n.each <- tapply(y, list(a,b), length) | ||
| - | > n.a.each <- tapply(y, list(a), length) | ||
| - | > n.b.each <- tapply(y, list(b), length) | ||
| - | > n.each | ||
| - | b1 b2 | ||
| - | a1 5 5 | ||
| - | a2 5 5 | ||
| - | a3 5 5 | ||
| - | > n.a.each | ||
| - | a1 a2 a3 | ||
| - | 10 10 10 | ||
| - | > n.b.each | ||
| - | b1 b2 | ||
| - | 15 15 | ||
| - | > | ||
| - | > | ||
| - | > | ||
| - | > ss.bet <- sum(n.each*(mean.each-mean.tot)^2) | ||
| - | > ss.bet | ||
| - | [1] 50.41308 | ||
| - | > | ||
| - | > ss.tot | ||
| - | [1] 75.21048 | ||
| - | > ss.within | ||
| - | [1] 24.7974 | ||
| - | > ss.bet | ||
| - | [1] 50.41308 | ||
| - | > ss.bet + ss.within | ||
| - | [1] 75.21048 | ||
| - | > | ||
| - | > ss.a <- sum(n.a.each * ((mean.tot - mean.a)^2)) | ||
| - | > ss.b <- sum(n.b.each * ((mean.tot - mean.b)^2)) | ||
| - | > ss.a | ||
| - | [1] 12.5663 | ||
| - | > ss.b | ||
| - | [1] 12.10691 | ||
| - | > ss.ab <- ss.bet - (ss.a + ss.b) # ss.ab = ss.interaction | ||
| - | > ss.ab | ||
| - | [1] 25.73987 | ||
| - | > | ||
| - | > ss.tot | ||
| - | [1] 75.21048 | ||
| - | > ss.bet | ||
| - | [1] 50.41308 | ||
| - | > ss.within | ||
| - | [1] 24.7974 | ||
| - | > ss.a | ||
| - | [1] 12.5663 | ||
| - | > ss.b | ||
| - | [1] 12.10691 | ||
| - | > ss.ab | ||
| - | [1] 25.73987 | ||
| - | > | ||
| - | > df.tot <- n.sub - 1 | ||
| - | > df.bet <- (n.a.group * n.b.group) - 1 | ||
| - | > df.a <- n.a.group - 1 | ||
| - | > df.b <- n.b.group - 1 | ||
| - | > df.ab <- df.bet - (df.a + df.b) | ||
| - | > df.within <- sum(df.within.each) | ||
| - | > | ||
| - | > df.tot | ||
| - | [1] 29 | ||
| - | > df.bet | ||
| - | [1] 5 | ||
| - | > df.a | ||
| - | [1] 2 | ||
| - | > df.b | ||
| - | [1] 1 | ||
| - | > df.ab | ||
| - | [1] 2 | ||
| - | > df.within | ||
| - | [1] 24 | ||
| - | > | ||
| - | > ms.tot <- ss.tot / df.tot # we did it above | ||
| - | > ms.bet <- ss.bet / df.bet | ||
| - | > ms.a <- ss.a / df.a | ||
| - | > ms.b <- ss.b / df.b | ||
| - | > ms.ab <- ss.ab / df.ab | ||
| - | > ms.within <- ss.within / df.within | ||
| - | > | ||
| - | > ms.tot | ||
| - | [1] 2.593465 | ||
| - | > ms.bet | ||
| - | [1] 10.08262 | ||
| - | > ms.within | ||
| - | [1] 1.033225 | ||
| - | > ms.a | ||
| - | [1] 6.283151 | ||
| - | > ms.b | ||
| - | [1] 12.10691 | ||
| - | > ms.ab | ||
| - | [1] 12.86993 | ||
| - | > | ||
| - | > | ||
| - | > f.a <- ms.a / ms.within | ||
| - | > f.b <- ms.b / ms.within | ||
| - | > f.ab <- ms.ab / ms.within | ||
| - | > | ||
| - | > alpha <- .05 | ||
| - | > # confidence interval | ||
| - | > ci <- 1 - alpha | ||
| - | > | ||
| - | > f.a | ||
| - | [1] 6.081107 | ||
| - | > # 봐야할 F분포표에서의 F-value | ||
| - | > # qt 처럼 qf 사용 | ||
| - | > # qf(alpha, df.between, df.within, lower.tail=F) 처럼 사용 | ||
| - | > qf(ci, df.a, df.within) # or | ||
| - | [1] 3.402826 | ||
| - | > qf(alpha, df.a, df.within, lower.tail = F) | ||
| - | [1] 3.402826 | ||
| - | > # 혹은 | ||
| - | > # qf(alpha, df.a, df.within, lower.tail = F) | ||
| - | > # 도 마찬가지 | ||
| - | > f.a.crit <- qf(ci, df.a, df.within) | ||
| - | > f.a > f.a.crit # 만약 f.a가 크다면 | ||
| - | [1] TRUE | ||
| - | > # 혹은 f.a 값에 해당하는 percentage를 구하면 | ||
| - | > pf(f.a, df.a, df.within, lower.tail = F) | ||
| - | [1] 0.007302552 | ||
| - | > | ||
| - | > f.b | ||
| - | [1] 11.7176 | ||
| - | > qf(ci, df.b, df.within) | ||
| - | [1] 4.259677 | ||
| - | > f.b > qf(ci, df.b, df.within) | ||
| - | [1] TRUE | ||
| - | > pf(f.b, df.b, df.within, lower.tail = F) | ||
| - | [1] 0.00222738 | ||
| - | > | ||
| - | > f.ab | ||
| - | [1] 12.45608 | ||
| - | > qf(ci, df.ab, df.within) | ||
| - | [1] 3.402826 | ||
| - | > f.ab > qf(ci, df.ab, df.within) | ||
| - | [1] TRUE | ||
| - | > pf(f.ab, df.ab, df.within, lower.tail = F) | ||
| - | [1] 0.0001947745 | ||
| - | > | ||
| - | > # aov result | ||
| - | > aov.dat.all <- aov(y ~ a * b, data = dat) # anova test | ||
| - | > summary(aov.dat.all) # summary of the test output | ||
| - | Df Sum Sq Mean Sq F value | ||
| - | a 2 12.57 | ||
| - | b 1 12.11 12.107 | ||
| - | a:b 2 25.74 12.870 | ||
| - | Residuals | ||
| - | --- | ||
| - | Signif. codes: | ||
| - | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | ||
| - | > aov.dat.noint <- aov(y ~ a + b, data = dat) | ||
| - | > # interaction이 갖는 SS와 df를 residuals이 갖음을 주목 | ||
| - | > summary(aov.dat.noint) | ||
| - | Df Sum Sq Mean Sq F value Pr(> | ||
| - | a 2 12.57 | ||
| - | b 1 12.11 12.107 | ||
| - | Residuals | ||
| - | --- | ||
| - | Signif. codes: | ||
| - | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | ||
| - | > aov.dat.all2 <- aov(y ~ a + b + a:b, data = dat) | ||
| - | > summary(aov.dat.all2) | ||
| - | Df Sum Sq Mean Sq F value | ||
| - | a 2 12.57 | ||
| - | b 1 12.11 12.107 | ||
| - | a:b 2 25.74 12.870 | ||
| - | Residuals | ||
| - | --- | ||
| - | Signif. codes: | ||
| - | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | ||
| - | > aov.dat.all # what is standard error of residuals | ||
| - | Call: | ||
| - | | ||
| - | |||
| - | Terms: | ||
| - | | ||
| - | Sum of Squares | ||
| - | Deg. of Freedom | ||
| - | |||
| - | Residual standard error: 1.016477 | ||
| - | Estimated effects may be unbalanced | ||
| - | > | ||
| - | > | ||
| - | > | ||
| - | </ | ||
c/ms/2025/w07.2_factorial_anova.1745190679.txt.gz · Last modified: by hkimscil
