User Tools

Site Tools


c:ms:2025:w07.2_factorial_anova

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
c:ms:2025:w07.2_factorial_anova [2025/04/20 23:27] – [Output, two-way ANOVA] hkimscilc:ms:2025:w07.2_factorial_anova [2026/04/20 04:04] (current) hkimscil
Line 1: Line 1:
-====== Two-way ANOVA ====== +~~REDIRECT>:Factorial Anova~~
-Factorial ANOVA +
-<code> +
-################################################# +
-# two-way anova +
-# subject = factor(paste('sub', 1:30, sep='')) +
-################################################# +
- +
-n.a.group <- 3 # a treatment 숫자 +
-n.b.group <- 2 # b 그룹 숫자 +
-n.sub <- 30 # 총 샘플 숫자 +
-n.sub/n.a.group +
- +
-# 데이터 생성 +
-set.seed(9) +
-a <- gl(3, n.sub/n.a.group, n.sub, labels=c('a1', 'a2', 'a3')) +
-b <- gl(2, (n.sub/n.a.group)/2, n.sub, labels=c('b1', 'b2')) +
-+
-+
-y <- rnorm(30, mean=10) +  +
-  3.14 * (a=='a1' & b=='b2') +  +
-  1.43 * (a=='a3' & b=='b2')  +
-+
- +
-dat <- data.frame(a, b, y) +
-dat +
-# aov.dat <- aov(y~a*b) # anova test +
-# summary(aov.dat) # summary of the test output +
- +
-# hand calculation +
-table(a,b) +
-tapply(y, list(a,b), mean) # 각 셀에서의 평균 +
-tapply(y, a, mean) +
-tapply(y, b, mean) +
-n.within.each <- tapply(y, list(a,b), length) # the same as table(a, b) +
-n.within.each +
-df.within.each <- n.within.each - 1  # 각 셀에서의 샘플숫자 +
-df.within.each +
-df.within <- sum(df.within.each) # df within +
-df.within +
- +
-var.within <- tapply(y, list(a,b), var) # var.within +
-var.within +
-ss.within.each <- tapply(y, list(a,b), var) * df.within.each +
-ss.within.each +
-ss.within <- sum(ss.within.each) # ss.within +
-ss.within +
- +
-ms.within <- ss.within / df.within +
-ms.within +
- +
-# interaction.plot(a,b,y) +
- +
-mean.a <- tapply(y, list(a), mean) +
-mean.b <- tapply(y, list(b), mean) +
-mean.a +
-mean.b +
- +
-var.a <- tapply(y, list(a), var) +
-var.b <- tapply(y, list(b), var) +
-var.a +
-var.b +
- +
-mean.tot <- mean(dat$y) +
-var.tot <- var(dat$y) +
-df.tot <- n.sub - 1  +
-ss.tot <- var.tot * df.tot +
-ms.tot <- ss.tot/df.tot +
- +
-mean.tot +
-var.tot +
-df.tot +
-ss.tot +
-ms.tot +
-ss.tot/df.tot +
- +
- +
-## between +
-mean.each <- tapply(y, list(a,b), mean) +
-mean.each  +
-mean.tot <- mean(y) +
-mean.tot +
-n.each <- tapply(y, list(a,b), length) +
-n.a.each <- tapply(y, list(a), length) +
-n.b.each <- tapply(y, list(b), length) +
-n.each +
-n.a.each +
-n.b.each +
- +
- +
- +
-ss.bet <- sum(n.each*(mean.each-mean.tot)^2) +
-ss.bet +
- +
-ss.tot +
-ss.within +
-ss.bet +
-ss.bet + ss.within +
- +
-ss.a <- sum(n.a.each * ((mean.tot - mean.a)^2)) +
-ss.b <- sum(n.b.each * ((mean.tot - mean.b)^2)) +
-ss.a +
-ss.b +
-ss.ab <- ss.bet - (ss.a + ss.b) # ss.ab = ss.interaction +
-ss.ab +
- +
-ss.tot +
-ss.bet +
-ss.within +
-ss.a +
-ss.b +
-ss.ab +
- +
-df.tot <- n.sub - 1 +
-df.bet <- (n.a.group * n.b.group) - 1 +
-df.a <- n.a.group - 1 +
-df.b <- n.b.group - 1 +
-df.ab <- df.bet - (df.a + df.b) +
-df.within <- sum(df.within.each) +
- +
-df.tot +
-df.bet +
-df.a +
-df.b +
-df.ab +
-df.within +
- +
-ms.tot <- ss.tot / df.tot # we did it above  +
-ms.bet <- ss.bet / df.bet +
-ms.a <- ss.a / df.a +
-ms.b <- ss.b / df.b +
-ms.ab <- ss.ab / df.ab +
-ms.within <- ss.within / df.within +
- +
-ms.tot +
-ms.bet +
-ms.within +
-ms.a +
-ms.b +
-ms.ab +
- +
- +
-f.a <- ms.a / ms.within +
-f.b <- ms.b / ms.within +
-f.ab <- ms.ab / ms.within +
- +
-alpha <- .05 +
-# confidence interval +
-ci <- 1 - alpha +
- +
-f.a +
-# 봐야할 F분포표에서의 F-value +
-# qt 처럼 qf 사용 +
-# qf(alpha, df.between, df.within, lower.tail=F) 처럼 사용 +
-qf(ci, df.a, df.within) # or +
-qf(alpha, df.a, df.within, lower.tail = F) +
-# 혹은  +
-# qf(alpha, df.a, df.within, lower.tail = F) +
-# 도 마찬가지 +
-f.a.crit <- qf(ci, df.a, df.within)  +
-f.a > f.a.crit # 만약 f.a가 크다면  +
-# 혹은 f.a 값에 해당하는 percentage를 구하면 +
-pf(f.a, df.a, df.within, lower.tail = F) +
- +
-f.b +
-qf(ci, df.b, df.within) +
-f.b > qf(ci, df.b, df.within) +
-pf(f.b, df.b, df.within, lower.tail = F) +
- +
-f.ab +
-qf(ci, df.ab, df.within) +
-f.ab > qf(ci, df.ab, df.within) +
-pf(f.ab, df.ab, df.within, lower.tail = F) +
- +
-# aov result +
-aov.dat.all <- aov(y a * b, data = dat) # anova test +
-summary(aov.dat.all) # summary of the test output +
-aov.dat.noint <- aov(y ~ a + b, data = dat) +
-# interaction이 갖는 SS와 df를 residuals이 갖음을 주목 +
-summary(aov.dat.noint)  +
-aov.dat.all2 <- aov(y ~ a + b + a:b, data = dat) +
-summary(aov.dat.all2) +
-aov.dat.all # what is standard error of residuals  +
-</code> +
- +
-====== Output, two-way ANOVA ====== +
-<code> +
-> ################################################# +
-> # two-way anova +
-> # subject = factor(paste('sub', 1:30, sep='')) +
-> ################################################# +
->  +
-> n.a.group <- 3 # a treatment 숫자 +
-> n.b.group <- 2 # b 그룹 숫자 +
-> n.sub <- 30 # 총 샘플 숫자 +
-> n.sub/n.a.group +
-[1] 10 +
->  +
-> # 데이터 생성 +
-> set.seed(9) +
-> a <- gl(3, n.sub/n.a.group, n.sub, labels=c('a1', 'a2', 'a3')) +
-> b <- gl(2, (n.sub/n.a.group)/2, n.sub, labels=c('b1', 'b2')) +
-> a +
- [1] a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a2 a2 a2 a2 a2 a2 a2 a2 +
-[19] a2 a2 a3 a3 a3 a3 a3 a3 a3 a3 a3 a3 +
-Levels: a1 a2 a3 +
-> b +
- [1] b1 b1 b1 b1 b1 b2 b2 b2 b2 b2 b1 b1 b1 b1 b1 b2 b2 b2 +
-[19] b2 b2 b1 b1 b1 b1 b1 b2 b2 b2 b2 b2 +
-Levels: b1 b2 +
-> y <- rnorm(30, mean=10) +  +
-+   3.14 * (a=='a1' & b=='b2') +  +
-+   1.43 * (a=='a3' & b=='b2')  +
-> y +
- [1]  9.233204  9.183542  9.858465  9.722395 10.436307 +
- [6] 11.953127 14.331987 13.121810 12.891915 12.777063 +
-[11] 11.277571  9.531103 10.071054  9.733962 11.845257 +
-[16]  9.160550  9.922552  7.382294 10.887884  9.292509 +
-[21] 11.756993 10.182252  9.733111 10.926422  9.306668 +
-[26] 14.111990 11.652524 10.723328 11.847213 11.799557 +
->  +
-> dat <- data.frame(a, b, y) +
-> dat +
-    a  b         y +
-1  a1 b1  9.233204 +
-2  a1 b1  9.183542 +
-3  a1 b1  9.858465 +
-4  a1 b1  9.722395 +
-5  a1 b1 10.436307 +
-6  a1 b2 11.953127 +
-7  a1 b2 14.331987 +
-8  a1 b2 13.121810 +
-9  a1 b2 12.891915 +
-10 a1 b2 12.777063 +
-11 a2 b1 11.277571 +
-12 a2 b1  9.531103 +
-13 a2 b1 10.071054 +
-14 a2 b1  9.733962 +
-15 a2 b1 11.845257 +
-16 a2 b2  9.160550 +
-17 a2 b2  9.922552 +
-18 a2 b2  7.382294 +
-19 a2 b2 10.887884 +
-20 a2 b2  9.292509 +
-21 a3 b1 11.756993 +
-22 a3 b1 10.182252 +
-23 a3 b1  9.733111 +
-24 a3 b1 10.926422 +
-25 a3 b1  9.306668 +
-26 a3 b2 14.111990 +
-27 a3 b2 11.652524 +
-28 a3 b2 10.723328 +
-29 a3 b2 11.847213 +
-30 a3 b2 11.799557 +
-> # aov.dat <- aov(y~a*b) # anova test +
-> # summary(aov.dat) # summary of the test output +
->  +
-> # hand calculation +
-> table(a,b) +
-    b +
-a    b1 b2 +
-  a1  5  5 +
-  a2  5  5 +
-  a3  5  5 +
-> tapply(y, list(a,b), mean) # 각 셀에서의 평균 +
-          b1        b2 +
-a1  9.686782 13.015181 +
-a2 10.491789  9.329158 +
-a3 10.381089 12.026922 +
-> tapply(y, a, mean) +
-       a1        a2        a3  +
-11.350981  9.910474 11.204006  +
-> tapply(y, b, mean) +
-      b1       b2  +
-10.18655 11.45709  +
-> n.within.each <- tapply(y, list(a,b), length) # the same as table(a, b) +
-> n.within.each +
-   b1 b2 +
-a1  5  5 +
-a2  5  5 +
-a3  5  5 +
-> df.within.each <- n.within.each - 1  # 각 셀에서의 샘플숫자 +
-> df.within.each +
-   b1 b2 +
-a1  4  4 +
-a2  4  4 +
-a3  4  4 +
-> df.within <- sum(df.within.each) # df within +
-> df.within +
-[1] 24 +
->  +
-> var.within <- tapply(y, list(a,b), var) # var.within +
-> var.within +
-          b1        b2 +
-a1 0.2628787 0.7362999 +
-a2 1.0308917 1.6504481 +
-a3 0.9510727 1.5677577 +
-> ss.within.each <- tapply(y, list(a,b), var) * df.within.each +
-> ss.within.each +
-         b1       b2 +
-a1 1.051515 2.945200 +
-a2 4.123567 6.601793 +
-a3 3.804291 6.271031 +
-> ss.within <- sum(ss.within.each) # ss.within +
-> ss.within +
-[1] 24.7974 +
->  +
-> ms.within <- ss.within / df.within +
-> ms.within +
-[1] 1.033225 +
->  +
-> # interaction.plot(a,b,y) +
->  +
-> mean.a <- tapply(y, list(a), mean) +
-> mean.b <- tapply(y, list(b), mean) +
-> mean.a +
-       a1        a2        a3  +
-11.350981  9.910474 11.204006  +
-> mean.b +
-      b1       b2  +
-10.18655 11.45709  +
->  +
-> var.a <- tapply(y, list(a), var) +
-> var.b <- tapply(y, list(b), var) +
-> var.a +
-      a1       a2       a3  +
-3.521366 1.567182 1.871915  +
-> var.b +
-       b1        b2  +
-0.7773781 3.7300196  +
->  +
-> mean.tot <- mean(dat$y) +
-> var.tot <- var(dat$y) +
-> df.tot <- n.sub - 1  +
-> ss.tot <- var.tot * df.tot +
-> ms.tot <- ss.tot/df.tot +
->  +
-> mean.tot +
-[1] 10.82182 +
-> var.tot +
-[1] 2.593465 +
-> df.tot +
-[1] 29 +
-> ss.tot +
-[1] 75.21048 +
-> ms.tot +
-[1] 2.593465 +
-> ss.tot/df.tot +
-[1] 2.593465 +
->  +
->  +
-> ## between +
-> mean.each <- tapply(y, list(a,b), mean) +
-> mean.each  +
-          b1        b2 +
-a1  9.686782 13.015181 +
-a2 10.491789  9.329158 +
-a3 10.381089 12.026922 +
-> mean.tot <- mean(y) +
-> mean.tot +
-[1] 10.82182 +
-> n.each <- tapply(y, list(a,b), length) +
-> n.a.each <- tapply(y, list(a), length) +
-> n.b.each <- tapply(y, list(b), length) +
-> n.each +
-   b1 b2 +
-a1  5  5 +
-a2  5  5 +
-a3  5  5 +
-> n.a.each +
-a1 a2 a3  +
-10 10 10  +
-> n.b.each +
-b1 b2  +
-15 15  +
->  +
->  +
->  +
-> ss.bet <- sum(n.each*(mean.each-mean.tot)^2) +
-> ss.bet +
-[1] 50.41308 +
->  +
-> ss.tot +
-[1] 75.21048 +
-> ss.within +
-[1] 24.7974 +
-> ss.bet +
-[1] 50.41308 +
-> ss.bet + ss.within +
-[1] 75.21048 +
->  +
-> ss.a <- sum(n.a.each * ((mean.tot - mean.a)^2)) +
-> ss.b <- sum(n.b.each * ((mean.tot - mean.b)^2)) +
-> ss.a +
-[1] 12.5663 +
-> ss.b +
-[1] 12.10691 +
-> ss.ab <- ss.bet - (ss.a + ss.b) # ss.ab = ss.interaction +
-> ss.ab +
-[1] 25.73987 +
->  +
-> ss.tot +
-[1] 75.21048 +
-> ss.bet +
-[1] 50.41308 +
-> ss.within +
-[1] 24.7974 +
-> ss.a +
-[1] 12.5663 +
-> ss.b +
-[1] 12.10691 +
-> ss.ab +
-[1] 25.73987 +
->  +
-> df.tot <- n.sub - 1 +
-> df.bet <- (n.a.group * n.b.group) - 1 +
-> df.a <- n.a.group - 1 +
-> df.b <- n.b.group - 1 +
-> df.ab <- df.bet - (df.a + df.b) +
-> df.within <- sum(df.within.each) +
->  +
-> df.tot +
-[1] 29 +
-> df.bet +
-[1] 5 +
-> df.a +
-[1] 2 +
-> df.b +
-[1] 1 +
-> df.ab +
-[1] 2 +
-> df.within +
-[1] 24 +
->  +
-> ms.tot <- ss.tot / df.tot # we did it above  +
-> ms.bet <- ss.bet / df.bet +
-> ms.a <- ss.a / df.a +
-> ms.b <- ss.b / df.b +
-> ms.ab <- ss.ab / df.ab +
-> ms.within <- ss.within / df.within +
->  +
-> ms.tot +
-[1] 2.593465 +
-> ms.bet +
-[1] 10.08262 +
-> ms.within +
-[1] 1.033225 +
-> ms.a +
-[1] 6.283151 +
-> ms.b +
-[1] 12.10691 +
-> ms.ab +
-[1] 12.86993 +
->  +
->  +
-> f.a <- ms.a / ms.within +
-> f.b <- ms.b / ms.within +
-> f.ab <- ms.ab / ms.within +
->  +
-> alpha <- .05 +
-> # confidence interval +
-> ci <- 1 - alpha +
->  +
-> f.a +
-[1] 6.081107 +
-> # 봐야할 F분포표에서의 F-value +
-> # qt 처럼 qf 사용 +
-> # qf(alpha, df.between, df.within, lower.tail=F) 처럼 사용 +
-> qf(ci, df.a, df.within) # or +
-[1] 3.402826 +
-> qf(alpha, df.a, df.within, lower.tail = F) +
-[1] 3.402826 +
-> # 혹은  +
-> # qf(alpha, df.a, df.within, lower.tail = F) +
-> # 도 마찬가지 +
-> f.a.crit <- qf(ci, df.a, df.within)  +
-> f.a > f.a.crit # 만약 f.a가 크다면  +
-[1] TRUE +
-> # 혹은 f.a 값에 해당하는 percentage를 구하면 +
-> pf(f.a, df.a, df.within, lower.tail = F) +
-[1] 0.007302552 +
->  +
-> f.b +
-[1] 11.7176 +
-> qf(ci, df.b, df.within) +
-[1] 4.259677 +
-> f.b > qf(ci, df.b, df.within) +
-[1] TRUE +
-> pf(f.b, df.b, df.within, lower.tail = F) +
-[1] 0.00222738 +
->  +
-> f.ab +
-[1] 12.45608 +
-> qf(ci, df.ab, df.within) +
-[1] 3.402826 +
-> f.ab > qf(ci, df.ab, df.within) +
-[1] TRUE +
-> pf(f.ab, df.ab, df.within, lower.tail = F) +
-[1] 0.0001947745 +
->  +
-> # aov result +
-> aov.dat.all <- aov(y a * b, data = dat) # anova test +
-> summary(aov.dat.all) # summary of the test output +
-            Df Sum Sq Mean Sq F value   Pr(>F)     +
-a            2  12.57   6.283   6.081 0.007303 **  +
-b            1  12.11  12.107  11.718 0.002227 **  +
-a:b          2  25.74  12.870  12.456 0.000195 *** +
-Residuals   24  24.80   1.033                      +
---- +
-Signif. codes:   +
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 +
-> aov.dat.noint <- aov(y ~ a + b, data = dat) +
-> # interaction이 갖는 SS와 df를 residuals이 갖음을 주목 +
-> summary(aov.dat.noint)  +
-            Df Sum Sq Mean Sq F value Pr(>F)   +
-a            2  12.57   6.283   3.233 0.0558 . +
-b            1  12.11  12.107   6.229 0.0192 * +
-Residuals   26  50.54   1.944                  +
---- +
-Signif. codes:   +
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 +
-> aov.dat.all2 <- aov(y ~ a + b + a:b, data = dat) +
-> summary(aov.dat.all2) +
-            Df Sum Sq Mean Sq F value   Pr(>F)     +
-a            2  12.57   6.283   6.081 0.007303 **  +
-b            1  12.11  12.107  11.718 0.002227 **  +
-a:b          2  25.74  12.870  12.456 0.000195 *** +
-Residuals   24  24.80   1.033                      +
---- +
-Signif. codes:   +
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 +
-> aov.dat.all # what is standard error of residuals  +
-Call: +
-   aov(formula = y ~ a * b, data = dat) +
- +
-Terms: +
-                              b      a:b Residuals +
-Sum of Squares  12.56630 12.10691 25.73987  24.79740 +
-Deg. of Freedom        2        1        2        24 +
- +
-Residual standard error: 1.016477 +
-Estimated effects may be unbalanced +
->  +
->  +
->  +
-</code> +
-{{:c:ms:2025:pasted:20250421-082701.png}} +
-{{:c:ms:2025:pasted:20250421-082710.png}} +
-{{:c:ms:2025:pasted:20250421-082636.png?500}}+
c/ms/2025/w07.2_factorial_anova.1745191666.txt.gz · Last modified: by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki