Differences

This shows you the differences between two versions of the page.

--- c:ms:2025:w07.2_factorial_anova [2025/04/21 08:43] – [Output, two-way ANOVA] hkimscil
+++ c:ms:2025:w07.2_factorial_anova [2025/05/07 07:25] (current) – hkimscil
@@ Line 2: / Line 2: @@
 Factorial ANOVA
 <code>
+rm(list=ls(all=TRUE))
 #################################################
 # two-way anova
@@ Line 7: / Line 9: @@
 #################################################
-n.a.group <- 3 # a treatment 숫자
+n.a.group <- 3 # a treatment 숫자 e.g: drug a1, a2, a3
-n.b.group <- 2 # b 그룹 숫자
+n.b.group <- 2 # b 그룹 숫자 e.g.: exercise no(b1), yes(b2)
 n.sub <- 30 # 총 샘플 숫자
 n.sub/n.a.group
@@ Line 14: / Line 16: @@
 # 데이터 생성
 set.seed(9)
-a <- gl(3, n.sub/n.a.group, n.sub, labels=c('a1', 'a2', 'a3'))
+a <- gl(n.a.group,
-b <- gl(2, (n.sub/n.a.group)/2, n.sub, labels=c('b1', 'b2'))
+        n.sub/n.a.group,
+        n.sub,
+        labels=c('drg1', 'drg2', 'drg3'))
+b <- gl(n.b.group,
+        (n.sub/n.a.group)/2,
+        n.sub,
+        labels=c('noex', 'exerc'))
 a
 b
 y <- rnorm(30, mean=10) +
-.14 * (a=='a1' & b=='b2') +
+.14 * (a=='drg1' & b=='exerc') +
-.43 * (a=='a3' & b=='b2')
+.43 * (a=='drg3' & b=='exerc')
 y
@@ Line 182: / Line 190: @@
 summary(aov.dat.all2)
 aov.dat.all # what is standard error of residuals
+# post-hoc test
+TukeyHSD(aov.dat.all)
+TukeyHSD(aov.dat.all, which = "a")
+TukeyHSD(aov.dat.all, which = "b")
+TukeyHSD(aov.dat.all, which = "a:b")
+boxplot(dat$y~dat$a)
+boxplot(dat$y~dat$b)
+plot(TukeyHSD(aov.dat.all, which = "a:b"),
+     las = 2 # rotate x-axis ticks
+)
+#
+#
+# see another e.g. at
+# https://statsandr.com/blog/two-way-anova-in-r/
+#
+#
 </code>
 ====== Output, two-way ANOVA ======
 <code>
+> rm(list=ls(all=TRUE))
+>
 > #################################################
 > # two-way anova
@@ Line 191: / Line 221: @@
 > #################################################
 >
-> n.a.group <- 3 # a treatment 숫자
+> n.a.group <- 3 # a treatment 숫자 e.g: drug a1, a2, a3
-> n.b.group <- 2 # b 그룹 숫자
+> n.b.group <- 2 # b 그룹 숫자 e.g.: exercise no(b1), yes(b2)
 > n.sub <- 30 # 총 샘플 숫자
 > n.sub/n.a.group
@@ Line 199: / Line 229: @@
 > # 데이터 생성
 > set.seed(9)
-> a <- gl(3, n.sub/n.a.group, n.sub, labels=c('a1', 'a2', 'a3'))
+> a <- gl(n.a.group,
-> b <- gl(2, (n.sub/n.a.group)/2, n.sub, labels=c('b1', 'b2'))
++         n.sub/n.a.group,
++         n.sub,
++         labels=c('drg1', 'drg2', 'drg3'))
+> b <- gl(n.b.group,
++         (n.sub/n.a.group)/2,
++         n.sub,
++         labels=c('noex', 'exerc'))
 > a
- [1] a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a2 a2 a2 a2 a2 a2 a2 a2
+ [1] drg1 drg1 drg1 drg1 drg1 drg1 drg1 drg1 drg1 drg1 drg2 drg2 drg2 drg2 drg2 drg2 drg2 drg2 drg2 drg2 drg3
-[19] a2 a2 a3 a3 a3 a3 a3 a3 a3 a3 a3 a3
+[22] drg3 drg3 drg3 drg3 drg3 drg3 drg3 drg3 drg3
-Levels: a1 a2 a3
+Levels: drg1 drg2 drg3
 > b
- [1] b1 b1 b1 b1 b1 b2 b2 b2 b2 b2 b1 b1 b1 b1 b1 b2 b2 b2
+ [1] noex  noex  noex  noex  noex  exerc exerc exerc exerc exerc noex  noex  noex  noex  noex  exerc exerc exerc
-[19] b2 b2 b1 b1 b1 b1 b1 b2 b2 b2 b2 b2
+[19] exerc exerc noex  noex  noex  noex  noex  exerc exerc exerc exerc exerc
-Levels: b1 b2
+Levels: noex exerc
 > y <- rnorm(30, mean=10) +
-+   3.14 * (a=='a1' & b=='b2') +
++   3.14 * (a=='drg1' & b=='exerc') +
-+   1.43 * (a=='a3' & b=='b2')
++   1.43 * (a=='drg3' & b=='exerc')
 > y
- [1]  9.233204  9.183542  9.858465  9.722395 10.436307
+ [1]  9.233204  9.183542  9.858465  9.722395 10.436307 11.953127 14.331987 13.121810 12.891915 12.777063
- [6] 11.953127 14.331987 13.121810 12.891915 12.777063
+[11] 11.277571  9.531103 10.071054  9.733962 11.845257  9.160550  9.922552  7.382294 10.887884  9.292509
-[11] 11.277571  9.531103 10.071054  9.733962 11.845257
+[21] 11.756993 10.182252  9.733111 10.926422  9.306668 14.111990 11.652524 10.723328 11.847213 11.799557
-[16]  9.160550  9.922552  7.382294 10.887884  9.292509
-[21] 11.756993 10.182252  9.733111 10.926422  9.306668
-[26] 14.111990 11.652524 10.723328 11.847213 11.799557
 >
 > dat <- data.frame(a, b, y)
 > dat
-    a  b         y
+      a     b         y
-  a1 b1  9.233204
+  drg1  noex  9.233204
-  a1 b1  9.183542
+  drg1  noex  9.183542
-  a1 b1  9.858465
+  drg1  noex  9.858465
-  a1 b1  9.722395
+  drg1  noex  9.722395
-  a1 b1 10.436307
+  drg1  noex 10.436307
-  a1 b2 11.953127
+  drg1 exerc 11.953127
-  a1 b2 14.331987
+  drg1 exerc 14.331987
-  a1 b2 13.121810
+  drg1 exerc 13.121810
-  a1 b2 12.891915
+  drg1 exerc 12.891915
- a1 b2 12.777063
+ drg1 exerc 12.777063
- a2 b1 11.277571
+ drg2  noex 11.277571
- a2 b1  9.531103
+ drg2  noex  9.531103
- a2 b1 10.071054
+ drg2  noex 10.071054
- a2 b1  9.733962
+ drg2  noex  9.733962
- a2 b1 11.845257
+ drg2  noex 11.845257
- a2 b2  9.160550
+ drg2 exerc  9.160550
- a2 b2  9.922552
+ drg2 exerc  9.922552
- a2 b2  7.382294
+ drg2 exerc  7.382294
- a2 b2 10.887884
+ drg2 exerc 10.887884
- a2 b2  9.292509
+ drg2 exerc  9.292509
- a3 b1 11.756993
+ drg3  noex 11.756993
- a3 b1 10.182252
+ drg3  noex 10.182252
- a3 b1  9.733111
+ drg3  noex  9.733111
- a3 b1 10.926422
+ drg3  noex 10.926422
- a3 b1  9.306668
+ drg3  noex  9.306668
- a3 b2 14.111990
+ drg3 exerc 14.111990
- a3 b2 11.652524
+ drg3 exerc 11.652524
- a3 b2 10.723328
+ drg3 exerc 10.723328
- a3 b2 11.847213
+ drg3 exerc 11.847213
- a3 b2 11.799557
+ drg3 exerc 11.799557
 > # aov.dat <- aov(y~a*b) # anova test
 > # summary(aov.dat) # summary of the test output
@@ Line 258: / Line 291: @@
 > # hand calculation
 > table(a,b)
-    b
+      b
-a    b1 b2
+a      noex exerc
-  a1  5  5
+  drg1    5     5
-  a2  5  5
+  drg2    5     5
-  a3  5  5
+  drg3    5     5
 > tapply(y, list(a,b), mean) # 각 셀에서의 평균
-          b1        b2
+          noex     exerc
-a1  9.686782 13.015181
+drg1  9.686782 13.015181
-a2 10.491789  9.329158
+drg2 10.491789  9.329158
-a3 10.381089 12.026922
+drg3 10.381089 12.026922
 > tapply(y, a, mean)
-       a1        a2        a3
+     drg1      drg2      drg3
 .350981  9.910474 11.204006
 > tapply(y, b, mean)
-      b1       b2
+    noex    exerc
 .18655 11.45709
 > n.within.each <- tapply(y, list(a,b), length) # the same as table(a, b)
 > n.within.each
-   b1 b2
+     noex exerc
-a1  5  5
+drg1    5     5
-a2  5  5
+drg2    5     5
-a3  5  5
+drg3    5     5
 > df.within.each <- n.within.each - 1  # 각 셀에서의 샘플숫자
 > df.within.each
-   b1 b2
+     noex exerc
-a1  4  4
+drg1    4     4
-a2  4  4
+drg2    4     4
-a3  4  4
+drg3    4     4
 > df.within <- sum(df.within.each) # df within
 > df.within
@@ Line 292: / Line 325: @@
 > var.within <- tapply(y, list(a,b), var) # var.within
 > var.within
-          b1        b2
+          noex     exerc
-a1 0.2628787 0.7362999
+drg1 0.2628787 0.7362999
-a2 1.0308917 1.6504481
+drg2 1.0308917 1.6504481
-a3 0.9510727 1.5677577
+drg3 0.9510727 1.5677577
 > ss.within.each <- tapply(y, list(a,b), var) * df.within.each
 > ss.within.each
-         b1       b2
+         noex    exerc
-a1 1.051515 2.945200
+drg1 1.051515 2.945200
-a2 4.123567 6.601793
+drg2 4.123567 6.601793
-a3 3.804291 6.271031
+drg3 3.804291 6.271031
 > ss.within <- sum(ss.within.each) # ss.within
 > ss.within
@@ Line 315: / Line 348: @@
 > mean.b <- tapply(y, list(b), mean)
 > mean.a
-       a1        a2        a3
+     drg1      drg2      drg3
 .350981  9.910474 11.204006
 > mean.b
-      b1       b2
+    noex    exerc
 .18655 11.45709
 >
@@ Line 324: / Line 357: @@
 > var.b <- tapply(y, list(b), var)
 > var.a
-      a1       a2       a3
+    drg1     drg2     drg3
 .521366 1.567182 1.871915
 > var.b
-       b1        b2
+     noex     exerc
 .7773781 3.7300196
 >
@@ Line 353: / Line 386: @@
 > mean.each <- tapply(y, list(a,b), mean)
 > mean.each
-          b1        b2
+          noex     exerc
-a1  9.686782 13.015181
+drg1  9.686782 13.015181
-a2 10.491789  9.329158
+drg2 10.491789  9.329158
-a3 10.381089 12.026922
+drg3 10.381089 12.026922
 > mean.tot <- mean(y)
 > mean.tot
@@ Line 364: / Line 397: @@
 > n.b.each <- tapply(y, list(b), length)
 > n.each
-   b1 b2
+     noex exerc
-a1  5  5
+drg1    5     5
-a2  5  5
+drg2    5     5
-a3  5  5
+drg3    5     5
 > n.a.each
-a1 a2 a3
+drg1 drg2 drg3
-10 10
+   10   10
 > n.b.each
-b1 b2
+ noex exerc
-15
+    15
 >
 >
@@ Line 508: / Line 541: @@
 Residuals   24  24.80   1.033
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 > aov.dat.noint <- aov(y ~ a + b, data = dat)
 > # interaction이 갖는 SS와 df를 residuals이 갖음을 주목
@@ Line 518: / Line 550: @@
 Residuals   26  50.54   1.944
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 > aov.dat.all2 <- aov(y ~ a + b + a:b, data = dat)
 > summary(aov.dat.all2)
@@ Line 528: / Line 559: @@
 Residuals   24  24.80   1.033
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 > aov.dat.all # what is standard error of residuals
 Call:
@@ Line 542: / Line 572: @@
 Estimated effects may be unbalanced
 >
+> # post-hoc test
+> TukeyHSD(aov.dat.all)
+  Tukey multiple comparisons of means
+% family-wise confidence level
+Fit: aov(formula = y ~ a * b, data = dat)
+$a
+                diff       lwr        upr     p adj
+drg2-drg1 -1.4405079 -2.575730 -0.3052857 0.0111258
+drg3-drg1 -0.1469757 -1.282198  0.9882466 0.9441388
+drg3-drg2  1.2935323  0.158310  2.4287545 0.0233996
+$b
+               diff       lwr     upr     p adj
+exerc-noex 1.270533 0.5044869 2.03658 0.0022274
+$`a:b`
+                            diff        lwr        upr     p adj
+drg2:noex-drg1:noex    0.8050068 -1.1827224  2.7927360 0.8069478
+drg3:noex-drg1:noex    0.6943068 -1.2934224  2.6820359 0.8845124
+drg1:exerc-drg1:noex   3.3283980  1.3406689  5.3161272 0.0003437
+drg2:exerc-drg1:noex  -0.3576246 -2.3453538  1.6301046 0.9929417
+drg3:exerc-drg1:noex   2.3401400  0.3524108  4.3278691 0.0145532
+drg3:noex-drg2:noex   -0.1107000 -2.0984292  1.8770291 0.9999759
+drg1:exerc-drg2:noex   2.5233913  0.5356621  4.5111204 0.0074152
+drg2:exerc-drg2:noex  -1.1626314 -3.1503606  0.8250978 0.4792630
+drg3:exerc-drg2:noex   1.5351332 -0.4525960  3.5228624 0.2000613
+drg1:exerc-drg3:noex   2.6340913  0.6463621  4.6218205 0.0048987
+drg2:exerc-drg3:noex  -1.0519314 -3.0396605  0.9357978 0.5839905
+drg3:exerc-drg3:noex   1.6458332 -0.3418960  3.6335624 0.1464756
+drg2:exerc-drg1:exerc -3.6860226 -5.6737518 -1.6982935 0.0000874
+drg3:exerc-drg1:exerc -0.9882581 -2.9759873  0.9994711 0.6448824
+drg3:exerc-drg2:exerc  2.6977646  0.7100354  4.6854937 0.0038521
+> TukeyHSD(aov.dat.all, which = "a")
+  Tukey multiple comparisons of means
+% family-wise confidence level
+Fit: aov(formula = y ~ a * b, data = dat)
+$a
+                diff       lwr        upr     p adj
+drg2-drg1 -1.4405079 -2.575730 -0.3052857 0.0111258
+drg3-drg1 -0.1469757 -1.282198  0.9882466 0.9441388
+drg3-drg2  1.2935323  0.158310  2.4287545 0.0233996
+> TukeyHSD(aov.dat.all, which = "b")
+  Tukey multiple comparisons of means
+% family-wise confidence level
+Fit: aov(formula = y ~ a * b, data = dat)
+$b
+               diff       lwr     upr     p adj
+exerc-noex 1.270533 0.5044869 2.03658 0.0022274
+> TukeyHSD(aov.dat.all, which = "a:b")
+  Tukey multiple comparisons of means
+% family-wise confidence level
+Fit: aov(formula = y ~ a * b, data = dat)
+$`a:b`
+                            diff        lwr        upr     p adj
+drg2:noex-drg1:noex    0.8050068 -1.1827224  2.7927360 0.8069478
+drg3:noex-drg1:noex    0.6943068 -1.2934224  2.6820359 0.8845124
+drg1:exerc-drg1:noex   3.3283980  1.3406689  5.3161272 0.0003437
+drg2:exerc-drg1:noex  -0.3576246 -2.3453538  1.6301046 0.9929417
+drg3:exerc-drg1:noex   2.3401400  0.3524108  4.3278691 0.0145532
+drg3:noex-drg2:noex   -0.1107000 -2.0984292  1.8770291 0.9999759
+drg1:exerc-drg2:noex   2.5233913  0.5356621  4.5111204 0.0074152
+drg2:exerc-drg2:noex  -1.1626314 -3.1503606  0.8250978 0.4792630
+drg3:exerc-drg2:noex   1.5351332 -0.4525960  3.5228624 0.2000613
+drg1:exerc-drg3:noex   2.6340913  0.6463621  4.6218205 0.0048987
+drg2:exerc-drg3:noex  -1.0519314 -3.0396605  0.9357978 0.5839905
+drg3:exerc-drg3:noex   1.6458332 -0.3418960  3.6335624 0.1464756
+drg2:exerc-drg1:exerc -3.6860226 -5.6737518 -1.6982935 0.0000874
+drg3:exerc-drg1:exerc -0.9882581 -2.9759873  0.9994711 0.6448824
+drg3:exerc-drg2:exerc  2.6977646  0.7100354  4.6854937 0.0038521
+> boxplot(dat$y~dat$a)
+> boxplot(dat$y~dat$b)
+> plot(TukeyHSD(aov.dat.all, which = "a:b"),
++      las = 2 # rotate x-axis ticks
++ )
 >
 >
+> #
+> #
+> # see another e.g. at
+> # https://statsandr.com/blog/two-way-anova-in-r/
+> #
+> #
+>
+>
 </code>
+====== output interpretation ======
 {{:c:ms:2025:pasted:20250421-082701.png}}
 {{:c:ms:2025:pasted:20250421-082710.png}}
-{{:c:ms:2025:pasted:20250421-084309.png}}
+{{:c:ms:2025:pasted:20250421-084815.png}}
-{{:c:ms:2025:pasted:20250421-083739.png}}
+{{:c:ms:2025:pasted:20250430-125904.png}}
+{{:c:ms:2025:pasted:20250430-124435.png}}
+상호작용효과 포함 해석
+  * drug1의 효과에 운동의 역할은 아주 중요한 것으로 밝혀졌다.
+  * 반면에 drug2, 3에 한해서는 운동의 역할은 제한적이었다.
+  * 특히 drg3의 경우 운동이 효과를 증대시키기는 하였지만 그것이 통계학적으로 의미있는 것은 아니었다.
+  * 또한 drg2의 경우 운동이 오히려 효과를 감소하는 것으로 나타났다. 그렇지만 이 또한 통계학적으로 의미가 있는 것은 아니었다.
+  * 이를 통해서 drg1을 처치받는 환자의 경우에는 운동을 꼭 하도록 하여야 함을 알 수 있고
+  * drg2의 경우에는 그 효과가 미미하므로 권장을 하는 정도가 필요함을 알 수 있었다.
+혹은
+  * drg1과 운동이 병행 된 경우에는 대부분의 다른 처방보다 월등한 효과가 있는 것으로 밝혀졌다.
+  * 운동이 병행된 drg1과 drg3의  효과에는 차이가 없었다.
+  * drg2의 운동유무, 그리고 drg3의 운동을 하지않음과 통계학적으로 유의미한 차이가 있었다.
+{{:c:ms:2025:pasted:20250430-130525.png}}