This is an old revision of the document!

Twoway ANOVA

recap of oneway ANOVA

# One Way Anova (Completely Randomized Design)
fit <- aov(y ~ A, data=mydataframe)

Twoway ANOVA without interaction

# Randomized Block Design (B is the blocking factor) 
fit <- aov(y ~ A + B, data=mydataframe)

Twoway ANOVA with interaction

# Two Way Factorial Design 
fit <- aov(y ~ A + B + A:B, data=mydataframe)
fit <- aov(y ~ A*B, data=mydataframe) # same thing

E.g. 1

> delivery.df = data.frame(
     Service = c(rep("Carrier 1", 15), rep("Carrier 2", 15),
                 rep("Carrier 3", 15)),
     Destination = c(rep(c("Office 1", "Office 2", "Office 3",
                           "Office 4", "Office 5"), 9)),
     Time = c(15.23, 14.32, 14.77, 15.12, 14.05,
              15.48, 14.13, 14.46, 15.62, 14.23, 15.19, 14.67, 14.48, 15.34, 14.22,
              16.66, 16.27, 16.35, 16.93, 15.05, 16.98, 16.43, 15.95, 16.73, 15.62,
              16.53, 16.26, 15.69, 16.97, 15.37, 17.12, 16.65, 15.73, 17.77, 15.52,
              16.15, 16.86, 15.18, 17.96, 15.26, 16.36, 16.44, 14.82, 17.62, 15.04)
)
> delivery.df
     Service Destination  Time
1  Carrier 1    Office 1 15.23
2  Carrier 1    Office 2 14.32
3  Carrier 1    Office 3 14.77
4  Carrier 1    Office 4 15.12
5  Carrier 1    Office 5 14.05
6  Carrier 1    Office 1 15.48
7  Carrier 1    Office 2 14.13
8  Carrier 1    Office 3 14.46
9  Carrier 1    Office 4 15.62
10 Carrier 1    Office 5 14.23
11 Carrier 1    Office 1 15.19
12 Carrier 1    Office 2 14.67
13 Carrier 1    Office 3 14.48
14 Carrier 1    Office 4 15.34
15 Carrier 1    Office 5 14.22
16 Carrier 2    Office 1 16.66
17 Carrier 2    Office 2 16.27
18 Carrier 2    Office 3 16.35
19 Carrier 2    Office 4 16.93
20 Carrier 2    Office 5 15.05
21 Carrier 2    Office 1 16.98
22 Carrier 2    Office 2 16.43
23 Carrier 2    Office 3 15.95
24 Carrier 2    Office 4 16.73
25 Carrier 2    Office 5 15.62
26 Carrier 2    Office 1 16.53
27 Carrier 2    Office 2 16.26
28 Carrier 2    Office 3 15.69
29 Carrier 2    Office 4 16.97
30 Carrier 2    Office 5 15.37
31 Carrier 3    Office 1 17.12
32 Carrier 3    Office 2 16.65
33 Carrier 3    Office 3 15.73
34 Carrier 3    Office 4 17.77
35 Carrier 3    Office 5 15.52
36 Carrier 3    Office 1 16.15
37 Carrier 3    Office 2 16.86
38 Carrier 3    Office 3 15.18
39 Carrier 3    Office 4 17.96
40 Carrier 3    Office 5 15.26
41 Carrier 3    Office 1 16.36
42 Carrier 3    Office 2 16.44
43 Carrier 3    Office 3 14.82
44 Carrier 3    Office 4 17.62
45 Carrier 3    Office 5 15.04

> library(ggplot2)
> ggplot(delivery.df, aes(Time, Destination, colour = Service)) + geom_point()
>

> delivery.mod1 = aov(Time ~ Destination*Service, data = delivery.df)

> summary(delivery.mod1)
                    Df  Sum Sq Mean Sq  F value    Pr(>F)    
Destination          4 17.5415  4.3854  61.1553 5.408e-14 ***
Service              2 23.1706 11.5853 161.5599 < 2.2e-16 ***
Destination:Service  8  4.1888  0.5236   7.3018 2.360e-05 ***
Residuals           30  2.1513  0.0717                       
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

> TukeyHSD(delivery.mod1, which = "Service")
  Tukey multiple comparisons of means
    95% family-wise confidence level
 
Fit: aov(formula = Time ~ Destination * Service, data = delivery.df)
 
$Service
                        diff        lwr       upr     p adj
Carrier 2-Carrier 1 1.498667  1.2576092 1.7397241 0.0000000
Carrier 3-Carrier 1 1.544667  1.3036092 1.7857241 0.0000000
Carrier 3-Carrier 2 0.046000 -0.1950575 0.2870575 0.8856246

E.g. 2

plant.df = PlantGrowth
plant.df$group = factor(plant.df$group,
  labels = c("Control", "Treatment 1", "Treatment 2"))

require(ggplot2)
 
ggplot(plant.df, aes(x = group, y = weight)) +
  geom_boxplot(fill = "grey80", colour = "blue") +
  scale_x_discrete() + xlab("Treatment Group") +
  ylab("Dried weight of plants")

> plant.mod2 = aov(weight ~ group, data = plant.df)
> summary(plant.mod2)
            Df  Sum Sq Mean Sq F value  Pr(>F)  
group        2  3.7663  1.8832  4.8461 0.01591 *
Residuals   27 10.4921  0.3886                  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

> TukeyHSD(plant.mod2)
  Tukey multiple comparisons of means
    95% family-wise confidence level
 
Fit: aov(formula = weight ~ group, data = plant.df)
 
$group
                          diff        lwr       upr     p adj
Treatment 1-Control     -0.371 -1.0622161 0.3202161 0.3908711
Treatment 2-Control      0.494 -0.1972161 1.1852161 0.1979960
Treatment 2-Treatment 1  0.865  0.1737839 1.5562161 0.0120064

plot(TukeyHSD(plant.mod2))

3

basketball.txt

Time	Shoes	Made
Morning	Others	25
Morning	Others	26
Night	Others	27
Night	Others	27
Morning	Favorite	32
Morning	Favorite	22
Night	Favorite	30
Night	Favorite	34
Morning	Others	35
Morning	Others	34
Night	Others	33
Night	Others	30
Morning	Favorite	33
Morning	Favorite	37
Night	Favorite	36
Night	Favorite	38

> basketball <- read.table("http://commres.net/wiki/_export/code/r/twoway_anova?codeblock=11",header=TRUE)
> basketball
      Time    Shoes Made
1  Morning   Others   25
2  Morning   Others   26
3    Night   Others   27
4    Night   Others   27
5  Morning Favorite   32
6  Morning Favorite   22
7    Night Favorite   30
8    Night Favorite   34
9  Morning   Others   35
10 Morning   Others   34
11   Night   Others   33
12   Night   Others   30
13 Morning Favorite   33
14 Morning Favorite   37
15   Night Favorite   36
16   Night Favorite   38

> attach(basketball)
>
> b.model <- aov(Made~Time+Shoes)
> summary(b.model)
            Df Sum Sq Mean Sq F value Pr(>F)
Time         1   7.56    7.56   0.349  0.565
Shoes        1  39.06   39.06   1.802  0.202
Residuals   13 281.81   21.68   

> b.model2 <- aov(Made~Time*Shoes)
> summary(b.model2)
            Df Sum Sq Mean Sq F value Pr(>F)
Time         1   7.56    7.56   0.344  0.568
Shoes        1  39.06   39.06   1.777  0.207
Time:Shoes   1  18.06   18.06   0.822  0.382
Residuals   12 263.75   21.98

4 Interaction effects

Interaction effect example 1

Interaction effect example 2

Interaction effect example 3

Interaction effect example 4

5

download dataset_anova_twoWay_interactions.csv

data <- read.csv("http://commres.net/wiki/_media/r/dataset_anova_twoway_comparisons.csv")
> #display the data
> data
   Treatment   Age StressReduction
1     mental young              10
2     mental young               9
3     mental young               8
4     mental   mid               7
5     mental   mid               6
6     mental   mid               5
7     mental   old               4
8     mental   old               3
9     mental   old               2
10  physical young               9
11  physical young               8
12  physical young               7
13  physical   mid               6
14  physical   mid               5
15  physical   mid               4
16  physical   old               3
17  physical   old               2
18  physical   old               1
19   medical young               8
20   medical young               7
21   medical young               6
22   medical   mid               5
23   medical   mid               4
24   medical   mid               3
25   medical   old               2
26   medical   old               1
27   medical   old               0

> aov(d$StressReduction~d$Treatment*d$Age, d)
Call:
   aov(formula = d$StressReduction ~ d$Treatment * d$Age, data = d)

Terms:
                d$Treatment d$Age d$Treatment:d$Age Residuals
Sum of Squares           18   162                 0        18
Deg. of Freedom           2     2                 4        18

Residual standard error: 1
Estimated effects may be unbalanced
> a.mod <- aov(d$StressReduction~d$Treatment*d$Age, d)
> summary(a.mod)
                  Df Sum Sq Mean Sq F value  Pr(>F)    
d$Treatment        2     18       9       9 0.00195 ** 
d$Age              2    162      81      81   1e-09 ***
d$Treatment:d$Age  4      0       0       0 1.00000    
Residuals         18     18       1                    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
>

> pairwise.t.test(d$StressReduction, d$Treatment, p.adj = "none")

	Pairwise comparisons using t tests with pooled SD 

data:  d$StressReduction and d$Treatment 

         medical mental
mental   0.13    -     
physical 0.45    0.45  

P value adjustment method: none

> pairwise.t.test(d$StressReduction, d$Age, p.adj = "none")

	Pairwise comparisons using t tests with pooled SD 

data:  d$StressReduction and d$Age 

      mid     old    
old   2.5e-05 -      
young 2.5e-05 2.3e-10

P value adjustment method: none 
>

위는 아래의 linear model 을 이용하여도 가능. 사실 모든 ANOVA 테스트는 linear model이기도 함(lm)

anova(lm(d$StressReduction ~ d$Treatment * d$Age, d))
Analysis of Variance Table

Response: d$StressReduction
                  Df Sum Sq Mean Sq F value   Pr(>F)    
d$Treatment        2     18       9       9 0.001953 ** 
d$Age              2    162      81      81    1e-09 ***
d$Treatment:d$Age  4      0       0       0 1.000000    
Residuals         18     18       1                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
>

> pairwise.t.test(d$StressReduction, d$Treatment, p.adj = "none")

	Pairwise comparisons using t tests with pooled SD 

data:  d$StressReduction and d$Treatment 

         medical mental
mental   0.13    -     
physical 0.45    0.45  

P value adjustment method: none

> pairwise.t.test(d$StressReduction, d$Age, p.adj = "none")

	Pairwise comparisons using t tests with pooled SD 

data:  d$StressReduction and d$Age 

      mid     old    
old   2.5e-05 -      
young 2.5e-05 2.3e-10

P value adjustment method: none 
>

e.g., 6

tdata <- ToothGrowth
tdata
str(tdata)
tdata$dose <- factor(tdata$dose)
# or a nicer way
tdata$dose <- factor(tdata$dose, levels=c(0.5, 1, 2), labels=c("dhalf", "d1", "d2"))

tdata

table(tdata$supp, tdata$dose)

install.packages("ggpubr")

library("ggpubr")
ggboxplot(tdata, x = "dose", y = "len", color = "supp",
          palette = c("red", "blue"))
ggline(tdata, x = "dose", y = "len", color = "supp",
       add = c("mean_se", "dotplot"),
       palette = c("red", "blue"))

# prettier
boxplot(len ~ supp * dose, data=tdata, frame = FALSE, 
      col = c("#00AFBB", "#E7B800"), ylab="Tooth Length")

# or 
interaction.plot(x.factor = tdata$dose, trace.factor = 
      tdata$supp, response = tdata$len, fun = mean, 
      type = "b", legend = TRUE, 
      xlab = "Dose", ylab="Tooth Length",
      pch=c(1,19), col = c("#00AFBB", "#E7B800"))

res.aov2 <- aov(len ~ supp + dose, data = tdata)
summary(res.aov2)
            Df Sum Sq Mean Sq F value   Pr(>F)    
supp         1  205.4   205.4   14.02 0.000429 ***
dose         2 2426.4  1213.2   82.81  < 2e-16 ***
Residuals   56  820.4    14.7                     
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

res.aov3 <- aov(len ~ supp * dose, data = tdata)
res.aov3 <- aov(len ~ supp + dose + supp:dose, data = tdata)
summary(res.aov3)
            Df Sum Sq Mean Sq F value   Pr(>F)    
supp         1  205.4   205.4  15.572 0.000231 ***
dose         2 2426.4  1213.2  92.000  < 2e-16 ***
supp:dose    2  108.3    54.2   4.107 0.021860 *  
Residuals   54  712.1    13.2                     
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

From the ANOVA results, you can conclude the following, based on the p-values and a significance level of 0.05:

the p-value of supp is 0.000429 (significant), which indicates that the levels of supp are associated with significant different tooth length.
the p-value of dose is < 2e-16 (significant), which indicates that the levels of dose are associated with significant different tooth length.
the p-value for the interaction between supp*dose is 0.02 (significant), which indicates that the relationships between dose and tooth length depends on the supp method.

> TukeyHSD(res.aov3, which = "dose")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = len ~ supp * dose, data = tdata)

$dose
        diff       lwr       upr   p adj
d1-dh  9.130  6.362488 11.897512 0.0e+00
d2-dh 15.495 12.727488 18.262512 0.0e+00
d2-d1  6.365  3.597488  9.132512 2.7e-06

pairwise.t.test(tdata$len, tdata$dose, p.adjust.method = "BH")

plot(res.aov3, 1)

install.packages("car")
library(car)
leveneTest(len ~ supp*dose, data = tdata)

Levene's Test for Homogeneity of Variance (center = median)
      Df F value Pr(>F)
group  5  1.7086 0.1484
      54

p value is greater than .05, which indicates that there is no evidence of significant difference between variances of groups.

COMMunication
RESearch.NET

Table of Contents

Twoway ANOVA

E.g. 1

E.g. 2

3

4 Interaction effects

5

e.g., 6