r:twoway_anova
This is an old revision of the document!
Table of Contents
Twoway ANOVA
recap of oneway ANOVA
# One Way Anova (Completely Randomized Design) fit <- aov(y ~ A, data=mydataframe)
Twoway ANOVA without interaction
# Randomized Block Design (B is the blocking factor) fit <- aov(y ~ A + B, data=mydataframe)
Twoway ANOVA with interaction
# Two Way Factorial Design fit <- aov(y ~ A + B + A:B, data=mydataframe) fit <- aov(y ~ A*B, data=mydataframe) # same thing
E.g. 1
> delivery.df = data.frame( Service = c(rep("Carrier 1", 15), rep("Carrier 2", 15), rep("Carrier 3", 15)), Destination = c(rep(c("Office 1", "Office 2", "Office 3", "Office 4", "Office 5"), 9)), Time = c(15.23, 14.32, 14.77, 15.12, 14.05, 15.48, 14.13, 14.46, 15.62, 14.23, 15.19, 14.67, 14.48, 15.34, 14.22, 16.66, 16.27, 16.35, 16.93, 15.05, 16.98, 16.43, 15.95, 16.73, 15.62, 16.53, 16.26, 15.69, 16.97, 15.37, 17.12, 16.65, 15.73, 17.77, 15.52, 16.15, 16.86, 15.18, 17.96, 15.26, 16.36, 16.44, 14.82, 17.62, 15.04) ) > delivery.df Service Destination Time 1 Carrier 1 Office 1 15.23 2 Carrier 1 Office 2 14.32 3 Carrier 1 Office 3 14.77 4 Carrier 1 Office 4 15.12 5 Carrier 1 Office 5 14.05 6 Carrier 1 Office 1 15.48 7 Carrier 1 Office 2 14.13 8 Carrier 1 Office 3 14.46 9 Carrier 1 Office 4 15.62 10 Carrier 1 Office 5 14.23 11 Carrier 1 Office 1 15.19 12 Carrier 1 Office 2 14.67 13 Carrier 1 Office 3 14.48 14 Carrier 1 Office 4 15.34 15 Carrier 1 Office 5 14.22 16 Carrier 2 Office 1 16.66 17 Carrier 2 Office 2 16.27 18 Carrier 2 Office 3 16.35 19 Carrier 2 Office 4 16.93 20 Carrier 2 Office 5 15.05 21 Carrier 2 Office 1 16.98 22 Carrier 2 Office 2 16.43 23 Carrier 2 Office 3 15.95 24 Carrier 2 Office 4 16.73 25 Carrier 2 Office 5 15.62 26 Carrier 2 Office 1 16.53 27 Carrier 2 Office 2 16.26 28 Carrier 2 Office 3 15.69 29 Carrier 2 Office 4 16.97 30 Carrier 2 Office 5 15.37 31 Carrier 3 Office 1 17.12 32 Carrier 3 Office 2 16.65 33 Carrier 3 Office 3 15.73 34 Carrier 3 Office 4 17.77 35 Carrier 3 Office 5 15.52 36 Carrier 3 Office 1 16.15 37 Carrier 3 Office 2 16.86 38 Carrier 3 Office 3 15.18 39 Carrier 3 Office 4 17.96 40 Carrier 3 Office 5 15.26 41 Carrier 3 Office 1 16.36 42 Carrier 3 Office 2 16.44 43 Carrier 3 Office 3 14.82 44 Carrier 3 Office 4 17.62 45 Carrier 3 Office 5 15.04 > library(ggplot2) > ggplot(delivery.df, aes(Time, Destination, colour = Service)) + geom_point() >
> delivery.mod1 = aov(Time ~ Destination*Service, data = delivery.df)
> summary(delivery.mod1) Df Sum Sq Mean Sq F value Pr(>F) Destination 4 17.5415 4.3854 61.1553 5.408e-14 *** Service 2 23.1706 11.5853 161.5599 < 2.2e-16 *** Destination:Service 8 4.1888 0.5236 7.3018 2.360e-05 *** Residuals 30 2.1513 0.0717 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> TukeyHSD(delivery.mod1, which = "Service") Tukey multiple comparisons of means 95% family-wise confidence level Fit: aov(formula = Time ~ Destination * Service, data = delivery.df) $Service diff lwr upr p adj Carrier 2-Carrier 1 1.498667 1.2576092 1.7397241 0.0000000 Carrier 3-Carrier 1 1.544667 1.3036092 1.7857241 0.0000000 Carrier 3-Carrier 2 0.046000 -0.1950575 0.2870575 0.8856246
E.g. 2
plant.df = PlantGrowth plant.df$group = factor(plant.df$group, labels = c("Control", "Treatment 1", "Treatment 2"))
require(ggplot2) ggplot(plant.df, aes(x = group, y = weight)) + geom_boxplot(fill = "grey80", colour = "blue") + scale_x_discrete() + xlab("Treatment Group") + ylab("Dried weight of plants")
> plant.mod2 = aov(weight ~ group, data = plant.df) > summary(plant.mod2) Df Sum Sq Mean Sq F value Pr(>F) group 2 3.7663 1.8832 4.8461 0.01591 * Residuals 27 10.4921 0.3886 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> TukeyHSD(plant.mod2) Tukey multiple comparisons of means 95% family-wise confidence level Fit: aov(formula = weight ~ group, data = plant.df) $group diff lwr upr p adj Treatment 1-Control -0.371 -1.0622161 0.3202161 0.3908711 Treatment 2-Control 0.494 -0.1972161 1.1852161 0.1979960 Treatment 2-Treatment 1 0.865 0.1737839 1.5562161 0.0120064 plot(TukeyHSD(plant.mod2))
3
- basketball.txt
Time Shoes Made Morning Others 25 Morning Others 26 Night Others 27 Night Others 27 Morning Favorite 32 Morning Favorite 22 Night Favorite 30 Night Favorite 34 Morning Others 35 Morning Others 34 Night Others 33 Night Others 30 Morning Favorite 33 Morning Favorite 37 Night Favorite 36 Night Favorite 38
> basketball <- read.table("http://commres.net/wiki/_export/code/r/twoway_anova?codeblock=11",header=TRUE) > basketball Time Shoes Made 1 Morning Others 25 2 Morning Others 26 3 Night Others 27 4 Night Others 27 5 Morning Favorite 32 6 Morning Favorite 22 7 Night Favorite 30 8 Night Favorite 34 9 Morning Others 35 10 Morning Others 34 11 Night Others 33 12 Night Others 30 13 Morning Favorite 33 14 Morning Favorite 37 15 Night Favorite 36 16 Night Favorite 38 > attach(basketball) > > b.model <- aov(Made~Time+Shoes) > summary(b.model) Df Sum Sq Mean Sq F value Pr(>F) Time 1 7.56 7.56 0.349 0.565 Shoes 1 39.06 39.06 1.802 0.202 Residuals 13 281.81 21.68 > b.model2 <- aov(Made~Time*Shoes) > summary(b.model2) Df Sum Sq Mean Sq F value Pr(>F) Time 1 7.56 7.56 0.344 0.568 Shoes 1 39.06 39.06 1.777 0.207 Time:Shoes 1 18.06 18.06 0.822 0.382 Residuals 12 263.75 21.98
4 Interaction effects
5
download dataset_anova_twoWay_interactions.csv
data <- read.csv("http://commres.net/wiki/_media/r/dataset_anova_twoway_comparisons.csv") > #display the data > data Treatment Age StressReduction 1 mental young 10 2 mental young 9 3 mental young 8 4 mental mid 7 5 mental mid 6 6 mental mid 5 7 mental old 4 8 mental old 3 9 mental old 2 10 physical young 9 11 physical young 8 12 physical young 7 13 physical mid 6 14 physical mid 5 15 physical mid 4 16 physical old 3 17 physical old 2 18 physical old 1 19 medical young 8 20 medical young 7 21 medical young 6 22 medical mid 5 23 medical mid 4 24 medical mid 3 25 medical old 2 26 medical old 1 27 medical old 0
> aov(d$StressReduction~d$Treatment*d$Age, d) Call: aov(formula = d$StressReduction ~ d$Treatment * d$Age, data = d) Terms: d$Treatment d$Age d$Treatment:d$Age Residuals Sum of Squares 18 162 0 18 Deg. of Freedom 2 2 4 18 Residual standard error: 1 Estimated effects may be unbalanced > a.mod <- aov(d$StressReduction~d$Treatment*d$Age, d) > summary(a.mod) Df Sum Sq Mean Sq F value Pr(>F) d$Treatment 2 18 9 9 0.00195 ** d$Age 2 162 81 81 1e-09 *** d$Treatment:d$Age 4 0 0 0 1.00000 Residuals 18 18 1 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 >
> pairwise.t.test(d$StressReduction, d$Treatment, p.adj = "none") Pairwise comparisons using t tests with pooled SD data: d$StressReduction and d$Treatment medical mental mental 0.13 - physical 0.45 0.45 P value adjustment method: none
> pairwise.t.test(d$StressReduction, d$Age, p.adj = "none") Pairwise comparisons using t tests with pooled SD data: d$StressReduction and d$Age mid old old 2.5e-05 - young 2.5e-05 2.3e-10 P value adjustment method: none >
위는 아래의 linear model 을 이용하여도 가능. 사실 모든 ANOVA 테스트는 linear model이기도 함(lm)
anova(lm(d$StressReduction ~ d$Treatment * d$Age, d)) Analysis of Variance Table Response: d$StressReduction Df Sum Sq Mean Sq F value Pr(>F) d$Treatment 2 18 9 9 0.001953 ** d$Age 2 162 81 81 1e-09 *** d$Treatment:d$Age 4 0 0 0 1.000000 Residuals 18 18 1 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 >
> pairwise.t.test(d$StressReduction, d$Treatment, p.adj = "none") Pairwise comparisons using t tests with pooled SD data: d$StressReduction and d$Treatment medical mental mental 0.13 - physical 0.45 0.45 P value adjustment method: none
> pairwise.t.test(d$StressReduction, d$Age, p.adj = "none") Pairwise comparisons using t tests with pooled SD data: d$StressReduction and d$Age mid old old 2.5e-05 - young 2.5e-05 2.3e-10 P value adjustment method: none >
e.g., 6
tdata <- ToothGrowth tdata str(tdata) tdata$dose <- factor(tdata$dose) # or a nicer way tdata$dose <- factor(tdata$dose, levels=c(0.5, 1, 2), labels=c("dhalf", "d1", "d2")) tdata table(tdata$supp, tdata$dose) install.packages("ggpubr") library("ggpubr") ggboxplot(tdata, x = "dose", y = "len", color = "supp", palette = c("red", "blue")) ggline(tdata, x = "dose", y = "len", color = "supp", add = c("mean_se", "dotplot"), palette = c("red", "blue")) # prettier boxplot(len ~ supp * dose, data=tdata, frame = FALSE, col = c("#00AFBB", "#E7B800"), ylab="Tooth Length") # or interaction.plot(x.factor = tdata$dose, trace.factor = tdata$supp, response = tdata$len, fun = mean, type = "b", legend = TRUE, xlab = "Dose", ylab="Tooth Length", pch=c(1,19), col = c("#00AFBB", "#E7B800"))
r/twoway_anova.1509929691.txt.gz · Last modified: 2017/11/06 09:24 by hkimscil