> rm(list=ls())
> # set.seed(101)
> rnorm2 <- function(n,mean,sd){ mean+sd*scale(rnorm(n)) }
> ss <- function(x) {
+     sum((x-mean(x))^2)
+ }
> 
> n.o <- 30
> n.p <- 30
> o <- rnorm(n.o, 100, 10)
> p <- rnorm(n.p, 105, 10)
> t.test(o,p, var.equal=T)

	Two Sample t-test

data:  o and p
t = -3.3581, df = 58, p-value = 0.001391
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -14.78163  -3.74076
sample estimates:
mean of x mean of y 
 97.31987 106.58106 

> comb <- list(o = o, p = p)
> op <- stack(comb)
> head(op)
     values ind
1  83.05641   o
2 108.25078   o
3  90.81559   o
4  99.74236   o
5  82.61865   o
6  85.64129   o
> colnames(op)[1] <- "values"
> colnames(op)[2] <- "group"
> op$group <- factor(op$group)
> head(op)
     values group
1  83.05641     o
2 108.25078     o
3  90.81559     o
4  99.74236     o
5  82.61865     o
6  85.64129     o
> boxplot(op$values~op$group)
> 
> 

> plot(op$values~op$group)
> boxplot(op$values~op$group, main="values by group",
+         yaxt="n", xlab="value", horizontal=TRUE,
+         col=terrain.colors(2))
> abline(v=mean(op$values), col="red", lwd=3)
> legend("topleft", inset=.05, title="group",
+        c("o","p"), fill=terrain.colors(2), horiz=TRUE)
> 
> m.tot <- mean(op$values)
> m.o <- mean(o)
> m.p <- mean(p)
> 
> min.x <- min(op$values)
> max.x <- max(op$values)
> br <- seq(floor(min.x), ceiling(max.x), by = 1) 
> 

> hist(o, breaks=br, 
+      col=rgb(1,1,1,.5))
> abline(v=m.o, col="red", lwd=3)
> hist(p, add=T, breaks=br,
+      col=rgb(.5,1,1,.5))
> abline(v=m.p, col="blue", lwd=3)
> abline(v=m.tot, col='black', lwd=3)
> 
> ss.tot <- ss(op$values)
> df.tot <- length(op$values)-1
> ss.tot/df.tot
[1] 133.9582
> var(op$values)
[1] 133.9582
> ss.tot
[1] 7903.533
> 
> ss.o <- ss(o)
> ss.p <- ss(p)
> df.o <- length(o)-1
> df.p <- length(p)-1
> 
> m.tot
[1] 101.9505
> m.o
[1] 97.31987
> m.p
[1] 106.5811
> ss.o
[1] 3647.085
> ss.p
[1] 2969.903
> 
> ss.bet <- length(o)*(m.tot-m.o)^2+length(p)*(m.tot-m.p)^2
> ss.tot
[1] 7903.533
> ss.bet
[1] 1286.546
> ss.wit <- ss.o+ss.p 
> ss.wit
[1] 6616.987
> ss.bet+ss.wit
[1] 7903.533
> ss.tot
[1] 7903.533
> 
> df.tot <- length(op$values)-1
> df.bet <- nlevels(op$group) - 1
> df.wit <- (length(o)-1)+(length(p)-1)
> df.tot
[1] 59
> df.bet
[1] 1
> df.wit
[1] 58
> 
> ms.tot <- ss.tot / df.tot
> ms.bet <- ss.bet / df.bet
> ms.wit <- ss.wit / df.wit
> 
> f.cal <- ms.bet / ms.wit
> f.cal
[1] 11.27699
> p.val <- pf(f.cal, df1=df.bet, df2=df.wit, lower.tail = F)
> p.val
[1] 0.001390984
> summary(aov(op$values~op$group))
            Df Sum Sq Mean Sq F value  Pr(>F)   
op$group     1   1287  1286.5   11.28 0.00139 **
Residuals   58   6617   114.1                   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> t.test(o,p, var.equal = T)

	Two Sample t-test

data:  o and p
t = -3.3581, df = 58, p-value = 0.001391
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -14.78163  -3.74076
sample estimates:
mean of x mean of y 
 97.31987 106.58106 

> 
> diff <- m.o - m.p
> ssp <- (ss.o + ss.p) / (df.o + df.p)
> se <- sqrt(ssp/n.o+ssp/n.p)
> t.cal <- diff/se
> t.cal
[1] -3.358122
> p.t.cal <- pt(abs(t.cal), df=df.o+df.p, lower.tail = F)*2
> p.t.cal
[1] 0.001390984
> t.cal^2
[1] 11.27699
> f.cal
[1] 11.27699
> 
> df.bet
[1] 1
> df.wit
[1] 58
> f.cal
[1] 11.27699
> 
>