t-test sum up

rs.hypothesis.testing

rm(list=ls())
rnorm2 <- function(n,mean,sd){ mean+sd*scale(rnorm(n)) }
ss <- function(x) { sum((x-mean(x))^2) }
z.test <- function(sam, mu, sigma) {
  diff <-mean(sam) - mu
  se <- sigma / sqrt(length(sam))
  z.cal <- diff / se
  p.val <- pnorm(abs(z.cal), lower.tail = F)*2
  two <- abs(qnorm(.05/2))
  return(cat(
    " z value:", round(z.cal,5),  '\n', 
    "p value:", round(p.val,8), '\n',
    "diff:   ", mean(sam), "-", mu, "=", mean(sam)-mu, '\n',
    "se:     ", se, '\n',
    "95% CI: ", c(mu-two*se, mu+two*se)   ) )
}

################################
set.seed(1001)
N.p <- 1000000
m.p <- 100
sd.p <- 10

p1 <- rnorm2(N.p, m.p, sd.p)
p2 <- rnorm2(N.p, m.p+6, sd.p)

################################
sz <- 16
iter <- 100000
# n = 16 일 때의 p1에 대한 sampling dist 은 아래 시뮬레이션으로
# 구해볼 수 있다.
means <- rep(NA, iter)
for (i in 1:iter) {
  s1 <- sample(p1, sz, replace = T)
  means[i] <- mean(s1)
}
mean(means)
var(means)
sd(means)

# CLT에 의하면 위이 값은
m.means <- mean(p1)
ms.means <- c(var(p1)/sz)
sd.means <- c(sqrt(var(p1)/sz))
m.means
ms.means
sd.means

# 위 집합을 표준화하게 되면 그 집합의 평균과
# 표준편차는 0, 1이 (분산도 1) 된다.
m.zmeans <- 0
ms.zmeans <- 1
sd.zmeans <- 1

sd.means
# p2에서 구하는 샘플링 디스트리뷰션의 표준점수를
# p1의 표준점수에 (0, 1) 비교해서 배치하자면 
z.p2 <- c((mean(p2)-mean(p1))/sd.means) # 표준점수 평균  
sd.means <- c(sqrt(var(p1)/sz)) # 표준편차 

curve(dnorm(x), from = -4, to = z.p2+4, 
      main = "normalized distribution of sample \n means from p1 and p2 (n=10)", 
      ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
curve(dnorm(x-c(z.p2)), from = z.p2-3, to = z.p2+3, add = T,
      main = "", 
      ylab = "Density", xlab = "z-value", col = "blue", lwd = 2, lty=2)
abline(v=0, col='black', lwd=2)
abline(v=z.p2, col='blue', lwd=2)
text(x=0, y=.1, label=paste(round(0, 4)), pos=4)
text(x=z.p2, y=.1, label=paste(round(z.p2, 4)), pos=4)

#######################################
#######################################
lo1 <- qnorm(.32/2)
hi1 <- -lo1
lo2 <- qnorm(.05/2)
hi2 <- -lo2
lo3 <- qnorm(.01/2)
hi3 <- -lo3
c(lo1,hi1)
c(lo2,hi2)
c(lo3,hi3)

curve(dnorm(x), from = -4, to = 2+4, 
      main = "normalized distribution of sample means from p1", 
      ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
abline(v=0, col="black", lwd=2)
abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3),
       col=c("red","red", "blue", "blue", "orange", "orange"), 
       lwd=2)
text(x=hi1, y=.2, label=paste(round(hi1,3), "(1)", "\n","86%"), pos=4)
text(x=hi2, y=.15, label=paste(round(hi2,3),"(2)", "\n","95%"), pos=4)
text(x=hi3, y=.1, label=paste(round(hi3,3), "(3)", "\n","99%"), pos=4)

mean.of.sample.a <- m.means+ 1.5*sd.means
mean.of.sample.a
diff <- (mean.of.sample.a - m.means)
se.z <- sd(p1)/sqrt(sz)
diff
se.z
z.score  <- diff / se.z
z.score
prob <- pnorm(z.score, lower.tail = F)*2
prob

curve(dnorm(x), from = -4, to = z.p2+4, 
      main = "normalized distribution of sample means from p1 with z score 1.5", 
      ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
abline(v=0, col="black", lwd=2)
abline(v=c(lo2, hi2), col=c("blue"), lwd=2)
abline(v=z.score,col='red', lwd=2)
abline(v=-z.score, col='red', lwd=2)
text(x=z.score, y=0.2, 
     label = paste(" z score =", z.score, 
                   "\n", "pnorm(-z.score)*2 =", round(prob,5)), 
     pos=4, col='red')

# 새로운 UI로 게임을 하도록 한 후
# UI점수를 sz 명에게 구했다고 가정하고
# 새로운 UI점수가 기존의 p1 paramter와
# 다른지 테스트 해보라 

# z-test 가설검증에 실패하는 경우, 사실은 성공해야 하는데
# p2의 평균이 106점이고 p1은 100점이니 p2에서 sample을 취
# 하면 샘플의 평균과 p1의 평균은 다르다고 판단될 것이다.
# 아래는 그럼에도 불구하고 실패하는 경우이다.
set.seed(110)
smp <- sample(p2, sz, replace=T)
m.smp <- mean(smp)
m.smp
diff <- m.smp - mean(p1)
se.z <- sqrt(var(p1)/sz)
z.cal1 <- diff / se.z
prob1 <- pnorm(abs(z.cal1), lower.tail = F)*2
print(c(z.cal1, sz, prob1))
z.test(smp, mean(p1), sd(p1))

curve(dnorm(x), from = -4, to = z.p2+4, 
      main = "normalized distribution of sample means 
      testing with a sample from p2 (failed)", 
      ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
abline(v=0, col="black", lwd=2)
abline(v=c(lo2, hi2), col=c("blue"), lwd=2)
abline(v=z.cal1,col='red', lwd=2)
abline(v=-z.cal1, col='red', lwd=2)
text(x=z.cal1, y=0.2, 
     label = paste(" z score =", round(z.cal1,4), 
                   "\n", "pnorm(-z.cal1)*2 =", round(prob1,4)), 
     pos=4, col='red')


# 같은 방법으로 했는데 성공한 경우
set.seed(211)
smp <- sample(p2,sz,replace=T)
m.smp <- mean(smp)
m.smp
diff <- m.smp - mean(p1)
se.z <- sqrt(var(p1)/sz)
z.cal2 <- diff / se.z
prob2 <- pnorm(abs(z.cal2), lower.tail = F)*2
print(c(z.cal2, sz, prob2))
z.test(smp, mean(p1), sd(p1))

z.p2 <- (mean(p2)-mean(p1))/se.z
z.p2
curve(dnorm(x), from = -5, to = z.p2+5, 
      main = "normalized distribution of sample means \n testing with a sample from p2 (succeeded)", 
      ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
abline(v=0, col='black', lwd=2)
z.cal1
z.cal2
two <- qnorm(.05/2)
two
abline(v=c(two, -two), col='black', lwd=2)
abline(v=c(-z.cal2, z.cal2), col='green', lwd=2)
text(x=z.cal2, y=.3, 
     label=paste("z.cal =", round(z.cal2,4), "\n",
                 "pnorm(-z.cal2)*2 =", "\n", round(prob2,5)),
     col="darkgreen", cex=1, pos=4)
text(x=-z.cal2, y=.3, 
     label=paste(round(-z.cal2,4)),
     col="darkgreen", cex=1, pos=2)


# type i and type ii error
two <- qnorm(.05/2)
two

curve(dnorm(x), from = -4.7, to = z.p2+4, 
      main = "Distribution Curve", 
      ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
curve(dnorm(x-(z.p2)), from = z.p2-3, to = z.p2+3, add = T,
      main = "Distribution Curve", 
      ylab = "Density", xlab = "z-value", col = "blue", lwd = 2, lty=2)
abline(v=0, col='black', lwd=2)
abline(v=c(two, -two), col='black', lwd=2)
abline(v=c(-z.cal1, z.cal1), col='red', lwd=2)
text(x=z.cal1, y=.1, 
     label=paste("z.cal =", round(z.cal1,4), "\n",
                 "pnorm(-z.cal1)*2 =", "\n", round(prob1,3)),
     col="red", cex=1, pos=4)
text(x=-z.cal1, y=.1, 
     label=paste(round(-z.cal1,4)),
     col="red", cex=1, pos=2)

abline(v=c(-z.cal2, z.cal2), col='green', lwd=2)
text(x=z.cal2, y=.3, 
     label=paste("z.cal =", round(z.cal2,4), "\n",
                 "pnorm(-z.cal2)*2 =", "\n", round(prob2,5)),
     col="darkgreen", cex=1, pos=4)
text(x=-z.cal2, y=.3, 
     label=paste(round(-z.cal2,4)),
     col="darkgreen", cex=1, pos=2)


############################
# one sample t-test
############################
set.seed(99)
sz <- 20
smp <- sample(p2, sz, replace = T)
m.smp <- mean(smp)
diff <- m.smp - mean(p1)
se.z <- sqrt(var(smp)/sz)
df.smp <- sz - 1 
t.cal <- (diff/se.z)
prob <- pt(t.cal, df.smp, lower.tail = F)*2
se.z
qt(.05/2, df.smp)
lo2 <- qt(.05/2, df.smp)
hi2 <- -lo2
m.smp+lo2*se.z

curve(dt(x, df.smp), from = -6, to = 7, 
      main = "t distribution", 
      ylab = "Density", xlab = "t-value", col = "black", lwd = 2)
abline(v=0, col="black", lwd=2)
abline(v=c(lo2, hi2), col="green", lwd=2)
abline(v=c(-t.cal, t.cal), col="red", lwd=2)
text(x=t.cal,y=.2, 
     label=paste("t =", round(t.cal,3),
                 "\n", "df =", df.smp, 
                 "\n", "p value =", round(prob, 5)),
     pos = 4, col="red", cex=1)

prob

print(c(t.cal, df.smp, prob))
print(c(m.smp+lo2*se.z, m.smp+hi2*se.z))
cat(" t =", t.cal, ", df =", round(df.smp,0), ", p-value =", prob, 
"\n", "95% confidence interval =", m.smp+lo2*se.z, m.smp+hi2*se.z)
t.test(smp, mu=mean(p1))

#################################
# t-test 2 group 
#################################
set.seed(169)
sz.a <- 25
sz.b <- 25
group.a <- sample(p1, sz.a)
group.b <- sample(p2, sz.b)
m.a <- mean(group.a)
m.b <- mean(group.b)
ss.a <- ss(group.a)
ss.b <- ss(group.b)
df.a <- sz.a - 1
df.b <- sz.b - 1
df <- df.a+df.b
ss.a
ss.b 
df.a
df.b

pooled.v <- (ss.a+ss.b)/(df.a+df.b)
pooled.v
se.s <- sqrt(pooled.v/sz.a+pooled.v/sz.b)
se.s
diff <- m.a-m.b
t.cal <- diff/se.s
t.cal

prob <- pt(abs(t.cal), df=df, lower.tail = F)*2 

t.cal
df
prob

t.test(group.a, group.b, var.equal = T)

lo2 <- qt(.05/2, df)
hi2 <- -lo2 
c(lo2, hi2)

curve(dt(x, df=df), from = -6, to = 6, 
      main = "t distribution curve", 
      ylab = "Density", xlab = "t-value", col = "black", lwd = 2)
abline(v=0, col="black", lwd=2)
abline(v=c(lo2, hi2), col=c("blue"), lwd=2)
abline(v=c(t.cal, -t.cal), col='red', lwd=2)
text(x=t.cal, y=0.2, 
     label = paste(" t =", round(t.cal, 4), 
                   "\n", 
                   "df =", df, "\n",
                   "pt(-t.cal)*2 =", round(prob, 4)), 
     pos=4, col='red')

print(paste(t.cal, df, prob))
t.test(group.a, group.b, var.equal = T)
t.cal

######################
# 4번째 케이스 t-test
######################
set.seed(37)
sz <- 40
time.a <- sample(p1,sz)
time.b <- sample(p2,sz)
diff.time <- time.a-time.b
m.a <- mean(time.a)
m.b <- mean(time.b)
diff <- m.a-m.b
diff
se.s <- sd(diff.time)/sqrt(sz)
t.cal <- diff/se.s
df <- sz - 1
prob <- pt(abs(t.cal), df=sz-1, lower.tail = F)*2
t.cal
df
prob
diff

m.diff.time <- mean(diff.time)
se.s

t.test(time.a, time.b, paired=T)

m.diff.time 
se.s
lo1 <- qt(0.32/2,sz-1)
hi1 <- -lo1
lo2 <- qt(0.05/2,sz-1)
hi2 <- -lo2
lo3 <- qt(0.01/2, sz-1)
hi3 <- -lo3

curve(dt(x, df=sz-1), from = -6, to = 7, 
      main = "t distribution", 
      ylab = "Density", xlab = "t-value", col = "black", lwd = 2)

abline(v=0, col="black", lwd=2)
abline(v=c(lo2, hi2), col="green", lwd=2)
abline(v=c(t.cal,-t.cal), col="red", lwd=2)
text(x=-t.cal, y=.2, label=paste("t = ", round(-t.cal,4), 
     "\n", "df =", sz-1, "\n", "p-value =", round(prob,5)),
     col="red", pos=4)
text(x=t.cal, y=.2, label=c(round(t.cal,3)), col="red", pos=2)

cat(t.cal, sz-1, prob)

ro.hypothesis.testing

> rm(list=ls())
> rnorm2 <- function(n,mean,sd){ mean+sd*scale(rnorm(n)) }
> ss <- function(x) { sum((x-mean(x))^2) }
> z.test <- function(sam, mu, sigma) {
+   diff <-mean(sam) - mu
+   se <- sigma / sqrt(length(sam))
+   z.cal <- diff / se
+   p.val <- pnorm(abs(z.cal), lower.tail = F)*2
+   two <- abs(qnorm(.05/2))
+   return(cat(
+     " z value:", round(z.cal,5),  '\n', 
+     "p value:", round(p.val,8), '\n',
+     "diff:   ", mean(sam), "-", mu, "=", mean(sam)-mu, '\n',
+     "se:     ", se, '\n',
+     "95% CI: ", c(mu-two*se, mu+two*se)   ) )
+ }
> 
> ################################
> set.seed(1001)
> N.p <- 1000000
> m.p <- 100
> sd.p <- 10
> 
> p1 <- rnorm2(N.p, m.p, sd.p)
> p2 <- rnorm2(N.p, m.p+6, sd.p)
> 
> ################################
> sz <- 16
> iter <- 100000
> # n = 16 일 때의 p1에 대한 sampling dist 은 아래 시뮬레이션으로
> # 구해볼 수 있다.
> means <- rep(NA, iter)
> for (i in 1:iter) {
+   s1 <- sample(p1, sz, replace = T)
+   means[i] <- mean(s1)
+ }
> mean(means)
[1] 99.997
> var(means)
[1] 6.215719
> sd(means)
[1] 2.493134
> 
> # CLT에 의하면 위이 값은
> m.means <- mean(p1)
> ms.means <- c(var(p1)/sz)
> sd.means <- c(sqrt(var(p1)/sz))
> m.means
[1] 100
> ms.means
[1] 6.25
> sd.means
[1] 2.5
> 
> # 위 집합을 표준화하게 되면 그 집합의 평균과
> # 표준편차는 0, 1이 (분산도 1) 된다.
> m.zmeans <- 0
> ms.zmeans <- 1
> sd.zmeans <- 1
> 
> sd.means
[1] 2.5
> # p2에서 구하는 샘플링 디스트리뷰션의 표준점수를
> # p1의 표준점수에 (0, 1) 비교해서 배치하자면 
> z.p2 <- c((mean(p2)-mean(p1))/sd.means) # 표준점수 평균  
> sd.means <- c(sqrt(var(p1)/sz)) # 표준편차 
> 
> curve(dnorm(x), from = -4, to = z.p2+4, 
+       main = "normalized distribution of sample \n means from p1 and p2 (n=10)", 
+       ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
> curve(dnorm(x-c(z.p2)), from = z.p2-3, to = z.p2+3, add = T,
+       main = "", 
+       ylab = "Density", xlab = "z-value", col = "blue", lwd = 2, lty=2)
> abline(v=0, col='black', lwd=2)
> abline(v=z.p2, col='blue', lwd=2)
> text(x=0, y=.1, label=paste(round(0, 4)), pos=4)
> text(x=z.p2, y=.1, label=paste(round(z.p2, 4)), pos=4)
>

> #######################################
> #######################################
> lo1 <- qnorm(.32/2)
> hi1 <- -lo1
> lo2 <- qnorm(.05/2)
> hi2 <- -lo2
> lo3 <- qnorm(.01/2)
> hi3 <- -lo3
> c(lo1,hi1)
[1] -0.9944579  0.9944579
> c(lo2,hi2)
[1] -1.959964  1.959964
> c(lo3,hi3)
[1] -2.575829  2.575829
> 
> curve(dnorm(x), from = -4, to = 2+4, 
+       main = "normalized distribution of sample means from p1", 
+       ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
> abline(v=0, col="black", lwd=2)
> abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3),
+        col=c("red","red", "blue", "blue", "orange", "orange"), 
+        lwd=2)
> text(x=hi1, y=.2, label=paste(round(hi1,3), "(1)", "\n","86%"), pos=4)
> text(x=hi2, y=.15, label=paste(round(hi2,3),"(2)", "\n","95%"), pos=4)
> text(x=hi3, y=.1, label=paste(round(hi3,3), "(3)", "\n","99%"), pos=4)
>

> mean.of.sample.a <- m.means+ 1.5*sd.means
> mean.of.sample.a
[1] 103.75
> diff <- (mean.of.sample.a - m.means)
> se.z <- sd(p1)/sqrt(sz)
> diff
[1] 3.75
> se.z
[1] 2.5
> z.score  <- diff / se.z
> z.score
[1] 1.5
> prob <- pnorm(z.score, lower.tail = F)*2
> prob
[1] 0.1336144
> 
> curve(dnorm(x), from = -4, to = z.p2+4, 
+       main = "normalized distribution of sample means from p1 with z score 1.5", 
+       ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
> abline(v=0, col="black", lwd=2)
> abline(v=c(lo2, hi2), col=c("blue"), lwd=2)
> abline(v=z.score,col='red', lwd=2)
> abline(v=-z.score, col='red', lwd=2)
> text(x=z.score, y=0.2, 
+      label = paste(" z score =", z.score, 
+                    "\n", "pnorm(-z.score)*2 =", round(prob,5)), 
+      pos=4, col='red')
>

 
> # 새로운 UI로 게임을 하도록 한 후
> # UI점수를 10명에게 구했다고 가정하고
> # 새로운 UI점수가 기존의 p1 paramter와
> # 다른지 테스트 해보라 
> 
> # z-test 가설검증에 실패하는 경우, 사실은 성공해야 하는데
> # p2의 평균이 106점이고 p1은 100점이니 p2에서 sample을 취
> # 하면 샘플의 평균과 p1의 평균은 다르다고 판단될 것이다.
> # 아래는 그럼에도 불구하고 실패하는 경우이다.
> set.seed(110)
> smp <- sample(p2, sz, replace=T)
> m.smp <- mean(smp)
> m.smp
[1] 104.5958
> diff <- m.smp - mean(p1)
> se.z <- sqrt(var(p1)/sz)
> z.cal1 <- diff / se.z
> prob1 <- pnorm(abs(z.cal1), lower.tail = F)*2
> print(c(z.cal1, sz, prob1))
[1]  2.906626913 40.000000000  0.003653487
> z.test(smp, mean(p1), sd(p1))
 z value: 2.90663 
 p value: 0.00365349 
 diff:    104.5958 - 100 = 4.595781 
 se:      1.581139 
 95% CI:  96.90102 103.099> 
> curve(dnorm(x), from = -4, to = z.p2+4, 
+       main = "normalized distribution of sample means 
+       testing with a sample from p2 (failed)", 
+       ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
> abline(v=0, col="black", lwd=2)
> abline(v=c(lo2, hi2), col=c("blue"), lwd=2)
> abline(v=z.cal1,col='red', lwd=2)
> abline(v=-z.cal1, col='red', lwd=2)
> text(x=z.cal1, y=0.2, 
+      label = paste(" z score =", round(z.cal1,4), 
+                    "\n", "pnorm(-z.cal1)*2 =", round(prob1,4)), 
+      pos=4, col='red')
> 
>

> # 같은 방법으로 했는데 성공한 경우
> set.seed(211)
> smp <- sample(p2,sz,replace=T)
> m.smp <- mean(smp)
> m.smp
[1] 107.6795
> diff <- m.smp - mean(p1)
> se.z <- sqrt(var(p1)/sz)
> z.cal2 <- diff / se.z
> prob2 <- pnorm(abs(z.cal2), lower.tail = F)*2
> print(c(z.cal2, sz, prob2))
[1] 4.856940e+00 4.000000e+01 1.192138e-06
> z.test(smp, mean(p1), sd(p1))
 z value: 4.85694 
 p value: 1.19e-06 
 diff:    107.6795 - 100 = 7.679496 
 se:      1.581139 
 95% CI:  96.90102 103.099> 
> z.p2 <- (mean(p2)-mean(p1))/se.z
> z.p2
         [,1]
[1,] 3.794733
> curve(dnorm(x), from = -5, to = z.p2+5, 
+       main = "normalized distribution of sample means \n testing with a sample from p2 (succeeded)", 
+       ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
> abline(v=0, col='black', lwd=2)
> z.cal1
         [,1]
[1,] 2.906627
> z.cal2
        [,1]
[1,] 4.85694
> two <- qnorm(.05/2)
> two
[1] -1.959964
> abline(v=c(two, -two), col='black', lwd=2)
> abline(v=c(-z.cal2, z.cal2), col='green', lwd=2)
> text(x=z.cal2, y=.3, 
+      label=paste("z.cal =", round(z.cal2,4), "\n",
+                  "pnorm(-z.cal2)*2 =", "\n", round(prob2,5)),
+      col="darkgreen", cex=1, pos=4)
> text(x=-z.cal2, y=.3, 
+      label=paste(round(-z.cal2,4)),
+      col="darkgreen", cex=1, pos=2)
> 
>

 
> # type i and type ii error
> two <- qnorm(.05/2)
> two
[1] -1.959964
> 
> curve(dnorm(x), from = -4.7, to = z.p2+4, 
+       main = "Distribution Curve", 
+       ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
> curve(dnorm(x-c(z.p2)), from = z.p2-3, to = z.p2+3, add = T,
+       main = "Distribution Curve", 
+       ylab = "Density", xlab = "z-value", col = "blue", lwd = 2, lty=2)
> abline(v=0, col='black', lwd=2)
> abline(v=c(two, -two), col='black', lwd=2)
> abline(v=c(-z.cal1, z.cal1), col='red', lwd=2)
> text(x=z.cal1, y=.1, 
+      label=paste("z.cal =", round(z.cal1,4), "\n",
+                  "pnorm(-z.cal1)*2 =", "\n", round(prob1,3)),
+      col="red", cex=1, pos=4)
> text(x=-z.cal1, y=.1, 
+      label=paste(round(-z.cal1,4)),
+      col="red", cex=1, pos=2)
> 
> abline(v=c(-z.cal2, z.cal2), col='green', lwd=2)
> text(x=z.cal2, y=.3, 
+      label=paste("z.cal =", round(z.cal2,4), "\n",
+                  "pnorm(-z.cal2)*2 =", "\n", round(prob2,5)),
+      col="darkgreen", cex=1, pos=4)
> text(x=-z.cal2, y=.3, 
+      label=paste(round(-z.cal2,4)),
+      col="darkgreen", cex=1, pos=2)
> 
>

> ############################
> # one sample t-test
> ############################
> set.seed(99)
> sz <- 20
> smp <- sample(p2, sz, replace = T)
> m.smp <- mean(smp)
> diff <- m.smp - mean(p1)
> se.z <- sqrt(var(smp)/sz)
> df.smp <- sz - 1 
> t.cal <- (diff/se.z)
> prob <- pt(t.cal, df.smp, lower.tail = F)*2
> se.z
[1] 1.809134
> qt(.05/2, df.smp)
[1] -2.093024
> lo2 <- qt(.05/2, df.smp)
> hi2 <- -lo2
> m.smp+lo2*se.z
[1] 102.5239
> 
> curve(dt(x, df.smp), from = -6, to = 7, 
+       main = "t distribution", 
+       ylab = "Density", xlab = "t-value", col = "black", lwd = 2)
> abline(v=0, col="black", lwd=2)
> abline(v=c(lo2, hi2), col="green", lwd=2)
> abline(v=c(-t.cal, t.cal), col="red", lwd=2)
> text(x=t.cal,y=.2, 
+      label=paste("t =", round(t.cal,3),
+                  "\n", "df =", df.smp, 
+                  "\n", "p value =", round(prob, 5)),
+      pos = 4, col="red", cex=1)
>

> prob
[1] 0.002460977
> 
> print(c(t.cal, df.smp, prob))
[1]  3.488086575 19.000000000  0.002460977
> print(c(m.smp+lo2*se.z, m.smp+hi2*se.z))
[1] 102.5239 110.0970
> cat(" t =", t.cal, ", df =", round(df.smp,0), ", p-value =", prob, 
+ "\n", "95% confidence interval =", m.smp+lo2*se.z, m.smp+hi2*se.z)
 t = 3.488087 , df = 19 , p-value = 0.002460977 
 95% confidence interval = 102.5239 110.097> t.test(smp, mu=mean(p1))

	One Sample t-test

data:  smp
t = 3.4881, df = 19, p-value = 0.002461
alternative hypothesis: true mean is not equal to 100
95 percent confidence interval:
 102.5239 110.0970
sample estimates:
mean of x 
 106.3104 
> 
> #################################
> # t-test 2 group 
> #################################
> set.seed(169)
> sz.a <- 25
> sz.b <- 25
> group.a <- sample(p1, sz.a)
> group.b <- sample(p2, sz.b)
> m.a <- mean(group.a)
> m.b <- mean(group.b)
> ss.a <- ss(group.a)
> ss.b <- ss(group.b)
> df.a <- sz.a - 1
> df.b <- sz.b - 1
> df <- df.a+df.b
> ss.a
[1] 2225.751
> ss.b 
[1] 2783.816
> df.a
[1] 24
> df.b
[1] 24
> 
> pooled.v <- (ss.a+ss.b)/(df.a+df.b)
> pooled.v
[1] 104.366
> se.s <- sqrt(pooled.v/sz.a+pooled.v/sz.b)
> se.s
[1] 2.889512
> diff <- m.a-m.b
> t.cal <- diff/se.s
> t.cal
[1] -3.070212
> 
> prob <- pt(abs(t.cal), df=df, lower.tail = F)*2 
> 
> t.cal
[1] -3.070212
> df
[1] 48
> prob
[1] 0.003515457
> 
> t.test(group.a, group.b, var.equal = T)

	Two Sample t-test

data:  group.a and group.b
t = -3.0702, df = 48, p-value = 0.003515
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -14.681167  -3.061661
sample estimates:
mean of x mean of y 
 101.0286  109.9000 

> 
> lo2 <- qt(.05/2, df)
> hi2 <- -lo2 
> c(lo2, hi2)
[1] -2.010635  2.010635
> 
> curve(dt(x, df=df), from = -6, to = 6, 
+       main = "t distribution curve", 
+       ylab = "Density", xlab = "t-value", col = "black", lwd = 2)
> abline(v=0, col="black", lwd=2)
> abline(v=c(lo2, hi2), col=c("blue"), lwd=2)
> abline(v=c(t.cal, -t.cal), col='red', lwd=2)
> text(x=t.cal, y=0.2, 
+      label = paste(" t =", round(t.cal, 4), 
+                    "\n", 
+                    "df =", df, "\n",
+                    "pt(-t.cal)*2 =", round(prob, 4)), 
+      pos=4, col='red')
>

> print(paste(t.cal, df, prob))
[1] "-3.07021182079817 48 0.00351545738746208"
> t.test(group.a, group.b, var.equal = T)

	Two Sample t-test

data:  group.a and group.b
t = -3.0702, df = 48, p-value = 0.003515
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -14.681167  -3.061661
sample estimates:
mean of x mean of y 
 101.0286  109.9000 

> t.cal
[1] -3.070212
> 
> ######################
> # 4번째 케이스 t-test
> ######################
> set.seed(37)
> sz <- 40
> time.a <- sample(p1,sz)
> time.b <- sample(p2,sz)
> diff.time <- time.a-time.b
> m.a <- mean(time.a)
> m.b <- mean(time.b)
> diff <- m.a-m.b
> diff
[1] -8.674375
> se.s <- sd(diff.time)/sqrt(sz)
> t.cal <- diff/se.s
> df <- sz - 1
> prob <- pt(abs(t.cal), df=sz-1, lower.tail = F)*2
> t.cal
[1] -3.88213
> df
[1] 39
> prob
[1] 0.0003888961
> diff
[1] -8.674375
> 
> m.diff.time <- mean(diff.time)
> se.s
[1] 2.234437
> 
> t.test(time.a, time.b, paired=T)

	Paired t-test

data:  time.a and time.b
t = -3.8821, df = 39, p-value = 0.0003889
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 -13.193950  -4.154799
sample estimates:
mean difference 
      -8.674375 

> 
> m.diff.time 
[1] -8.674375
> se.s
[1] 2.234437
> lo1 <- qt(0.32/2,sz-1)
> hi1 <- -lo1
> lo2 <- qt(0.05/2,sz-1)
> hi2 <- -lo2
> lo3 <- qt(0.01/2, sz-1)
> hi3 <- -lo3
> 
> curve(dt(x, df=sz-1), from = -6, to = 7, 
+       main = "t distribution", 
+       ylab = "Density", xlab = "t-value", col = "black", lwd = 2)
> 
> abline(v=0, col="black", lwd=2)
> abline(v=c(lo2, hi2), col="green", lwd=2)
> abline(v=c(t.cal,-t.cal), col="red", lwd=2)
> text(x=-t.cal, y=.2, label=paste("t = ", round(-t.cal,4), 
+      "\n", "df =", sz-1, "\n", "p-value =", round(prob,5)),
+      col="red", pos=4)
> text(x=t.cal, y=.2, label=c(round(t.cal,3)), col="red", pos=2)
> 
> cat(t.cal, sz-1, prob)
-3.88213 39 0.0003888961
>

 
> cat(t.cal, sz-1, prob)
-3.88213 39 0.0003888961
>