User Tools

Site Tools


t-test_summary

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
t-test_summary [2026/04/12 06:34] – [ro.hypothesis.testing] hkimscilt-test_summary [2026/04/16 01:27] (current) hkimscil
Line 29: Line 29:
  
 ################################ ################################
-sz <- 10+sz <- 16
 iter <- 100000 iter <- 100000
-# n = 10 일 때의 p1에 대한 sampling dist 은 아래 시뮬레이션으로+# n = 16 일 때의 p1에 대한 sampling dist 은 아래 시뮬레이션으로
 # 구해볼 수 있다. # 구해볼 수 있다.
 means <- rep(NA, iter) means <- rep(NA, iter)
Line 50: Line 50:
 sd.means sd.means
  
-# 위의 시뮬레이션으로 구한 sampling dist 대신에  +# 위 집합을 표준화하게 되면 그 집합의 평균과 
-정확한 mean과 se 값을 갖는 집합 sdc를 만든다 +표준편차는 0, 1이 (분산도 1) 된. 
-sdc <- rnorm2(iter, m.means, sd.means) +m.zmeans <- 
-mean(sdc) +ms.zmeans <- 1 
-var(sdc) +sd.zmeans <- 1
-sd(sdc)+
  
-zsdc <- scale(sdc) +sd.means 
-m.zsdc <- mean(zsdc) +# p2에서 구하는 샘플링 디스트리뷰션의 표준점수를 
-ms.zsdc <- var(zsdc) +# p1의 표준점수에 (0, 1비교해서 배치하자면  
-sd.zsdc <- sd(zsdc) +z.p2 <- c((mean(p2)-mean(p1))/sd.means# 표준점수 평균   
-m.zsdc +sd.means <- c(sqrt(var(p1)/sz)) # 표준편차 
-ms.zsdc +
-sd.zsdc +
- +
-se2 <- c(sqrt(var(p2)/sz)+
-z.p2 <- c((mean(p2)-mean(p1))/se2)  +
-sdc2 <- rnorm2(iter, mean(p2), se2) +
-zsdc2 <- scale(sdc2)+z.p2 +
-mean(zsdc2) +
-sd(zsdc2)+
  
 curve(dnorm(x), from = -4, to = z.p2+4,  curve(dnorm(x), from = -4, to = z.p2+4, 
-      main = "normalized distribution of sample means from p1 and p2",  +      main = "normalized distribution of sample \n means from p1 and p2 (n=10)",  
-      ylab = "Density", xlab = "t-value", col = "black", lwd = 2) +      ylab = "Density", xlab = "z-value", col = "black", lwd = 2) 
-curve(dnorm(x-(z.p2)), from = z.p2-3, to = z.p2+3, add = T, +curve(dnorm(x-c(z.p2)), from = z.p2-3, to = z.p2+3, add = T, 
-      main = "Distribution Curve",  +      main = "",  
-      ylab = "Density", xlab = "t-value", col = "blue", lwd = 2, lty=2) +      ylab = "Density", xlab = "z-value", col = "blue", lwd = 2, lty=2) 
-abline(v=mean(zsdc), col='black', lwd=2) +abline(v=0, col='black', lwd=2) 
-abline(v=mean(zsdc2), col='blue', lwd=2+abline(v=z.p2, col='blue', lwd=2) 
-mean(zsdc2+text(x=0, y=.1, label=paste(round(0, 4)), pos=4) 
-text(x=mean(zsdc), y=.1, label=paste(round(mean(zsdc),4)), pos=4) +text(x=z.p2, y=.1, label=paste(round(z.p2, 4)), pos=4)
-text(x=mean(zsdc2), y=.1, label=paste(round(mean(zsdc2),4)), pos=4)+
  
-+####################################### 
 +#######################################
 lo1 <- qnorm(.32/2) lo1 <- qnorm(.32/2)
 hi1 <- -lo1 hi1 <- -lo1
Line 106: Line 96:
 text(x=hi3, y=.1, label=paste(round(hi3,3), "(3)", "\n","99%"), pos=4) text(x=hi3, y=.1, label=paste(round(hi3,3), "(3)", "\n","99%"), pos=4)
  
-mean.of.sample.a <- mean(sdc)+ 1.5*sd(sdc)+mean.of.sample.a <- m.means+ 1.5*sd.means
 mean.of.sample.a mean.of.sample.a
-diff <- (mean.of.sample.a - mean(sdc))+diff <- (mean.of.sample.a - m.means)
 se.z <- sd(p1)/sqrt(sz) se.z <- sd(p1)/sqrt(sz)
 diff diff
Line 130: Line 120:
  
 # 새로운 UI로 게임을 하도록 한 후 # 새로운 UI로 게임을 하도록 한 후
-# UI점수를 10명에게 구했다고 가정하고+# UI점수를 sz 명에게 구했다고 가정하고
 # 새로운 UI점수가 기존의 p1 paramter와 # 새로운 UI점수가 기존의 p1 paramter와
 # 다른지 테스트 해보라  # 다른지 테스트 해보라 
Line 138: Line 128:
 # 하면 샘플의 평균과 p1의 평균은 다르다고 판단될 것이다. # 하면 샘플의 평균과 p1의 평균은 다르다고 판단될 것이다.
 # 아래는 그럼에도 불구하고 실패하는 경우이다. # 아래는 그럼에도 불구하고 실패하는 경우이다.
-set.seed(111)+set.seed(110)
 smp <- sample(p2, sz, replace=T) smp <- sample(p2, sz, replace=T)
 m.smp <- mean(smp) m.smp <- mean(smp)
Line 150: Line 140:
  
 curve(dnorm(x), from = -4, to = z.p2+4,  curve(dnorm(x), from = -4, to = z.p2+4, 
-      main = "normalized distribution of sample means \n testing with a sample from p2 (failed)", +      main = "normalized distribution of sample means  
 +      testing with a sample from p2 (failed)", 
       ylab = "Density", xlab = "z-value", col = "black", lwd = 2)       ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
 abline(v=0, col="black", lwd=2) abline(v=0, col="black", lwd=2)
Line 174: Line 165:
 z.test(smp, mean(p1), sd(p1)) z.test(smp, mean(p1), sd(p1))
  
-z.p2 <- (mean(p2)-mean(p1))/se2+z.p2 <- (mean(p2)-mean(p1))/se.z
 z.p2 z.p2
 curve(dnorm(x), from = -5, to = z.p2+5,  curve(dnorm(x), from = -5, to = z.p2+5, 
Line 196: Line 187:
  
 # type i and type ii error # type i and type ii error
-z.p2 <- (mean(p2)-mean(p1))/se2 +two <- qnorm(.05/2
-z.p2+two 
 curve(dnorm(x), from = -4.7, to = z.p2+4,  curve(dnorm(x), from = -4.7, to = z.p2+4, 
       main = "Distribution Curve",        main = "Distribution Curve", 
Line 205: Line 197:
       ylab = "Density", xlab = "z-value", col = "blue", lwd = 2, lty=2)       ylab = "Density", xlab = "z-value", col = "blue", lwd = 2, lty=2)
 abline(v=0, col='black', lwd=2) abline(v=0, col='black', lwd=2)
-z.cal1 
-z.cal2 
-two <- qnorm(.05/2) 
-two 
 abline(v=c(two, -two), col='black', lwd=2) abline(v=c(two, -two), col='black', lwd=2)
 abline(v=c(-z.cal1, z.cal1), col='red', lwd=2) abline(v=c(-z.cal1, z.cal1), col='red', lwd=2)
Line 263: Line 251:
 print(c(t.cal, df.smp, prob)) print(c(t.cal, df.smp, prob))
 print(c(m.smp+lo2*se.z, m.smp+hi2*se.z)) print(c(m.smp+lo2*se.z, m.smp+hi2*se.z))
-cat("t =", t.cal, ", df =", round(df.smp,0), ", p-value =", prob, +cat(" t =", t.cal, ", df =", round(df.smp,0), ", p-value =", prob, 
 "\n", "95% confidence interval =", m.smp+lo2*se.z, m.smp+hi2*se.z) "\n", "95% confidence interval =", m.smp+lo2*se.z, m.smp+hi2*se.z)
 t.test(smp, mu=mean(p1)) t.test(smp, mu=mean(p1))
Line 323: Line 311:
 t.test(group.a, group.b, var.equal = T) t.test(group.a, group.b, var.equal = T)
 t.cal t.cal
-# t.cal=diff/se 
-t.cal * se.s 
-diff 
-diff+lo2*se.s 
-diff+hi2*se.s 
-(t.cal+lo2)*se.s 
-(t.cal+hi2)*se.s 
  
 ###################### ######################
Line 383: Line 364:
 <tabbox ro.hypothesis.testing> <tabbox ro.hypothesis.testing>
 <code> <code>
- 
 > rm(list=ls()) > rm(list=ls())
 > rnorm2 <- function(n,mean,sd){ mean+sd*scale(rnorm(n)) } > rnorm2 <- function(n,mean,sd){ mean+sd*scale(rnorm(n)) }
Line 411: Line 391:
  
 > ################################ > ################################
-> sz <- 10+> sz <- 16
 > iter <- 100000 > iter <- 100000
-> # n = 10 일 때의 p1에 대한 sampling dist 은 아래 시뮬레이션으로+> # n = 16 일 때의 p1에 대한 sampling dist 은 아래 시뮬레이션으로
 > # 구해볼 수 있다. > # 구해볼 수 있다.
 > means <- rep(NA, iter) > means <- rep(NA, iter)
Line 421: Line 401:
 + } + }
 > mean(means) > mean(means)
-[1] 99.9946+[1] 99.997
 > var(means) > var(means)
-[1] 9.95743+[1] 6.215719
 > sd(means) > sd(means)
-[1] 3.15554+[1] 2.493134
  
 > # CLT에 의하면 위이 값은 > # CLT에 의하면 위이 값은
Line 434: Line 414:
 [1] 100 [1] 100
 > ms.means > ms.means
-[1] 10+[1] 6.25
 > sd.means > sd.means
-[1] 3.162278+[1] 2.5
  
-> # 위의 시뮬레이션으로 구한 sampling dist 대신에  +> # 위 집합을 표준화하게 되면 그 집합의 평균과 
-> # 정확한 mean과 se 값을 갖는 집합 sdc를 만든다 +> # 표준편차는 0, 1이 (분산도 1) 된. 
-sdc <- rnorm2(iter, m.means, sd.means) +m.zmeans <- 0 
-mean(sdc) +ms.zmeans <- 
-[1] 100 +> sd.zmeans <- 1
-> var(sdc) +
-     [,1] +
-[1,]   10 +
-> sd(sdc) +
-[1] 3.162278+
  
-> zsdc <- scale(sdc) +> sd.means 
-> m.zsdc <- mean(zsdc) +[1] 2.5 
-> ms.zsdc <- var(zsdc) +# p2에서 구하는 샘플링 디스트리뷰션의 표준점수를 
-> sd.zsdc <- sd(zsdc) +> # p1의 표준점수에 (0, 1) 비교해서 배치하자면  
-> m.zsdc +> z.p2 <- c((mean(p2)-mean(p1))/sd.means# 표준점수 평균   
-[1] -2.40102e-17 +sd.means <- c(sqrt(var(p1)/sz)) # 표준편차 
-ms.zsdc +
-     [,1+
-[1,]    1 +
-> sd.zsdc +
-[1] 1 +
->  +
-> se2 <- c(sqrt(var(p2)/sz)+
-> z.p2 <- c((mean(p2)-mean(p1))/se2)  +
-sdc2 <- rnorm2(iter, mean(p2), se2) +
-> zsdc2 <- scale(sdc2)+z.p2 +
-> mean(zsdc2) +
-[1] 1.897367 +
-> sd(zsdc2) +
-[1] 1+
  
 > curve(dnorm(x), from = -4, to = z.p2+4,  > curve(dnorm(x), from = -4, to = z.p2+4, 
-+       main = "normalized distribution of sample means from p1 and p2",  ++       main = "normalized distribution of sample \n means from p1 and p2 (n=10)",  
-+       ylab = "Density", xlab = "t-value", col = "black", lwd = 2) ++       ylab = "Density", xlab = "z-value", col = "black", lwd = 2) 
-> curve(dnorm(x-(z.p2)), from = z.p2-3, to = z.p2+3, add = T, +> curve(dnorm(x-c(z.p2)), from = z.p2-3, to = z.p2+3, add = T, 
-+       main = "Distribution Curve",  ++       main = "",  
-+       ylab = "Density", xlab = "t-value", col = "blue", lwd = 2, lty=2) ++       ylab = "Density", xlab = "z-value", col = "blue", lwd = 2, lty=2) 
-> abline(v=mean(zsdc), col='black', lwd=2) +> abline(v=0, col='black', lwd=2) 
-> abline(v=mean(zsdc2), col='blue', lwd=2) +> abline(v=z.p2, col='blue', lwd=2) 
-> mean(zsdc2) +> text(x=0, y=.1, label=paste(round(0, 4)), pos=4) 
-[1] 1.897367 +> text(x=z.p2, y=.1, label=paste(round(z.p2, 4)), pos=4)
-> text(x=mean(zsdc), y=.1, label=paste(round(mean(zsdc),4)), pos=4) +
-> text(x=mean(zsdc2), y=.1, label=paste(round(mean(zsdc2),4)), pos=4)+
  
-> # +</code> 
 +{{pasted:20260412-232126.png}} 
 + 
 +<code> 
 +> ####################################### 
 +> #######################################
 > lo1 <- qnorm(.32/2) > lo1 <- qnorm(.32/2)
 > hi1 <- -lo1 > hi1 <- -lo1
Line 508: Line 472:
 > text(x=hi3, y=.1, label=paste(round(hi3,3), "(3)", "\n","99%"), pos=4) > text(x=hi3, y=.1, label=paste(round(hi3,3), "(3)", "\n","99%"), pos=4)
  
-> mean.of.sample.a <- mean(sdc)+ 1.5*sd(sdc)+</code> 
 +{{pasted:20260412-063531.png}} 
 + 
 +<code> 
 +> mean.of.sample.a <- m.means+ 1.5*sd.means
 > mean.of.sample.a > mean.of.sample.a
-[1] 104.7434 +[1] 103.75 
-> diff <- (mean.of.sample.a - mean(sdc))+> diff <- (mean.of.sample.a - m.means)
 > se.z <- sd(p1)/sqrt(sz) > se.z <- sd(p1)/sqrt(sz)
 > diff > diff
-[1] 4.743416+[1] 3.75
 > se.z > se.z
-[1] 3.162278+[1] 2.5
 > z.score  <- diff / se.z > z.score  <- diff / se.z
 > z.score > z.score
Line 536: Line 504:
 +      pos=4, col='red') +      pos=4, col='red')
  
 +</code>
 +{{pasted:20260412-063608.png}}
 +
 +<code> 
 > # 새로운 UI로 게임을 하도록 한 후 > # 새로운 UI로 게임을 하도록 한 후
 > # UI점수를 10명에게 구했다고 가정하고 > # UI점수를 10명에게 구했다고 가정하고
Line 545: Line 517:
 > # 하면 샘플의 평균과 p1의 평균은 다르다고 판단될 것이다. > # 하면 샘플의 평균과 p1의 평균은 다르다고 판단될 것이다.
 > # 아래는 그럼에도 불구하고 실패하는 경우이다. > # 아래는 그럼에도 불구하고 실패하는 경우이다.
-> set.seed(111)+> set.seed(110)
 > smp <- sample(p2, sz, replace=T) > smp <- sample(p2, sz, replace=T)
 > m.smp <- mean(smp) > m.smp <- mean(smp)
 > m.smp > m.smp
-[1] 104.4742+[1] 104.5958
 > diff <- m.smp - mean(p1) > diff <- m.smp - mean(p1)
 > se.z <- sqrt(var(p1)/sz) > se.z <- sqrt(var(p1)/sz)
Line 555: Line 527:
 > prob1 <- pnorm(abs(z.cal1), lower.tail = F)*2 > prob1 <- pnorm(abs(z.cal1), lower.tail = F)*2
 > print(c(z.cal1, sz, prob1)) > print(c(z.cal1, sz, prob1))
-[1]  1.4148817 10.0000000  0.1571032+[1]  2.906626913 40.000000000  0.003653487
 > z.test(smp, mean(p1), sd(p1)) > z.test(smp, mean(p1), sd(p1))
- z value: 1.41488  + z value: 2.90663  
- p value: 0.1571032  + p value: 0.00365349  
- diff:    104.4742 - 100 = 4.474249  + diff:    104.5958 - 100 = 4.595781  
- se:      3.162278  + se:      1.581139  
- 95% CI:  93.80205 106.198+ 95% CI:  96.90102 103.099
 > curve(dnorm(x), from = -4, to = z.p2+4,  > curve(dnorm(x), from = -4, to = z.p2+4, 
-+       main = "normalized distribution of sample means \n testing with a sample from p2 (failed)", ++       main = "normalized distribution of sample means  
 ++       testing with a sample from p2 (failed)", 
 +       ylab = "Density", xlab = "z-value", col = "black", lwd = 2) +       ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
 > abline(v=0, col="black", lwd=2) > abline(v=0, col="black", lwd=2)
Line 575: Line 548:
  
  
 +</code>
 +{{pasted:20260412-233253.png}}
 +
 +<code>
 > # 같은 방법으로 했는데 성공한 경우 > # 같은 방법으로 했는데 성공한 경우
 > set.seed(211) > set.seed(211)
Line 580: Line 557:
 > m.smp <- mean(smp) > m.smp <- mean(smp)
 > m.smp > m.smp
-[1] 110.1154+[1] 107.6795
 > diff <- m.smp - mean(p1) > diff <- m.smp - mean(p1)
 > se.z <- sqrt(var(p1)/sz) > se.z <- sqrt(var(p1)/sz)
Line 586: Line 563:
 > prob2 <- pnorm(abs(z.cal2), lower.tail = F)*2 > prob2 <- pnorm(abs(z.cal2), lower.tail = F)*2
 > print(c(z.cal2, sz, prob2)) > print(c(z.cal2, sz, prob2))
-[1]  3.198763975 10.000000000  0.001380181+[1] 4.856940e+00 4.000000e+01 1.192138e-06
 > z.test(smp, mean(p1), sd(p1)) > z.test(smp, mean(p1), sd(p1))
- z value: 3.19876  + z value: 4.85694  
- p value: 0.00138018  + p value: 1.19e-06  
- diff:    110.1154 - 100 = 10.11538  + diff:    107.6795 - 100 = 7.679496  
- se:      3.162278  + se:      1.581139  
- 95% CI:  93.80205 106.198>  + 95% CI:  96.90102 103.099>  
-> z.p2 <- (mean(p2)-mean(p1))/se2+> z.p2 <- (mean(p2)-mean(p1))/se.z
 > z.p2 > z.p2
-[1] 1.897367+         [,1] 
 +[1,] 3.794733
 > curve(dnorm(x), from = -5, to = z.p2+5,  > curve(dnorm(x), from = -5, to = z.p2+5, 
 +       main = "normalized distribution of sample means \n testing with a sample from p2 (succeeded)",  +       main = "normalized distribution of sample means \n testing with a sample from p2 (succeeded)", 
Line 602: Line 580:
 > z.cal1 > z.cal1
          [,1]          [,1]
-[1,] 1.414882+[1,] 2.906627
 > z.cal2 > z.cal2
-         [,1] +        [,1] 
-[1,] 3.198764+[1,] 4.85694
 > two <- qnorm(.05/2) > two <- qnorm(.05/2)
 > two > two
Line 620: Line 598:
  
  
 +</code>
 +{{pasted:20260412-233208.png}}
 +
 +<code> 
 > # type i and type ii error > # type i and type ii error
-z.p2 <- (mean(p2)-mean(p1))/se2 +two <- qnorm(.05/2
-z.p2 +two 
-[1] 1.897367+[1] -1.959964 
 +
 > curve(dnorm(x), from = -4.7, to = z.p2+4,  > curve(dnorm(x), from = -4.7, to = z.p2+4, 
 +       main = "Distribution Curve",  +       main = "Distribution Curve", 
 +       ylab = "Density", xlab = "z-value", col = "black", lwd = 2) +       ylab = "Density", xlab = "z-value", col = "black", lwd = 2)
-> curve(dnorm(x-(z.p2)), from = z.p2-3, to = z.p2+3, add = T,+> curve(dnorm(x-c(z.p2)), from = z.p2-3, to = z.p2+3, add = T,
 +       main = "Distribution Curve",  +       main = "Distribution Curve", 
 +       ylab = "Density", xlab = "z-value", col = "blue", lwd = 2, lty=2) +       ylab = "Density", xlab = "z-value", col = "blue", lwd = 2, lty=2)
 > abline(v=0, col='black', lwd=2) > abline(v=0, col='black', lwd=2)
-> z.cal1 
-         [,1] 
-[1,] 1.414882 
-> z.cal2 
-         [,1] 
-[1,] 3.198764 
-> two <- qnorm(.05/2) 
-> two 
-[1] -1.959964 
 > abline(v=c(two, -two), col='black', lwd=2) > abline(v=c(two, -two), col='black', lwd=2)
 > abline(v=c(-z.cal1, z.cal1), col='red', lwd=2) > abline(v=c(-z.cal1, z.cal1), col='red', lwd=2)
Line 660: Line 634:
  
  
 +</code>
 +{{pasted:20260412-233411.png}}
 +
 +<code>
 > ############################ > ############################
 > # one sample t-test > # one sample t-test
Line 693: Line 671:
 +      pos = 4, col="red", cex=1) +      pos = 4, col="red", cex=1)
  
 +</code>
 +{{pasted:20260412-063722.png}}
 +
 +<code>
 > prob > prob
 [1] 0.002460977 [1] 0.002460977
Line 700: Line 682:
 > print(c(m.smp+lo2*se.z, m.smp+hi2*se.z)) > print(c(m.smp+lo2*se.z, m.smp+hi2*se.z))
 [1] 102.5239 110.0970 [1] 102.5239 110.0970
-> cat("t =", t.cal, ", df =", round(df.smp,0), ", p-value =", prob, +> cat(" t =", t.cal, ", df =", round(df.smp,0), ", p-value =", prob, 
 + "\n", "95% confidence interval =", m.smp+lo2*se.z, m.smp+hi2*se.z) + "\n", "95% confidence interval =", m.smp+lo2*se.z, m.smp+hi2*se.z)
-t = 3.488087 , df = 19 , p-value = 0.002460977 + t = 3.488087 , df = 19 , p-value = 0.002460977 
  95% confidence interval = 102.5239 110.097> t.test(smp, mu=mean(p1))  95% confidence interval = 102.5239 110.097> t.test(smp, mu=mean(p1))
  
Line 715: Line 697:
 mean of x  mean of x 
  106.3104   106.3104 
- 
  
 > ################################# > #################################
Line 793: Line 774:
 +      pos=4, col='red') +      pos=4, col='red')
  
 +
 +</code>
 +{{pasted:20260412-063739.png}}
 +
 +<code>
 > print(paste(t.cal, df, prob)) > print(paste(t.cal, df, prob))
 [1] "-3.07021182079817 48 0.00351545738746208" [1] "-3.07021182079817 48 0.00351545738746208"
Line 810: Line 796:
 > t.cal > t.cal
 [1] -3.070212 [1] -3.070212
-> # t.cal=diff/se 
-> t.cal * se.s 
-[1] -8.871414 
-> diff 
-[1] -8.871414 
-> diff+lo2*se.s 
-[1] -14.68117 
-> diff+hi2*se.s 
-[1] -3.061661 
-> (t.cal+lo2)*se.s 
-[1] -14.68117 
-> (t.cal+hi2)*se.s 
-[1] -3.061661 
  
 > ###################### > ######################
Line 891: Line 864:
 > text(x=t.cal, y=.2, label=c(round(t.cal,3)), col="red", pos=2) > text(x=t.cal, y=.2, label=c(round(t.cal,3)), col="red", pos=2)
  
 +> cat(t.cal, sz-1, prob)
 +-3.88213 39 0.0003888961
 +
 +</code>
 +{{pasted:20260412-063758.png}}
 +
 +<code> 
 > cat(t.cal, sz-1, prob) > cat(t.cal, sz-1, prob)
 -3.88213 39 0.0003888961 -3.88213 39 0.0003888961
t-test_summary.1775975646.txt.gz · Last modified: by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki