User Tools

Site Tools


r:two_sample_t-test

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
r:two_sample_t-test [2026/04/07 23:01] – [rs.two.sample.t-test] hkimscilr:two_sample_t-test [2026/04/07 23:02] (current) – [ro.two.sample.t-test] hkimscil
Line 261: Line 261:
 <tabbox ro.two.sample.t-test> <tabbox ro.two.sample.t-test>
 <code> <code>
 +
 +
 > rm(list=ls()) > rm(list=ls())
 > rnorm2 <- function(n,mean,sd){  > rnorm2 <- function(n,mean,sd){ 
Line 286: Line 288:
 [1] 10 [1] 10
  
-> sz1 <- sz2 <- 50+> sz1 <- 50 
 +sz2 <- 50
 > df1 <- sz1 - 1 > df1 <- sz1 - 1
 > df2 <- sz2 - 1 > df2 <- sz2 - 1
Line 366: Line 369:
  
 > # 이것을 그래프로 그려보면 > # 이것을 그래프로 그려보면
-> hist(mdiffs, breaks=50)+> hist(mdiffs, breaks=50,  
 ++      main=paste("Histogram of mdiffs", 
 ++                 "\n","with 95% CI (blue)"))
 > abline(v=mean(mdiffs),  > abline(v=mean(mdiffs), 
 +        col="black", lwd=2) +        col="black", lwd=2)
Line 391: Line 396:
 +      col="blue", pos = 4) +      col="blue", pos = 4)
  
 +
 +> # 위는 이론적으로 생각해 보는 것이고 
 +> # 아래는 실제로 2 샘플을 취한 것
 > s1 <- sample(p1, sz1, replace = T) > s1 <- sample(p1, sz1, replace = T)
 > s2 <- sample(p2, sz2, replace = T) > s2 <- sample(p2, sz2, replace = T)
Line 397: Line 405:
 [1] -6.959851 [1] -6.959851
  
 +> # 이 때의 pooled variance는 ss(s1)을 이용해서 구함
 +> # population의 parameter는 모르는 상태로 가정
 > pv <- (ss(s1) + ss(s2))/(df1 + df2) > pv <- (ss(s1) + ss(s2))/(df1 + df2)
 > pv > pv
Line 410: Line 420:
 [1] 106.6359 [1] 106.6359
  
-> # se <- sqrt(ms.a/sz1 + ms.b/sz2)+> # se <- sqrt(ms1/sz1 + ms2/sz2)
 > # se > # se
 > se.z <- sqrt(pv/sz1 + pv/sz2) > se.z <- sqrt(pv/sz1 + pv/sz2)
Line 418: Line 428:
 > diff <- mean(s1)-mean(s2) > diff <- mean(s1)-mean(s2)
 > t.cal <- diff / se.z > t.cal <- diff / se.z
 +
 +
 +> t.cal
 +[1] -3.369909
 +> df.tot
 +[1] 98
 +> p.val <- pt(abs(t.cal), df.tot, lower.tail = F)*2
 +> mean.diff <- mean(s1)-mean(s2)
 +> # two <-  -2 에 해당하는 정확한 값
 +> two <- qt(.05/2, df.tot)
 +> lo2 <- se.z * two
 +> lo2
 +[1] -4.098508
 +
 +> paste(c(t.cal, df.tot, p.val))
 +[1] "-3.36990898869323"   "98"                  "0.00107663367234877"
 +> mean.diff + c(lo2, -lo2)
 +[1] -11.058359  -2.861343
  
 > t.test(s1,s2, var.equal = T) > t.test(s1,s2, var.equal = T)
Line 433: Line 461:
  
  
-> t.cal 
-[1] -3.369909 
-> df.tot 
-[1] 98 
-> print(p.val <- pt(abs(t.cal), df.tot, lower.tail = F)*2) 
-[1] 0.001076634 
-> print(mean.diff <- mean(s1)-mean(s2)) 
-[1] -6.959851 
-> two <- qt(.05/2, df.tot) 
-> two 
-[1] -1.984467 
-> # two <-  -2 
-> lo2 <- se.z * two 
-> lo2 
-[1] -4.098508 
-> mean.diff+c(lo2,-lo2) 
-[1] -11.058359  -2.861343 
  
 > zdiffs <- scale(mdiffs) > zdiffs <- scale(mdiffs)
Line 544: Line 555:
 > c(lo, hi) > c(lo, hi)
 [1] -4.098424  4.098424 [1] -4.098424  4.098424
- 
  
 > # let's see > # let's see
-+# but, this is exactly what distribution of sample differences 
 +> # in the first place
 > iter <- 1000 > iter <- 1000
 > means.s3 <- means.s4 <- rep(NA, iter) > means.s3 <- means.s4 <- rep(NA, iter)
r/two_sample_t-test.txt · Last modified: by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki