r:two_sample_t-test
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revision | |||
| r:two_sample_t-test [2026/04/07 23:01] – [rs.two.sample.t-test] hkimscil | r:two_sample_t-test [2026/04/07 23:02] (current) – [ro.two.sample.t-test] hkimscil | ||
|---|---|---|---|
| Line 261: | Line 261: | ||
| <tabbox ro.two.sample.t-test> | <tabbox ro.two.sample.t-test> | ||
| < | < | ||
| + | > | ||
| + | > | ||
| > rm(list=ls()) | > rm(list=ls()) | ||
| > rnorm2 <- function(n, | > rnorm2 <- function(n, | ||
| Line 286: | Line 288: | ||
| [1] 10 | [1] 10 | ||
| > | > | ||
| - | > sz1 <- sz2 <- 50 | + | > sz1 <- 50 |
| + | > sz2 <- 50 | ||
| > df1 <- sz1 - 1 | > df1 <- sz1 - 1 | ||
| > df2 <- sz2 - 1 | > df2 <- sz2 - 1 | ||
| Line 366: | Line 369: | ||
| > | > | ||
| > # 이것을 그래프로 그려보면 | > # 이것을 그래프로 그려보면 | ||
| - | > hist(mdiffs, | + | > hist(mdiffs, |
| + | + main=paste(" | ||
| + | + " | ||
| > abline(v=mean(mdiffs), | > abline(v=mean(mdiffs), | ||
| + col=" | + col=" | ||
| Line 391: | Line 396: | ||
| + col=" | + col=" | ||
| > | > | ||
| + | > | ||
| + | > # 위는 이론적으로 생각해 보는 것이고 | ||
| + | > # 아래는 실제로 2 샘플을 취한 것 | ||
| > s1 <- sample(p1, sz1, replace = T) | > s1 <- sample(p1, sz1, replace = T) | ||
| > s2 <- sample(p2, sz2, replace = T) | > s2 <- sample(p2, sz2, replace = T) | ||
| Line 397: | Line 405: | ||
| [1] -6.959851 | [1] -6.959851 | ||
| > | > | ||
| + | > # 이 때의 pooled variance는 ss(s1)을 이용해서 구함 | ||
| + | > # population의 parameter는 모르는 상태로 가정 | ||
| > pv <- (ss(s1) + ss(s2))/ | > pv <- (ss(s1) + ss(s2))/ | ||
| > pv | > pv | ||
| Line 410: | Line 420: | ||
| [1] 106.6359 | [1] 106.6359 | ||
| > | > | ||
| - | > # se <- sqrt(ms.a/sz1 + ms.b/sz2) | + | > # se <- sqrt(ms1/sz1 + ms2/sz2) |
| > # se | > # se | ||
| > se.z <- sqrt(pv/sz1 + pv/sz2) | > se.z <- sqrt(pv/sz1 + pv/sz2) | ||
| Line 418: | Line 428: | ||
| > diff <- mean(s1)-mean(s2) | > diff <- mean(s1)-mean(s2) | ||
| > t.cal <- diff / se.z | > t.cal <- diff / se.z | ||
| + | > | ||
| + | > | ||
| + | > t.cal | ||
| + | [1] -3.369909 | ||
| + | > df.tot | ||
| + | [1] 98 | ||
| + | > p.val <- pt(abs(t.cal), | ||
| + | > mean.diff <- mean(s1)-mean(s2) | ||
| + | > # two <- -2 에 해당하는 정확한 값 | ||
| + | > two <- qt(.05/2, df.tot) | ||
| + | > lo2 <- se.z * two | ||
| + | > lo2 | ||
| + | [1] -4.098508 | ||
| + | > | ||
| + | > paste(c(t.cal, | ||
| + | [1] " | ||
| + | > mean.diff + c(lo2, -lo2) | ||
| + | [1] -11.058359 | ||
| > | > | ||
| > t.test(s1, | > t.test(s1, | ||
| Line 433: | Line 461: | ||
| > | > | ||
| - | > t.cal | ||
| - | [1] -3.369909 | ||
| - | > df.tot | ||
| - | [1] 98 | ||
| - | > print(p.val <- pt(abs(t.cal), | ||
| - | [1] 0.001076634 | ||
| - | > print(mean.diff <- mean(s1)-mean(s2)) | ||
| - | [1] -6.959851 | ||
| - | > two <- qt(.05/2, df.tot) | ||
| - | > two | ||
| - | [1] -1.984467 | ||
| - | > # two <- -2 | ||
| - | > lo2 <- se.z * two | ||
| - | > lo2 | ||
| - | [1] -4.098508 | ||
| - | > mean.diff+c(lo2, | ||
| - | [1] -11.058359 | ||
| > | > | ||
| > zdiffs <- scale(mdiffs) | > zdiffs <- scale(mdiffs) | ||
| Line 544: | Line 555: | ||
| > c(lo, hi) | > c(lo, hi) | ||
| [1] -4.098424 | [1] -4.098424 | ||
| - | > | ||
| > | > | ||
| > # let's see | > # let's see | ||
| - | > | + | > # but, this is exactly what distribution of sample differences |
| + | > # in the first place | ||
| > iter <- 1000 | > iter <- 1000 | ||
| > means.s3 <- means.s4 <- rep(NA, iter) | > means.s3 <- means.s4 <- rep(NA, iter) | ||
r/two_sample_t-test.txt · Last modified: by hkimscil
