r:two_sample_t-test
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| r:two_sample_t-test [2026/04/07 22:18] – hkimscil | r:two_sample_t-test [2026/04/07 23:02] (current) – [ro.two.sample.t-test] hkimscil | ||
|---|---|---|---|
| Line 48: | Line 48: | ||
| sd(p2) | sd(p2) | ||
| - | sz1 <- sz2 <- 50 | + | sz1 <- 50 |
| + | sz2 <- 50 | ||
| df1 <- sz1 - 1 | df1 <- sz1 - 1 | ||
| df2 <- sz2 - 1 | df2 <- sz2 - 1 | ||
| Line 103: | Line 104: | ||
| # 이것을 그래프로 그려보면 | # 이것을 그래프로 그려보면 | ||
| - | hist(mdiffs, | + | hist(mdiffs, |
| + | | ||
| + | " | ||
| abline(v=mean(mdiffs), | abline(v=mean(mdiffs), | ||
| | | ||
| Line 125: | Line 128: | ||
| | | ||
| | | ||
| - | </ | ||
| - | < | + | # 위는 이론적으로 생각해 보는 것이고 |
| + | # 아래는 실제로 2 샘플을 취한 것 | ||
| s1 <- sample(p1, sz1, replace = T) | s1 <- sample(p1, sz1, replace = T) | ||
| s2 <- sample(p2, sz2, replace = T) | s2 <- sample(p2, sz2, replace = T) | ||
| Line 134: | Line 137: | ||
| m.diff | m.diff | ||
| + | # 이 때의 pooled variance는 ss(s1)을 이용해서 구함 | ||
| + | # population의 parameter는 모르는 상태로 가정 | ||
| pv <- (ss(s1) + ss(s2))/ | pv <- (ss(s1) + ss(s2))/ | ||
| pv | pv | ||
| Line 143: | Line 148: | ||
| (ms1 + ms2)/2 | (ms1 + ms2)/2 | ||
| - | # se <- sqrt(ms.a/sz1 + ms.b/sz2) | + | # se <- sqrt(ms1/sz1 + ms2/sz2) |
| # se | # se | ||
| se.z <- sqrt(pv/sz1 + pv/sz2) | se.z <- sqrt(pv/sz1 + pv/sz2) | ||
| Line 151: | Line 156: | ||
| t.cal <- diff / se.z | t.cal <- diff / se.z | ||
| - | t.test(s1, | ||
| t.cal | t.cal | ||
| df.tot | df.tot | ||
| - | print(p.val <- pt(abs(t.cal), | + | p.val <- pt(abs(t.cal), |
| - | print(mean.diff <- mean(s1)-mean(s2)) | + | mean.diff <- mean(s1)-mean(s2) |
| + | # two <- -2 에 해당하는 정확한 값 | ||
| two <- qt(.05/2, df.tot) | two <- qt(.05/2, df.tot) | ||
| - | two | ||
| - | # two <- -2 | ||
| lo2 <- se.z * two | lo2 <- se.z * two | ||
| lo2 | lo2 | ||
| - | mean.diff+c(lo2, | + | |
| + | paste(c(t.cal, | ||
| + | mean.diff + c(lo2, -lo2) | ||
| + | |||
| + | t.test(s1, | ||
| zdiffs <- scale(mdiffs) | zdiffs <- scale(mdiffs) | ||
| Line 176: | Line 184: | ||
| text(x=t.cal, | text(x=t.cal, | ||
| p.val | p.val | ||
| - | </ | ||
| - | < | ||
| ### | ### | ||
| # what if s1 and s2 are from | # what if s1 and s2 are from | ||
| Line 216: | Line 222: | ||
| var.diff | var.diff | ||
| sd.diff | sd.diff | ||
| - | </ | ||
| - | < | ||
| s3 <- sample(p1, sz1, replace=T) | s3 <- sample(p1, sz1, replace=T) | ||
| s4 <- sample(p1, sz2, replace=T) | s4 <- sample(p1, sz2, replace=T) | ||
| Line 234: | Line 238: | ||
| hi <- -lo | hi <- -lo | ||
| c(lo, hi) | c(lo, hi) | ||
| - | </ | ||
| - | < | ||
| # let's see | # let's see | ||
| + | # but, this is exactly what distribution of sample differences | ||
| + | # in the first place | ||
| iter <- 1000 | iter <- 1000 | ||
| means.s3 <- means.s4 <- rep(NA, iter) | means.s3 <- means.s4 <- rep(NA, iter) | ||
| Line 252: | Line 256: | ||
| table(mdiffs < -4 | mdiffs > 4) | table(mdiffs < -4 | mdiffs > 4) | ||
| + | |||
| </ | </ | ||
| <tabbox ro.two.sample.t-test> | <tabbox ro.two.sample.t-test> | ||
| < | < | ||
| + | > | ||
| + | > | ||
| > rm(list=ls()) | > rm(list=ls()) | ||
| > rnorm2 <- function(n, | > rnorm2 <- function(n, | ||
| Line 281: | Line 288: | ||
| [1] 10 | [1] 10 | ||
| > | > | ||
| - | > sz1 <- sz2 <- 50 | + | > sz1 <- 50 |
| + | > sz2 <- 50 | ||
| > df1 <- sz1 - 1 | > df1 <- sz1 - 1 | ||
| > df2 <- sz2 - 1 | > df2 <- sz2 - 1 | ||
| Line 361: | Line 369: | ||
| > | > | ||
| > # 이것을 그래프로 그려보면 | > # 이것을 그래프로 그려보면 | ||
| - | > hist(mdiffs, | + | > hist(mdiffs, |
| + | + main=paste(" | ||
| + | + " | ||
| > abline(v=mean(mdiffs), | > abline(v=mean(mdiffs), | ||
| + col=" | + col=" | ||
| Line 386: | Line 396: | ||
| + col=" | + col=" | ||
| > | > | ||
| + | > | ||
| + | > # 위는 이론적으로 생각해 보는 것이고 | ||
| + | > # 아래는 실제로 2 샘플을 취한 것 | ||
| > s1 <- sample(p1, sz1, replace = T) | > s1 <- sample(p1, sz1, replace = T) | ||
| > s2 <- sample(p2, sz2, replace = T) | > s2 <- sample(p2, sz2, replace = T) | ||
| Line 392: | Line 405: | ||
| [1] -6.959851 | [1] -6.959851 | ||
| > | > | ||
| + | > # 이 때의 pooled variance는 ss(s1)을 이용해서 구함 | ||
| + | > # population의 parameter는 모르는 상태로 가정 | ||
| > pv <- (ss(s1) + ss(s2))/ | > pv <- (ss(s1) + ss(s2))/ | ||
| > pv | > pv | ||
| Line 405: | Line 420: | ||
| [1] 106.6359 | [1] 106.6359 | ||
| > | > | ||
| - | > # se <- sqrt(ms.a/sz1 + ms.b/sz2) | + | > # se <- sqrt(ms1/sz1 + ms2/sz2) |
| > # se | > # se | ||
| > se.z <- sqrt(pv/sz1 + pv/sz2) | > se.z <- sqrt(pv/sz1 + pv/sz2) | ||
| Line 413: | Line 428: | ||
| > diff <- mean(s1)-mean(s2) | > diff <- mean(s1)-mean(s2) | ||
| > t.cal <- diff / se.z | > t.cal <- diff / se.z | ||
| + | > | ||
| + | > | ||
| + | > t.cal | ||
| + | [1] -3.369909 | ||
| + | > df.tot | ||
| + | [1] 98 | ||
| + | > p.val <- pt(abs(t.cal), | ||
| + | > mean.diff <- mean(s1)-mean(s2) | ||
| + | > # two <- -2 에 해당하는 정확한 값 | ||
| + | > two <- qt(.05/2, df.tot) | ||
| + | > lo2 <- se.z * two | ||
| + | > lo2 | ||
| + | [1] -4.098508 | ||
| + | > | ||
| + | > paste(c(t.cal, | ||
| + | [1] " | ||
| + | > mean.diff + c(lo2, -lo2) | ||
| + | [1] -11.058359 | ||
| > | > | ||
| > t.test(s1, | > t.test(s1, | ||
| Line 428: | Line 461: | ||
| > | > | ||
| - | > t.cal | ||
| - | [1] -3.369909 | ||
| - | > df.tot | ||
| - | [1] 98 | ||
| - | > print(p.val <- pt(abs(t.cal), | ||
| - | [1] 0.001076634 | ||
| - | > print(mean.diff <- mean(s1)-mean(s2)) | ||
| - | [1] -6.959851 | ||
| - | > two <- qt(.05/2, df.tot) | ||
| - | > two | ||
| - | [1] -1.984467 | ||
| - | > # two <- -2 | ||
| - | > lo2 <- se.z * two | ||
| - | > lo2 | ||
| - | [1] -4.098508 | ||
| - | > mean.diff+c(lo2, | ||
| - | [1] -11.058359 | ||
| > | > | ||
| > zdiffs <- scale(mdiffs) | > zdiffs <- scale(mdiffs) | ||
| Line 539: | Line 555: | ||
| > c(lo, hi) | > c(lo, hi) | ||
| [1] -4.098424 | [1] -4.098424 | ||
| - | > | ||
| > | > | ||
| > # let's see | > # let's see | ||
| - | > | + | > # but, this is exactly what distribution of sample differences |
| + | > # in the first place | ||
| > iter <- 1000 | > iter <- 1000 | ||
| > means.s3 <- means.s4 <- rep(NA, iter) | > means.s3 <- means.s4 <- rep(NA, iter) | ||
r/two_sample_t-test.1775600320.txt.gz · Last modified: by hkimscil
