note.w02
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
note.w02 [2025/09/11 10:36] – hkimscil | note.w02 [2025/09/12 19:39] (current) – [Sampling Distribution and z-test] hkimscil | ||
---|---|---|---|
Line 23: | Line 23: | ||
sd(p1) | sd(p1) | ||
- | p2 <- rnorm2(N.p, m.p+5, sd.p) | + | p2 <- rnorm2(N.p, m.p+20, sd.p) |
mean(p2) | mean(p2) | ||
sd(p2) | sd(p2) | ||
Line 31: | Line 31: | ||
var(p1) | var(p1) | ||
+ | hist(p1) | ||
hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5), | hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5), | ||
main = " | main = " | ||
Line 41: | Line 41: | ||
hist(p1, breaks=50, col=rgb(0, | hist(p1, breaks=50, col=rgb(0, | ||
abline(v=mean(p1), | abline(v=mean(p1), | ||
- | abline(v=mean(p1)-sd(p1), lwd=2) | + | abline(v=m.p1-sd.p1, lwd=2) |
abline(v=mean(p1)+sd(p1), | abline(v=mean(p1)+sd(p1), | ||
abline(v=c(m.p1-2*sd.p1, | abline(v=c(m.p1-2*sd.p1, | ||
Line 60: | Line 60: | ||
pnorm(m.p1+3*sd.p1, | pnorm(m.p1+3*sd.p1, | ||
pnorm(m.p1-3*sd.p1, | pnorm(m.p1-3*sd.p1, | ||
+ | |||
+ | pnorm(121, 100, 10) - pnorm(85, 100, 10) | ||
m.p1 | m.p1 | ||
Line 69: | Line 71: | ||
pnorm(1)-pnorm(-1) | pnorm(1)-pnorm(-1) | ||
pnorm(2)-pnorm(-2) | pnorm(2)-pnorm(-2) | ||
- | pnorm(3)-pnorm(3) | + | pnorm(3)-pnorm(-3) |
1-pnorm(-2)*2 | 1-pnorm(-2)*2 | ||
Line 154: | Line 156: | ||
################################ | ################################ | ||
- | s.size <- 50 | + | s.size <- 10 |
means.temp <- c() | means.temp <- c() | ||
Line 199: | Line 201: | ||
se.z <- sqrt(var(p1)/ | se.z <- sqrt(var(p1)/ | ||
+ | se.z | ||
se.z <- c(se.z) | se.z <- c(se.z) | ||
se.z | se.z | ||
Line 301: | Line 304: | ||
sd(means) | sd(means) | ||
- | tmp <- mean(means) - (m.s.from.p2 - mean(means)) | + | m.k <- mean(s.from.p2) |
+ | se.k <- sd(s.from.p2)/ | ||
+ | |||
+ | |||
+ | tmp <- mean(means) - (m.s.from.p2 | ||
+ | | ||
tmp | tmp | ||
Line 315: | Line 323: | ||
m.s.from.p2 | m.s.from.p2 | ||
pnorm(m.s.from.p2, | pnorm(m.s.from.p2, | ||
+ | pnorm(m.s.from.p2, | ||
# then, what is the probabilty of getting | # then, what is the probabilty of getting | ||
# greater than m.sample.i.got and | # greater than m.sample.i.got and | ||
Line 323: | Line 331: | ||
abline(v=tmp, | abline(v=tmp, | ||
2 * pnorm(m.s.from.p2, | 2 * pnorm(m.s.from.p2, | ||
+ | |||
+ | 2 * pnorm(m.s.from.p2, | ||
+ | |||
se.z | se.z | ||
Line 338: | Line 349: | ||
pt(z.cal, 49, lower.tail = F)*2 | pt(z.cal, 49, lower.tail = F)*2 | ||
t.test(s.from.p2, | t.test(s.from.p2, | ||
+ | |||
+ | |||
+ | |||
</ | </ | ||
====== output ====== | ====== output ====== | ||
+ | ===== 1 ===== | ||
+ | |||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
< | < | ||
> rm(list=ls()) | > rm(list=ls()) | ||
Line 355: | Line 373: | ||
+ } | + } | ||
> | > | ||
+ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ........................................................................... | ||
+ | </ | ||
+ | </ | ||
+ | ===== 2 ===== | ||
+ | |||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
+ | < | ||
> ################################ | > ################################ | ||
> N.p <- 1000000 | > N.p <- 1000000 | ||
Line 386: | Line 415: | ||
> | > | ||
> | > | ||
+ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ........................................................................... | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | ===== 3 ===== | ||
+ | |||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
+ | < | ||
> hist(p1, breaks=50, col=rgb(0, | > hist(p1, breaks=50, col=rgb(0, | ||
> abline(v=mean(p1), | > abline(v=mean(p1), | ||
Line 413: | Line 454: | ||
[1] 0.9973002 | [1] 0.9973002 | ||
> | > | ||
+ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ........................................................................... | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | ===== 4 ===== | ||
+ | |||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
+ | < | ||
> m.p1 | > m.p1 | ||
[1] 100 | [1] 100 | ||
Line 463: | Line 516: | ||
[1] 0.03593032 | [1] 0.03593032 | ||
> | > | ||
+ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ........................................................................... | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | ===== 5 ===== | ||
+ | |||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
+ | < | ||
> z.p1 <- (p1-mean(p1))/ | > z.p1 <- (p1-mean(p1))/ | ||
> mean(z.p1) | > mean(z.p1) | ||
Line 493: | Line 558: | ||
> | > | ||
> # | > # | ||
+ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ........................................................................... | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | ===== 6 ===== | ||
+ | |||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
+ | < | ||
> hist(p1, breaks=50, col=rgb(.9, | > hist(p1, breaks=50, col=rgb(.9, | ||
> abline(v=mean(p1), | > abline(v=mean(p1), | ||
Line 553: | Line 630: | ||
> | > | ||
> | > | ||
+ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ........................................................................... | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | ===== 7 ===== | ||
+ | |||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
+ | < | ||
> ################################ | > ################################ | ||
> s.size <- 50 | > s.size <- 50 | ||
Line 568: | Line 657: | ||
[1] 98.76098 | [1] 98.76098 | ||
> | > | ||
+ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ........................................................................... | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | ===== 8 ===== | ||
+ | |||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
+ | < | ||
> iter <- 1000000 | > iter <- 1000000 | ||
> # means <- c() | > # means <- c() | ||
Line 600: | Line 701: | ||
+ lwd=2) | + lwd=2) | ||
> | > | ||
+ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ........................................................................... | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | ===== 9 ===== | ||
+ | |||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
+ | < | ||
> # meanwhile . . . . | > # meanwhile . . . . | ||
> se.s | > se.s | ||
[1] 1.414035 | [1] 1.414035 | ||
- | > | + | # se.s = sd(means) |
+ | |||
+ | # The below is from CLT | ||
+ | # see http:// | ||
+ | # | ||
> se.z <- sqrt(var(p1)/ | > se.z <- sqrt(var(p1)/ | ||
> se.z <- c(se.z) | > se.z <- c(se.z) | ||
Line 645: | Line 762: | ||
> se.s | > se.s | ||
[1] 1.414035 | [1] 1.414035 | ||
- | > se.z | ||
- | [1] 1.414214 | ||
> | > | ||
- | > # because CLT | + | </ |
+ | </ | ||
+ | <WRAP column half> | ||
+ | ........................................................................... | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | |||
+ | ===== 10 ===== | ||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
+ | < | ||
+ | > # because | ||
+ | > # below instead of | ||
+ | > # mean(means)+-se.s | ||
+ | > # | ||
> loz1 <- mean(p1)-se.z | > loz1 <- mean(p1)-se.z | ||
> hiz1 <- mean(p1)+se.z | > hiz1 <- mean(p1)+se.z | ||
Line 671: | Line 801: | ||
> | > | ||
> | > | ||
+ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ........................................................................... | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | ===== 11 ===== | ||
+ | |||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
+ | < | ||
> hist(means, breaks=50, | > hist(means, breaks=50, | ||
+ xlim = c(mean(means)-5*sd(means), | + xlim = c(mean(means)-5*sd(means), | ||
Line 694: | Line 836: | ||
[1] 96 104 | [1] 96 104 | ||
> | > | ||
+ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ........................................................................... | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | ===== 12 ===== | ||
+ | |||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
+ | < | ||
> m.sample.i.got <- mean(means)+ 1.5*sd(means) | > m.sample.i.got <- mean(means)+ 1.5*sd(means) | ||
> m.sample.i.got | > m.sample.i.got | ||
Line 726: | Line 880: | ||
[1] 102.1193 | [1] 102.1193 | ||
> | > | ||
+ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ........................................................................... | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | |||
+ | ===== 13 ===== | ||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
+ | < | ||
> ### one more time | > ### one more time | ||
> # this time, with a story | > # this time, with a story | ||
> mean(p2) | > mean(p2) | ||
- | [1] 105 | + | [1] 120 |
> sd(p2) | > sd(p2) | ||
[1] 10 | [1] 10 | ||
Line 735: | Line 901: | ||
> m.s.from.p2 <- mean(s.from.p2) | > m.s.from.p2 <- mean(s.from.p2) | ||
> m.s.from.p2 | > m.s.from.p2 | ||
- | [1] 103.1283 | + | [1] 119.0929 |
> | > | ||
> se.s | > se.s | ||
- | [1] 1.414035 | + | [1] 3.163228 |
> se.z | > se.z | ||
- | [1] 1.414214 | + | [1] 3.162278 |
> sd(means) | > sd(means) | ||
- | [1] 1.414035 | + | [1] 3.163228 |
> | > | ||
- | > tmp <- mean(means) - (m.s.from.p2 - mean(means)) | + | > m.k <- mean(s.from.p2) |
+ | > se.k <- sd(s.from.p2)/ | ||
+ | > | ||
+ | > | ||
+ | > tmp <- mean(means) - (m.s.from.p2 | ||
+ | + - mean(means)) | ||
> tmp | > tmp | ||
- | [1] 96.86822 | + | [1] 80.90409 |
> | > | ||
> hist(means, breaks=30, | > hist(means, breaks=30, | ||
Line 758: | Line 929: | ||
> # m.sample.i.got? | > # m.sample.i.got? | ||
> m.s.from.p2 | > m.s.from.p2 | ||
- | [1] 103.1283 | + | [1] 119.0929 |
> pnorm(m.s.from.p2, | > pnorm(m.s.from.p2, | ||
- | [1] 0.01348266 | + | [1] 7.816511e-10 |
- | > | + | > pnorm(m.s.from.p2, |
+ | [1] 0.5 | ||
> # then, what is the probabilty of getting | > # then, what is the probabilty of getting | ||
> # greater than m.sample.i.got and | > # greater than m.sample.i.got and | ||
Line 769: | Line 941: | ||
> abline(v=tmp, | > abline(v=tmp, | ||
> 2 * pnorm(m.s.from.p2, | > 2 * pnorm(m.s.from.p2, | ||
- | [1] 0.02696533 | + | [1] 1.563302e-09 |
+ | > | ||
+ | > 2 * pnorm(m.s.from.p2, | ||
+ | [1] 1 | ||
+ | > | ||
> | > | ||
> se.z | > se.z | ||
- | [1] 1.414214 | + | [1] 3.162278 |
> sd(s.from.p2)/ | > sd(s.from.p2)/ | ||
- | [1] 1.414296 | + | [1] 3.35771 |
> se.z.adjusted <- sqrt(var(s.from.p2)/ | > se.z.adjusted <- sqrt(var(s.from.p2)/ | ||
> se.z.adjusted | > se.z.adjusted | ||
- | [1] 1.414296 | + | [1] 3.35771 |
> 2 * pnorm(m.s.from.p2, | > 2 * pnorm(m.s.from.p2, | ||
+ | + | ||
- | [1] 0.02697421 | + | [1] 1.298387e-08 |
> | > | ||
> z.cal <- (m.s.from.p2 - mean(p1))/ | > z.cal <- (m.s.from.p2 - mean(p1))/ | ||
> z.cal | > z.cal | ||
- | [1] 2.211891 | + | [1] 5.686277 |
> pnorm(z.cal, | > pnorm(z.cal, | ||
- | [1] 0.02697421 | + | [1] 1.298387e-08 |
> | > | ||
> | > | ||
> pt(z.cal, 49, lower.tail = F)*2 | > pt(z.cal, 49, lower.tail = F)*2 | ||
- | [1] 0.03166797 | + | [1] 7.095934e-07 |
> t.test(s.from.p2, | > t.test(s.from.p2, | ||
Line 796: | Line 972: | ||
data: s.from.p2 | data: s.from.p2 | ||
- | t = 2.2119, df = 49, p-value = 0.03167 | + | t = 5.6863, df = 9, p-value = 0.0002995 |
alternative hypothesis: true mean is not equal to 100 | alternative hypothesis: true mean is not equal to 100 | ||
95 percent confidence interval: | 95 percent confidence interval: | ||
- | 100.2861 105.9704 | + | 111.4972 126.6885 |
sample estimates: | sample estimates: | ||
mean of x | mean of x | ||
- | 103.1283 | + | 119.0929 |
+ | > | ||
> | > | ||
</ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ........................................................................... | ||
+ | {{: | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | ====== T-test sum up ====== | ||
+ | < | ||
+ | |||
+ | rm(list=ls()) | ||
+ | rnorm2 <- function(n, | ||
+ | mean+sd*scale(rnorm(n)) | ||
+ | } | ||
+ | se <- function(sample) { | ||
+ | sd(sample)/ | ||
+ | } | ||
+ | ss <- function(x) { | ||
+ | sum((x-mean(x))^2) | ||
+ | } | ||
+ | |||
+ | N.p <- 1000000 | ||
+ | m.p <- 100 | ||
+ | sd.p <- 10 | ||
+ | |||
+ | |||
+ | p1 <- rnorm2(N.p, m.p, sd.p) | ||
+ | mean(p1) | ||
+ | sd(p1) | ||
+ | |||
+ | p2 <- rnorm2(N.p, m.p+10, sd.p) | ||
+ | mean(p2) | ||
+ | sd(p2) | ||
+ | |||
+ | hist(p1, breaks=50, col = rgb(1, 0, 0, 0.5), | ||
+ | main = " | ||
+ | abline(v=mean(p1), | ||
+ | hist(p2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) | ||
+ | abline(v=mean(p2), | ||
+ | |||
+ | s.size <- 1000 | ||
+ | s2 <- sample(p2, s.size) | ||
+ | mean(s2) | ||
+ | sd(s2) | ||
+ | |||
+ | se.z <- sqrt(var(p1)/ | ||
+ | se.z <- c(se.z) | ||
+ | se.z.range <- c(-2*se.z, | ||
+ | se.z.range | ||
+ | |||
+ | mean(p1)+se.z.range | ||
+ | mean(s2) | ||
+ | |||
+ | z.cal <- (mean(s2) - mean(p1)) / se.z | ||
+ | z.cal | ||
+ | pnorm(z.cal, | ||
+ | |||
+ | z.cal | ||
+ | |||
+ | # principles . . . | ||
+ | # distribution of sample means | ||
+ | iter <- 100000 | ||
+ | means <- c() | ||
+ | for (i in 1:iter) { | ||
+ | m.of.s <- mean(sample(p1, | ||
+ | means <- append(means, | ||
+ | } | ||
+ | |||
+ | hist(means, | ||
+ | xlim = c(mean(means)-3*sd(means), | ||
+ | col = rgb(1, 0, 0, 0.5)) | ||
+ | abline(v=mean(p1), | ||
+ | abline(v=mean(s2), | ||
+ | | ||
+ | lo1 <- mean(p1)-se.z | ||
+ | hi1 <- mean(p1)+se.z | ||
+ | lo2 <- mean(p1)-2*se.z | ||
+ | hi2 <- mean(p1)+2*se.z | ||
+ | abline(v=c(lo1, | ||
+ | | ||
+ | | ||
+ | se.z | ||
+ | c(lo2, hi2) | ||
+ | pnorm(z.cal, | ||
+ | |||
+ | |||
+ | # Note that we use sqrt(var(s2)/ | ||
+ | # as our se value instread of | ||
+ | # sqrt(var(p1)/ | ||
+ | # This is a common practice for R | ||
+ | # In fact, some z.test (made by someone) | ||
+ | # function uses the former rather than | ||
+ | # latter. | ||
+ | |||
+ | sqrt(var(p1)/ | ||
+ | se.z | ||
+ | |||
+ | sqrt(var(s2)/ | ||
+ | se(s2) | ||
+ | |||
+ | t.cal.os <- (mean(s2) - mean(p1))/ se(s2) | ||
+ | z.cal <- (mean(s2) - mean(p1))/ se.z | ||
+ | t.cal.os | ||
+ | z.cal | ||
+ | |||
+ | df.s2 <- length(s2)-1 | ||
+ | df.s2 | ||
+ | p.t.os <- pt(abs(t.cal.os), | ||
+ | p.t.os | ||
+ | t.out <- t.test(s2, mu=mean(p1)) | ||
+ | |||
+ | library(BSDA) | ||
+ | z.out <- z.test(s2, p1, sigma.x = sd(s2), sigma.y = sd(p1)) | ||
+ | |||
+ | z.out$statistic # se.z 대신에 se(s2) 값으로 구한 z 값 | ||
+ | z.cal # se.z으로 (sqrt(var(p1)/ | ||
+ | |||
+ | t.out$statistic # se(s2)를 분모로 하여 구한 t 값 | ||
+ | t.cal.os # se(s2)를 이용하여 손으로 구한 t 값 | ||
+ | |||
+ | # But, after all, we use t.test method regardless of | ||
+ | # variation | ||
+ | |||
+ | hist(means, | ||
+ | xlim = c(mean(means)-3*sd(means), | ||
+ | col = rgb(1, 0, 0, 0.5)) | ||
+ | abline(v=mean(p1), | ||
+ | abline(v=mean(s2), | ||
+ | | ||
+ | lo1 <- mean(p1)-se.z | ||
+ | hi1 <- mean(p1)+se.z | ||
+ | lo2 <- mean(p1)-2*se.z | ||
+ | hi2 <- mean(p1)+2*se.z | ||
+ | abline(v=c(lo1, | ||
+ | | ||
+ | | ||
+ | |||
+ | # difference between black and blue line | ||
+ | # divided by | ||
+ | # se(s2) (= random difference) | ||
+ | # t.value | ||
+ | mean(s2) | ||
+ | mean(p1) | ||
+ | diff <- mean(s2)-mean(p1) | ||
+ | diff | ||
+ | se(s2) | ||
+ | diff/se(s2) | ||
+ | |||
+ | t.cal.os | ||
+ | |||
+ | ######################## | ||
+ | # 2 sample t-test | ||
+ | ######################## | ||
+ | # 가정. 아래에서 추출하는 두 | ||
+ | # 샘플의 모집단의 파라미터를 | ||
+ | # 모른다. | ||
+ | s1 <- sample(p1, s.size) | ||
+ | s2 <- sample(p2, s.size) | ||
+ | |||
+ | mean(s1) | ||
+ | mean(s2) | ||
+ | ss(s1) | ||
+ | ss(s2) | ||
+ | df.s1 <- length(s1)-1 | ||
+ | df.s2 <- length(s2)-1 | ||
+ | df.s1 | ||
+ | df.s2 | ||
+ | |||
+ | pooled.variance <- (ss(s1)+ss(s2))/ | ||
+ | pooled.variance | ||
+ | se.ts <- sqrt((pooled.variance/ | ||
+ | se.ts | ||
+ | t.cal.ts <- (mean(s1)-mean(s2))/ | ||
+ | t.cal.ts | ||
+ | p.val.ts <- pt(abs(t.cal.ts), | ||
+ | p.val.ts | ||
+ | |||
+ | t.test(s1, s2, var.equal = T) | ||
+ | |||
+ | se(s1) | ||
+ | se(s2) | ||
+ | |||
+ | mean(s1)+c(-se(s1)*2, | ||
+ | mean(s2)+c(-se(s2)*2, | ||
+ | |||
+ | mean(p1) | ||
+ | mean(p2) | ||
+ | |||
+ | hist(s1, breaks=50, | ||
+ | col = rgb(1, 0, 0, 0.5)) | ||
+ | hist(s2, breaks=50, add=T, col=rgb(0, | ||
+ | abline(v=mean(s1), | ||
+ | # hist(s2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) | ||
+ | abline(v=mean(s2), | ||
+ | |||
+ | diff <- mean(s1)-mean(s2) | ||
+ | se.ts | ||
+ | diff/se.ts | ||
+ | |||
+ | #### | ||
+ | # repeated measure t-test | ||
+ | # we can use the above case | ||
+ | # pop paramter unknown | ||
+ | # two consecutive measurement | ||
+ | # for the same sample | ||
+ | |||
+ | t1 <- s1 | ||
+ | t2 <- s2 | ||
+ | mean(t1) | ||
+ | mean(t2) | ||
+ | diff.s <- t1 - t2 | ||
+ | diff.s | ||
+ | t.cal.rm <- mean(diff.s)/ | ||
+ | t.cal.rm | ||
+ | p.val.rm <- pt(abs(t.cal.rm), | ||
+ | p.val.rm | ||
+ | t.test(s1, s2, paired = T) | ||
+ | |||
+ | # create multiple histogram | ||
+ | s.all <- c(s1,s2) | ||
+ | mean(s.all) | ||
+ | hist(s1, col=' | ||
+ | hist(s2, col=' | ||
+ | abline(v=c(mean(s.all)), | ||
+ | | ||
+ | abline(v=c(mean(s1), | ||
+ | | ||
+ | |||
+ | comb <- data.frame(s1, | ||
+ | dat <- stack(comb) | ||
+ | head(dat) | ||
+ | |||
+ | m.tot <- mean(s.all) | ||
+ | m.s1 <- mean(s1) | ||
+ | m.s2 <- mean(s2) | ||
+ | |||
+ | ss.tot <- ss(s.all) | ||
+ | ss.s1 <- ss(s1) | ||
+ | ss.s2 <- ss(s2) | ||
+ | |||
+ | df.tot <- length(s.all)-1 | ||
+ | df.s1 <- length(s1)-1 | ||
+ | df.s2 <- length(s2)-1 | ||
+ | |||
+ | ms.tot <- var(s.all) | ||
+ | ms.tot | ||
+ | ss.tot/ | ||
+ | |||
+ | var(s1) | ||
+ | ss.s1 / df.s1 | ||
+ | |||
+ | var(s2) | ||
+ | ss.s2 / df.s2 | ||
+ | |||
+ | ss.b.s1 <- length(s1) * ((m.tot - m.s1)^2) | ||
+ | ss.b.s2 <- length(s2) * ((m.tot - m.s1)^2) | ||
+ | ss.bet <- ss.b.s1+ss.b.s2 | ||
+ | ss.bet | ||
+ | |||
+ | ss.wit <- ss.s1 + ss.s2 | ||
+ | ss.wit | ||
+ | |||
+ | ss.bet + ss.wit | ||
+ | ss.tot | ||
+ | |||
+ | library(dplyr) | ||
+ | # df.bet <- length(unique(dat)) - 1 | ||
+ | df.bet <- nlevels(dat$ind) - 1 | ||
+ | df.wit <- df.s1+df.s2 | ||
+ | df.bet | ||
+ | df.wit | ||
+ | df.bet+df.wit | ||
+ | df.tot | ||
+ | |||
+ | ms.bet <- ss.bet / df.bet | ||
+ | ms.wit <- ss.wit / df.wit | ||
+ | ms.bet | ||
+ | ms.wit | ||
+ | |||
+ | f.cal <- ms.bet / ms.wit | ||
+ | f.cal | ||
+ | pf(f.cal, df.bet, df.wit, lower.tail = F) | ||
+ | |||
+ | |||
+ | f.test <- aov(dat$values~ dat$ind, data = dat) | ||
+ | summary(f.test) | ||
+ | |||
+ | sqrt(f.cal) | ||
+ | t.cal.ts | ||
+ | |||
+ | # this is anova after all. | ||
+ | </ | ||
+ | |||
+ | ====== output ====== | ||
+ | < | ||
+ | > rm(list=ls()) | ||
+ | > rnorm2 <- function(n, | ||
+ | + | ||
+ | + } | ||
+ | > se <- function(sample) { | ||
+ | + | ||
+ | + } | ||
+ | > ss <- function(x) { | ||
+ | + | ||
+ | + } | ||
+ | > | ||
+ | > N.p <- 1000000 | ||
+ | > m.p <- 100 | ||
+ | > sd.p <- 10 | ||
+ | > | ||
+ | > | ||
+ | > p1 <- rnorm2(N.p, m.p, sd.p) | ||
+ | > mean(p1) | ||
+ | [1] 100 | ||
+ | > sd(p1) | ||
+ | [1] 10 | ||
+ | > | ||
+ | > p2 <- rnorm2(N.p, m.p+10, sd.p) | ||
+ | > mean(p2) | ||
+ | [1] 110 | ||
+ | > sd(p2) | ||
+ | [1] 10 | ||
+ | > | ||
+ | > hist(p1, breaks=50, col = rgb(1, 0, 0, 0.5), | ||
+ | + main = " | ||
+ | > abline(v=mean(p1), | ||
+ | > hist(p2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) | ||
+ | > abline(v=mean(p2), | ||
+ | > | ||
+ | > s.size <- 1000 | ||
+ | > s2 <- sample(p2, s.size) | ||
+ | > mean(s2) | ||
+ | [1] 110.4892 | ||
+ | > sd(s2) | ||
+ | [1] 10.36614 | ||
+ | > | ||
+ | > se.z <- sqrt(var(p1)/ | ||
+ | > se.z <- c(se.z) | ||
+ | > se.z.range <- c(-2*se.z, | ||
+ | > se.z.range | ||
+ | [1] -0.6324555 | ||
+ | > | ||
+ | > mean(p1)+se.z.range | ||
+ | [1] 99.36754 100.63246 | ||
+ | > mean(s2) | ||
+ | [1] 110.4892 | ||
+ | > | ||
+ | > z.cal <- (mean(s2) - mean(p1)) / se.z | ||
+ | > z.cal | ||
+ | [1] 33.16976 | ||
+ | > pnorm(z.cal, | ||
+ | [1] 2.93954e-241 | ||
+ | > | ||
+ | > z.cal | ||
+ | [1] 33.16976 | ||
+ | > | ||
+ | > # principles . . . | ||
+ | > # distribution of sample means | ||
+ | > iter <- 100000 | ||
+ | > means <- c() | ||
+ | > for (i in 1:iter) { | ||
+ | + | ||
+ | + means <- append(means, | ||
+ | + } | ||
+ | > | ||
+ | > hist(means, | ||
+ | + xlim = c(mean(means)-3*sd(means), | ||
+ | + col = rgb(1, 0, 0, 0.5)) | ||
+ | > abline(v=mean(p1), | ||
+ | > abline(v=mean(s2), | ||
+ | + col=" | ||
+ | > lo1 <- mean(p1)-se.z | ||
+ | > hi1 <- mean(p1)+se.z | ||
+ | > lo2 <- mean(p1)-2*se.z | ||
+ | > hi2 <- mean(p1)+2*se.z | ||
+ | > abline(v=c(lo1, | ||
+ | + col=c(" | ||
+ | + lwd=2) | ||
+ | > se.z | ||
+ | [1] 0.3162278 | ||
+ | > c(lo2, hi2) | ||
+ | [1] 99.36754 100.63246 | ||
+ | > pnorm(z.cal, | ||
+ | [1] 2.93954e-241 | ||
+ | > | ||
+ | > | ||
+ | > # Note that we use sqrt(var(s2)/ | ||
+ | > # as our se value instread of | ||
+ | > # sqrt(var(p1)/ | ||
+ | > # This is a common practice for R | ||
+ | > # In fact, some z.test (made by someone) | ||
+ | > # function uses the former rather than | ||
+ | > # latter. | ||
+ | > | ||
+ | > sqrt(var(p1)/ | ||
+ | [,1] | ||
+ | [1,] 0.3162278 | ||
+ | > se.z | ||
+ | [1] 0.3162278 | ||
+ | > | ||
+ | > sqrt(var(s2)/ | ||
+ | [1] 0.3278061 | ||
+ | > se(s2) | ||
+ | [1] 0.3278061 | ||
+ | > | ||
+ | > t.cal.os <- (mean(s2) - mean(p1))/ se(s2) | ||
+ | > z.cal <- (mean(s2) - mean(p1))/ se.z | ||
+ | > t.cal.os | ||
+ | [1] 31.99818 | ||
+ | > z.cal | ||
+ | [1] 33.16976 | ||
+ | > | ||
+ | > df.s2 <- length(s2)-1 | ||
+ | > df.s2 | ||
+ | [1] 999 | ||
+ | > p.t.os <- pt(abs(t.cal.os), | ||
+ | > p.t.os | ||
+ | [1] 3.162139e-155 | ||
+ | > t.out <- t.test(s2, mu=mean(p1)) | ||
+ | > | ||
+ | > library(BSDA) | ||
+ | 필요한 패키지를 로딩중입니다: | ||
+ | |||
+ | 다음의 패키지를 부착합니다: | ||
+ | |||
+ | The following object is masked from ‘package: | ||
+ | |||
+ | Orange | ||
+ | |||
+ | 경고메시지(들): | ||
+ | 패키지 ‘BSDA’는 R 버전 4.3.3에서 작성되었습니다 | ||
+ | > z.out <- z.test(s2, p1, sigma.x = sd(s2), sigma.y = sd(p1)) | ||
+ | > | ||
+ | > z.out$statistic # se.z 대신에 se(s2) 값으로 구한 z 값 | ||
+ | z | ||
+ | 31.9833 | ||
+ | > z.cal # se.z으로 (sqrt(var(p1)/ | ||
+ | [1] 33.16976 | ||
+ | > | ||
+ | > t.out$statistic # se(s2)를 분모로 하여 구한 t 값 | ||
+ | | ||
+ | 31.99818 | ||
+ | > t.cal.os # se(s2)를 이용하여 손으로 구한 t 값 | ||
+ | [1] 31.99818 | ||
+ | > | ||
+ | > # But, after all, we use t.test method regardless of | ||
+ | > # variation | ||
+ | > | ||
+ | > hist(means, | ||
+ | + xlim = c(mean(means)-3*sd(means), | ||
+ | + col = rgb(1, 0, 0, 0.5)) | ||
+ | > abline(v=mean(p1), | ||
+ | > abline(v=mean(s2), | ||
+ | + col=" | ||
+ | > lo1 <- mean(p1)-se.z | ||
+ | > hi1 <- mean(p1)+se.z | ||
+ | > lo2 <- mean(p1)-2*se.z | ||
+ | > hi2 <- mean(p1)+2*se.z | ||
+ | > abline(v=c(lo1, | ||
+ | + col=c(" | ||
+ | + lwd=2) | ||
+ | > | ||
+ | > # difference between black and blue line | ||
+ | > # divided by | ||
+ | > # se(s2) (= random difference) | ||
+ | > # t.value | ||
+ | > mean(s2) | ||
+ | [1] 110.4892 | ||
+ | > mean(p1) | ||
+ | [1] 100 | ||
+ | > diff <- mean(s2)-mean(p1) | ||
+ | > diff | ||
+ | [1] 10.4892 | ||
+ | > se(s2) | ||
+ | [1] 0.3278061 | ||
+ | > diff/se(s2) | ||
+ | [1] 31.99818 | ||
+ | > | ||
+ | > t.cal.os | ||
+ | [1] 31.99818 | ||
+ | > | ||
+ | > ######################## | ||
+ | > # 2 sample t-test | ||
+ | > ######################## | ||
+ | > # 가정. 아래에서 추출하는 두 | ||
+ | > # 샘플의 모집단의 파라미터를 | ||
+ | > # 모른다. | ||
+ | > s1 <- sample(p1, s.size) | ||
+ | > s2 <- sample(p2, s.size) | ||
+ | > | ||
+ | > mean(s1) | ||
+ | [1] 100.7138 | ||
+ | > mean(s2) | ||
+ | [1] 109.8804 | ||
+ | > ss(s1) | ||
+ | [1] 108288.9 | ||
+ | > ss(s2) | ||
+ | [1] 100751 | ||
+ | > df.s1 <- length(s1)-1 | ||
+ | > df.s2 <- length(s2)-1 | ||
+ | > df.s1 | ||
+ | [1] 999 | ||
+ | > df.s2 | ||
+ | [1] 999 | ||
+ | > | ||
+ | > pooled.variance <- (ss(s1)+ss(s2))/ | ||
+ | > pooled.variance | ||
+ | [1] 104.6246 | ||
+ | > se.ts <- sqrt((pooled.variance/ | ||
+ | > se.ts | ||
+ | [1] 0.4574376 | ||
+ | > t.cal.ts <- (mean(s1)-mean(s2))/ | ||
+ | > t.cal.ts | ||
+ | [1] -20.03892 | ||
+ | > p.val.ts <- pt(abs(t.cal.ts), | ||
+ | > p.val.ts | ||
+ | [1] 1.522061e-81 | ||
+ | > | ||
+ | > t.test(s1, s2, var.equal = T) | ||
+ | |||
+ | Two Sample t-test | ||
+ | |||
+ | data: s1 and s2 | ||
+ | t = -20.039, df = 1998, p-value < 2.2e-16 | ||
+ | alternative hypothesis: true difference in means is not equal to 0 | ||
+ | 95 percent confidence interval: | ||
+ | | ||
+ | sample estimates: | ||
+ | mean of x mean of y | ||
+ | | ||
+ | |||
+ | > | ||
+ | > se(s1) | ||
+ | [1] 0.3292374 | ||
+ | > se(s2) | ||
+ | [1] 0.3175719 | ||
+ | > | ||
+ | > mean(s1)+c(-se(s1)*2, | ||
+ | [1] 100.0553 101.3723 | ||
+ | > mean(s2)+c(-se(s2)*2, | ||
+ | [1] 109.2452 110.5155 | ||
+ | > | ||
+ | > mean(p1) | ||
+ | [1] 100 | ||
+ | > mean(p2) | ||
+ | [1] 110 | ||
+ | > | ||
+ | > hist(s1, breaks=50, | ||
+ | + col = rgb(1, 0, 0, 0.5)) | ||
+ | > hist(s2, breaks=50, add=T, col=rgb(0, | ||
+ | > abline(v=mean(s1), | ||
+ | > # hist(s2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) | ||
+ | > abline(v=mean(s2), | ||
+ | > | ||
+ | > diff <- mean(s1)-mean(s2) | ||
+ | > se.ts | ||
+ | [1] 0.4574376 | ||
+ | > diff/se.ts | ||
+ | [1] -20.03892 | ||
+ | > | ||
+ | > #### | ||
+ | > # repeated measure t-test | ||
+ | > # we can use the above case | ||
+ | > # pop paramter unknown | ||
+ | > # two consecutive measurement | ||
+ | > # for the same sample | ||
+ | > | ||
+ | > t1 <- s1 | ||
+ | > t2 <- s2 | ||
+ | > mean(t1) | ||
+ | [1] 100.7138 | ||
+ | > mean(t2) | ||
+ | [1] 109.8804 | ||
+ | > diff.s <- t1 - t2 | ||
+ | > diff.s | ||
+ | [1] -11.22445947 | ||
+ | | ||
+ | [15] | ||
+ | [22] -1.00704696 | ||
+ | [29] -10.58244069 -21.50608157 -53.61230898 | ||
+ | [36] | ||
+ | [43] -19.16855657 | ||
+ | [50] | ||
+ | [57] -8.28743679 -38.33995044 -50.60884456 | ||
+ | [64] | ||
+ | [71] -20.50728324 -19.37275012 -23.27866089 -11.74962053 -34.35317908 -26.10460199 | ||
+ | [78] -24.79252799 | ||
+ | [85] -23.66447959 | ||
+ | [92] -21.22842221 -11.68774036 | ||
+ | [99] 19.63435733 -14.40578016 -11.24172079 | ||
+ | | ||
+ | [113] -10.23092427 | ||
+ | [120] -31.50478963 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [183] -38.63379525 -16.58800530 -17.62672134 | ||
+ | [190] -22.25276559 | ||
+ | [197] -17.21120659 | ||
+ | | ||
+ | | ||
+ | [218] -24.41623403 | ||
+ | [225] -13.27091592 | ||
+ | [232] -11.52528966 | ||
+ | [239] -13.34969773 -37.98584006 | ||
+ | [246] -24.07983488 | ||
+ | [253] -12.30101864 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [288] -10.10160879 -11.59352210 | ||
+ | | ||
+ | [302] -35.18133234 | ||
+ | | ||
+ | | ||
+ | [323] -31.81200194 -12.67414744 | ||
+ | | ||
+ | [337] -17.60471709 | ||
+ | [344] -10.52672545 | ||
+ | | ||
+ | [358] -23.18199896 -18.58488442 -26.09162223 | ||
+ | | ||
+ | | ||
+ | [379] -37.25886118 | ||
+ | [386] -10.17057116 -11.45984282 -11.29130871 -15.60613249 | ||
+ | [393] -21.77681110 | ||
+ | | ||
+ | [407] -17.52668230 | ||
+ | [414] -14.30821541 -10.88427284 | ||
+ | | ||
+ | [428] -23.35919604 | ||
+ | | ||
+ | [442] -19.75974381 | ||
+ | | ||
+ | | ||
+ | [463] -19.77773032 | ||
+ | | ||
+ | | ||
+ | [484] -18.62856464 -25.58573774 -11.07888550 -16.77332405 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [519] -27.58434557 | ||
+ | [526] -44.00039083 -11.40638340 -10.42772214 | ||
+ | [533] -27.11548294 -26.69534077 | ||
+ | | ||
+ | [547] -18.87000360 | ||
+ | [554] -17.69850124 -11.92887098 | ||
+ | | ||
+ | | ||
+ | | ||
+ | [582] -12.87197389 -27.14630137 -14.53291112 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [645] -42.02480771 -13.42077241 | ||
+ | [652] -17.68303368 | ||
+ | [659] -19.48957914 -28.88885682 -10.59583907 -32.20537872 -11.95661953 -23.77887711 -23.51551361 | ||
+ | [666] -17.32443690 -10.86310515 | ||
+ | [673] -12.41671968 | ||
+ | | ||
+ | [687] -21.75000477 | ||
+ | [694] -34.30010114 | ||
+ | [701] -19.46561809 | ||
+ | [708] -27.91638644 -12.61381331 | ||
+ | [715] -20.55376627 | ||
+ | [722] -14.38944775 | ||
+ | | ||
+ | [736] -20.25705972 | ||
+ | [743] -13.32818441 | ||
+ | | ||
+ | [757] -19.16011286 -13.91801221 -11.66649472 | ||
+ | [764] -14.75337241 | ||
+ | | ||
+ | [778] -34.01869977 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [813] -14.60867384 | ||
+ | [820] -13.93270232 -20.21993468 | ||
+ | [827] -29.04193544 -24.04102844 -14.01878071 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [862] -30.93593593 | ||
+ | | ||
+ | [876] -14.65449874 -25.51320775 -35.73124575 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [918] -21.82098554 | ||
+ | [925] -26.29548705 -28.39677088 | ||
+ | [932] -11.67413566 -11.59680714 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [967] -18.49661342 -12.30226946 | ||
+ | [974] -11.26368153 | ||
+ | [981] -25.17421015 -10.35273557 | ||
+ | | ||
+ | [995] -22.27223290 | ||
+ | > t.cal.rm <- mean(diff.s)/ | ||
+ | > t.cal.rm | ||
+ | [1] -19.82846 | ||
+ | > p.val.rm <- pt(abs(t.cal.rm), | ||
+ | > p.val.rm | ||
+ | [1] 4.836389e-74 | ||
+ | > t.test(s1, s2, paired = T) | ||
+ | |||
+ | Paired t-test | ||
+ | |||
+ | data: s1 and s2 | ||
+ | t = -19.828, df = 999, p-value < 2.2e-16 | ||
+ | alternative hypothesis: true mean difference is not equal to 0 | ||
+ | 95 percent confidence interval: | ||
+ | | ||
+ | sample estimates: | ||
+ | mean difference | ||
+ | -9.166555 | ||
+ | |||
+ | > | ||
+ | > # create multiple histogram | ||
+ | > s.all <- c(s1,s2) | ||
+ | > mean(s.all) | ||
+ | [1] 105.2971 | ||
+ | > hist(s1, col=' | ||
+ | > hist(s2, col=' | ||
+ | > abline(v=c(mean(s.all)), | ||
+ | + col=c(" | ||
+ | > abline(v=c(mean(s1), | ||
+ | + col=c(" | ||
+ | > | ||
+ | > comb <- data.frame(s1, | ||
+ | > dat <- stack(comb) | ||
+ | > head(dat) | ||
+ | | ||
+ | 1 93.17788 | ||
+ | 2 103.00254 | ||
+ | 3 104.53388 | ||
+ | 4 88.59698 | ||
+ | 5 105.67789 | ||
+ | 6 112.72657 | ||
+ | > | ||
+ | > m.tot <- mean(s.all) | ||
+ | > m.s1 <- mean(s1) | ||
+ | > m.s2 <- mean(s2) | ||
+ | > | ||
+ | > ss.tot <- ss(s.all) | ||
+ | > ss.s1 <- ss(s1) | ||
+ | > ss.s2 <- ss(s2) | ||
+ | > | ||
+ | > df.tot <- length(s.all)-1 | ||
+ | > df.s1 <- length(s1)-1 | ||
+ | > df.s2 <- length(s2)-1 | ||
+ | > | ||
+ | > ms.tot <- var(s.all) | ||
+ | > ms.tot | ||
+ | [1] 125.5892 | ||
+ | > ss.tot/ | ||
+ | [1] 125.5892 | ||
+ | > | ||
+ | > var(s1) | ||
+ | [1] 108.3973 | ||
+ | > ss.s1 / df.s1 | ||
+ | [1] 108.3973 | ||
+ | > | ||
+ | > var(s2) | ||
+ | [1] 100.8519 | ||
+ | > ss.s2 / df.s2 | ||
+ | [1] 100.8519 | ||
+ | > | ||
+ | > ss.b.s1 <- length(s1) * ((m.tot - m.s1)^2) | ||
+ | > ss.b.s2 <- length(s2) * ((m.tot - m.s1)^2) | ||
+ | > ss.bet <- ss.b.s1+ss.b.s2 | ||
+ | > ss.bet | ||
+ | [1] 42012.87 | ||
+ | > | ||
+ | > ss.wit <- ss.s1 + ss.s2 | ||
+ | > ss.wit | ||
+ | [1] 209039.9 | ||
+ | > | ||
+ | > ss.bet + ss.wit | ||
+ | [1] 251052.8 | ||
+ | > ss.tot | ||
+ | [1] 251052.8 | ||
+ | > | ||
+ | > library(dplyr) | ||
+ | |||
+ | 다음의 패키지를 부착합니다: | ||
+ | |||
+ | The following objects are masked from ‘package: | ||
+ | |||
+ | filter, lag | ||
+ | |||
+ | The following objects are masked from ‘package: | ||
+ | |||
+ | intersect, setdiff, setequal, union | ||
+ | > # df.bet <- length(unique(dat)) - 1 | ||
+ | > df.bet <- nlevels(dat$ind) - 1 | ||
+ | > df.wit <- df.s1+df.s2 | ||
+ | > df.bet | ||
+ | [1] 1 | ||
+ | > df.wit | ||
+ | [1] 1998 | ||
+ | > df.bet+df.wit | ||
+ | [1] 1999 | ||
+ | > df.tot | ||
+ | [1] 1999 | ||
+ | > | ||
+ | > ms.bet <- ss.bet / df.bet | ||
+ | > ms.wit <- ss.wit / df.wit | ||
+ | > ms.bet | ||
+ | [1] 42012.87 | ||
+ | > ms.wit | ||
+ | [1] 104.6246 | ||
+ | > | ||
+ | > f.cal <- ms.bet / ms.wit | ||
+ | > f.cal | ||
+ | [1] 401.5582 | ||
+ | > pf(f.cal, df.bet, df.wit, lower.tail = F) | ||
+ | [1] 1.522061e-81 | ||
+ | > | ||
+ | > | ||
+ | > f.test <- aov(dat$values~ dat$ind, data = dat) | ||
+ | > summary(f.test) | ||
+ | Df Sum Sq Mean Sq F value Pr(> | ||
+ | dat$ind | ||
+ | Residuals | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | > | ||
+ | > sqrt(f.cal) | ||
+ | [1] 20.03892 | ||
+ | > t.cal.ts | ||
+ | [1] -20.03892 | ||
+ | > | ||
+ | > # this is anova after all. | ||
+ | > | ||
+ | </ | ||
+ |
note.w02.1757554571.txt.gz · Last modified: 2025/09/11 10:36 by hkimscil