r:sampling_distribution
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| r:sampling_distribution [2025/09/10 05:02] – [Last one . . . Important] hkimscil | r:sampling_distribution [2026/01/11 09:00] (current) – hkimscil | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| - | ====== PS1. week02 ====== | + | < |
| + | * :r:sampling distribution: | ||
| + | * *: | ||
| + | </ | ||
| - | < | ||
| - | rm(list=ls()) | ||
| - | rnorm2 <- function(n, | ||
| - | mean+sd*scale(rnorm(n)) | ||
| - | } | ||
| - | |||
| - | se <- function(sample) { | ||
| - | sd(sample)/ | ||
| - | } | ||
| - | |||
| - | ss <- function(x) { | ||
| - | sum((x-mean(x))^2) | ||
| - | } | ||
| - | |||
| - | ################################ | ||
| - | N.p <- 1000000 | ||
| - | m.p <- 100 | ||
| - | sd.p <- 10 | ||
| - | |||
| - | p1 <- rnorm2(N.p, m.p, sd.p) | ||
| - | mean(p1) | ||
| - | sd(p1) | ||
| - | |||
| - | p2 <- rnorm2(N.p, m.p+20, sd.p) | ||
| - | mean(p2) | ||
| - | sd(p2) | ||
| - | |||
| - | m.p1 <- mean(p1) | ||
| - | sd.p1 <- sd(p1) | ||
| - | var(p1) | ||
| - | |||
| - | hist(p1, breaks=100, col=rgb(1, | ||
| - | abline(v=mean(p1), | ||
| - | abline(v=mean(p1)-sd(p1), | ||
| - | abline(v=mean(p1)+sd(p1), | ||
| - | abline(v=c(m.p1-2*sd.p1, | ||
| - | abline(v=c(m.p1-3*sd.p1, | ||
| - | |||
| - | # area bet black = 68% | ||
| - | # between red = 95% | ||
| - | # between green = 99% | ||
| - | |||
| - | pnorm(m.p1+sd.p1, | ||
| - | pnorm(m.p1-sd.p1, | ||
| - | pnorm(m.p1+sd.p1, | ||
| - | pnorm(m.p1-sd.p1, | ||
| - | |||
| - | pnorm(m.p1+2*sd.p1, | ||
| - | pnorm(m.p1-2*sd.p1, | ||
| - | |||
| - | pnorm(m.p1+3*sd.p1, | ||
| - | pnorm(m.p1-3*sd.p1, | ||
| - | |||
| - | m.p1 | ||
| - | sd.p1 | ||
| - | |||
| - | (m.p1-m.p1)/ | ||
| - | ((m.p1-sd.p1) - m.p1) / sd.p1 | ||
| - | (120-100)/ | ||
| - | pnorm(1)-pnorm(-1) | ||
| - | pnorm(2)-pnorm(-2) | ||
| - | pnorm(3)-pnorm(3) | ||
| - | |||
| - | 1-pnorm(-2)*2 | ||
| - | pnorm(2)-pnorm(-2) | ||
| - | |||
| - | pnorm(120, 100, 10) | ||
| - | pnorm(2)-pnorm(-2) | ||
| - | |||
| - | zscore <- (120-100)/ | ||
| - | pnorm(zscore)-pnorm(-zscore) | ||
| - | zscore | ||
| - | |||
| - | # reminder. | ||
| - | pnorm(-1) | ||
| - | pnorm(1, 0, 1, lower.tail = F) | ||
| - | pnorm(110, 100, 10, lower.tail = F) | ||
| - | zscore <- (110-100)/ | ||
| - | pnorm(zscore, | ||
| - | |||
| - | pnorm(118, 100, 10, lower.tail = F) | ||
| - | pnorm(18/ | ||
| - | |||
| - | z.p1 <- (p1-mean(p1))/ | ||
| - | mean(z.p1) | ||
| - | round(mean(z.p1), | ||
| - | sd(z.p1) | ||
| - | pnorm(1.8)-pnorm(-1.8) | ||
| - | |||
| - | hist(z.p1, breaks=100, col=rgb(0, | ||
| - | abline(v=c(m.p1, | ||
| - | 1-(pnorm(1.8)-pnorm(-1.8)) | ||
| - | |||
| - | |||
| - | pnorm(1)-pnorm(-1) | ||
| - | 1-(pnorm(-1)*2) | ||
| - | |||
| - | pnorm(2)-pnorm(-2) | ||
| - | 1-(pnorm(-2)*2) | ||
| - | |||
| - | 1-(pnorm(-3)*2) | ||
| - | |||
| - | # | ||
| - | hist(p1, breaks=100, col=rgb(1, | ||
| - | abline(v=mean(p1), | ||
| - | abline(v=mean(p1)-sd(p1), | ||
| - | abline(v=mean(p1)+sd(p1), | ||
| - | abline(v=c(m.p1-2*sd.p1, | ||
| - | abline(v=c(m.p1-3*sd.p1, | ||
| - | |||
| - | # 68% | ||
| - | a <- qnorm(.32/ | ||
| - | b <- qnorm(1-.32/ | ||
| - | c(a, b) | ||
| - | c(-1, 1) | ||
| - | |||
| - | # 95% | ||
| - | c <- qnorm(.05/ | ||
| - | d <- qnorm(1-.05/ | ||
| - | c(c, d) | ||
| - | c(-2,2) | ||
| - | # 99% | ||
| - | e <- qnorm(.01/ | ||
| - | f <- qnorm(1-.01/ | ||
| - | c(e,f) | ||
| - | c(-3,3) | ||
| - | |||
| - | pnorm(b)-pnorm(a) | ||
| - | c(a, b) | ||
| - | pnorm(d)-pnorm(c) | ||
| - | c(c, d) | ||
| - | pnorm(f)-pnorm(e) | ||
| - | c(e, f) | ||
| - | |||
| - | qnorm(.5) | ||
| - | qnorm(1) | ||
| - | |||
| - | |||
| - | ################################ | ||
| - | hist(p1, breaks=50, col = rgb(1, 0, 0, 0.5), | ||
| - | main = " | ||
| - | abline(v=mean(p1), | ||
| - | hist(p2, add=T, breaks=50, col=rgb(0, | ||
| - | abline(v=mean(p2), | ||
| - | |||
| - | s.size <- 10 | ||
| - | |||
| - | means.temp <- c() | ||
| - | s1 <- sample(p1, s.size, replace = T) | ||
| - | mean(s1) | ||
| - | means.temp <- append(means.temp, | ||
| - | means.temp <- append(means.temp, | ||
| - | means.temp <- append(means.temp, | ||
| - | means.temp <- append(means.temp, | ||
| - | means.temp <- append(means.temp, | ||
| - | means.temp | ||
| - | |||
| - | iter <- 1000000 | ||
| - | # means <- c() | ||
| - | means <- rep(NA, iter) | ||
| - | for (i in 1:iter) { | ||
| - | # means <- append(means, | ||
| - | means[i] <- mean(sample(p1, | ||
| - | } | ||
| - | length(means) | ||
| - | mean(means) | ||
| - | sd(means) | ||
| - | se.s <- sd(means) | ||
| - | |||
| - | hist(means, breaks=100, col=rgb(.1, 0, 0, .5)) | ||
| - | abline(v=mean(means), | ||
| - | |||
| - | # now we want to get sd of this distribution | ||
| - | lo1 <- mean(means)-se.s | ||
| - | hi1 <- mean(means)+se.s | ||
| - | lo2 <- mean(means)-2*se.s | ||
| - | hi2 <- mean(means)+2*se.s | ||
| - | lo3 <- mean(means)-3*se.s | ||
| - | hi3 <- mean(means)+3*se.s | ||
| - | |||
| - | hist(means, | ||
| - | xlim = c(mean(means)-5*sd(means), | ||
| - | col = rgb(1, 0, 0, .5)) | ||
| - | abline(v=mean(means), | ||
| - | # abline(v=mean(p2), | ||
| - | abline(v=c(lo1, | ||
| - | | ||
| - | | ||
| - | |||
| - | # meanwhile . . . . | ||
| - | se.s | ||
| - | |||
| - | se.z <- sqrt(var(p1)/ | ||
| - | se.z <- c(se.z) | ||
| - | se.z | ||
| - | |||
| - | # sd of sample means (sd(means)) | ||
| - | # is sqrt(var(s1)/ | ||
| - | # sd(s1) / sqrt(s.size) | ||
| - | # = se.s | ||
| - | |||
| - | # when iter value goes to | ||
| - | # unlimited value: | ||
| - | # mean(means) = mean(p1) | ||
| - | # and | ||
| - | # sd(means) = sd(p1) / sqrt(s.size) | ||
| - | # that is, sd(means) = se.z | ||
| - | # This is called CLT (Central Limit Theorem) | ||
| - | mean(means) | ||
| - | mean(p1) | ||
| - | sd(means) | ||
| - | var(p1) | ||
| - | sqrt(var(p1)/ | ||
| - | se.z | ||
| - | |||
| - | sd(means) | ||
| - | se.s | ||
| - | se.z | ||
| - | |||
| - | # because CLT | ||
| - | loz1 <- mean(p1)-se.z | ||
| - | hiz1 <- mean(p1)+se.z | ||
| - | loz2 <- mean(p1)-2*se.z | ||
| - | hiz2 <- mean(p1)+2*se.z | ||
| - | loz3 <- mean(p1)-3*se.z | ||
| - | hiz3 <- mean(p1)+3*se.z | ||
| - | |||
| - | c(lo1, loz1) | ||
| - | c(lo2, loz2) | ||
| - | c(lo3, loz3) | ||
| - | |||
| - | c(hi1, hiz1) | ||
| - | c(hi2, hiz2) | ||
| - | c(hi3, hiz3) | ||
| - | |||
| - | |||
| - | hist(means, | ||
| - | xlim = c(mean(means)-5*sd(means), | ||
| - | col = rgb(1, 0, 0, .5)) | ||
| - | abline(v=mean(means), | ||
| - | # abline(v=mean(p2), | ||
| - | abline(v=c(lo1, | ||
| - | | ||
| - | | ||
| - | |||
| - | round(c(lo1, | ||
| - | round(c(lo2, | ||
| - | round(c(lo3, | ||
| - | |||
| - | round(c(loz1, | ||
| - | round(c(loz2, | ||
| - | round(c(loz3, | ||
| - | |||
| - | m.sample.i.got <- mean(means)+ 1.5*sd(means) | ||
| - | m.sample.i.got | ||
| - | |||
| - | hist(means, | ||
| - | xlim = c(mean(means)-10*sd(means), | ||
| - | col = rgb(1, 0, 0, .5)) | ||
| - | abline(v=mean(means), | ||
| - | abline(v=m.sample.i.got, | ||
| - | |||
| - | # what is the probablity of getting | ||
| - | # greater than | ||
| - | # m.sample.i.got? | ||
| - | m.sample.i.got | ||
| - | pnorm(m.sample.i.got, | ||
| - | pnorm(m.sample.i.got, | ||
| - | |||
| - | # then, what is the probabilty of getting | ||
| - | # greater than m.sample.i.got and | ||
| - | # less than corresponding value, which is | ||
| - | # mean(means) - m.sample.i.got - mean(means) | ||
| - | # (green line) | ||
| - | tmp <- mean(means) - (m.sample.i.got - mean(means)) | ||
| - | abline(v=tmp, | ||
| - | 2 * pnorm(m.sample.i.got, | ||
| - | m.sample.i.got | ||
| - | |||
| - | ### one more time | ||
| - | mean(p2) | ||
| - | sd(p2) | ||
| - | sample(p2, s.size) | ||
| - | m.sample.i.got <- mean(sample(p2, | ||
| - | m.sample.i.got | ||
| - | |||
| - | hist(means, | ||
| - | xlim = c(mean(means)-15*sd(means), | ||
| - | col = rgb(1, 0, 0, .5)) | ||
| - | abline(v=mean(means), | ||
| - | abline(v=m.sample.i.got, | ||
| - | |||
| - | # what is the probablity of getting | ||
| - | # greater than | ||
| - | # m.sample.i.got? | ||
| - | m.sample.i.got | ||
| - | pnorm(m.sample.i.got, | ||
| - | |||
| - | # then, what is the probabilty of getting | ||
| - | # greater than m.sample.i.got and | ||
| - | # less than corresponding value, which is | ||
| - | # mean(means) - m.sample.i.got - mean(means) | ||
| - | # (green line) | ||
| - | tmp <- mean(means) - (m.sample.i.got - mean(means)) | ||
| - | abline(v=tmp, | ||
| - | 2 * pnorm(m.sample.i.got, | ||
| - | |||
| - | |||
| - | |||
| - | |||
| - | </ | ||
| ====== output ====== | ====== output ====== | ||
| <WRAP group> | <WRAP group> | ||
| Line 644: | Line 336: | ||
| </ | </ | ||
| <WRAP column half> | <WRAP column half> | ||
| - | .... | + | qnorm |
| * qnorm는 pnorm의 반대값을 구하는 명령어 | * qnorm는 pnorm의 반대값을 구하는 명령어 | ||
| * 히스토그램에서 검정 색 부분의 바깥 쪽 부분은 32%이고 왼 쪽의 것은 이것의 반인 16% 이다. | * 히스토그램에서 검정 색 부분의 바깥 쪽 부분은 32%이고 왼 쪽의 것은 이것의 반인 16% 이다. | ||
| Line 692: | Line 384: | ||
| ===== distribution of sample means ===== | ===== distribution of sample means ===== | ||
| + | 아래는 모두 같은 의미이다. | ||
| + | * distribution of sample means | ||
| + | * distribution of the sample mean | ||
| + | * mean and variance of the sample mean | ||
| + | * mean and variance of sample means | ||
| + | * sampling distribution | ||
| + | |||
| <WRAP group> | <WRAP group> | ||
| <WRAP column half> | <WRAP column half> | ||
r/sampling_distribution.1757480569.txt.gz · Last modified: by hkimscil
