r:sampling_distribution
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revision | |||
| r:sampling_distribution [2025/11/23 22:57] – [distribution of sample means] hkimscil | r:sampling_distribution [2026/01/11 09:00] (current) – hkimscil | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| - | ====== PS1. week02 ====== | + | <tabbed> |
| - | <code> | + | * :r:sampling distribution: |
| - | rm(list=ls()) | + | * *: |
| - | rnorm2 <- function(n, | + | </ |
| - | | + | |
| - | } | + | |
| - | se <- function(sample) { | ||
| - | sd(sample)/ | ||
| - | } | ||
| - | |||
| - | ss <- function(x) { | ||
| - | sum((x-mean(x))^2) | ||
| - | } | ||
| - | |||
| - | ################################ | ||
| - | N.p <- 1000000 | ||
| - | m.p <- 100 | ||
| - | sd.p <- 10 | ||
| - | |||
| - | p1 <- rnorm2(N.p, m.p, sd.p) | ||
| - | mean(p1) | ||
| - | sd(p1) | ||
| - | |||
| - | p2 <- rnorm2(N.p, m.p+20, sd.p) | ||
| - | mean(p2) | ||
| - | sd(p2) | ||
| - | |||
| - | m.p1 <- mean(p1) | ||
| - | sd.p1 <- sd(p1) | ||
| - | var(p1) | ||
| - | |||
| - | |||
| - | hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5), | ||
| - | main = " | ||
| - | abline(v=mean(p1), | ||
| - | hist(p2, add=T, breaks=50, col=rgb(1, | ||
| - | abline(v=mean(p2), | ||
| - | |||
| - | |||
| - | hist(p1, breaks=50, col=rgb(0, | ||
| - | abline(v=mean(p1), | ||
| - | abline(v=mean(p1)-sd(p1), | ||
| - | abline(v=mean(p1)+sd(p1), | ||
| - | abline(v=c(m.p1-2*sd.p1, | ||
| - | abline(v=c(m.p1-3*sd.p1, | ||
| - | |||
| - | # area bet black = 68% | ||
| - | # between red = 95% | ||
| - | # between green = 99% | ||
| - | |||
| - | pnorm(m.p1+sd.p1, | ||
| - | pnorm(m.p1-sd.p1, | ||
| - | pnorm(m.p1+sd.p1, | ||
| - | pnorm(m.p1-sd.p1, | ||
| - | |||
| - | pnorm(m.p1+2*sd.p1, | ||
| - | pnorm(m.p1-2*sd.p1, | ||
| - | |||
| - | pnorm(m.p1+3*sd.p1, | ||
| - | pnorm(m.p1-3*sd.p1, | ||
| - | |||
| - | m.p1 | ||
| - | sd.p1 | ||
| - | |||
| - | (m.p1-m.p1)/ | ||
| - | ((m.p1-sd.p1) - m.p1) / sd.p1 | ||
| - | (120-100)/ | ||
| - | pnorm(1)-pnorm(-1) | ||
| - | pnorm(2)-pnorm(-2) | ||
| - | pnorm(3)-pnorm(3) | ||
| - | |||
| - | 1-pnorm(-2)*2 | ||
| - | pnorm(2)-pnorm(-2) | ||
| - | |||
| - | pnorm(120, 100, 10) | ||
| - | pnorm(2)-pnorm(-2) | ||
| - | |||
| - | zscore <- (120-100)/ | ||
| - | pnorm(zscore)-pnorm(-zscore) | ||
| - | zscore | ||
| - | |||
| - | # reminder. | ||
| - | pnorm(-1) | ||
| - | pnorm(1, 0, 1, lower.tail = F) | ||
| - | pnorm(110, 100, 10, lower.tail = F) | ||
| - | zscore <- (110-100)/ | ||
| - | pnorm(zscore, | ||
| - | |||
| - | pnorm(118, 100, 10, lower.tail = F) | ||
| - | pnorm(18/ | ||
| - | |||
| - | z.p1 <- (p1-mean(p1))/ | ||
| - | mean(z.p1) | ||
| - | round(mean(z.p1), | ||
| - | sd(z.p1) | ||
| - | pnorm(1.8)-pnorm(-1.8) | ||
| - | |||
| - | hist(z.p1, breaks=50, col=rgb(1, | ||
| - | abline(v=c(m.p1, | ||
| - | 1-(pnorm(1.8)-pnorm(-1.8)) | ||
| - | |||
| - | |||
| - | pnorm(1)-pnorm(-1) | ||
| - | 1-(pnorm(-1)*2) | ||
| - | |||
| - | pnorm(2)-pnorm(-2) | ||
| - | 1-(pnorm(-2)*2) | ||
| - | |||
| - | 1-(pnorm(-3)*2) | ||
| - | |||
| - | # | ||
| - | hist(p1, breaks=50, col=rgb(.9, | ||
| - | abline(v=mean(p1), | ||
| - | abline(v=mean(p1)-sd(p1), | ||
| - | abline(v=mean(p1)+sd(p1), | ||
| - | abline(v=c(m.p1-2*sd.p1, | ||
| - | abline(v=c(m.p1-3*sd.p1, | ||
| - | |||
| - | # 68% | ||
| - | a <- qnorm(.32/ | ||
| - | b <- qnorm(1-.32/ | ||
| - | c(a, b) | ||
| - | c(-1, 1) | ||
| - | # note that | ||
| - | .32/2 | ||
| - | pnorm(-1) | ||
| - | qnorm(.32/ | ||
| - | qnorm(pnorm(-1)) | ||
| - | |||
| - | # 95% | ||
| - | c <- qnorm(.05/ | ||
| - | d <- qnorm(1-.05/ | ||
| - | c(c, d) | ||
| - | c(-2,2) | ||
| - | |||
| - | # 99% | ||
| - | e <- qnorm(.01/ | ||
| - | f <- qnorm(1-.01/ | ||
| - | c(e,f) | ||
| - | c(-3,3) | ||
| - | |||
| - | |||
| - | pnorm(b)-pnorm(a) | ||
| - | c(a, b) | ||
| - | pnorm(d)-pnorm(c) | ||
| - | c(c, d) | ||
| - | pnorm(f)-pnorm(e) | ||
| - | c(e, f) | ||
| - | |||
| - | qnorm(.5) | ||
| - | qnorm(1) | ||
| - | |||
| - | |||
| - | ################################ | ||
| - | s.size <- 10 | ||
| - | |||
| - | means.temp <- c() | ||
| - | s1 <- sample(p1, s.size, replace = T) | ||
| - | mean(s1) | ||
| - | means.temp <- append(means.temp, | ||
| - | means.temp <- append(means.temp, | ||
| - | means.temp <- append(means.temp, | ||
| - | means.temp <- append(means.temp, | ||
| - | means.temp <- append(means.temp, | ||
| - | means.temp | ||
| - | |||
| - | iter <- 1000000 | ||
| - | # means <- c() | ||
| - | means <- rep(NA, iter) | ||
| - | for (i in 1:iter) { | ||
| - | # means <- append(means, | ||
| - | means[i] <- mean(sample(p1, | ||
| - | } | ||
| - | length(means) | ||
| - | mean(means) | ||
| - | sd(means) | ||
| - | se.s <- sd(means) | ||
| - | |||
| - | hist(means, breaks=50, | ||
| - | xlim = c(mean(means)-5*sd(means), | ||
| - | | ||
| - | abline(v=mean(means), | ||
| - | # now we want to get sd of this distribution | ||
| - | lo1 <- mean(means)-se.s | ||
| - | hi1 <- mean(means)+se.s | ||
| - | lo2 <- mean(means)-2*se.s | ||
| - | hi2 <- mean(means)+2*se.s | ||
| - | lo3 <- mean(means)-3*se.s | ||
| - | hi3 <- mean(means)+3*se.s | ||
| - | abline(v=mean(means), | ||
| - | # abline(v=mean(p2), | ||
| - | abline(v=c(lo1, | ||
| - | | ||
| - | | ||
| - | |||
| - | # meanwhile . . . . | ||
| - | se.s | ||
| - | |||
| - | se.z <- sqrt(var(p1)/ | ||
| - | se.z <- c(se.z) | ||
| - | se.z | ||
| - | |||
| - | # sd of sample means (sd(means)) | ||
| - | # = se.s | ||
| - | |||
| - | # when iter value goes to | ||
| - | # infinite value: | ||
| - | # mean(means) = mean(p1) | ||
| - | # and | ||
| - | # sd(means) = sd(p1) / sqrt(s.size) | ||
| - | # that is, se.s = se.z | ||
| - | # This is called CLT (Central Limit Theorem) | ||
| - | # see http:// | ||
| - | |||
| - | mean(means) | ||
| - | mean(p1) | ||
| - | sd(means) | ||
| - | var(p1) | ||
| - | # remember we started talking sample size 10 | ||
| - | sqrt(var(p1)/ | ||
| - | se.z | ||
| - | |||
| - | sd(means) | ||
| - | se.s | ||
| - | se.z | ||
| - | |||
| - | # because CLT | ||
| - | loz1 <- mean(p1)-se.z | ||
| - | hiz1 <- mean(p1)+se.z | ||
| - | loz2 <- mean(p1)-2*se.z | ||
| - | hiz2 <- mean(p1)+2*se.z | ||
| - | loz3 <- mean(p1)-3*se.z | ||
| - | hiz3 <- mean(p1)+3*se.z | ||
| - | |||
| - | c(lo1, loz1) | ||
| - | c(lo2, loz2) | ||
| - | c(lo3, loz3) | ||
| - | |||
| - | c(hi1, hiz1) | ||
| - | c(hi2, hiz2) | ||
| - | c(hi3, hiz3) | ||
| - | |||
| - | |||
| - | hist(means, breaks=50, | ||
| - | xlim = c(mean(means)-5*sd(means), | ||
| - | col = rgb(1, 1, 1, .5)) | ||
| - | abline(v=mean(means), | ||
| - | # abline(v=mean(p2), | ||
| - | abline(v=c(lo1, | ||
| - | | ||
| - | | ||
| - | |||
| - | round(c(lo1, | ||
| - | round(c(lo2, | ||
| - | round(c(lo3, | ||
| - | |||
| - | round(c(loz1, | ||
| - | round(c(loz2, | ||
| - | round(c(loz3, | ||
| - | |||
| - | m.sample.i.got <- mean(means)+ 1.5*sd(means) | ||
| - | m.sample.i.got | ||
| - | |||
| - | hist(means, breaks=30, | ||
| - | xlim = c(mean(means)-7*sd(means), | ||
| - | col = rgb(1, 1, 1, .5)) | ||
| - | abline(v=mean(means), | ||
| - | abline(v=m.sample.i.got, | ||
| - | |||
| - | # what is the probablity of getting | ||
| - | # greater than | ||
| - | # m.sample.i.got? | ||
| - | m.sample.i.got | ||
| - | pnorm(m.sample.i.got, | ||
| - | pnorm(m.sample.i.got, | ||
| - | |||
| - | # then, what is the probabilty of getting | ||
| - | # greater than m.sample.i.got and | ||
| - | # less than corresponding value, which is | ||
| - | # mean(means) - m.sample.i.got - mean(means) | ||
| - | # (green line) | ||
| - | tmp <- mean(means) - (m.sample.i.got - mean(means)) | ||
| - | abline(v=tmp, | ||
| - | 2 * pnorm(m.sample.i.got, | ||
| - | m.sample.i.got | ||
| - | |||
| - | ### one more time | ||
| - | # this time, with a story | ||
| - | mean(p2) | ||
| - | sd(p2) | ||
| - | sample(p2, s.size) | ||
| - | m.sample.i.got <- mean(sample(p2, | ||
| - | m.sample.i.got | ||
| - | |||
| - | tmp <- mean(means) - (m.sample.i.got-mean(means)) | ||
| - | tmp | ||
| - | |||
| - | hist(means, breaks=30, | ||
| - | xlim = c(tmp-4*sd(means), | ||
| - | col = rgb(1, 1, 1, .5)) | ||
| - | abline(v=mean(means), | ||
| - | abline(v=m.sample.i.got, | ||
| - | |||
| - | # what is the probablity of getting | ||
| - | # greater than | ||
| - | # m.sample.i.got? | ||
| - | m.sample.i.got | ||
| - | pnorm(m.sample.i.got, | ||
| - | |||
| - | # then, what is the probabilty of getting | ||
| - | # greater than m.sample.i.got and | ||
| - | # less than corresponding value, which is | ||
| - | # mean(means) - m.sample.i.got - mean(means) | ||
| - | # (green line) | ||
| - | abline(v=tmp, | ||
| - | 2 * pnorm(m.sample.i.got, | ||
| - | </ | ||
| ====== output ====== | ====== output ====== | ||
| <WRAP group> | <WRAP group> | ||
r/sampling_distribution.txt · Last modified: by hkimscil
