r:sampling_distribution
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revision | |||
r:sampling_distribution [2025/09/10 14:03] – [qnorm] hkimscil | r:sampling_distribution [2025/09/10 20:41] (current) – [PS1. week02] hkimscil | ||
---|---|---|---|
Line 1: | Line 1: | ||
====== PS1. week02 ====== | ====== PS1. week02 ====== | ||
- | |||
< | < | ||
rm(list=ls()) | rm(list=ls()) | ||
Line 32: | Line 31: | ||
var(p1) | var(p1) | ||
- | hist(p1, breaks=100, col=rgb(1, | + | |
+ | hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5), | ||
+ | main = " | ||
+ | abline(v=mean(p1), | ||
+ | hist(p2, add=T, breaks=50, col=rgb(1, | ||
+ | abline(v=mean(p2), | ||
+ | |||
+ | |||
+ | hist(p1, breaks=50, col=rgb(0, | ||
abline(v=mean(p1), | abline(v=mean(p1), | ||
abline(v=mean(p1)-sd(p1), | abline(v=mean(p1)-sd(p1), | ||
Line 90: | Line 97: | ||
pnorm(1.8)-pnorm(-1.8) | pnorm(1.8)-pnorm(-1.8) | ||
- | hist(z.p1, breaks=100, col=rgb(0,0,0,0)) | + | hist(z.p1, breaks=50, col=rgb(1,0,0,0)) |
abline(v=c(m.p1, | abline(v=c(m.p1, | ||
1-(pnorm(1.8)-pnorm(-1.8)) | 1-(pnorm(1.8)-pnorm(-1.8)) | ||
Line 104: | Line 111: | ||
# | # | ||
- | hist(p1, breaks=100, col=rgb(1,1,1,1)) | + | hist(p1, breaks=50, col=rgb(.9,.9,.9,.9)) |
abline(v=mean(p1), | abline(v=mean(p1), | ||
abline(v=mean(p1)-sd(p1), | abline(v=mean(p1)-sd(p1), | ||
Line 116: | Line 123: | ||
c(a, b) | c(a, b) | ||
c(-1, 1) | c(-1, 1) | ||
+ | # note that | ||
+ | .32/2 | ||
+ | pnorm(-1) | ||
+ | qnorm(.32/ | ||
+ | qnorm(pnorm(-1)) | ||
# 95% | # 95% | ||
Line 122: | Line 134: | ||
c(c, d) | c(c, d) | ||
c(-2,2) | c(-2,2) | ||
+ | |||
# 99% | # 99% | ||
e <- qnorm(.01/ | e <- qnorm(.01/ | ||
Line 127: | Line 140: | ||
c(e,f) | c(e,f) | ||
c(-3,3) | c(-3,3) | ||
+ | |||
pnorm(b)-pnorm(a) | pnorm(b)-pnorm(a) | ||
Line 140: | Line 154: | ||
################################ | ################################ | ||
- | hist(p1, breaks=50, col = rgb(1, 0, 0, 0.5), | ||
- | main = " | ||
- | abline(v=mean(p1), | ||
- | hist(p2, add=T, breaks=50, col=rgb(0, | ||
- | abline(v=mean(p2), | ||
- | |||
s.size <- 10 | s.size <- 10 | ||
Line 170: | Line 178: | ||
se.s <- sd(means) | se.s <- sd(means) | ||
- | hist(means, breaks=100, col=rgb(.1, 0, 0, .5)) | + | hist(means, breaks=50, |
- | abline(v=mean(means), | + | xlim = c(mean(means)-5*sd(means), |
+ | col=rgb(1, | ||
+ | abline(v=mean(means), | ||
# now we want to get sd of this distribution | # now we want to get sd of this distribution | ||
lo1 <- mean(means)-se.s | lo1 <- mean(means)-se.s | ||
Line 180: | Line 189: | ||
lo3 <- mean(means)-3*se.s | lo3 <- mean(means)-3*se.s | ||
hi3 <- mean(means)+3*se.s | hi3 <- mean(means)+3*se.s | ||
- | |||
- | hist(means, | ||
- | xlim = c(mean(means)-5*sd(means), | ||
- | col = rgb(1, 0, 0, .5)) | ||
abline(v=mean(means), | abline(v=mean(means), | ||
- | # abline(v=mean(p2), | + | # abline(v=mean(p2), |
abline(v=c(lo1, | abline(v=c(lo1, | ||
- | | + | |
| | ||
Line 198: | Line 203: | ||
# sd of sample means (sd(means)) | # sd of sample means (sd(means)) | ||
- | # is sqrt(var(s1)/ | ||
- | # sd(s1) / sqrt(s.size) | ||
# = se.s | # = se.s | ||
# when iter value goes to | # when iter value goes to | ||
- | # unlimited | + | # infinite |
# mean(means) = mean(p1) | # mean(means) = mean(p1) | ||
# and | # and | ||
# sd(means) = sd(p1) / sqrt(s.size) | # sd(means) = sd(p1) / sqrt(s.size) | ||
- | # that is, sd(means) | + | # that is, se.s = se.z |
# This is called CLT (Central Limit Theorem) | # This is called CLT (Central Limit Theorem) | ||
+ | # see http:// | ||
+ | |||
mean(means) | mean(means) | ||
mean(p1) | mean(p1) | ||
sd(means) | sd(means) | ||
var(p1) | var(p1) | ||
+ | # remember we started talking sample size 10 | ||
sqrt(var(p1)/ | sqrt(var(p1)/ | ||
se.z | se.z | ||
Line 237: | Line 243: | ||
- | hist(means, | + | hist(means, breaks=50, |
xlim = c(mean(means)-5*sd(means), | xlim = c(mean(means)-5*sd(means), | ||
- | col = rgb(1, | + | col = rgb(1, |
- | abline(v=mean(means), | + | abline(v=mean(means), |
# abline(v=mean(p2), | # abline(v=mean(p2), | ||
abline(v=c(lo1, | abline(v=c(lo1, | ||
- | | + | |
| | ||
Line 257: | Line 263: | ||
m.sample.i.got | m.sample.i.got | ||
- | hist(means, | + | hist(means, breaks=30, |
- | xlim = c(mean(means)-10*sd(means), mean(means)+10*sd(means)), | + | xlim = c(mean(means)-7*sd(means), mean(means)+10*sd(means)), |
- | col = rgb(1, | + | col = rgb(1, |
abline(v=mean(means), | abline(v=mean(means), | ||
abline(v=m.sample.i.got, | abline(v=m.sample.i.got, | ||
Line 276: | Line 282: | ||
# (green line) | # (green line) | ||
tmp <- mean(means) - (m.sample.i.got - mean(means)) | tmp <- mean(means) - (m.sample.i.got - mean(means)) | ||
- | abline(v=tmp, | + | abline(v=tmp, |
2 * pnorm(m.sample.i.got, | 2 * pnorm(m.sample.i.got, | ||
m.sample.i.got | m.sample.i.got | ||
### one more time | ### one more time | ||
+ | # this time, with a story | ||
mean(p2) | mean(p2) | ||
sd(p2) | sd(p2) | ||
Line 287: | Line 294: | ||
m.sample.i.got | m.sample.i.got | ||
- | hist(means, | + | tmp <- mean(means) - (m.sample.i.got-mean(means)) |
- | xlim = c(mean(means)-15*sd(means), | + | tmp |
- | col = rgb(1, | + | |
- | abline(v=mean(means), | + | hist(means, breaks=30, |
- | abline(v=m.sample.i.got, | + | xlim = c(tmp-4*sd(means), |
+ | col = rgb(1, | ||
+ | abline(v=mean(means), | ||
+ | abline(v=m.sample.i.got, | ||
# what is the probablity of getting | # what is the probablity of getting | ||
Line 304: | Line 314: | ||
# mean(means) - m.sample.i.got - mean(means) | # mean(means) - m.sample.i.got - mean(means) | ||
# (green line) | # (green line) | ||
- | tmp <- mean(means) - (m.sample.i.got - mean(means)) | + | abline(v=tmp, |
- | abline(v=tmp, | + | |
2 * pnorm(m.sample.i.got, | 2 * pnorm(m.sample.i.got, | ||
- | |||
- | |||
- | |||
- | |||
</ | </ | ||
====== output ====== | ====== output ====== |
r/sampling_distribution.txt · Last modified: 2025/09/10 20:41 by hkimscil