User Tools

Site Tools


r:sampling_distribution

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
r:sampling_distribution [2025/09/10 14:03] – [qnorm] hkimscilr:sampling_distribution [2025/09/10 20:41] (current) – [PS1. week02] hkimscil
Line 1: Line 1:
 ====== PS1. week02 ====== ====== PS1. week02 ======
- 
 <code> <code>
 rm(list=ls()) rm(list=ls())
Line 32: Line 31:
 var(p1) var(p1)
  
-hist(p1, breaks=100, col=rgb(1,1,1,1))+ 
 +hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5), 
 +     main = "histogram of p1 and p2",
 +abline(v=mean(p1), col="black", lwd=3) 
 +hist(p2, add=T, breaks=50, col=rgb(1,1,.5,.5)) 
 +abline(v=mean(p2), col="red", lwd=3) 
 + 
 + 
 +hist(p1, breaks=50, col=rgb(0,.5,.5,.5))
 abline(v=mean(p1),lwd=2) abline(v=mean(p1),lwd=2)
 abline(v=mean(p1)-sd(p1), lwd=2) abline(v=mean(p1)-sd(p1), lwd=2)
Line 90: Line 97:
 pnorm(1.8)-pnorm(-1.8) pnorm(1.8)-pnorm(-1.8)
  
-hist(z.p1, breaks=100, col=rgb(0,0,0,0))+hist(z.p1, breaks=50, col=rgb(1,0,0,0))
 abline(v=c(m.p1, -1.8, 1.8), col='red') abline(v=c(m.p1, -1.8, 1.8), col='red')
 1-(pnorm(1.8)-pnorm(-1.8)) 1-(pnorm(1.8)-pnorm(-1.8))
Line 104: Line 111:
  
 # #
-hist(p1, breaks=100, col=rgb(1,1,1,1))+hist(p1, breaks=50, col=rgb(.9,.9,.9,.9))
 abline(v=mean(p1),lwd=2) abline(v=mean(p1),lwd=2)
 abline(v=mean(p1)-sd(p1), lwd=2) abline(v=mean(p1)-sd(p1), lwd=2)
Line 116: Line 123:
 c(a, b) c(a, b)
 c(-1, 1) c(-1, 1)
 +# note that
 +.32/2
 +pnorm(-1)
 +qnorm(.32/2)
 +qnorm(pnorm(-1))
  
 # 95% # 95%
Line 122: Line 134:
 c(c, d) c(c, d)
 c(-2,2) c(-2,2)
 +
 # 99%  # 99% 
 e <- qnorm(.01/2) e <- qnorm(.01/2)
Line 127: Line 140:
 c(e,f) c(e,f)
 c(-3,3) c(-3,3)
 +
  
 pnorm(b)-pnorm(a) pnorm(b)-pnorm(a)
Line 140: Line 154:
  
 ################################ ################################
-hist(p1, breaks=50, col = rgb(1, 0, 0, 0.5), 
-     main = "histogram of p1 and p2",) 
-abline(v=mean(p1), col="black", lwd=3) 
-hist(p2, add=T, breaks=50, col=rgb(0,0,1,.5)) 
-abline(v=mean(p2), col="violet", lwd=3) 
- 
 s.size <- 10 s.size <- 10
  
Line 170: Line 178:
 se.s <- sd(means) se.s <- sd(means)
  
-hist(means, breaks=100, col=rgb(.1, 00, .5)) +hist(means, breaks=50 
-abline(v=mean(means), col="red", lwd=2) +     xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)),  
 +     col=rgb(1, 11, .5)) 
 +abline(v=mean(means), col="black", lwd=3)
 # now we want to get sd of this distribution # now we want to get sd of this distribution
 lo1 <- mean(means)-se.s lo1 <- mean(means)-se.s
Line 180: Line 189:
 lo3 <- mean(means)-3*se.s lo3 <- mean(means)-3*se.s
 hi3 <- mean(means)+3*se.s hi3 <- mean(means)+3*se.s
- 
-hist(means,  
-     xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)),  
-     col = rgb(1, 0, 0, .5)) 
 abline(v=mean(means), col="black", lwd=2) abline(v=mean(means), col="black", lwd=2)
-# abline(v=mean(p2), colo='darkgreen', lwd=3)+# abline(v=mean(p2), colo='darkgreen', lwd=2)
 abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3),  abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3), 
-       col=c("green","green", "blue", "blue", "orange", "orange"), +       col=c("red","red", "blue", "blue", "orange", "orange"), 
        lwd=2)        lwd=2)
  
Line 198: Line 203:
  
 # sd of sample means (sd(means)) # sd of sample means (sd(means))
-# is sqrt(var(s1)/s.size) or  
-# sd(s1) / sqrt(s.size)  
 # = se.s # = se.s
  
 # when iter value goes to  # when iter value goes to 
-unlimited value:+infinite value:
 # mean(means) = mean(p1)  # mean(means) = mean(p1) 
 # and # and
 # sd(means) = sd(p1) / sqrt(s.size) # sd(means) = sd(p1) / sqrt(s.size)
-# that is, sd(means) = se.z+# that is, se.s = se.z
 # This is called CLT (Central Limit Theorem) # This is called CLT (Central Limit Theorem)
 +# see http://commres.net/wiki/cetral_limit_theorem
 +
 mean(means) mean(means)
 mean(p1) mean(p1)
 sd(means) sd(means)
 var(p1)  var(p1) 
 +# remember we started talking sample size 10
 sqrt(var(p1)/s.size) sqrt(var(p1)/s.size)
 se.z se.z
Line 237: Line 243:
  
  
-hist(means, +hist(means, breaks=50,
      xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)),       xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)), 
-     col = rgb(1, 00, .5)) +     col = rgb(1, 11, .5)) 
-abline(v=mean(means), col="black", lwd=2)+abline(v=mean(means), col="black", lwd=3)
 # abline(v=mean(p2), colo='darkgreen', lwd=3) # abline(v=mean(p2), colo='darkgreen', lwd=3)
 abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3),  abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3), 
-       col=c("green","green", "blue", "blue", "orange", "orange"), +       col=c("darkgreen","darkgreen", "blue", "blue", "orange", "orange"), 
        lwd=2)        lwd=2)
  
Line 257: Line 263:
 m.sample.i.got m.sample.i.got
  
-hist(means,  +hist(means, breaks=30,  
-     xlim = c(mean(means)-10*sd(means), mean(means)+10*sd(means)),  +     xlim = c(mean(means)-7*sd(means), mean(means)+10*sd(means)),  
-     col = rgb(1, 00, .5))+     col = rgb(1, 11, .5))
 abline(v=mean(means), col="black", lwd=3) abline(v=mean(means), col="black", lwd=3)
 abline(v=m.sample.i.got, col='darkgreen', lwd=3) abline(v=m.sample.i.got, col='darkgreen', lwd=3)
Line 276: Line 282:
 # (green line) # (green line)
 tmp <- mean(means) - (m.sample.i.got - mean(means)) tmp <- mean(means) - (m.sample.i.got - mean(means))
-abline(v=tmp, col='green', lwd=3)+abline(v=tmp, col='red', lwd=3)
 2 * pnorm(m.sample.i.got, mean(p1), sd(means), lower.tail = F) 2 * pnorm(m.sample.i.got, mean(p1), sd(means), lower.tail = F)
 m.sample.i.got m.sample.i.got
  
 ### one more time  ### one more time 
 +# this time, with a story
 mean(p2) mean(p2)
 sd(p2) sd(p2)
Line 287: Line 294:
 m.sample.i.got m.sample.i.got
  
-hist(means,  +tmp <- mean(means) - (m.sample.i.got-mean(means)) 
-     xlim = c(mean(means)-15*sd(means), mean(means)+15*sd(means)),  +tmp  
-     col = rgb(1, 00, .5)) + 
-abline(v=mean(means), col="black", lwd=2+hist(means, breaks=30,  
-abline(v=m.sample.i.got, col='darkgreen', lwd=2)+     xlim = c(tmp-4*sd(means), m.sample.i.got+4*sd(means)),  
 +     col = rgb(1, 11, .5)) 
 +abline(v=mean(means), col="black", lwd=3
 +abline(v=m.sample.i.got, col='blue', lwd=3)
  
 # what is the probablity of getting # what is the probablity of getting
Line 304: Line 314:
 # mean(means) - m.sample.i.got - mean(means) # mean(means) - m.sample.i.got - mean(means)
 # (green line) # (green line)
-tmp <- mean(means) - (m.sample.i.got - mean(means)) +abline(v=tmp, col='red', lwd=3)
-abline(v=tmp, col='green', lwd=2)+
 2 * pnorm(m.sample.i.got, mean(p1), sd(means), lower.tail = F) 2 * pnorm(m.sample.i.got, mean(p1), sd(means), lower.tail = F)
- 
- 
- 
- 
 </code> </code>
 ====== output ====== ====== output ======
r/sampling_distribution.1757480616.txt.gz · Last modified: 2025/09/10 14:03 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki