User Tools

Site Tools


r:sampling_distribution

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
r:sampling_distribution [2025/11/23 22:57] – [distribution of sample means] hkimscilr:sampling_distribution [2026/01/11 09:00] (current) hkimscil
Line 1: Line 1:
-====== PS1. week02 ====== +<tabbed
-<code+  * :r:sampling distribution:code.01 
-rm(list=ls()) +  * *:r:sampling distribution:out.01 
-rnorm2 <- function(n,mean,sd){  +</tabbed>
-  mean+sd*scale(rnorm(n))  +
-}+
  
-se <- function(sample) { 
-  sd(sample)/sqrt(length(sample)) 
-} 
- 
-ss <- function(x) { 
-  sum((x-mean(x))^2) 
-} 
- 
-################################ 
-N.p <- 1000000 
-m.p <- 100 
-sd.p <- 10 
- 
-p1 <- rnorm2(N.p, m.p, sd.p) 
-mean(p1) 
-sd(p1) 
- 
-p2 <- rnorm2(N.p, m.p+20, sd.p) 
-mean(p2) 
-sd(p2) 
- 
-m.p1 <- mean(p1) 
-sd.p1 <- sd(p1) 
-var(p1) 
- 
- 
-hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5), 
-     main = "histogram of p1 and p2",) 
-abline(v=mean(p1), col="black", lwd=3) 
-hist(p2, add=T, breaks=50, col=rgb(1,1,.5,.5)) 
-abline(v=mean(p2), col="red", lwd=3) 
- 
- 
-hist(p1, breaks=50, col=rgb(0,.5,.5,.5)) 
-abline(v=mean(p1),lwd=2) 
-abline(v=mean(p1)-sd(p1), lwd=2) 
-abline(v=mean(p1)+sd(p1), lwd=2) 
-abline(v=c(m.p1-2*sd.p1, m.p1+2*sd.p1), lwd=2, col='red') 
-abline(v=c(m.p1-3*sd.p1, m.p1+3*sd.p1), lwd=2, col='green') 
- 
-# area bet black = 68% 
-# between red = 95% 
-# between green = 99% 
- 
-pnorm(m.p1+sd.p1, m.p1, sd.p1) 
-pnorm(m.p1-sd.p1, m.p1, sd.p1) 
-pnorm(m.p1+sd.p1, m.p1, sd.p1) -  
-  pnorm(m.p1-sd.p1, m.p1, sd.p1) 
- 
-pnorm(m.p1+2*sd.p1, m.p1, sd.p1) -  
-  pnorm(m.p1-2*sd.p1, m.p1, sd.p1) 
- 
-pnorm(m.p1+3*sd.p1, m.p1, sd.p1) -  
-  pnorm(m.p1-3*sd.p1, m.p1, sd.p1) 
- 
-m.p1 
-sd.p1 
- 
-(m.p1-m.p1)/sd.p1 
-((m.p1-sd.p1) - m.p1) / sd.p1 
-(120-100)/10  
-pnorm(1)-pnorm(-1) 
-pnorm(2)-pnorm(-2) 
-pnorm(3)-pnorm(3) 
- 
-1-pnorm(-2)*2 
-pnorm(2)-pnorm(-2) 
- 
-pnorm(120, 100, 10) 
-pnorm(2)-pnorm(-2) 
- 
-zscore <- (120-100)/10 
-pnorm(zscore)-pnorm(-zscore) 
-zscore 
- 
-# reminder.  
-pnorm(-1) 
-pnorm(1, 0, 1, lower.tail = F) 
-pnorm(110, 100, 10, lower.tail = F) 
-zscore <- (110-100)/10 
-pnorm(zscore, lower.tail = F) 
- 
-pnorm(118, 100, 10, lower.tail = F) 
-pnorm(18/10, lower.tail = F) 
- 
-z.p1 <- (p1-mean(p1))/sd(p1) 
-mean(z.p1) 
-round(mean(z.p1),10) 
-sd(z.p1) 
-pnorm(1.8)-pnorm(-1.8) 
- 
-hist(z.p1, breaks=50, col=rgb(1,0,0,0)) 
-abline(v=c(m.p1, -1.8, 1.8), col='red') 
-1-(pnorm(1.8)-pnorm(-1.8)) 
- 
- 
-pnorm(1)-pnorm(-1) 
-1-(pnorm(-1)*2) 
- 
-pnorm(2)-pnorm(-2) 
-1-(pnorm(-2)*2) 
- 
-1-(pnorm(-3)*2) 
- 
-# 
-hist(p1, breaks=50, col=rgb(.9,.9,.9,.9)) 
-abline(v=mean(p1),lwd=2) 
-abline(v=mean(p1)-sd(p1), lwd=2) 
-abline(v=mean(p1)+sd(p1), lwd=2) 
-abline(v=c(m.p1-2*sd.p1, m.p1+2*sd.p1), lwd=2, col='red') 
-abline(v=c(m.p1-3*sd.p1, m.p1+3*sd.p1), lwd=2, col='green') 
- 
-# 68% 
-a <- qnorm(.32/2) 
-b <- qnorm(1-.32/2) 
-c(a, b) 
-c(-1, 1) 
-# note that 
-.32/2 
-pnorm(-1) 
-qnorm(.32/2) 
-qnorm(pnorm(-1)) 
- 
-# 95% 
-c <- qnorm(.05/2) 
-d <- qnorm(1-.05/2) 
-c(c, d) 
-c(-2,2) 
- 
-# 99%  
-e <- qnorm(.01/2) 
-f <- qnorm(1-.01/2) 
-c(e,f) 
-c(-3,3) 
- 
- 
-pnorm(b)-pnorm(a) 
-c(a, b) 
-pnorm(d)-pnorm(c) 
-c(c, d) 
-pnorm(f)-pnorm(e) 
-c(e, f) 
- 
-qnorm(.5) 
-qnorm(1) 
- 
- 
-################################ 
-s.size <- 10 
- 
-means.temp <- c() 
-s1 <- sample(p1, s.size, replace = T) 
-mean(s1) 
-means.temp <- append(means.temp, mean(s1)) 
-means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) 
-means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) 
-means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) 
-means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) 
-means.temp 
- 
-iter <- 1000000 
-# means <- c() 
-means <- rep(NA, iter) 
-for (i in 1:iter) { 
-  # means <- append(means, mean(sample(p1, s.size, replace = T))) 
-  means[i] <- mean(sample(p1, s.size, replace = T)) 
-} 
-length(means) 
-mean(means) 
-sd(means) 
-se.s <- sd(means) 
- 
-hist(means, breaks=50,  
-     xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)),  
-     col=rgb(1, 1, 1, .5)) 
-abline(v=mean(means), col="black", lwd=3) 
-# now we want to get sd of this distribution 
-lo1 <- mean(means)-se.s 
-hi1 <- mean(means)+se.s 
-lo2 <- mean(means)-2*se.s 
-hi2 <- mean(means)+2*se.s 
-lo3 <- mean(means)-3*se.s 
-hi3 <- mean(means)+3*se.s 
-abline(v=mean(means), col="black", lwd=2) 
-# abline(v=mean(p2), colo='darkgreen', lwd=2) 
-abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3),  
-       col=c("red","red", "blue", "blue", "orange", "orange"),  
-       lwd=2) 
- 
-# meanwhile . . . . 
-se.s 
- 
-se.z <- sqrt(var(p1)/s.size) 
-se.z <- c(se.z) 
-se.z 
- 
-# sd of sample means (sd(means)) 
-# = se.s 
- 
-# when iter value goes to  
-# infinite value: 
-# mean(means) = mean(p1)  
-# and 
-# sd(means) = sd(p1) / sqrt(s.size) 
-# that is, se.s = se.z 
-# This is called CLT (Central Limit Theorem) 
-# see http://commres.net/wiki/cetral_limit_theorem 
- 
-mean(means) 
-mean(p1) 
-sd(means) 
-var(p1)  
-# remember we started talking sample size 10 
-sqrt(var(p1)/s.size) 
-se.z 
- 
-sd(means) 
-se.s  
-se.z 
- 
-# because CLT 
-loz1 <- mean(p1)-se.z 
-hiz1 <- mean(p1)+se.z 
-loz2 <- mean(p1)-2*se.z 
-hiz2 <- mean(p1)+2*se.z 
-loz3 <- mean(p1)-3*se.z 
-hiz3 <- mean(p1)+3*se.z 
- 
-c(lo1, loz1) 
-c(lo2, loz2) 
-c(lo3, loz3) 
- 
-c(hi1, hiz1) 
-c(hi2, hiz2) 
-c(hi3, hiz3) 
- 
- 
-hist(means, breaks=50, 
-     xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)),  
-     col = rgb(1, 1, 1, .5)) 
-abline(v=mean(means), col="black", lwd=3) 
-# abline(v=mean(p2), colo='darkgreen', lwd=3) 
-abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3),  
-       col=c("darkgreen","darkgreen", "blue", "blue", "orange", "orange"),  
-       lwd=2) 
- 
-round(c(lo1, hi1)) 
-round(c(lo2, hi2)) 
-round(c(lo3, hi3)) 
- 
-round(c(loz1, hiz1)) 
-round(c(loz2, hiz2)) 
-round(c(loz3, hiz3)) 
- 
-m.sample.i.got <- mean(means)+ 1.5*sd(means) 
-m.sample.i.got 
- 
-hist(means, breaks=30,  
-     xlim = c(mean(means)-7*sd(means), mean(means)+10*sd(means)),  
-     col = rgb(1, 1, 1, .5)) 
-abline(v=mean(means), col="black", lwd=3) 
-abline(v=m.sample.i.got, col='darkgreen', lwd=3) 
- 
-# what is the probablity of getting 
-# greater than  
-# m.sample.i.got? 
-m.sample.i.got 
-pnorm(m.sample.i.got, mean(means), sd(means), lower.tail = F) 
-pnorm(m.sample.i.got, mean(p1), se.z, lower.tail = F) 
- 
-# then, what is the probabilty of getting  
-# greater than m.sample.i.got and 
-# less than corresponding value, which is 
-# mean(means) - m.sample.i.got - mean(means) 
-# (green line) 
-tmp <- mean(means) - (m.sample.i.got - mean(means)) 
-abline(v=tmp, col='red', lwd=3) 
-2 * pnorm(m.sample.i.got, mean(p1), sd(means), lower.tail = F) 
-m.sample.i.got 
- 
-### one more time  
-# this time, with a story 
-mean(p2) 
-sd(p2) 
-sample(p2, s.size) 
-m.sample.i.got <- mean(sample(p2, s.size)) 
-m.sample.i.got 
- 
-tmp <- mean(means) - (m.sample.i.got-mean(means)) 
-tmp  
- 
-hist(means, breaks=30,  
-     xlim = c(tmp-4*sd(means), m.sample.i.got+4*sd(means)),  
-     col = rgb(1, 1, 1, .5)) 
-abline(v=mean(means), col="black", lwd=3) 
-abline(v=m.sample.i.got, col='blue', lwd=3) 
- 
-# what is the probablity of getting 
-# greater than  
-# m.sample.i.got? 
-m.sample.i.got 
-pnorm(m.sample.i.got, mean(p1), sd(means), lower.tail = F) 
- 
-# then, what is the probabilty of getting  
-# greater than m.sample.i.got and 
-# less than corresponding value, which is 
-# mean(means) - m.sample.i.got - mean(means) 
-# (green line) 
-abline(v=tmp, col='red', lwd=3) 
-2 * pnorm(m.sample.i.got, mean(p1), sd(means), lower.tail = F) 
-</code> 
 ====== output ====== ====== output ======
 <WRAP group> <WRAP group>
r/sampling_distribution.txt · Last modified: by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki