User Tools

Site Tools


note.w02

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
note.w02 [2025/09/12 19:12] – [output] hkimscilnote.w02 [2025/09/12 19:39] (current) – [Sampling Distribution and z-test] hkimscil
Line 23: Line 23:
 sd(p1) sd(p1)
  
-p2 <- rnorm2(N.p, m.p+5, sd.p)+p2 <- rnorm2(N.p, m.p+20, sd.p)
 mean(p2) mean(p2)
 sd(p2) sd(p2)
Line 31: Line 31:
 var(p1) var(p1)
  
 +hist(p1)
 hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5), hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5),
      main = "histogram of p1 and p2",)      main = "histogram of p1 and p2",)
Line 41: Line 41:
 hist(p1, breaks=50, col=rgb(0,.5,.5,.5)) hist(p1, breaks=50, col=rgb(0,.5,.5,.5))
 abline(v=mean(p1),lwd=2) abline(v=mean(p1),lwd=2)
-abline(v=mean(p1)-sd(p1), lwd=2)+abline(v=m.p1-sd.p1, lwd=2)
 abline(v=mean(p1)+sd(p1), lwd=2) abline(v=mean(p1)+sd(p1), lwd=2)
 abline(v=c(m.p1-2*sd.p1, m.p1+2*sd.p1), lwd=2, col='red') abline(v=c(m.p1-2*sd.p1, m.p1+2*sd.p1), lwd=2, col='red')
Line 60: Line 60:
 pnorm(m.p1+3*sd.p1, m.p1, sd.p1) -  pnorm(m.p1+3*sd.p1, m.p1, sd.p1) - 
   pnorm(m.p1-3*sd.p1, m.p1, sd.p1)   pnorm(m.p1-3*sd.p1, m.p1, sd.p1)
 +
 +pnorm(121, 100, 10) - pnorm(85, 100, 10)
  
 m.p1 m.p1
Line 69: Line 71:
 pnorm(1)-pnorm(-1) pnorm(1)-pnorm(-1)
 pnorm(2)-pnorm(-2) pnorm(2)-pnorm(-2)
-pnorm(3)-pnorm(3)+pnorm(3)-pnorm(-3)
  
 1-pnorm(-2)*2 1-pnorm(-2)*2
Line 154: Line 156:
  
 ################################ ################################
-s.size <- 50+s.size <- 10
  
 means.temp <- c() means.temp <- c()
Line 199: Line 201:
  
 se.z <- sqrt(var(p1)/s.size) se.z <- sqrt(var(p1)/s.size)
 +se.z
 se.z <- c(se.z) se.z <- c(se.z)
 se.z se.z
Line 301: Line 304:
 sd(means) sd(means)
  
-tmp <- mean(means) - (m.s.from.p2 - mean(means))+m.k <- mean(s.from.p2) 
 +se.k <- sd(s.from.p2)/sqrt(s.size) 
 + 
 + 
 +tmp <- mean(means) - (m.s.from.p2  
 +                    - mean(means))
 tmp  tmp 
  
Line 315: Line 323:
 m.s.from.p2 m.s.from.p2
 pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F) pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F)
 +pnorm(m.s.from.p2, m.k, se.k, lower.tail = F)
 # then, what is the probabilty of getting  # then, what is the probabilty of getting 
 # greater than m.sample.i.got and # greater than m.sample.i.got and
Line 323: Line 331:
 abline(v=tmp, col='red', lwd=3) abline(v=tmp, col='red', lwd=3)
 2 * pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F) 2 * pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F)
 +
 +2 * pnorm(m.s.from.p2, m.k, se.k, lower.tail = F)
 +
  
 se.z se.z
Line 338: Line 349:
 pt(z.cal, 49, lower.tail = F)*2 pt(z.cal, 49, lower.tail = F)*2
 t.test(s.from.p2, mu=mean(p1), var.equal = T) t.test(s.from.p2, mu=mean(p1), var.equal = T)
 +
 +
 +
 </code> </code>
  
 ====== output ====== ====== output ======
 +===== 1 =====
 +
 <WRAP group> <WRAP group>
-<WRAP column 45>+<WRAP column half>
 <code> <code>
 > rm(list=ls()) > rm(list=ls())
Line 359: Line 375:
 </code> </code>
 </WRAP> </WRAP>
-<WRAP column 50>+<WRAP column half> 
 +...........................................................................
 </WRAP> </WRAP>
 </WRAP> </WRAP>
 +===== 2 =====
  
 +<WRAP group> 
 +<WRAP column half>
 <code> <code>
 > ################################ > ################################
Line 396: Line 415:
  
  
 +</code>
 +</WRAP>
 +<WRAP column half>
 +...........................................................................
 +</WRAP>
 +</WRAP>
 +
 +===== 3 =====
 +
 +<WRAP group>
 +<WRAP column half>
 +<code>
 > hist(p1, breaks=50, col=rgb(0,.5,.5,.5)) > hist(p1, breaks=50, col=rgb(0,.5,.5,.5))
 > abline(v=mean(p1),lwd=2) > abline(v=mean(p1),lwd=2)
Line 423: Line 454:
 [1] 0.9973002 [1] 0.9973002
  
 +</code>
 +</WRAP>
 +<WRAP column half>
 +...........................................................................
 +</WRAP>
 +</WRAP>
 +
 +===== 4 =====
 +
 +<WRAP group>
 +<WRAP column half>
 +<code>
 > m.p1 > m.p1
 [1] 100 [1] 100
Line 473: Line 516:
 [1] 0.03593032 [1] 0.03593032
  
 +</code>
 +</WRAP>
 +<WRAP column half>
 +...........................................................................
 +</WRAP>
 +</WRAP>
 +
 +===== 5 =====
 +
 +<WRAP group>
 +<WRAP column half>
 +<code>
 > z.p1 <- (p1-mean(p1))/sd(p1) > z.p1 <- (p1-mean(p1))/sd(p1)
 > mean(z.p1) > mean(z.p1)
Line 503: Line 558:
  
 > # > #
 +</code>
 +</WRAP>
 +<WRAP column half>
 +...........................................................................
 +</WRAP>
 +</WRAP>
 +
 +===== 6 =====
 +
 +<WRAP group>
 +<WRAP column half>
 +<code>
 > hist(p1, breaks=50, col=rgb(.9,.9,.9,.9)) > hist(p1, breaks=50, col=rgb(.9,.9,.9,.9))
 > abline(v=mean(p1),lwd=2) > abline(v=mean(p1),lwd=2)
Line 563: Line 630:
  
  
 +</code>
 +</WRAP>
 +<WRAP column half>
 +...........................................................................
 +</WRAP>
 +</WRAP>
 +
 +===== 7 =====
 +
 +<WRAP group>
 +<WRAP column half>
 +<code>
 > ################################ > ################################
 > s.size <- 50 > s.size <- 50
Line 578: Line 657:
 [1]  98.76098  99.90935  99.29643  99.66014 101.93822 [1]  98.76098  99.90935  99.29643  99.66014 101.93822
  
 +</code>
 +</WRAP>
 +<WRAP column half>
 +...........................................................................
 +</WRAP>
 +</WRAP>
 +
 +===== 8 =====
 +
 +<WRAP group>
 +<WRAP column half>
 +<code>
 > iter <- 1000000 > iter <- 1000000
 > # means <- c() > # means <- c()
Line 610: Line 701:
 +        lwd=2) +        lwd=2)
  
 +</code>
 +</WRAP>
 +<WRAP column half>
 +...........................................................................
 +</WRAP>
 +</WRAP>
 +
 +===== 9 =====
 +
 +<WRAP group>
 +<WRAP column half>
 +<code>
 > # meanwhile . . . . > # meanwhile . . . .
 > se.s > se.s
 [1] 1.414035 [1] 1.414035
-+# se.s = sd(means) 
 + 
 +# The below is from CLT  
 +# see http://commres.org/wiki/central limit theorem 
 +#
 > se.z <- sqrt(var(p1)/s.size) > se.z <- sqrt(var(p1)/s.size)
 > se.z <- c(se.z) > se.z <- c(se.z)
Line 655: Line 762:
 > se.s  > se.s 
 [1] 1.414035 [1] 1.414035
-> se.z 
-[1] 1.414214 
  
-> # because CLT+</code> 
 +</WRAP> 
 +<WRAP column half> 
 +........................................................................... 
 +</WRAP> 
 +</WRAP> 
 + 
 + 
 +===== 10 ===== 
 +<WRAP group> 
 +<WRAP column half> 
 +<code> 
 +> # because of CLT we can use the 
 +> # below instead of  
 +> # mean(means)+-se.s 
 +> #
 > loz1 <- mean(p1)-se.z > loz1 <- mean(p1)-se.z
 > hiz1 <- mean(p1)+se.z > hiz1 <- mean(p1)+se.z
Line 681: Line 801:
  
  
 +</code>
 +</WRAP>
 +<WRAP column half>
 +...........................................................................
 +</WRAP>
 +</WRAP>
 +
 +===== 11 =====
 +
 +<WRAP group>
 +<WRAP column half>
 +<code>
 > hist(means, breaks=50, > hist(means, breaks=50,
 +      xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)),  +      xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)), 
Line 704: Line 836:
 [1]  96 104 [1]  96 104
  
 +</code>
 +</WRAP>
 +<WRAP column half>
 +...........................................................................
 +</WRAP>
 +</WRAP>
 +
 +===== 12 =====
 +
 +<WRAP group>
 +<WRAP column half>
 +<code>
 > m.sample.i.got <- mean(means)+ 1.5*sd(means) > m.sample.i.got <- mean(means)+ 1.5*sd(means)
 > m.sample.i.got > m.sample.i.got
Line 736: Line 880:
 [1] 102.1193 [1] 102.1193
  
 +</code>
 +</WRAP>
 +<WRAP column half>
 +...........................................................................
 +</WRAP>
 +</WRAP>
 +
 +
 +===== 13 =====
 +<WRAP group>
 +<WRAP column half>
 +<code>
 > ### one more time  > ### one more time 
 > # this time, with a story > # this time, with a story
 > mean(p2) > mean(p2)
-[1] 105+[1] 120
 > sd(p2) > sd(p2)
 [1] 10 [1] 10
Line 745: Line 901:
 > m.s.from.p2 <- mean(s.from.p2) > m.s.from.p2 <- mean(s.from.p2)
 > m.s.from.p2 > m.s.from.p2
-[1] 103.1283+[1] 119.0929
  
 > se.s > se.s
-[1] 1.414035+[1] 3.163228
 > se.z > se.z
-[1] 1.414214+[1] 3.162278
 > sd(means) > sd(means)
-[1] 1.414035+[1] 3.163228
  
-> tmp <- mean(means) - (m.s.from.p2 - mean(means))+> m.k <- mean(s.from.p2) 
 +> se.k <- sd(s.from.p2)/sqrt(s.size) 
 +>  
 +>  
 +> tmp <- mean(means) - (m.s.from.p2  
 ++                     - mean(means))
 > tmp  > tmp 
-[1] 96.86822+[1] 80.90409
  
 > hist(means, breaks=30,  > hist(means, breaks=30, 
Line 768: Line 929:
 > # m.sample.i.got? > # m.sample.i.got?
 > m.s.from.p2 > m.s.from.p2
-[1] 103.1283+[1] 119.0929
 > pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F) > pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F)
-[1] 0.01348266 +[1] 7.816511e-10 
-+pnorm(m.s.from.p2, m.k, se.k, lower.tail = F) 
 +[1] 0.5
 > # then, what is the probabilty of getting  > # then, what is the probabilty of getting 
 > # greater than m.sample.i.got and > # greater than m.sample.i.got and
Line 779: Line 941:
 > abline(v=tmp, col='red', lwd=3) > abline(v=tmp, col='red', lwd=3)
 > 2 * pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F) > 2 * pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F)
-[1] 0.02696533+[1] 1.563302e-09 
 +>  
 +> 2 * pnorm(m.s.from.p2, m.k, se.k, lower.tail = F) 
 +[1] 1 
 +
  
 > se.z > se.z
-[1] 1.414214+[1] 3.162278
 > sd(s.from.p2)/sqrt(s.size) > sd(s.from.p2)/sqrt(s.size)
-[1] 1.414296+[1] 3.35771
 > se.z.adjusted <- sqrt(var(s.from.p2)/s.size) > se.z.adjusted <- sqrt(var(s.from.p2)/s.size)
 > se.z.adjusted > se.z.adjusted
-[1] 1.414296+[1] 3.35771
 > 2 * pnorm(m.s.from.p2, mean(p1), se.z.adjusted,  > 2 * pnorm(m.s.from.p2, mean(p1), se.z.adjusted, 
 +           lower.tail = F) +           lower.tail = F)
-[1] 0.02697421+[1] 1.298387e-08
  
 > z.cal <- (m.s.from.p2 - mean(p1))/se.z.adjusted > z.cal <- (m.s.from.p2 - mean(p1))/se.z.adjusted
 > z.cal > z.cal
-[1] 2.211891+[1] 5.686277
 > pnorm(z.cal, lower.tail = F)*2 > pnorm(z.cal, lower.tail = F)*2
-[1] 0.02697421+[1] 1.298387e-08
  
  
 > pt(z.cal, 49, lower.tail = F)*2 > pt(z.cal, 49, lower.tail = F)*2
-[1] 0.03166797+[1] 7.095934e-07
 > t.test(s.from.p2, mu=mean(p1), var.equal = T) > t.test(s.from.p2, mu=mean(p1), var.equal = T)
  
Line 806: Line 972:
  
 data:  s.from.p2 data:  s.from.p2
-t = 2.2119, df = 49, p-value = 0.03167+t = 5.6863, df = 9, p-value = 0.0002995
 alternative hypothesis: true mean is not equal to 100 alternative hypothesis: true mean is not equal to 100
 95 percent confidence interval: 95 percent confidence interval:
- 100.2861 105.9704+ 111.4972 126.6885
 sample estimates: sample estimates:
 mean of x  mean of x 
- 103.1283 + 119.0929 
  
 +
 > >
 </code> </code>
 +</WRAP>
 +<WRAP column half>
 +...........................................................................
 +{{:pasted:20250912-193249.png}}
 +</WRAP>
 +</WRAP>
  
 ====== T-test sum up ====== ====== T-test sum up ======
note.w02.1757671933.txt.gz · Last modified: 2025/09/12 19:12 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki