sampling_distribution_in_r

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
sampling_distribution_in_r [2024/03/20 13:53] – [Sampling distribution in R e.g. 1] hkimscilsampling_distribution_in_r [2025/03/24 09:00] (current) hkimscil
Line 1: Line 1:
 ====== Sampling distribution in R e.g. 1 ====== ====== Sampling distribution in R e.g. 1 ======
 <code> <code>
 +# sampling distribution 
 n.ajstu <- 100000 n.ajstu <- 100000
 mean.ajstu <- 100 mean.ajstu <- 100
Line 77: Line 78:
   ses[i] = sqrt(var(ajstu)/sss[i])   ses[i] = sqrt(var(ajstu)/sss[i])
 } }
 +ses.means4 <- sqrt(var(means4))
 +ses.means25 <- sqrt(var(means25))
 +ses.means100 <- sqrt(var(means100))
 +ses.means400 <- sqrt(var(means400))
 +ses.means900 <- sqrt(var(means900))
 +ses.means1600 <- sqrt(var(means1600))
 +ses.means2500 <- sqrt(var(means2500))
 +ses.real <- c(ses.means4, ses.means25,
 +              ses.means100, ses.means400,
 +              ses.means900, ses.means1600,
 +              ses.means2500)
 +ses.real 
  
 ses ses
 se.1 <- ses se.1 <- ses
-two.95 <- qnorm(0.975) +se.2 <- * ses 
-se.2 <- two.95 * ses # 2가 아닌 95%에 해당하는 숫자 (1.96...) +
  
 lower.s2 <- mean(ajstu)-se.2 lower.s2 <- mean(ajstu)-se.2
 upper.s2 <- mean(ajstu)+se.2 upper.s2 <- mean(ajstu)+se.2
-data.frame(cbind(sss, ses, lower.s2, upper.s2)) +data.frame(cbind(sss, ses, ses.real, lower.s2, upper.s2))
 </code> </code>
 +아웃풋 
 +<code> 
 +> n.ajstu <- 100000 
 +> mean.ajstu <- 100 
 +> sd.ajstu <- 10 
 +> set.seed(1024) 
 +> ajstu <- rnorm2(n.ajstu, mean=mean.ajstu, sd=sd.ajstu) 
 +> mean(ajstu) 
 +[1] 100 
 +> sd(ajstu) 
 +[1] 10 
 +> var(ajstu) 
 +     [,1] 
 +[1,]  100 
 +> iter <- 10000 # # of sampling  
 +> n.4 <- 4 
 +> means4 <- rep (NA, iter) 
 +> for(i in 1:iter){ 
 ++   means4[i] = mean(sample(ajstu, n.4)) 
 ++ } 
 +> n.25 <- 25 
 +> means25 <- rep (NA, iter) 
 +> for(i in 1:iter){ 
 ++   means25[i] = mean(sample(ajstu, n.25)) 
 ++ } 
 +> n.100 <- 100 
 +> means100 <- rep (NA, iter) 
 +> for(i in 1:iter){ 
 ++   means100[i] = mean(sample(ajstu, n.100)) 
 ++ } 
 +> n.400 <- 400 
 +> means400 <- rep (NA, iter) 
 +> for(i in 1:iter){ 
 ++   means400[i] = mean(sample(ajstu, n.400)) 
 ++ } 
 +> n.900 <- 900 
 +> means900 <- rep (NA, iter) 
 +> for(i in 1:iter){ 
 ++   means900[i] = mean(sample(ajstu, n.900)) 
 ++ } 
 +> n.1600 <- 1600 
 +> means1600 <- rep (NA, iter) 
 +> for(i in 1:iter){ 
 ++   means1600[i] = mean(sample(ajstu, n.1600)) 
 ++ } 
 +> n.2500 <- 2500 
 +> means2500 <- rep (NA, iter) 
 +> for(i in 1:iter){ 
 ++   means2500[i] = mean(sample(ajstu, n.2500)) 
 ++ } 
 +> h4 <- hist(means4) 
 +> h25 <- hist(means25) 
 +> h100 <- hist(means100) 
 +> h400 <- hist(means400) 
 +> h900 <- hist(means900) 
 +> h1600 <- hist(means1600) 
 +> h2500 <- hist(means2500) 
 +> plot(h4, ylim=c(0,3000), col="red"
 +> plot(h25, add = T, col="blue"
 +> plot(h100, add = T, col="green"
 +> plot(h400, add = T, col="grey"
 +> plot(h900, add = T, col="yellow"
 +> sss <- c(4,25,100,400,900,1600,2500) # sss sample sizes 
 +> ses <- rep (NA, length(sss)) # std errors 
 +> for(i in 1:length(sss)){ 
 ++   ses[i] = sqrt(var(ajstu)/sss[i]) 
 ++ } 
 +> ses 
 +[1] 5.0000000 2.0000000 1.0000000 0.5000000 0.3333333 0.2500000 
 +[7] 0.2000000 
 +> se.1 <- ses 
 +> se.2 <- 2 * ses  
 +> lower.s2 <- mean(ajstu)-se.2 
 +> upper.s2 <- mean(ajstu)+se.2 
 +> data.frame(cbind(sss, ses, lower.s2, upper.s2)) 
 +   sss       ses lower.s2 upper.s2 
 +1    4 5.0000000 90.00000 110.0000 
 +2   25 2.0000000 96.00000 104.0000 
 +3  100 1.0000000 98.00000 102.0000 
 +4  400 0.5000000 99.00000 101.0000 
 +5  900 0.3333333 99.33333 100.6667 
 +6 1600 0.2500000 99.50000 100.5000 
 +7 2500 0.2000000 99.60000 100.4000 
 +> sss <- c(4,25,100,400,900,1600,2500) # sss sample sizes 
 +> ses <- rep (NA, length(sss)) # std errors 
 +> for(i in 1:length(sss)){ 
 ++   ses[i] = sqrt(var(ajstu)/sss[i]) 
 ++ } 
 +> ses.means4 <- sqrt(var(means4)) 
 +> ses.means25 <- sqrt(var(means25)) 
 +> ses.means100 <- sqrt(var(means100)) 
 +> ses.means400 <- sqrt(var(means400)) 
 +> ses.means900 <- sqrt(var(means900)) 
 +> ses.means1600 <- sqrt(var(means1600)) 
 +> ses.means2500 <- sqrt(var(means2500)) 
 +> ses.real <- c(ses.means4, ses.means25, 
 ++               ses.means100, ses.means400, 
 ++               ses.means900, ses.means1600, 
 ++               ses.means2500) 
 +> ses.real  
 +[1] 4.9719142 2.0155741 0.9999527 0.5034433 0.3324414 0.2466634 
 +[7] 0.1965940 
 +> ses 
 +[1] 5.0000000 2.0000000 1.0000000 0.5000000 0.3333333 0.2500000 
 +[7] 0.2000000 
 +> se.1 <- ses 
 +> se.2 <- 2 * ses  
 +> lower.s2 <- mean(ajstu)-se.2 
 +> upper.s2 <- mean(ajstu)+se.2 
 +> data.frame(cbind(sss, ses, ses.real, lower.s2, upper.s2)) 
 +   sss       ses  ses.real lower.s2 upper.s2 
 +1    4 5.0000000 4.9719142 90.00000 110.0000 
 +2   25 2.0000000 2.0155741 96.00000 104.0000 
 +3  100 1.0000000 0.9999527 98.00000 102.0000 
 +4  400 0.5000000 0.5034433 99.00000 101.0000 
 +5  900 0.3333333 0.3324414 99.33333 100.6667 
 +6 1600 0.2500000 0.2466634 99.50000 100.5000 
 +7 2500 0.2000000 0.1965940 99.60000 100.4000 
 +>  
 +</code> 
 +{{:pasted:20240319-120709.png}} 
 +문제 . . . . 
 <code> <code>
 # n =1600 일 경우에  # n =1600 일 경우에 
-# sample의 평균이 101.5보다 작을 +# sample의 평균이 100.15보다 작을 
 # 확률은 어떻게 구해야 할까? # 확률은 어떻게 구해야 할까?
  
Line 102: Line 233:
 # 이것을 standard error라고 부른다 # 이것을 standard error라고 부른다
 # 따라서 # 따라서
-pnorm(101.5, mean(ajstu), sqrt(var(ajstu)/n.1600))+se.1600 <- sqrt(var(ajstu)/n.1600) 
 +pnorm(100.15, mean(ajstu), se.1600)
 </code> </code>
  
-{{:pasted:20240319-120709.png}}+ 
 ===== Sampling distribution in proportion in R ===== ===== Sampling distribution in proportion in R =====
  
sampling_distribution_in_r.1710910391.txt.gz · Last modified: 2024/03/20 13:53 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki