sampling_distribution_in_r

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
sampling_distribution_in_r [2024/03/20 09:18] hkimscilsampling_distribution_in_r [2025/03/24 09:00] (current) hkimscil
Line 1: Line 1:
 ====== Sampling distribution in R e.g. 1 ====== ====== Sampling distribution in R e.g. 1 ======
 <code> <code>
 +# sampling distribution 
 n.ajstu <- 100000 n.ajstu <- 100000
 mean.ajstu <- 100 mean.ajstu <- 100
Line 71: Line 72:
 plot(h900, add = T, col="yellow") plot(h900, add = T, col="yellow")
  
-se4 <- sqrt(var(ajstu)/4) 
-se25 <- sqrt(var(ajstu)/25) 
-se100 <- sqrt(var(ajstu)/100) 
-se400 <- sqrt(var(ajstu)/400) 
-se900 <- sqrt(var(ajstu)/900) 
-se1600 <- sqrt(var(ajstu)/1600) 
-se2500 <- sqrt(var(ajstu)/2500) 
  
-sss <- c(4,25,100,400,900,1600,2500) +sss <- c(4,25,100,400,900,1600,2500) # sss sample sizes 
-ses <- rep (NA, length(sss))+ses <- rep (NA, length(sss)) # std errors
 for(i in 1:length(sss)){ for(i in 1:length(sss)){
   ses[i] = sqrt(var(ajstu)/sss[i])   ses[i] = sqrt(var(ajstu)/sss[i])
 } }
 +ses.means4 <- sqrt(var(means4))
 +ses.means25 <- sqrt(var(means25))
 +ses.means100 <- sqrt(var(means100))
 +ses.means400 <- sqrt(var(means400))
 +ses.means900 <- sqrt(var(means900))
 +ses.means1600 <- sqrt(var(means1600))
 +ses.means2500 <- sqrt(var(means2500))
 +ses.real <- c(ses.means4, ses.means25,
 +              ses.means100, ses.means400,
 +              ses.means900, ses.means1600,
 +              ses.means2500)
 +ses.real 
 +
 ses ses
 se.1 <- ses se.1 <- ses
-se.2 <- 2*ses +se.2 <- 2 * ses 
-lower.part.2 <- mean(ajstu)-se.2 +
-upper.part.2 <- mean(ajstu)+se.2 +
-data.frame(cbind(sss, ses, lower.part.2, upper.part.2)) +
-</code>+
  
 +lower.s2 <- mean(ajstu)-se.2
 +upper.s2 <- mean(ajstu)+se.2
 +data.frame(cbind(sss, ses, ses.real, lower.s2, upper.s2))
 +</code>
 +아웃풋
 +<code>
 +> n.ajstu <- 100000
 +> mean.ajstu <- 100
 +> sd.ajstu <- 10
 +> set.seed(1024)
 +> ajstu <- rnorm2(n.ajstu, mean=mean.ajstu, sd=sd.ajstu)
 +> mean(ajstu)
 +[1] 100
 +> sd(ajstu)
 +[1] 10
 +> var(ajstu)
 +     [,1]
 +[1,]  100
 +> iter <- 10000 # # of sampling 
 +> n.4 <- 4
 +> means4 <- rep (NA, iter)
 +> for(i in 1:iter){
 ++   means4[i] = mean(sample(ajstu, n.4))
 ++ }
 +> n.25 <- 25
 +> means25 <- rep (NA, iter)
 +> for(i in 1:iter){
 ++   means25[i] = mean(sample(ajstu, n.25))
 ++ }
 +> n.100 <- 100
 +> means100 <- rep (NA, iter)
 +> for(i in 1:iter){
 ++   means100[i] = mean(sample(ajstu, n.100))
 ++ }
 +> n.400 <- 400
 +> means400 <- rep (NA, iter)
 +> for(i in 1:iter){
 ++   means400[i] = mean(sample(ajstu, n.400))
 ++ }
 +> n.900 <- 900
 +> means900 <- rep (NA, iter)
 +> for(i in 1:iter){
 ++   means900[i] = mean(sample(ajstu, n.900))
 ++ }
 +> n.1600 <- 1600
 +> means1600 <- rep (NA, iter)
 +> for(i in 1:iter){
 ++   means1600[i] = mean(sample(ajstu, n.1600))
 ++ }
 +> n.2500 <- 2500
 +> means2500 <- rep (NA, iter)
 +> for(i in 1:iter){
 ++   means2500[i] = mean(sample(ajstu, n.2500))
 ++ }
 +> h4 <- hist(means4)
 +> h25 <- hist(means25)
 +> h100 <- hist(means100)
 +> h400 <- hist(means400)
 +> h900 <- hist(means900)
 +> h1600 <- hist(means1600)
 +> h2500 <- hist(means2500)
 +> plot(h4, ylim=c(0,3000), col="red")
 +> plot(h25, add = T, col="blue")
 +> plot(h100, add = T, col="green")
 +> plot(h400, add = T, col="grey")
 +> plot(h900, add = T, col="yellow")
 +> sss <- c(4,25,100,400,900,1600,2500) # sss sample sizes
 +> ses <- rep (NA, length(sss)) # std errors
 +> for(i in 1:length(sss)){
 ++   ses[i] = sqrt(var(ajstu)/sss[i])
 ++ }
 +> ses
 +[1] 5.0000000 2.0000000 1.0000000 0.5000000 0.3333333 0.2500000
 +[7] 0.2000000
 +> se.1 <- ses
 +> se.2 <- 2 * ses 
 +> lower.s2 <- mean(ajstu)-se.2
 +> upper.s2 <- mean(ajstu)+se.2
 +> data.frame(cbind(sss, ses, lower.s2, upper.s2))
 +   sss       ses lower.s2 upper.s2
 +1    4 5.0000000 90.00000 110.0000
 +2   25 2.0000000 96.00000 104.0000
 +3  100 1.0000000 98.00000 102.0000
 +4  400 0.5000000 99.00000 101.0000
 +5  900 0.3333333 99.33333 100.6667
 +6 1600 0.2500000 99.50000 100.5000
 +7 2500 0.2000000 99.60000 100.4000
 +> sss <- c(4,25,100,400,900,1600,2500) # sss sample sizes
 +> ses <- rep (NA, length(sss)) # std errors
 +> for(i in 1:length(sss)){
 ++   ses[i] = sqrt(var(ajstu)/sss[i])
 ++ }
 +> ses.means4 <- sqrt(var(means4))
 +> ses.means25 <- sqrt(var(means25))
 +> ses.means100 <- sqrt(var(means100))
 +> ses.means400 <- sqrt(var(means400))
 +> ses.means900 <- sqrt(var(means900))
 +> ses.means1600 <- sqrt(var(means1600))
 +> ses.means2500 <- sqrt(var(means2500))
 +> ses.real <- c(ses.means4, ses.means25,
 ++               ses.means100, ses.means400,
 ++               ses.means900, ses.means1600,
 ++               ses.means2500)
 +> ses.real 
 +[1] 4.9719142 2.0155741 0.9999527 0.5034433 0.3324414 0.2466634
 +[7] 0.1965940
 +> ses
 +[1] 5.0000000 2.0000000 1.0000000 0.5000000 0.3333333 0.2500000
 +[7] 0.2000000
 +> se.1 <- ses
 +> se.2 <- 2 * ses 
 +> lower.s2 <- mean(ajstu)-se.2
 +> upper.s2 <- mean(ajstu)+se.2
 +> data.frame(cbind(sss, ses, ses.real, lower.s2, upper.s2))
 +   sss       ses  ses.real lower.s2 upper.s2
 +1    4 5.0000000 4.9719142 90.00000 110.0000
 +2   25 2.0000000 2.0155741 96.00000 104.0000
 +3  100 1.0000000 0.9999527 98.00000 102.0000
 +4  400 0.5000000 0.5034433 99.00000 101.0000
 +5  900 0.3333333 0.3324414 99.33333 100.6667
 +6 1600 0.2500000 0.2466634 99.50000 100.5000
 +7 2500 0.2000000 0.1965940 99.60000 100.4000
 +
 +</code>
 +{{:pasted:20240319-120709.png}}
 +문제 . . . . 
 <code> <code>
 # n =1600 일 경우에  # n =1600 일 경우에 
-# sample의 평균이 71보다 작을 +# sample의 평균이 100.15보다 작을 
 # 확률은 어떻게 구해야 할까? # 확률은 어떻게 구해야 할까?
  
 # n = 1600 일 경우에  # n = 1600 일 경우에 
 # sampling distribution은  # sampling distribution은 
-# Xbar ~ N(70, var(ajstu)/n.1600)+# Xbar ~ N(100, var(ajstu)/n.1600)
 # 그리고, 위에서 standard error값은  # 그리고, 위에서 standard error값은 
 # sqrt(var(ajstu)/n.1600) # sqrt(var(ajstu)/n.1600)
 # 이것을 standard error라고 부른다 # 이것을 standard error라고 부른다
 # 따라서 # 따라서
-pnorm(71, mean(ajstu), sqrt(var(ajstu)/n.1600))+se.1600 <- sqrt(var(ajstu)/n.1600) 
 +pnorm(100.15, mean(ajstu), se.1600)
 </code> </code>
  
-{{:pasted:20240319-120709.png}}+ 
 ===== Sampling distribution in proportion in R ===== ===== Sampling distribution in proportion in R =====
  
sampling_distribution_in_r.1710893885.txt.gz · Last modified: 2024/03/20 09:18 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki