User Tools

Site Tools


summary_of_hypothesis_testing

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
summary_of_hypothesis_testing [2025/09/13 12:44] – [Basice] hkimscilsummary_of_hypothesis_testing [2025/11/30 23:00] (current) hkimscil
Line 1: Line 1:
 ====== Hypothesis testing ====== ====== Hypothesis testing ======
 +see also [[:types of error]]
  
 ====== Basic ====== ====== Basic ======
 see first [[:sampling distribution and z-test]] see first [[:sampling distribution and z-test]]
  
-====== Hypothesis testing, exp ====== +====== Hypothesis testing, 가설검증에 실패한 경우 (n=25) ====== 
-<code> +샘플은 p2에서 (mu.p2 104probability sampling을 한 샘플. 그러나, 샘플의 평균이 101.05가 나와서 가설 검증에 실패. 이런 경우가 type 2 error를 범한 경우. 효과가 4만큼 나타나는 모집단에서 샘플이 나왔음에도 불구하고 평균이 100인 집단의 샘플로 추정되어 영가설을 부정하지 못하고, 연구가설을 채택하지 못함.  
-rm(list=ls())+<tabbed> 
 +  * :summary of hypothesis testing:code01 
 +  * *:summary of hypothesis testing:output01 
 +</tabbed>
  
-rnorm2 <- function(n,mean,sd){  +====== Hypothesis testing가설검증에 성공한 경우 (n=25======
-  mean+sd*scale(rnorm(n))  +
-}+
  
-n.p <- 10000 +<tabbed> 
-m.p <- 100 +  * :summary of hypothesis testing:code03 
-sd.p <- 10 +  * *:summary of hypothesis testing:output03 
-p1 <- rnorm2(n.p, m.p, sd.p) +</tabbed>
-m.p1 <- mean(p1) +
-sd.p1 <- sd(p1)+
  
-p2 <- rnorm2(n.p, m.p+10, sd.p) 
-m.p2 <- mean(p2) 
-sd.p2 <- sd(p2) 
  
-n.s <- 100 
-se.z1 <- c(sqrt(var(p1)/n.s)) 
-se.z2 <- c(sqrt(var(p2)/n.s)) 
- 
-x.p1 <- seq(mean(p1)-5*se.z1,  
-                mean(p2)+5*se.z1,  
-                length.out = 500) 
-x.p2 <- seq(mean(p2)-5*se.z1,  
-            mean(p2)+5*se.z1,  
-            length.out = 500) 
- 
-# Calculate the probability  
-# density for a normal distribution 
-y.p1 <- dnorm(x.p1, mean(p1), se.z1) 
-y.p2 <- dnorm(x.p2, mean(p2), se.z2) 
- 
-# Plot the theoretical PDF 
-plot(x.p1, y.p1, type = "l",  
-     lwd=3,  
-     main = "Sample means from p1 and p2 (imaginary)", 
-     xlab = "Value", ylab = "Density") 
-lines(x.p2, y.p2, lty=2, lwd=3, add=T) 
- 
- 
-m.p1 <- mean(p1) 
-se1 <- c(m.p1-se.z1, m.p1+se.z1) 
-se2 <- c(m.p1-2*se.z1, m.p1+2*se.z1) 
-se3 <- c(m.p1-3*se.z1, m.p1+3*se.z1) 
-abline(v=c(m.p1,se1,se2,se3),  
-       col=c('black', 'orange', 'orange',  
-             'green', 'green',  
-             'blue', 'blue'),  
-       lwd=1) 
- 
-treated.s <- sample(p2, n.s) 
-m.treated.s <- mean(treated.s) 
-abline(v=m.treated.s, col='red', lwd=2) 
- 
-se.z1 
- 
-diff <- m.treated.s-mean(p1) 
-diff/se.z1 
- 
-# usual way - using sample's variance  
-# instead of p1's variance to get 
-# standard error value 
-se.s <- sqrt(var(treated.s)/n.s) 
-se.s 
-diff/se.s 
- 
-pt(diff/se.s, df=n.s-1, lower.tail = F) * 2 
-t.test(treated.s, mu=m.p1, var.equal = T) 
- 
-</code> 
-===== output ===== 
-<code> 
- 
- 
-> rm(list=ls()) 
- 
-> rnorm2 <- function(n,mean,sd){  
-+   mean+sd*scale(rnorm(n))  
-+ } 
- 
-> n.p <- 10000 
-> m.p <- 100 
-> sd.p <- 10 
-> p1 <- rnorm2(n.p, m.p, sd.p) 
-> m.p1 <- mean(p1) 
-> sd.p1 <- sd(p1) 
- 
-> p2 <- rnorm2(n.p, m.p+10, sd.p) 
-> m.p2 <- mean(p2) 
-> sd.p2 <- sd(p2) 
- 
-> n.s <- 100 
-> se.z1 <- c(sqrt(var(p1)/n.s)) 
-> se.z2 <- c(sqrt(var(p2)/n.s)) 
- 
-> x.p1 <- seq(mean(p1)-5*se.z1,  
-+                 mean(p2)+5*se.z1,  
-+                 length.out = 500) 
-> x.p2 <- seq(mean(p2)-5*se.z1,  
-+             mean(p2)+5*se.z1,  
-+             length.out = 500) 
- 
-> # Calculate the probability  
-> # density for a normal distribution 
-> y.p1 <- dnorm(x.p1, mean(p1), se.z1) 
-> y.p2 <- dnorm(x.p2, mean(p2), se.z2) 
- 
-> # Plot the theoretical PDF 
-> plot(x.p1, y.p1, type = "l",  
-+      lwd=3,  
-+      main = "Sample means from p1 and p2 (imaginary)", 
-+      xlab = "Value", ylab = "Density") 
-> lines(x.p2, y.p2, lty=2, lwd=3) 
- 
- 
-> m.p1 <- mean(p1) 
-> se1 <- c(m.p1-se.z1, m.p1+se.z1) 
-> se2 <- c(m.p1-2*se.z1, m.p1+2*se.z1) 
-> se3 <- c(m.p1-3*se.z1, m.p1+3*se.z1) 
-> abline(v=c(m.p1,se1,se2,se3),  
-+        col=c('black', 'orange', 'orange',  
-+              'green', 'green',  
-+              'blue', 'blue'),  
-+        lwd=1) 
- 
-> treated.s <- sample(p2, n.s) 
-> m.treated.s <- mean(treated.s) 
-> abline(v=m.treated.s, col='red', lwd=2) 
- 
-</code> 
-{{:pasted:20250913-184943.png}} 
- 
-<code> 
-> se.z1 
-[1] 1 
- 
-> diff <- m.treated.s-mean(p1) 
-> diff/se.z1 
-[1] 9.057418 
- 
-> # usual way - using sample's variance  
-> # instead of p1's variance to get 
-> # standard error value 
-> se.s <- sqrt(var(treated.s)/n.s) 
-> se.s 
-[1] 1.015243 
-> diff/se.s 
-[1] 8.921425 
- 
-> pt(diff/se.s, df=n.s-1, lower.tail = F) * 2 
-[1] 2.455388e-14 
-> t.test(treated.s, mu=m.p1, var.equal = T) 
- 
- One Sample t-test 
- 
-data:  treated.s 
-t = 8.9214, df = 99, p-value = 2.455e-14 
-alternative hypothesis: true mean is not equal to 100 
-95 percent confidence interval: 
- 107.0430 111.0719 
-sample estimates: 
-mean of x  
- 109.0574  
- 
- 
-</code> 
 ====== se value and sample size ====== ====== se value and sample size ======
 +<tabbed>
 +  * :summary of hypothesis testing:code2
 +  * *:summary of hypothesis testing:output2
 +</tabbed>
  
-<code> 
-n.ajstu <- 100000 
-mean.ajstu <- 100 
-sd.ajstu <- 10 
- 
-set.seed(1024) 
-ajstu <- rnorm2(n.ajstu, mean=mean.ajstu, sd=sd.ajstu) 
- 
-mean(ajstu) 
-sd(ajstu) 
-var(ajstu) 
- 
-iter <- 10000 # # of sampling  
- 
-n.4 <- 4 
-means4 <- rep (NA, iter) 
-for(i in 1:iter){ 
-  means4[i] = mean(sample(ajstu, n.4)) 
-} 
- 
-n.25 <- 25 
-means25 <- rep (NA, iter) 
-for(i in 1:iter){ 
-  means25[i] = mean(sample(ajstu, n.25)) 
-} 
- 
-n.100 <- 100 
-means100 <- rep (NA, iter) 
-for(i in 1:iter){ 
-  means100[i] = mean(sample(ajstu, n.100)) 
-} 
- 
-n.400 <- 400 
-means400 <- rep (NA, iter) 
-for(i in 1:iter){ 
-  means400[i] = mean(sample(ajstu, n.400)) 
-} 
- 
-n.900 <- 900 
-means900 <- rep (NA, iter) 
-for(i in 1:iter){ 
-  means900[i] = mean(sample(ajstu, n.900)) 
-} 
- 
-n.1600 <- 1600 
-means1600 <- rep (NA, iter) 
-for(i in 1:iter){ 
-  means1600[i] = mean(sample(ajstu, n.1600)) 
-} 
- 
-n.2500 <- 2500 
-means2500 <- rep (NA, iter) 
-for(i in 1:iter){ 
-  means2500[i] = mean(sample(ajstu, n.2500)) 
-} 
- 
-h4 <- hist(means4) 
-h25 <- hist(means25) 
-h100 <- hist(means100) 
-h400 <- hist(means400) 
-h900 <- hist(means900) 
-h1600 <- hist(means1600) 
-h2500 <- hist(means2500) 
- 
- 
-plot(h4, ylim=c(0,3000), col="red") 
-plot(h25, add = T, col="blue") 
-plot(h100, add = T, col="green") 
-plot(h400, add = T, col="grey") 
-plot(h900, add = T, col="yellow") 
- 
-m4 <- mean(means4) 
-m25 <- mean(means25) 
-m100 <- mean(means100) 
-m400 <- mean(means400) 
-m900 <- mean(means900) 
-m1600 <- mean(means1600) 
-m2500 <- mean(means2500) 
- 
-s4 <- sd(means4) 
-s25 <- sd(means25) 
-s100 <- sd(means100) 
-s400 <- sd(means400) 
-s900 <- sd(means900) 
-s1600 <- sd(means1600) 
-s2500 <- sd(means2500) 
- 
-sss <- c(4,25,100,400,900,1600,2500) # sss sample sizes 
-means <- c(m4, m25, m100, m400, m900, m1600, m2500) 
-sds <- c(s4, s25, s100, s400, s900, s1600, s2500) 
- 
-temp <- data.frame(sss, 
-                   means, 
-                   sds) 
- 
-temp 
- 
-ses <- rep (NA, length(sss)) # std error memory 
-for(i in 1:length(sss)){ 
-  ses[i] = sqrt(var(ajstu)/sss[i])  # std errors by theorem 
-} 
- 
-data.frame(ses) 
-se.1 <- ses 
-se.2 <- 2 * ses  
- 
-lower.s2 <- mean(ajstu)-se.2 
-upper.s2 <- mean(ajstu)+se.2 
-data.frame(cbind(sss, ses, lower.s2, upper.s2)) 
- 
-# 12/2 lecture  
-# note that we draw the statistical calculation  
-# by "diff/se" = "diff/random_error" 
-n <- 80 
-mean.sample <- 103 
- 
-sample <- rnorm2(n, mean.sample, sd.ajstu) 
-mean(sample) 
-sd(sample) 
- 
-diff <- mean.sample - mean.ajstu # this is actual difference 
-se <- sd.ajstu / sqrt(n) # this is random error  
-t.cal <- diff/se 
-t.cal 
-qnorm(0.025, lower.tail = F) 
-qnorm(0.01/2, lower.tail = F) 
-qt(0.05/2, n-1, lower.tail=F) 
- 
-t.test(sample, mu=mean.ajstu) 
- 
-# or we obtain the exact p value 
-p.value <- pt(t.cal, n-1, lower.tail = F) 
-p.value*2 
- 
- 
- 
-</code> 
summary_of_hypothesis_testing.1757767477.txt.gz · Last modified: by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki