summary_of_hypothesis_testing
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| summary_of_hypothesis_testing [2025/09/13 12:44] – [Basice] hkimscil | summary_of_hypothesis_testing [2025/11/30 23:00] (current) – hkimscil | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| ====== Hypothesis testing ====== | ====== Hypothesis testing ====== | ||
| + | see also [[:types of error]] | ||
| ====== Basic ====== | ====== Basic ====== | ||
| see first [[:sampling distribution and z-test]] | see first [[:sampling distribution and z-test]] | ||
| - | ====== Hypothesis testing, | + | ====== Hypothesis testing, |
| - | < | + | 샘플은 p2에서 |
| - | rm(list=ls()) | + | < |
| + | * :summary of hypothesis testing: | ||
| + | * *:summary of hypothesis testing: | ||
| + | </ | ||
| - | rnorm2 <- function(n,mean,sd){ | + | ====== Hypothesis testing, 가설검증에 성공한 경우 |
| - | mean+sd*scale(rnorm(n)) | + | |
| - | } | + | |
| - | n.p <- 10000 | + | <tabbed> |
| - | m.p <- 100 | + | * :summary of hypothesis testing: |
| - | sd.p <- 10 | + | * *:summary of hypothesis testing: |
| - | p1 <- rnorm2(n.p, m.p, sd.p) | + | </tabbed> |
| - | m.p1 <- mean(p1) | + | |
| - | sd.p1 <- sd(p1) | + | |
| - | p2 <- rnorm2(n.p, m.p+10, sd.p) | ||
| - | m.p2 <- mean(p2) | ||
| - | sd.p2 <- sd(p2) | ||
| - | n.s <- 100 | ||
| - | se.z1 <- c(sqrt(var(p1)/ | ||
| - | se.z2 <- c(sqrt(var(p2)/ | ||
| - | |||
| - | x.p1 <- seq(mean(p1)-5*se.z1, | ||
| - | mean(p2)+5*se.z1, | ||
| - | length.out = 500) | ||
| - | x.p2 <- seq(mean(p2)-5*se.z1, | ||
| - | mean(p2)+5*se.z1, | ||
| - | length.out = 500) | ||
| - | |||
| - | # Calculate the probability | ||
| - | # density for a normal distribution | ||
| - | y.p1 <- dnorm(x.p1, mean(p1), se.z1) | ||
| - | y.p2 <- dnorm(x.p2, mean(p2), se.z2) | ||
| - | |||
| - | # Plot the theoretical PDF | ||
| - | plot(x.p1, y.p1, type = " | ||
| - | | ||
| - | main = " | ||
| - | xlab = " | ||
| - | lines(x.p2, y.p2, lty=2, lwd=3, add=T) | ||
| - | |||
| - | |||
| - | m.p1 <- mean(p1) | ||
| - | se1 <- c(m.p1-se.z1, | ||
| - | se2 <- c(m.p1-2*se.z1, | ||
| - | se3 <- c(m.p1-3*se.z1, | ||
| - | abline(v=c(m.p1, | ||
| - | | ||
| - | ' | ||
| - | ' | ||
| - | | ||
| - | |||
| - | treated.s <- sample(p2, n.s) | ||
| - | m.treated.s <- mean(treated.s) | ||
| - | abline(v=m.treated.s, | ||
| - | |||
| - | se.z1 | ||
| - | |||
| - | diff <- m.treated.s-mean(p1) | ||
| - | diff/se.z1 | ||
| - | |||
| - | # usual way - using sample' | ||
| - | # instead of p1's variance to get | ||
| - | # standard error value | ||
| - | se.s <- sqrt(var(treated.s)/ | ||
| - | se.s | ||
| - | diff/se.s | ||
| - | |||
| - | pt(diff/ | ||
| - | t.test(treated.s, | ||
| - | |||
| - | </ | ||
| - | ===== output ===== | ||
| - | < | ||
| - | > | ||
| - | > | ||
| - | > rm(list=ls()) | ||
| - | > | ||
| - | > rnorm2 <- function(n, | ||
| - | + | ||
| - | + } | ||
| - | > | ||
| - | > n.p <- 10000 | ||
| - | > m.p <- 100 | ||
| - | > sd.p <- 10 | ||
| - | > p1 <- rnorm2(n.p, m.p, sd.p) | ||
| - | > m.p1 <- mean(p1) | ||
| - | > sd.p1 <- sd(p1) | ||
| - | > | ||
| - | > p2 <- rnorm2(n.p, m.p+10, sd.p) | ||
| - | > m.p2 <- mean(p2) | ||
| - | > sd.p2 <- sd(p2) | ||
| - | > | ||
| - | > n.s <- 100 | ||
| - | > se.z1 <- c(sqrt(var(p1)/ | ||
| - | > se.z2 <- c(sqrt(var(p2)/ | ||
| - | > | ||
| - | > x.p1 <- seq(mean(p1)-5*se.z1, | ||
| - | + | ||
| - | + | ||
| - | > x.p2 <- seq(mean(p2)-5*se.z1, | ||
| - | + | ||
| - | + | ||
| - | > | ||
| - | > # Calculate the probability | ||
| - | > # density for a normal distribution | ||
| - | > y.p1 <- dnorm(x.p1, mean(p1), se.z1) | ||
| - | > y.p2 <- dnorm(x.p2, mean(p2), se.z2) | ||
| - | > | ||
| - | > # Plot the theoretical PDF | ||
| - | > plot(x.p1, y.p1, type = " | ||
| - | + lwd=3, | ||
| - | + main = " | ||
| - | + xlab = " | ||
| - | > lines(x.p2, y.p2, lty=2, lwd=3) | ||
| - | > | ||
| - | > | ||
| - | > m.p1 <- mean(p1) | ||
| - | > se1 <- c(m.p1-se.z1, | ||
| - | > se2 <- c(m.p1-2*se.z1, | ||
| - | > se3 <- c(m.p1-3*se.z1, | ||
| - | > abline(v=c(m.p1, | ||
| - | + col=c(' | ||
| - | + ' | ||
| - | + ' | ||
| - | + lwd=1) | ||
| - | > | ||
| - | > treated.s <- sample(p2, n.s) | ||
| - | > m.treated.s <- mean(treated.s) | ||
| - | > abline(v=m.treated.s, | ||
| - | > | ||
| - | </ | ||
| - | {{: | ||
| - | |||
| - | < | ||
| - | > se.z1 | ||
| - | [1] 1 | ||
| - | > | ||
| - | > diff <- m.treated.s-mean(p1) | ||
| - | > diff/se.z1 | ||
| - | [1] 9.057418 | ||
| - | > | ||
| - | > # usual way - using sample' | ||
| - | > # instead of p1's variance to get | ||
| - | > # standard error value | ||
| - | > se.s <- sqrt(var(treated.s)/ | ||
| - | > se.s | ||
| - | [1] 1.015243 | ||
| - | > diff/se.s | ||
| - | [1] 8.921425 | ||
| - | > | ||
| - | > pt(diff/ | ||
| - | [1] 2.455388e-14 | ||
| - | > t.test(treated.s, | ||
| - | |||
| - | One Sample t-test | ||
| - | |||
| - | data: treated.s | ||
| - | t = 8.9214, df = 99, p-value = 2.455e-14 | ||
| - | alternative hypothesis: true mean is not equal to 100 | ||
| - | 95 percent confidence interval: | ||
| - | | ||
| - | sample estimates: | ||
| - | mean of x | ||
| - | | ||
| - | |||
| - | > | ||
| - | </ | ||
| ====== se value and sample size ====== | ====== se value and sample size ====== | ||
| + | < | ||
| + | * :summary of hypothesis testing: | ||
| + | * *:summary of hypothesis testing: | ||
| + | </ | ||
| - | < | ||
| - | n.ajstu <- 100000 | ||
| - | mean.ajstu <- 100 | ||
| - | sd.ajstu <- 10 | ||
| - | |||
| - | set.seed(1024) | ||
| - | ajstu <- rnorm2(n.ajstu, | ||
| - | |||
| - | mean(ajstu) | ||
| - | sd(ajstu) | ||
| - | var(ajstu) | ||
| - | |||
| - | iter <- 10000 # # of sampling | ||
| - | |||
| - | n.4 <- 4 | ||
| - | means4 <- rep (NA, iter) | ||
| - | for(i in 1:iter){ | ||
| - | means4[i] = mean(sample(ajstu, | ||
| - | } | ||
| - | |||
| - | n.25 <- 25 | ||
| - | means25 <- rep (NA, iter) | ||
| - | for(i in 1:iter){ | ||
| - | means25[i] = mean(sample(ajstu, | ||
| - | } | ||
| - | |||
| - | n.100 <- 100 | ||
| - | means100 <- rep (NA, iter) | ||
| - | for(i in 1:iter){ | ||
| - | means100[i] = mean(sample(ajstu, | ||
| - | } | ||
| - | |||
| - | n.400 <- 400 | ||
| - | means400 <- rep (NA, iter) | ||
| - | for(i in 1:iter){ | ||
| - | means400[i] = mean(sample(ajstu, | ||
| - | } | ||
| - | |||
| - | n.900 <- 900 | ||
| - | means900 <- rep (NA, iter) | ||
| - | for(i in 1:iter){ | ||
| - | means900[i] = mean(sample(ajstu, | ||
| - | } | ||
| - | |||
| - | n.1600 <- 1600 | ||
| - | means1600 <- rep (NA, iter) | ||
| - | for(i in 1:iter){ | ||
| - | means1600[i] = mean(sample(ajstu, | ||
| - | } | ||
| - | |||
| - | n.2500 <- 2500 | ||
| - | means2500 <- rep (NA, iter) | ||
| - | for(i in 1:iter){ | ||
| - | means2500[i] = mean(sample(ajstu, | ||
| - | } | ||
| - | |||
| - | h4 <- hist(means4) | ||
| - | h25 <- hist(means25) | ||
| - | h100 <- hist(means100) | ||
| - | h400 <- hist(means400) | ||
| - | h900 <- hist(means900) | ||
| - | h1600 <- hist(means1600) | ||
| - | h2500 <- hist(means2500) | ||
| - | |||
| - | |||
| - | plot(h4, ylim=c(0, | ||
| - | plot(h25, add = T, col=" | ||
| - | plot(h100, add = T, col=" | ||
| - | plot(h400, add = T, col=" | ||
| - | plot(h900, add = T, col=" | ||
| - | |||
| - | m4 <- mean(means4) | ||
| - | m25 <- mean(means25) | ||
| - | m100 <- mean(means100) | ||
| - | m400 <- mean(means400) | ||
| - | m900 <- mean(means900) | ||
| - | m1600 <- mean(means1600) | ||
| - | m2500 <- mean(means2500) | ||
| - | |||
| - | s4 <- sd(means4) | ||
| - | s25 <- sd(means25) | ||
| - | s100 <- sd(means100) | ||
| - | s400 <- sd(means400) | ||
| - | s900 <- sd(means900) | ||
| - | s1600 <- sd(means1600) | ||
| - | s2500 <- sd(means2500) | ||
| - | |||
| - | sss <- c(4, | ||
| - | means <- c(m4, m25, m100, m400, m900, m1600, m2500) | ||
| - | sds <- c(s4, s25, s100, s400, s900, s1600, s2500) | ||
| - | |||
| - | temp <- data.frame(sss, | ||
| - | | ||
| - | sds) | ||
| - | |||
| - | temp | ||
| - | |||
| - | ses <- rep (NA, length(sss)) # std error memory | ||
| - | for(i in 1: | ||
| - | ses[i] = sqrt(var(ajstu)/ | ||
| - | } | ||
| - | |||
| - | data.frame(ses) | ||
| - | se.1 <- ses | ||
| - | se.2 <- 2 * ses | ||
| - | |||
| - | lower.s2 <- mean(ajstu)-se.2 | ||
| - | upper.s2 <- mean(ajstu)+se.2 | ||
| - | data.frame(cbind(sss, | ||
| - | |||
| - | # 12/2 lecture | ||
| - | # note that we draw the statistical calculation | ||
| - | # by " | ||
| - | n <- 80 | ||
| - | mean.sample <- 103 | ||
| - | |||
| - | sample <- rnorm2(n, mean.sample, | ||
| - | mean(sample) | ||
| - | sd(sample) | ||
| - | |||
| - | diff <- mean.sample - mean.ajstu # this is actual difference | ||
| - | se <- sd.ajstu / sqrt(n) # this is random error | ||
| - | t.cal <- diff/se | ||
| - | t.cal | ||
| - | qnorm(0.025, | ||
| - | qnorm(0.01/ | ||
| - | qt(0.05/2, n-1, lower.tail=F) | ||
| - | |||
| - | t.test(sample, | ||
| - | |||
| - | # or we obtain the exact p value | ||
| - | p.value <- pt(t.cal, n-1, lower.tail = F) | ||
| - | p.value*2 | ||
| - | |||
| - | |||
| - | |||
| - | </ | ||
summary_of_hypothesis_testing.1757767477.txt.gz · Last modified: by hkimscil
