c:ms:2026:lecture_note_week_04
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revision | |||
| c:ms:2026:lecture_note_week_04 [2026/03/31 16:38] – [Recap] hkimscil | c:ms:2026:lecture_note_week_04 [2026/03/31 22:28] (current) – hkimscil | ||
|---|---|---|---|
| Line 46: | Line 46: | ||
| </ | </ | ||
| - | ====== Recap ====== | ||
| - | Distribution of Sample Means -- mu = 40, sigma = 4 (hence var = 16) 인 모집단에서 n = n 사이즈의 샘플링을 무한 반복할 때 그 샘플평균들이 모인 집합 | ||
| - | <tabbox rscript01> | ||
| - | < | ||
| - | rm(list=ls()) | ||
| - | rnorm2 <- function(n, | ||
| - | mean+sd*scale(rnorm(n)) | ||
| - | } | ||
| - | ss <- function(x) { | ||
| - | sum((x-mean(x))^2) | ||
| - | } | ||
| - | mu <- 40 | ||
| - | sigma <- 4 | ||
| - | iter <- 1000000 | ||
| - | sz <- 16 | ||
| - | se <- sigma/ | ||
| - | ################################ | ||
| - | means <- rnorm2(iter, | ||
| - | hist(means, breaks=50, | ||
| - | xlim = c(mu-6*se, mu+6*se), | ||
| - | main = paste(" | ||
| - | abline(v=mu, | ||
| - | lo1 <- mu - se*1 | ||
| - | hi1 <- mu + se*1 | ||
| - | lo2 <- mu - se*2 | ||
| - | hi2 <- mu + se*2 | ||
| - | lo3 <- mu - se*3 | ||
| - | hi3 <- mu + se*3 | ||
| - | |||
| - | abline(v=c(lo1, | ||
| - | | ||
| - | | ||
| - | |||
| - | print(c(lo2, | ||
| - | |||
| - | m.samp <- 37 | ||
| - | p.val <- pnorm(m.samp, | ||
| - | p.val | ||
| - | z.cal <- (m.samp-mu)/ | ||
| - | z.cal | ||
| - | p.val <- pnorm(z.cal)*2 | ||
| - | p.val | ||
| - | |||
| - | zmeans <- scale(means) | ||
| - | hist(zmeans, | ||
| - | xlim = c(0-10*1, 0+10*1), | ||
| - | | ||
| - | abline(v=0, col=" | ||
| - | abline(v=z.cal, | ||
| - | abline(v=-z.cal, | ||
| - | text(x=-6, y=50000, | ||
| - | | ||
| - | pos = 1, | ||
| - | | ||
| - | text(x=4, y=50000, | ||
| - | | ||
| - | | ||
| - | | ||
| - | text(x=-6, y=30000, | ||
| - | | ||
| - | | ||
| - | pos = 1, | ||
| - | | ||
| - | |||
| - | hist(zmeans, | ||
| - | xlim = c(0-10*1, 0+10*1), | ||
| - | | ||
| - | abline(v=0, col=" | ||
| - | abline(v=c(-1, | ||
| - | | ||
| - | |||
| - | z.cal | ||
| - | p.val | ||
| - | ##### | ||
| - | # 위의 아이디어로는 z.cal 점수가 | ||
| - | # +-2 밖에 있는지 보면 된다. 즉, | ||
| - | # 이는 prob가 0.05보다 작은지 | ||
| - | # 보면 되는 것이다. | ||
| - | ##### | ||
| - | # +-2 는 정확한 숫자가 아니고 | ||
| - | # qnorm(.05/ | ||
| - | # 가 정확한 숫자 | ||
| - | two.minus.exact <- qnorm(.05/ | ||
| - | two.plus.exact <- qnorm(1-(.05/ | ||
| - | c(two.minus.exact, | ||
| - | ##### | ||
| - | # 그러나 R 사용시에는 z 점수로 | ||
| - | # 판단하기 보다는 | ||
| - | # 직접 구하는 prob.로 판단 | ||
| - | pnorm(z.cal)*2 | ||
| - | p.val | ||
| - | ##### | ||
| - | # 위에서 그룹 간의 차이를 | ||
| - | # standard error로 나누는 것에 주의 | ||
| - | # | ||
| - | |||
| - | |||
| - | ################ | ||
| - | m.samp <- 43 | ||
| - | sd.samp <- 4 | ||
| - | sz <- 16 | ||
| - | samp <- rnorm2(sz, m.samp, sd.samp) | ||
| - | diff <- m.samp - mu | ||
| - | se <- sd.samp / sqrt(sz) | ||
| - | t.cal <- diff/se | ||
| - | df <- sz-1 | ||
| - | p.val <- pt(t.cal, df=df, lower.tail = F)*2 | ||
| - | t.cal | ||
| - | df | ||
| - | p.val | ||
| - | t.test(samp, | ||
| - | |||
| - | </ | ||
| - | <tabbox out01> | ||
| - | < | ||
| - | > rm(list=ls()) | ||
| - | > rnorm2 <- function(n, | ||
| - | + | ||
| - | + } | ||
| - | > ss <- function(x) { | ||
| - | + | ||
| - | + } | ||
| - | > | ||
| - | > mu <- 40 | ||
| - | > sigma <- 4 | ||
| - | > iter <- 1000000 | ||
| - | > sz <- 16 | ||
| - | > se <- sigma/ | ||
| - | > ################################ | ||
| - | > means <- rnorm2(iter, | ||
| - | > hist(means, breaks=50, | ||
| - | + xlim = c(mu-6*se, mu+6*se), | ||
| - | + main = paste(" | ||
| - | > abline(v=mu, | ||
| - | > lo1 <- mu - se*1 | ||
| - | > hi1 <- mu + se*1 | ||
| - | > lo2 <- mu - se*2 | ||
| - | > hi2 <- mu + se*2 | ||
| - | > lo3 <- mu - se*3 | ||
| - | > hi3 <- mu + se*3 | ||
| - | > | ||
| - | > abline(v=c(lo1, | ||
| - | + col=c(" | ||
| - | + lwd=2) | ||
| - | > | ||
| - | > print(c(lo2, | ||
| - | [1] 38 42 | ||
| - | > | ||
| - | > m.samp <- 37 | ||
| - | > p.val <- pnorm(m.samp, | ||
| - | > p.val | ||
| - | [1] 0.002699796 | ||
| - | > z.cal <- (m.samp-mu)/ | ||
| - | > z.cal | ||
| - | [1] -3 | ||
| - | > p.val <- pnorm(z.cal)*2 | ||
| - | > p.val | ||
| - | [1] 0.002699796 | ||
| - | > | ||
| - | > zmeans <- scale(means) | ||
| - | > hist(zmeans, | ||
| - | + xlim = c(0-10*1, 0+10*1), | ||
| - | + main=(" | ||
| - | > abline(v=0, col=" | ||
| - | > abline(v=z.cal, | ||
| - | > abline(v=-z.cal, | ||
| - | > text(x=-6, y=50000, | ||
| - | + label=paste(" | ||
| - | + pos = 1, | ||
| - | + col=" | ||
| - | > text(x=4, y=50000, | ||
| - | + label=paste(-z.cal), | ||
| - | + pos=1, | ||
| - | + col=" | ||
| - | > text(x=-6, y=30000, | ||
| - | + label=paste(" | ||
| - | + round(p.val, | ||
| - | + pos = 1, | ||
| - | + col=" | ||
| - | > | ||
| - | > hist(zmeans, | ||
| - | + xlim = c(0-10*1, 0+10*1), | ||
| - | + main=(" | ||
| - | > abline(v=0, col=" | ||
| - | > abline(v=c(-1, | ||
| - | + col=c(" | ||
| - | > | ||
| - | > z.cal | ||
| - | [1] -3 | ||
| - | > p.val | ||
| - | [1] 0.002699796 | ||
| - | > ##### | ||
| - | > # 위의 아이디어로는 z.cal 점수가 | ||
| - | > # +-2 밖에 있는지 보면 된다. 즉, | ||
| - | > # 이는 prob가 0.05보다 작은지 | ||
| - | > # 보면 되는 것이다. | ||
| - | > ##### | ||
| - | > # +-2 는 정확한 숫자가 아니고 | ||
| - | > # qnorm(.05/ | ||
| - | > # 가 정확한 숫자 | ||
| - | > two.minus.exact <- qnorm(.05/ | ||
| - | > two.plus.exact <- qnorm(1-(.05/ | ||
| - | > c(two.minus.exact, | ||
| - | [1] -1.959964 | ||
| - | > ##### | ||
| - | > # 그러나 R 사용시에는 z 점수로 | ||
| - | > # 판단하기 보다는 | ||
| - | > # 직접 구하는 prob.로 판단 | ||
| - | > pnorm(z.cal)*2 | ||
| - | [1] 0.002699796 | ||
| - | > p.val | ||
| - | [1] 0.002699796 | ||
| - | > ##### | ||
| - | > # 위에서 그룹 간의 차이를 | ||
| - | > # standard error로 나누는 것에 주의 | ||
| - | > # | ||
| - | > | ||
| - | > | ||
| - | > ################ | ||
| - | > m.samp <- 43 | ||
| - | > sd.samp <- 4 | ||
| - | > sz <- 16 | ||
| - | > samp <- rnorm2(sz, m.samp, sd.samp) | ||
| - | > diff <- m.samp - mu | ||
| - | > se <- sd.samp / sqrt(sz) | ||
| - | > t.cal <- diff/se | ||
| - | > df <- sz-1 | ||
| - | > p.val <- pt(t.cal, df=df, lower.tail = F)*2 | ||
| - | > t.cal | ||
| - | [1] 3 | ||
| - | > df | ||
| - | [1] 15 | ||
| - | > p.val | ||
| - | [1] 0.008972737 | ||
| - | > t.test(samp, | ||
| - | |||
| - | One Sample t-test | ||
| - | |||
| - | data: samp | ||
| - | t = 3, df = 15, p-value = 0.008973 | ||
| - | alternative hypothesis: true mean is not equal to 40 | ||
| - | 95 percent confidence interval: | ||
| - | | ||
| - | sample estimates: | ||
| - | mean of x | ||
| - | | ||
| - | |||
| - | > | ||
| - | </ | ||
| - | </ | ||
| - | {{.: | ||
| - | {{.: | ||
| - | {{.: | ||
| - | |||
| - | <tabbox rscript02> | ||
| - | < | ||
| - | ##### | ||
| - | # | ||
| - | m.a <- 5.8 | ||
| - | m.b <- 6.3 | ||
| - | sd.a <- .5 | ||
| - | sd.b <- .5 | ||
| - | sz.a <- 16 | ||
| - | sz.b <- 16 | ||
| - | df.a <- sz.a-1 | ||
| - | df.b <- sz.b-1 | ||
| - | df <- df.a + df.b | ||
| - | a <- rnorm2(sz.a, | ||
| - | b <- rnorm2(sz.b, | ||
| - | diff <- m.a - m.b | ||
| - | pv <- (ss(a)+ss(b))/ | ||
| - | se <- sqrt(pv/ | ||
| - | t.cal <- diff / se | ||
| - | p.val <- pt(t.cal, df=df)*2 | ||
| - | |||
| - | diff | ||
| - | se | ||
| - | t.cal | ||
| - | df | ||
| - | p.val | ||
| - | t.test(a,b, var.equal = T) | ||
| - | diff - se*2 | ||
| - | diff + se*2 | ||
| - | lo <- qt(.05/ | ||
| - | lo | ||
| - | hi <- -lo | ||
| - | diff + se*lo | ||
| - | diff + se*hi | ||
| - | |||
| - | ##### | ||
| - | # t-test repeated measre | ||
| - | ##### | ||
| - | m.t1 <- 103 | ||
| - | m.t2 <- 111 | ||
| - | sd.t1 <- 10 | ||
| - | sd.t2 <- 10 | ||
| - | sz <- 16 | ||
| - | t1 <- rnorm2(sz, m.t1, sd.t1) | ||
| - | t2 <- rnorm2(sz, m.t2, sd.t2) | ||
| - | t1 | ||
| - | t2 | ||
| - | mdiff <- m.t1-m.t2 | ||
| - | diff <- t1-t2 | ||
| - | sd.diff <- sd(diff) | ||
| - | se <- sd.diff/ | ||
| - | t.cal <- mdiff/se | ||
| - | p.val <- pt(t.cal, df=sz-1)*2 | ||
| - | t.cal | ||
| - | sz-1 | ||
| - | p.val | ||
| - | t.test(t1, | ||
| - | two <- qt(.05/2, df=sz-1) | ||
| - | two | ||
| - | lo <- se*two | ||
| - | hi <- -lo | ||
| - | c(lo, hi) | ||
| - | c(mdiff+lo, mdiff+hi) | ||
| - | </ | ||
| - | |||
| - | <tabbox rout02> | ||
| - | < | ||
| - | > ##### | ||
| - | > # | ||
| - | > m.a <- 5.8 | ||
| - | > m.b <- 6.3 | ||
| - | > sd.a <- .5 | ||
| - | > sd.b <- .5 | ||
| - | > sz.a <- 16 | ||
| - | > sz.b <- 16 | ||
| - | > df.a <- sz.a-1 | ||
| - | > df.b <- sz.b-1 | ||
| - | > df <- df.a + df.b | ||
| - | > a <- rnorm2(sz.a, | ||
| - | > b <- rnorm2(sz.b, | ||
| - | > diff <- m.a - m.b | ||
| - | > pv <- (ss(a)+ss(b))/ | ||
| - | > se <- sqrt(pv/ | ||
| - | > t.cal <- diff / se | ||
| - | > p.val <- pt(t.cal, df=df)*2 | ||
| - | > | ||
| - | > diff | ||
| - | [1] -0.5 | ||
| - | > se | ||
| - | [1] 0.1767767 | ||
| - | > t.cal | ||
| - | [1] -2.828427 | ||
| - | > df | ||
| - | [1] 30 | ||
| - | > p.val | ||
| - | [1] 0.008257336 | ||
| - | > t.test(a,b, var.equal = T) | ||
| - | |||
| - | Two Sample t-test | ||
| - | |||
| - | data: a and b | ||
| - | t = -2.8284, df = 30, p-value = 0.008257 | ||
| - | alternative hypothesis: true difference in means is not equal to 0 | ||
| - | 95 percent confidence interval: | ||
| - | | ||
| - | sample estimates: | ||
| - | mean of x mean of y | ||
| - | 5.8 | ||
| - | |||
| - | > diff - se*2 | ||
| - | [1] -0.8535534 | ||
| - | > diff + se*2 | ||
| - | [1] -0.1464466 | ||
| - | > lo <- qt(.05/ | ||
| - | > lo | ||
| - | [1] -2.042272 | ||
| - | > hi <- -lo | ||
| - | > diff + se*lo | ||
| - | [1] -0.8610262 | ||
| - | > diff + se*hi | ||
| - | [1] -0.1389738 | ||
| - | > | ||
| - | > ##### | ||
| - | > # t-test repeated measre | ||
| - | > ##### | ||
| - | > m.t1 <- 103 | ||
| - | > m.t2 <- 111 | ||
| - | > sd.t1 <- 10 | ||
| - | > sd.t2 <- 10 | ||
| - | > sz <- 16 | ||
| - | > t1 <- rnorm2(sz, m.t1, sd.t1) | ||
| - | > t2 <- rnorm2(sz, m.t2, sd.t2) | ||
| - | > t1 | ||
| - | [,1] | ||
| - | | ||
| - | | ||
| - | [3,] 100.82700 | ||
| - | [4,] 120.11867 | ||
| - | [5,] 103.06410 | ||
| - | [6,] 117.36762 | ||
| - | | ||
| - | [8,] 111.72472 | ||
| - | [9,] 100.06093 | ||
| - | [10,] 114.58757 | ||
| - | [11,] 105.99472 | ||
| - | [12,] 84.34803 | ||
| - | [13,] 94.63867 | ||
| - | [14,] 94.49667 | ||
| - | [15,] 106.03514 | ||
| - | [16,] 109.12144 | ||
| - | attr(," | ||
| - | [1] 0.08912759 | ||
| - | attr(," | ||
| - | [1] 0.9759765 | ||
| - | > t2 | ||
| - | [,1] | ||
| - | [1,] 114.76609 | ||
| - | [2,] 111.81937 | ||
| - | [3,] 102.93248 | ||
| - | [4,] 122.85959 | ||
| - | [5,] 105.68180 | ||
| - | [6,] 110.43890 | ||
| - | [7,] 115.34844 | ||
| - | | ||
| - | [9,] 117.00475 | ||
| - | [10,] 98.63924 | ||
| - | [11,] 118.87807 | ||
| - | [12,] 107.55519 | ||
| - | [13,] 128.46569 | ||
| - | [14,] 93.50094 | ||
| - | [15,] 107.15280 | ||
| - | [16,] 123.56487 | ||
| - | attr(," | ||
| - | [1] 0.2000755 | ||
| - | attr(," | ||
| - | [1] 0.8946962 | ||
| - | > mdiff <- m.t1-m.t2 | ||
| - | > diff <- t1-t2 | ||
| - | > sd.diff <- sd(diff) | ||
| - | > se <- sd.diff/ | ||
| - | > t.cal <- mdiff/se | ||
| - | > p.val <- pt(t.cal, df=sz-1)*2 | ||
| - | > t.cal | ||
| - | [1] -2.2741 | ||
| - | > sz-1 | ||
| - | [1] 15 | ||
| - | > p.val | ||
| - | [1] 0.03808083 | ||
| - | > t.test(t1, | ||
| - | |||
| - | Paired t-test | ||
| - | |||
| - | data: t1 and t2 | ||
| - | t = -2.2741, df = 15, p-value = 0.03808 | ||
| - | alternative hypothesis: true mean difference is not equal to 0 | ||
| - | 95 percent confidence interval: | ||
| - | | ||
| - | sample estimates: | ||
| - | mean difference | ||
| - | | ||
| - | |||
| - | > two <- qt(.05/2, df=sz-1) | ||
| - | > two | ||
| - | [1] -2.13145 | ||
| - | > lo <- se*two | ||
| - | > hi <- -lo | ||
| - | > c(lo, hi) | ||
| - | [1] -7.498174 | ||
| - | > c(mdiff+lo, mdiff+hi) | ||
| - | [1] -15.4981736 | ||
| - | > | ||
| - | > | ||
| - | </ | ||
| - | </ | ||
c/ms/2026/lecture_note_week_04.txt · Last modified: by hkimscil
