gradient_descent
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| gradient_descent [2026/03/14 06:05] – [b값 구하기] hkimscil | gradient_descent [2026/03/14 11:21] (current) – [output] hkimscil | ||
|---|---|---|---|
| Line 17: | Line 17: | ||
| library(ggplot2) | library(ggplot2) | ||
| library(ggpmisc) | library(ggpmisc) | ||
| + | |||
| rm(list=ls()) | rm(list=ls()) | ||
| - | # set.seed(191) | + | |
| + | # data 만들기 | ||
| + | set.seed(191) | ||
| nx <- 200 | nx <- 200 | ||
| mx <- 4.5 | mx <- 4.5 | ||
| Line 26: | Line 29: | ||
| slp <- 12 | slp <- 12 | ||
| y <- slp * x + rnorm(nx, 0, slp*sdx*3) | y <- slp * x + rnorm(nx, 0, slp*sdx*3) | ||
| - | |||
| data <- data.frame(x, | data <- data.frame(x, | ||
| + | # data 변인 완성 | ||
| + | # regression summary shows | ||
| + | # a and b | ||
| mo <- lm(y ~ x, data = data) | mo <- lm(y ~ x, data = data) | ||
| summary(mo) | summary(mo) | ||
| Line 37: | Line 42: | ||
| stat_poly_eq(use_label(c(" | stat_poly_eq(use_label(c(" | ||
| theme_classic() | theme_classic() | ||
| - | # set.seed(191) | + | |
| - | # Initialize random betas | + | # 위에서 확인한 |
| - | # 우선 | + | |
| - | # 변화시켜서 이해 | + | |
| b <- summary(mo)$coefficients[2] | b <- summary(mo)$coefficients[2] | ||
| a <- 0 | a <- 0 | ||
| Line 47: | Line 50: | ||
| a.init <- a | a.init <- a | ||
| - | # Predict function: | + | # Predict function: |
| predict <- function(x, a, b){ | predict <- function(x, a, b){ | ||
| return (a + b * x) | return (a + b * x) | ||
| } | } | ||
| - | # And loss function is: | + | # And loss function is: residual 혹은 error 값 |
| residuals <- function(predictions, | residuals <- function(predictions, | ||
| return(y - predictions) | return(y - predictions) | ||
| } | } | ||
| - | # we use sum of square of error which oftentimes become big | + | # we use sum of square of error |
| ssrloss <- function(predictions, | ssrloss <- function(predictions, | ||
| residuals <- (y - predictions) | residuals <- (y - predictions) | ||
| Line 67: | Line 70: | ||
| as <- c() # for as (intercepts) | as <- c() # for as (intercepts) | ||
| + | # x 값을 -50 에서 50을 범위로 0.01씩 증가시켜서 | ||
| + | # for 문에 대입, i로 사용 | ||
| for (i in seq(from = -50, to = 50, by = 0.01)) { | for (i in seq(from = -50, to = 50, by = 0.01)) { | ||
| pred <- predict(x, i, b) | pred <- predict(x, i, b) | ||
| Line 73: | Line 78: | ||
| ssrs <- append(ssrs, | ssrs <- append(ssrs, | ||
| srs <- append(srs, sum(res)) | srs <- append(srs, sum(res)) | ||
| - | as <- append(as, i) | + | as <- append(as, i) # i 값을 a로 사용했기에 as 변인에 기록 |
| } | } | ||
| + | # 1에는 0.01이 100개 있고, -50 ~ 50 = 1이 101 개 있으니, 10100 | ||
| length(ssrs) | length(ssrs) | ||
| length(srs) | length(srs) | ||
| length(as) | length(as) | ||
| - | min(ssrs) | + | min(ssrs) |
| - | min.pos.ssrs <- which(ssrs == min(ssrs)) | + | min.pos.ssrs <- which(ssrs == min(ssrs)) |
| min.pos.ssrs | min.pos.ssrs | ||
| - | print(as[min.pos.ssrs]) | + | print(as[min.pos.ssrs]) # 그 몇번째에 해당하는 a값을 구함 |
| + | # 이 a값이 최소 ssr값을 갖도록 하는 a (소수점 2자리에서 구함) | ||
| summary(mo) | summary(mo) | ||
| - | plot(seq(1, length(ssrs)), ssrs) | + | |
| - | plot(seq(1, length(ssrs)), srs) | + | k <- min(ssrs) |
| + | j <- as[min.pos.ssrs] | ||
| + | plot(seq(1, length(as)), ssrs, type=" | ||
| + | text(4500,2000000, | ||
| tail(ssrs) | tail(ssrs) | ||
| max(ssrs) | max(ssrs) | ||
| Line 96: | Line 107: | ||
| <tabbox ro01> | <tabbox ro01> | ||
| < | < | ||
| + | > # library(tidyverse) | ||
| + | > # library(data.table) | ||
| > library(ggplot2) | > library(ggplot2) | ||
| > library(ggpmisc) | > library(ggpmisc) | ||
| > | > | ||
| > rm(list=ls()) | > rm(list=ls()) | ||
| - | > # set.seed(191) | + | > |
| + | > # data 만들기 | ||
| + | > set.seed(191) | ||
| > nx <- 200 | > nx <- 200 | ||
| > mx <- 4.5 | > mx <- 4.5 | ||
| Line 107: | Line 122: | ||
| > slp <- 12 | > slp <- 12 | ||
| > y <- slp * x + rnorm(nx, 0, slp*sdx*3) | > y <- slp * x + rnorm(nx, 0, slp*sdx*3) | ||
| - | > | ||
| > data <- data.frame(x, | > data <- data.frame(x, | ||
| + | > # data 변인 완성 | ||
| > | > | ||
| + | > # regression summary shows | ||
| + | > # a and b | ||
| > mo <- lm(y ~ x, data = data) | > mo <- lm(y ~ x, data = data) | ||
| > summary(mo) | > summary(mo) | ||
| Line 118: | Line 135: | ||
| Residuals: | Residuals: | ||
| | | ||
| - | -259.314 -59.215 6.683 | + | -245.291 -67.967 -3.722 |
| Coefficients: | Coefficients: | ||
| Estimate Std. Error t value Pr(> | Estimate Std. Error t value Pr(> | ||
| - | (Intercept) | + | (Intercept) |
| - | x 11.888 2.433 4.887 2.11e-06 *** | + | x 12.900 2.870 4.495 1.19e-05 *** |
| --- | --- | ||
| Signif. codes: | Signif. codes: | ||
| - | Residual standard error: | + | Residual standard error: |
| - | Multiple R-squared: | + | Multiple R-squared: |
| - | F-statistic: | + | F-statistic: |
| > | > | ||
| > ggplot(data = data, aes(x = x, y = y)) + | > ggplot(data = data, aes(x = x, y = y)) + | ||
| - | + | + | + |
| - | + | + | + |
| - | + | + | + |
| - | + | + | + |
| - | > # set.seed(191) | + | > |
| - | > # Initialize random betas | + | > # 위에서 확인한 b값을 |
| - | > # 우선 | + | |
| - | > # 변화시켜서 이해 | + | |
| > b <- summary(mo)$coefficients[2] | > b <- summary(mo)$coefficients[2] | ||
| > a <- 0 | > a <- 0 | ||
| Line 147: | Line 162: | ||
| > a.init <- a | > a.init <- a | ||
| > | > | ||
| - | > # Predict function: | + | > # Predict function: |
| > predict <- function(x, a, b){ | > predict <- function(x, a, b){ | ||
| - | + | + | + |
| + } | + } | ||
| > | > | ||
| - | > # And loss function is: | + | > # And loss function is: residual 혹은 error 값 |
| > residuals <- function(predictions, | > residuals <- function(predictions, | ||
| - | + | + | + |
| + } | + } | ||
| > | > | ||
| - | > # we use sum of square of error which oftentimes become big | + | > # we use sum of square of error |
| > ssrloss <- function(predictions, | > ssrloss <- function(predictions, | ||
| - | + | + | + |
| - | + | + | + |
| + } | + } | ||
| > | > | ||
| Line 167: | Line 182: | ||
| > as <- c() # for as (intercepts) | > as <- c() # for as (intercepts) | ||
| > | > | ||
| + | > # x 값을 -50 에서 50을 범위로 0.01씩 증가시켜서 | ||
| + | > # for 문에 대입, i로 사용 | ||
| > for (i in seq(from = -50, to = 50, by = 0.01)) { | > for (i in seq(from = -50, to = 50, by = 0.01)) { | ||
| - | + | + | + |
| - | + | + | + |
| - | + | + | + |
| - | + | + | + |
| - | + | + | + |
| - | + | + | + |
| + } | + } | ||
| + | > # 1에는 0.01이 100개 있고, -50 ~ 50 = 1이 101 개 있으니, 10100 | ||
| > length(ssrs) | > length(ssrs) | ||
| [1] 10001 | [1] 10001 | ||
| Line 182: | Line 200: | ||
| [1] 10001 | [1] 10001 | ||
| > | > | ||
| - | > min(ssrs) | + | > min(ssrs) |
| - | [1] 1553336 | + | [1] 1747011 |
| - | > min.pos.ssrs <- which(ssrs == min(ssrs)) | + | > min.pos.ssrs <- which(ssrs == min(ssrs)) |
| > min.pos.ssrs | > min.pos.ssrs | ||
| - | [1] 5828 | + | [1] 4896 |
| - | > print(as[min.pos.ssrs]) | + | > print(as[min.pos.ssrs]) |
| - | [1] 8.27 | + | [1] -1.05 |
| + | > # 이 a값이 최소 ssr값을 갖도록 하는 a (소수점 2자리에서 구함) | ||
| > summary(mo) | > summary(mo) | ||
| Line 196: | Line 215: | ||
| Residuals: | Residuals: | ||
| | | ||
| - | -259.314 -59.215 6.683 | + | -245.291 -67.967 -3.722 |
| Coefficients: | Coefficients: | ||
| Estimate Std. Error t value Pr(> | Estimate Std. Error t value Pr(> | ||
| - | (Intercept) | + | (Intercept) |
| - | x 11.888 2.433 4.887 2.11e-06 *** | + | x 12.900 2.870 4.495 1.19e-05 *** |
| --- | --- | ||
| Signif. codes: | Signif. codes: | ||
| - | Residual standard error: | + | Residual standard error: |
| - | Multiple R-squared: | + | Multiple R-squared: |
| - | F-statistic: | + | F-statistic: |
| - | > plot(seq(1, length(ssrs)), ssrs) | + | > |
| - | > plot(seq(1, length(ssrs)), srs) | + | > k <- min(ssrs) |
| + | > j <- as[min.pos.ssrs] | ||
| + | > plot(seq(1, length(as)), ssrs, type=" | ||
| + | > text(4500,2000000, | ||
| + | > | ||
| > tail(ssrs) | > tail(ssrs) | ||
| - | [1] 1900842 1901008 1901175 1901342 1901509 1901676 | + | [1] 2267151 2267355 2267559 2267763 2267967 2268171 |
| > max(ssrs) | > max(ssrs) | ||
| - | [1] 2232329 | + | [1] 2268171 |
| > min(ssrs) | > min(ssrs) | ||
| - | [1] 1553336 | + | [1] 1747011 |
| > tail(srs) | > tail(srs) | ||
| - | [1] -8336.735 -8338.735 -8340.735 -8342.735 -8344.735 -8346.735 | + | [1] -10199.41 -10201.41 -10203.41 -10205.41 -10207.41 -10209.41 |
| > max(srs) | > max(srs) | ||
| - | [1] 11653.26 | + | [1] 9790.59 |
| > min(srs) | > min(srs) | ||
| - | [1] -8346.735 | + | [1] -10209.41 |
| - | > | + | |
| > | > | ||
| </ | </ | ||
| </ | </ | ||
| - | {{: | + | {{pasted:20260314-104647.png}} |
| - | {{: | + | |
| - | {{:pasted:20250821-120455.png}} | + | |
| 위 방법은 dumb . . . . . | 위 방법은 dumb . . . . . | ||
| Line 293: | Line 313: | ||
| > # we use sum of square of error which oftentimes become big | > # we use sum of square of error which oftentimes become big | ||
| > msrloss <- function(predictions, | > msrloss <- function(predictions, | ||
| - | + | + | + |
| - | + | + | + |
| + } | + } | ||
| > | > | ||
| Line 302: | Line 322: | ||
| > | > | ||
| > for (i in seq(from = -50, to = 50, by = 0.01)) { | > for (i in seq(from = -50, to = 50, by = 0.01)) { | ||
| - | + | + | + |
| - | + | + | + |
| - | + | + | + |
| - | + | + | + |
| - | + | + | + |
| - | + | + | + |
| + } | + } | ||
| > length(msrs) | > length(msrs) | ||
| Line 317: | Line 337: | ||
| > | > | ||
| > min(msrs) | > min(msrs) | ||
| - | [1] 7766.679 | + | [1] 8735.055 |
| > min.pos.msrs <- which(msrs == min(msrs)) | > min.pos.msrs <- which(msrs == min(msrs)) | ||
| > min.pos.msrs | > min.pos.msrs | ||
| - | [1] 5828 | + | [1] 4896 |
| > print(as[min.pos.msrs]) | > print(as[min.pos.msrs]) | ||
| - | [1] 8.27 | + | [1] -1.05 |
| > summary(mo) | > summary(mo) | ||
| Line 330: | Line 350: | ||
| Residuals: | Residuals: | ||
| | | ||
| - | -259.314 -59.215 6.683 | + | -245.291 -67.967 -3.722 |
| Coefficients: | Coefficients: | ||
| Estimate Std. Error t value Pr(> | Estimate Std. Error t value Pr(> | ||
| - | (Intercept) | + | (Intercept) |
| - | x 11.888 2.433 4.887 2.11e-06 *** | + | x 12.900 2.870 4.495 1.19e-05 *** |
| --- | --- | ||
| Signif. codes: | Signif. codes: | ||
| - | Residual standard error: | + | Residual standard error: |
| - | Multiple R-squared: | + | Multiple R-squared: |
| - | F-statistic: | + | F-statistic: |
| > plot(seq(1, length(msrs)), | > plot(seq(1, length(msrs)), | ||
| > plot(seq(1, length(srs)), | > plot(seq(1, length(srs)), | ||
| > tail(msrs) | > tail(msrs) | ||
| - | [1] 9504.208 9505.041 9505.875 9506.710 9507.544 9508.379 | + | [1] 11335.75 11336.77 11337.79 11338.81 11339.84 11340.86 |
| > max(msrs) | > max(msrs) | ||
| - | [1] 11161.64 | + | [1] 11340.86 |
| > min(msrs) | > min(msrs) | ||
| - | [1] 7766.679 | + | [1] 8735.055 |
| > tail(srs) | > tail(srs) | ||
| - | [1] -41.68368 -41.69368 -41.70368 -41.71368 -41.72368 -41.73368 | + | [1] -50.99705 -51.00705 -51.01705 -51.02705 -51.03705 -51.04705 |
| > max(srs) | > max(srs) | ||
| - | [1] 58.26632 | + | [1] 48.95295 |
| > min(srs) | > min(srs) | ||
| - | [1] -41.73368 | + | [1] -51.04705 |
| - | > | + | > |
| </ | </ | ||
| - | {{: | + | {{pasted:20260314-112129.png}} |
| - | {{:pasted:20250821-121024.png}} | + | |
| ===== b값 구하기 ===== | ===== b값 구하기 ===== | ||
| Line 626: | Line 646: | ||
| - | ====== | + | ====== |
| + | <tabbox rs01> | ||
| < | < | ||
| # the above no gradient | # the above no gradient | ||
| Line 746: | Line 767: | ||
| a | a | ||
| b | b | ||
| - | |||
| </ | </ | ||
| - | ====== R output ===== | + | <tabbox ro01> |
| < | < | ||
| > | > | ||
| Line 1012: | Line 1033: | ||
| {{: | {{: | ||
| {{: | {{: | ||
| + | |||
| + | </ | ||
| + | |||
| ====== Why normalize (scale or make z-score) xi ====== | ====== Why normalize (scale or make z-score) xi ====== | ||
| Line 1032: | Line 1056: | ||
| b & = & \frac{m}{\sigma} \\ | b & = & \frac{m}{\sigma} \\ | ||
| \end{eqnarray*} | \end{eqnarray*} | ||
| - | |||
| - | |||
| - | |||
| - | |||
| - | < | ||
| - | * :gradient descent: | ||
| - | * :gradient descent: | ||
| - | * *:gradient descent: | ||
| - | * :gradient descent: | ||
| - | </ | ||
gradient_descent.1773468311.txt.gz · Last modified: by hkimscil
