Differences

This shows you the differences between two versions of the page.

--- gradient_descent [2026/05/12 21:44] – [rs.01] hkimscil
+++ gradient_descent [2026/05/12 22:02] (current) – [ro01] hkimscil
@@ Line 149: / Line 149: @@
      pos=4)
-#####
-# with mean square error (mse) instead of sse
-b <- summary(mo)$coefficients[2]
-a <- 0
-# we use sum of square of error which oftentimes become big
-mseloss <- function(predictions, y) {
-  residuals <- (y - predictions)
-  return(mean(residuals^2))
-}
-mses <- c() # for sum  of square residuals
-as <- c() # for as (intercepts)
-for (i in seq(from = -40, to = 40, by = 0.01)) {
-  pred <- predict(x, i, b)
-  mse <- mseloss(pred, y)
-  mses <- append(mses, mse)
-  as <- append(as, i)
-}
-length(mses)
-length(as)
-min(mses)
-max(mses)
-min.pos.mses <- which(mses == min(mses))
-min.pos.mses
-print(as[min.pos.mses])
-summary(mo)
-plot(as, mses, type='l', lwd=2)
-abline(v=as[min.pos.mses])
-text(x = as[min.pos.mses], y = median(mses), col='red',
-     labels = paste(" mse = ", round(min(mses),4),
-     "\n is minimum value
- when a =", as[min.pos.mses]),
-     pos=4)
 </code>
@@ Line 381: / Line 344: @@
 +      pos=4)
 >
-> #####
-> # with mean square error (mse) instead of sse
->
-> b <- summary(mo)$coefficients[2]
-> a <- 0
->
-> # we use sum of square of error which oftentimes become big
-> mseloss <- function(predictions, y) {
-+   residuals <- (y - predictions)
-+   return(mean(residuals^2))
-+ }
->
-> mses <- c() # for sum  of square residuals
-> as <- c() # for as (intercepts)
->
-> for (i in seq(from = -40, to = 40, by = 0.01)) {
-+   pred <- predict(x, i, b)
-+   mse <- mseloss(pred, y)
-+   mses <- append(mses, mse)
-+   as <- append(as, i)
-+ }
-> length(mses)
-[1] 8001
-> length(as)
-[1] 8001
->
-> min(mses)
-[1] 0.1097429
-> max(mses)
-[1] 1731.87
-> min.pos.mses <- which(mses == min(mses))
-> min.pos.mses
-[1] 4162
-> print(as[min.pos.mses])
-[1] 1.61
-> summary(mo)
-Call:
-lm(formula = y ~ x, data = sam)
-Residuals:
-     Min       1Q   Median       3Q      Max
--0.92770 -0.20715  0.03368  0.20038  0.90512
-Coefficients:
-            Estimate Std. Error t value Pr(>|t|)
-(Intercept) 1.614428   0.344622   4.685 9.04e-06 ***
-x           0.014476   0.003212   4.508 1.82e-05 ***
----
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 0.3346 on 98 degrees of freedom
-Multiple R-squared:  0.1717,	Adjusted R-squared:  0.1633
-F-statistic: 20.32 on 1 and 98 DF,  p-value: 1.818e-05
-> plot(as, mses, type='l', lwd=2)
-> abline(v=as[min.pos.mses])
-> text(x = as[min.pos.mses], y = median(mses), col='red',
-+      labels = paste(" mse = ", round(min(mses),4),
-+      "\n is minimum value
-+  when a =", as[min.pos.mses]),
-+      pos=4)
->
 </code>
 </tabbox>
-{{pasted:20260512-214238.png?500}}
+{{pasted:20260512-214609.png?500}}
-{{pasted:20260512-214257.png?500}}
+{{pasted:20260512-214629.png?500}}
 위의 두번째 그래프는 a값이 -40 에서 40 까지 0.1 단위로 변할 때의 sse값을 구한 후에 이를 기록하여 그래프로 만든 것이다. 이렇게 구한 sse 값들 중 최소값일 때의 a값을 regression의 a값으로 추정한 것이다. 이렇게 해서 구한 값은 ''summary(mo)''에서의 a와 값과 같다.
@@ Line 522: / Line 423: @@
 > for (i in seq(from = -40, to = 40, by = 0.01)) {
 +   pred <- predict(x, i, b)
-+   res <- residuals(pred, y)
 +   mse <- mseloss(pred, y)
 +   mses <- append(mses, mse)
@@ Line 533: / Line 433: @@
 >
 > min(mses)
-[1] 0.1398253
+[1] 0.1097429
 > max(mses)
-[1] 1800.813
+[1] 1731.87
 > min.pos.mses <- which(mses == min(mses))
 > min.pos.mses
-[1] 4244
+[1] 4162
 > print(as[min.pos.mses])
-[1] 2.43
+[1] 1.61
 > summary(mo)
@@ Line 548: / Line 448: @@
 Residuals:
      Min       1Q   Median       3Q      Max
--1.05708 -0.25025 -0.00149  0.17215  0.85818
+-0.92770 -0.20715  0.03368  0.20038  0.90512
 Coefficients:
             Estimate Std. Error t value Pr(>|t|)
-(Intercept) 2.434345   0.426666   5.706 1.23e-07 ***
+(Intercept) 1.614428   0.344622   4.685 9.04e-06 ***
-x           0.007499   0.003962   1.893   0.0613 .
+x           0.014476   0.003212   4.508 1.82e-05 ***
 ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 0.3777 on 98 degrees of freedom
+Residual standard error: 0.3346 on 98 degrees of freedom
-Multiple R-squared:  0.03527,	Adjusted R-squared:  0.02542
+Multiple R-squared:  0.1717,	Adjusted R-squared:  0.1633
-F-statistic: 3.582 on 1 and 98 DF,  p-value: 0.06135
+F-statistic: 20.32 on 1 and 98 DF,  p-value: 1.818e-05
 > plot(as, mses, type='l', lwd=2)
 > abline(v=as[min.pos.mses])
-> text(x = as[min.pos.mses], y = median(mses),
+> text(x = as[min.pos.mses], y = median(mses), col='red',
-+      col='red', labels = paste(" mse = ", min(mses), "\n is minimum value when a =", as[min.pos.mses]),
++      labels = paste(" mse = ", round(min(mses),4),
++      "\n is minimum value
++  when a =", as[min.pos.mses]),
 +      pos=4)
+>
 >
 </code>
 </tabbox>
-{{pasted:20260510-235011.png?500}}
+{{pasted:20260512-214657.png?500}}
 ===== b값 구하기 =====
@@ Line 595: / Line 498: @@
 predict <- function(x, a, b){
   return (a + b * x)
-}
-# And loss function is:
-residuals <- function(predictions, y) {
-  return(y - predictions)
 }
@@ Line 613: / Line 511: @@
 for (i in seq(from = -40, to = 40, by = 0.01)) {
   pred <- predict(x, b, i)
-  res <- residuals(pred, y)
   mse <- mseloss(pred, y)
   mses <- append(mses, mse)
@@ Line 627: / Line 524: @@
 abline(v=bs[min.pos.mses])
 text(x = bs[min.pos.mses], y = median(mses), col='red',
-     labels = paste(" mse = ", min(mses),
+     labels = paste(" mse = ", round(min(mses),4),
-                    "\n is minimum value when b =",
+"\n is minimum value
-                    round(bs[min.pos.mses],4)),
+ when b =", round(bs[min.pos.mses],4)),
      pos=4)
 </code>
@@ Line 656: / Line 552: @@
 > predict <- function(x, a, b){
 +   return (a + b * x)
-+ }
->
-> # And loss function is:
-> residuals <- function(predictions, y) {
-+   return(y - predictions)
 + }
 >
@@ Line 674: / Line 565: @@
 > for (i in seq(from = -40, to = 40, by = 0.01)) {
 +   pred <- predict(x, b, i)
-+   res <- residuals(pred, y)
 +   mse <- mseloss(pred, y)
 +   mses <- append(mses, mse)
@@ Line 681: / Line 571: @@
 >
 > min(mses)
-[1] 0.1627399
+[1] 0.1136351
 > max(mses)
-[1] 18573746
+[1] 18442152
 > min.pos.mses <- which(mses == min(mses))
 > print(bs[min.pos.mses])
@@ Line 694: / Line 584: @@
 Residuals:
      Min       1Q   Median       3Q      Max
--1.05708 -0.25025 -0.00149  0.17215  0.85818
+-0.92770 -0.20715  0.03368  0.20038  0.90512
 Coefficients:
             Estimate Std. Error t value Pr(>|t|)
-(Intercept) 2.434345   0.426666   5.706 1.23e-07 ***
+(Intercept) 1.614428   0.344622   4.685 9.04e-06 ***
-x           0.007499   0.003962   1.893   0.0613 .
+x           0.014476   0.003212   4.508 1.82e-05 ***
 ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 0.3777 on 98 degrees of freedom
+Residual standard error: 0.3346 on 98 degrees of freedom
-Multiple R-squared:  0.03527,	Adjusted R-squared:  0.02542
+Multiple R-squared:  0.1717,	Adjusted R-squared:  0.1633
-F-statistic: 3.582 on 1 and 98 DF,  p-value: 0.06135
+F-statistic: 20.32 on 1 and 98 DF,  p-value: 1.818e-05
 > plot(bs, mses, type='l', lwd=2)
 > abline(v=bs[min.pos.mses])
 > text(x = bs[min.pos.mses], y = median(mses), col='red',
-+      labels = paste(" mse = ", min(mses),
++      labels = paste(" mse = ", round(min(mses),4),
-+                     "\n is minimum value when b =",
++ "\n is minimum value
-+                     round(bs[min.pos.mses],4)),
++  when b =", round(bs[min.pos.mses],4)),
 +      pos=4)
 >
@@ Line 806: / Line 696: @@
 # 다시 learning rate값을 곱하여 이를 다음의 a, b
 # 값으로 사용한다. . . . 이를 반복한다.
 a <- rnorm(1)
 b <- rnorm(1)
@@ Line 829: / Line 718: @@
 # Train the model with scaled features
-learning.rate = 1e-1
+learning.rate = 0.1
 # Record Loss for each epoch:
 as = c()
 bs = c()
+das = c()
+dbs = c()
+ep <- c()
 mses = c()
 zx <- (x-mean(x))/sd(x)
-nlen <- 50
+nlen <- 75
 for (epoch in 1:nlen) {
   predictions <- predict(zx, a, b)
-  residual <- residuals(predictions, y)
+  #residual <- residuals(predictions, y)
   loss <- mseloss(predictions, y)
   mses <- append(mses, loss)
   grad <- gradient(zx, y, predictions)
+  db <- grad$b
+  da <- grad$a
+  dbs <- append(dbs, db)
+  das <- append(das, da)
+  ep <- append(ep, epoch)
   step.b <- grad$b * learning.rate
   step.a <- grad$a * learning.rate
@@ Line 853: / Line 754: @@
   bs <- append(bs, b)
 }
-mses
-as
+tmp <- data.frame(ep, das, as, dbs, bs, mse)
-bs
+tmp
+# 위의 루프를 이해했는지 체크하기
+a.init
+b.init
+tmp.p <- predict(zx, a.init, b.init)
+# a.init b.init 일 때의 미분 결과
+# 즉, 기울기와 절편 값
+tmp.g <- gradient(zx, y, tmp.p)
+tmp.g
+# 이 값에 learning rate 값을 곱한 후에
+a.step <- tmp.g$a*learning.rate
+b.step <- tmp.g$b*learning.rate
+# 이를 원래 a값에서 빼준 값을 다음 순서의
+a.init <- a.init-a.step
+b.init <- b.init-b.step
+# a, b값으로 쓰고, 이 때의 미분 값을 구한 후에
+# (루프 안에서), 미분 결과를 다시 learning rate
+# 로 곱해 준 값을 (두번째 단계의) a, b 값에서
+# 빼 준 값을 다음 단계의 a, b값으로 쓰고, 다시
+# . . . . . . 위의 loop문은 이것을 75번 반복한 것
 # scaled
-a
+# 이렇게 해서 구한 제일 마지막 a, b 값은 x 변인 대신에
-b
+# 표준화한 zx 변인을 사용해서 구한 값이므로 이 값을
+# 다시 x 를 사용했을 때의 값으로 환원을 해야 한다 (unscale).
+data.frame(a, b)
 # unscale coefficients to make them comprehensible
@@ Line 865: / Line 790: @@
 # and
 # http://commres.net/wiki/gradient_descent#how_to_unnormalize_unscale_a_and_b
-#
 a =  a - (mean(x) / sd(x)) * b
 b =  b / sd(x)
-a
+cat(" unscaled a:", a, "\n unscaled b:", b, "\n")
-b
+# 아래 lm 결과와 확인
+summary(mo)
-# changes of estimators
+# 아래는 a와 b를 구하는 과정에서 저장했던 temporary
+# a, b 값들은 (as, bs 변인 내의) 모두 unscale한 것
 as <- as - (mean(x) /sd(x)) * bs
 bs <- bs / sd(x)
@@ Line 878: / Line 804: @@
 bs
+# 이것을 그래프로 나타내기 위해서 parameters 라는
+# 변인으로 저장
+# 이하 if 문들은 그래프에 처음 a, b값과 루프 문에서의
+# 그 값들이 변화하는 것을 모두 나타내기 위해서 사용한
+# 것으로 결과와 상관없다.
 parameters <- data.frame(as, bs, mses)
@@ Line 938: / Line 869: @@
   theme_classic() +
   labs(title = 'Gradient descent. blue: start, red: end, green: gradients')
 </code>
 <tabbox ro01>
@@ Line 952: / Line 882: @@
 > # 다시 learning rate값을 곱하여 이를 다음의 a, b
 > # 값으로 사용한다. . . . 이를 반복한다.
 > a <- rnorm(1)
 > b <- rnorm(1)
@@ Line 975: / Line 904: @@
 >
 > # Train the model with scaled features
-> learning.rate = 1e-1
+> learning.rate = 0.1
 >
 > # Record Loss for each epoch:
 > as = c()
 > bs = c()
+> das = c()
+> dbs = c()
+> ep <- c()
+>
 > mses = c()
 > zx <- (x-mean(x))/sd(x)
 >
-> nlen <- 50
+>
+> nlen <- 75
 > for (epoch in 1:nlen) {
 +   predictions <- predict(zx, a, b)
-+   residual <- residuals(predictions, y)
++   #residual <- residuals(predictions, y)
 +   loss <- mseloss(predictions, y)
 +   mses <- append(mses, loss)
 +
 +   grad <- gradient(zx, y, predictions)
++   db <- grad$b
++   da <- grad$a
++
++   dbs <- append(dbs, db)
++   das <- append(das, da)
++   ep <- append(ep, epoch)
++
 +   step.b <- grad$b * learning.rate
 +   step.a <- grad$a * learning.rate
@@ Line 999: / Line 940: @@
 +   bs <- append(bs, b)
 + }
-> mses
+>
- [1] 13.5910678  8.7489402  5.6498620  3.6663771  2.3968985
+> tmp <- data.frame(ep, das, as, dbs, bs, mse)
- [6]  1.5844012  1.0643830  0.7315586  0.5185427  0.3822072
+> tmp
-[11]  0.2949490  0.2391016  0.2033579  0.1804810  0.1658392
+   ep           das         as          dbs        bs      mse
-[16]  0.1564681  0.1504703  0.1466316  0.1441747  0.1426022
+   1 -8.349964e+00 -0.1795060 6.595337e-01 0.4187294 18404982
-[21]  0.1415958  0.1409517  0.1405394  0.1402755  0.1401067
+   2 -6.679971e+00  0.4884911 5.289460e-01 0.3658348 18404982
-[26]  0.1399986  0.1399294  0.1398851  0.1398568  0.1398386
+   3 -5.343977e+00  1.0228888 4.242147e-01 0.3234133 18404982
-[31]  0.1398270  0.1398196  0.1398149  0.1398118  0.1398099
+   4 -4.275181e+00  1.4504069 3.402202e-01 0.2893913 18404982
-[36]  0.1398086  0.1398078  0.1398073  0.1398070  0.1398068
+   5 -3.420145e+00  1.7924215 2.728566e-01 0.2621057 18404982
-[41]  0.1398066  0.1398066  0.1398065  0.1398065  0.1398064
+   6 -2.736116e+00  2.0660331 2.188310e-01 0.2402226 18404982
-[46]  0.1398064  0.1398064  0.1398064  0.1398064  0.1398064
+   7 -2.188893e+00  2.2849224 1.755025e-01 0.2226723 18404982
-> as
+   8 -1.751114e+00  2.4600338 1.407530e-01 0.2085970 18404982
- [1] 0.3157929 0.9003813 1.3680520 1.7421886 2.0414978 2.2809453
+   9 -1.400891e+00  2.6001229 1.128839e-01 0.1973086 18404982
- [7] 2.4725032 2.6257495 2.7483466 2.8464243 2.9248864 2.9876561
+10 -1.120713e+00  2.7121942 9.053288e-02 0.1882553 18404982
-[13] 3.0378718 3.0780445 3.1101825 3.1358930 3.1564614 3.1729161
+ 11 -8.965705e-01  2.8018513 7.260737e-02 0.1809946 18404982
-[19] 3.1860799 3.1966109 3.2050357 3.2117755 3.2171674 3.2214809
+12 -7.172564e-01  2.8735769 5.823111e-02 0.1751715 18404982
-[25] 3.2249317 3.2276923 3.2299008 3.2316677 3.2330811 3.2342119
+13 -5.738051e-01  2.9309574 4.670135e-02 0.1705014 18404982
-[31] 3.2351165 3.2358402 3.2364191 3.2368823 3.2372528 3.2375492
+14 -4.590441e-01  2.9768619 3.745448e-02 0.1667559 18404982
-[37] 3.2377863 3.2379761 3.2381278 3.2382492 3.2383464 3.2384241
+15 -3.672353e-01  3.0135854 3.003849e-02 0.1637521 18404982
-[43] 3.2384862 3.2385360 3.2385758 3.2386076 3.2386330 3.2386534
+ 16 -2.937882e-01  3.0429642 2.409087e-02 0.1613430 18404982
-[49] 3.2386697 3.2386827
+17 -2.350306e-01  3.0664673 1.932088e-02 0.1594109 18404982
-> bs
+18 -1.880245e-01  3.0852697 1.549535e-02 0.1578613 18404982
- [1] 0.32915377 0.27820741 0.23734843 0.20457952 0.17829886
+19 -1.504196e-01  3.1003117 1.242727e-02 0.1566186 18404982
- [6] 0.15722177 0.14031795 0.12676108 0.11588847 0.10716864
+20 -1.203357e-01  3.1123452 9.966668e-03 0.1556220 18404982
-[11] 0.10017533 0.09456670 0.09006858 0.08646109 0.08356788
+ 21 -9.626853e-02  3.1219721 7.993268e-03 0.1548226 18404982
-[16] 0.08124752 0.07938660 0.07789414 0.07669718 0.07573723
+22 -7.701482e-02  3.1296736 6.410601e-03 0.1541816 18404982
-[21] 0.07496734 0.07434989 0.07385470 0.07345756 0.07313905
+23 -6.161186e-02  3.1358348 5.141302e-03 0.1536674 18404982
-[26] 0.07288360 0.07267873 0.07251443 0.07238266 0.07227698
+24 -4.928949e-02  3.1407637 4.123324e-03 0.1532551 18404982
-[31] 0.07219222 0.07212425 0.07206973 0.07202601 0.07199095
+25 -3.943159e-02  3.1447069 3.306906e-03 0.1529244 18404982
-[36] 0.07196282 0.07194027 0.07192218 0.07190768 0.07189604
+ 26 -3.154527e-02  3.1478614 2.652139e-03 0.1526592 18404982
-[41] 0.07188671 0.07187923 0.07187323 0.07186841 0.07186455
+27 -2.523622e-02  3.1503850 2.127015e-03 0.1524465 18404982
-[46] 0.07186146 0.07185897 0.07185698 0.07185539 0.07185410
+28 -2.018897e-02  3.1524039 1.705866e-03 0.1522759 18404982
+29 -1.615118e-02  3.1540190 1.368105e-03 0.1521391 18404982
+30 -1.292094e-02  3.1553111 1.097220e-03 0.1520294 18404982
+ 31 -1.033675e-02  3.1563448 8.799704e-04 0.1519414 18404982
+32 -8.269403e-03  3.1571717 7.057362e-04 0.1518708 18404982
+33 -6.615523e-03  3.1578333 5.660005e-04 0.1518142 18404982
+34 -5.292418e-03  3.1583625 4.539324e-04 0.1517688 18404982
+35 -4.233935e-03  3.1587859 3.640538e-04 0.1517324 18404982
+ 36 -3.387148e-03  3.1591246 2.919711e-04 0.1517032 18404982
+37 -2.709718e-03  3.1593956 2.341608e-04 0.1516798 18404982
+38 -2.167774e-03  3.1596124 1.877970e-04 0.1516610 18404982
+39 -1.734220e-03  3.1597858 1.506132e-04 0.1516460 18404982
+40 -1.387376e-03  3.1599245 1.207918e-04 0.1516339 18404982
+ 41 -1.109901e-03  3.1600355 9.687500e-05 0.1516242 18404982
+42 -8.879204e-04  3.1601243 7.769375e-05 0.1516164 18404982
+43 -7.103363e-04  3.1601954 6.231039e-05 0.1516102 18404982
+44 -5.682691e-04  3.1602522 4.997293e-05 0.1516052 18404982
+45 -4.546153e-04  3.1602977 4.007829e-05 0.1516012 18404982
+ 46 -3.636922e-04  3.1603340 3.214279e-05 0.1515980 18404982
+47 -2.909538e-04  3.1603631 2.577852e-05 0.1515954 18404982
+48 -2.327630e-04  3.1603864 2.067437e-05 0.1515933 18404982
+49 -1.862104e-04  3.1604050 1.658085e-05 0.1515917 18404982
+50 -1.489683e-04  3.1604199 1.329784e-05 0.1515903 18404982
+51 -1.191747e-04  3.1604318 1.066487e-05 0.1515893 18404982
+52 -9.533973e-05  3.1604414 8.553223e-06 0.1515884 18404982
+53 -7.627178e-05  3.1604490 6.859685e-06 0.1515877 18404982
+54 -6.101743e-05  3.1604551 5.501467e-06 0.1515872 18404982
+55 -4.881394e-05  3.1604600 4.412177e-06 0.1515867 18404982
+56 -3.905115e-05  3.1604639 3.538566e-06 0.1515864 18404982
+57 -3.124092e-05  3.1604670 2.837930e-06 0.1515861 18404982
+58 -2.499274e-05  3.1604695 2.276020e-06 0.1515859 18404982
+59 -1.999419e-05  3.1604715 1.825368e-06 0.1515857 18404982
+60 -1.599535e-05  3.1604731 1.463945e-06 0.1515855 18404982
+61 -1.279628e-05  3.1604744 1.174084e-06 0.1515854 18404982
+62 -1.023703e-05  3.1604754 9.416152e-07 0.1515853 18404982
+63 -8.189621e-06  3.1604762 7.551754e-07 0.1515853 18404982
+64 -6.551696e-06  3.1604769 6.056507e-07 0.1515852 18404982
+65 -5.241357e-06  3.1604774 4.857318e-07 0.1515851 18404982
+66 -4.193086e-06  3.1604778 3.895569e-07 0.1515851 18404982
+67 -3.354469e-06  3.1604782 3.124247e-07 0.1515851 18404982
+68 -2.683575e-06  3.1604784 2.505646e-07 0.1515850 18404982
+69 -2.146860e-06  3.1604786 2.009528e-07 0.1515850 18404982
+70 -1.717488e-06  3.1604788 1.611641e-07 0.1515850 18404982
+71 -1.373990e-06  3.1604789 1.292536e-07 0.1515850 18404982
+72 -1.099192e-06  3.1604791 1.036614e-07 0.1515850 18404982
+73 -8.793538e-07  3.1604791 8.313646e-08 0.1515850 18404982
+74 -7.034830e-07  3.1604792 6.667544e-08 0.1515850 18404982
+75 -5.627864e-07  3.1604793 5.347370e-08 0.1515850 18404982
+>
+> # 위의 루프를 이해했는지 체크하기
+> a.init
+[1] -1.014502
+> b.init
+[1] 0.4846828
+> tmp.p <- predict(zx, a.init, b.init)
+> # a.init b.init 일 때의 미분 결과
+> # 즉, 기울기와 절편 값
+> tmp.g <- gradient(zx, y, tmp.p)
+> tmp.g
+$b
+[1] 0.6595337
+$a
+[1] -8.349964
+>
+> # 이 값에 learning rate 값을 곱한 후에
+> a.step <- tmp.g$a*learning.rate
+> b.step <- tmp.g$b*learning.rate
+>
+> # 이를 원래 a값에서 빼준 값을 다음 순서의
+> a.init <- a.init-a.step
+> b.init <- b.init-b.step
+> # a, b값으로 쓰고, 이 때의 미분 값을 구한 후에
+> # (루프 안에서), 미분 결과를 다시 learning rate
+> # 로 곱해 준 값을 (두번째 단계의) a, b 값에서
+> # 빼 준 값을 다음 단계의 a, b값으로 쓰고, 다시
+> # . . . . . . 위의 loop문은 이것을 75번 반복한 것
 >
 > # scaled
-> a
+> # 이렇게 해서 구한 제일 마지막 a, b 값은 x 변인 대신에
-[1] 3.238683
+> # 표준화한 zx 변인을 사용해서 구한 값이므로 이 값을
-> b
+> # 다시 x 를 사용했을 때의 값으로 환원을 해야 한다 (unscale).
-[1] 0.0718541
+> data.frame(a, b)
+         a        b
+ 3.160479 0.151585
 >
 > # unscale coefficients to make them comprehensible
@@ Line 1042: / Line 1062: @@
 > # and
 > # http://commres.net/wiki/gradient_descent#how_to_unnormalize_unscale_a_and_b
-> #
 > a =  a - (mean(x) / sd(x)) * b
 > b =  b / sd(x)
-> a
+> cat(" unscaled a:", a, "\n unscaled b:", b, "\n")
-[1] 2.434234
+ unscaled a: 1.614428
-> b
+ unscaled b: 0.01447618
-[1] 0.00749967
+> # 아래 lm 결과와 확인
+> summary(mo)
+Call:
+lm(formula = y ~ x, data = sam)
+Residuals:
+     Min       1Q   Median       3Q      Max
+-0.92770 -0.20715  0.03368  0.20038  0.90512
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept) 1.614428   0.344622   4.685 9.04e-06 ***
+x           0.014476   0.003212   4.508 1.82e-05 ***
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 0.3346 on 98 degrees of freedom
+Multiple R-squared:  0.1717,	Adjusted R-squared:  0.1633
+F-statistic: 20.32 on 1 and 98 DF,  p-value: 1.818e-05
 >
-> # changes of estimators
+> # 아래는 a와 b를 구하는 과정에서 저장했던 temporary
+> # a, b 값들은 (as, bs 변인 내의) 모두 unscale한 것
 > as <- as - (mean(x) /sd(x)) * bs
 > bs <- bs / sd(x)
 >
 > as
- [1] -3.36927396 -2.21431158 -1.28920093 -0.54819753  0.04533893
+ [1] -4.45022736 -3.24274555 -2.27568113 -1.50116427 -0.88085678 -0.38405420  0.01383425  0.33250300
- [6]  0.52075655  0.90156258  1.20658590  1.45090812  1.64660934
+ [9]  0.58772512  0.79213308  0.95584412  1.08696106  1.19197340  1.27607853  1.34343904  1.39738872
-[11]  1.80336556  1.92892713  2.02950197  2.11006255  2.17459180
+[17]  1.44059760  1.47520412  1.50292095  1.52511976  1.54289914  1.55713894  1.56854386  1.57767829
-[16]  2.22627998  2.26768248  2.30084615  2.32741050  2.34868878
+[25]  1.58499424  1.59085375  1.59554676  1.59930551  1.60231600  1.60472717  1.60665835  1.60820509
-[21]  2.36573290  2.37938544  2.39032129  2.39908106  2.40609777
+[33]  1.60944392  1.61043613  1.61123083  1.61186734  1.61237714  1.61278545  1.61311249  1.61337442
-[26]  2.41171827  2.41622039  2.41982667  2.42271538  2.42502929
+[41]  1.61358422  1.61375225  1.61388684  1.61399463  1.61408097  1.61415012  1.61420551  1.61424987
-[31]  2.42688279  2.42836748  2.42955676  2.43050941  2.43127250
+[49]  1.61428541  1.61431387  1.61433666  1.61435492  1.61436954  1.61438126  1.61439064  1.61439815
-[36]  2.43188376  2.43237340  2.43276561  2.43307979  2.43333146
+[57]  1.61440417  1.61440899  1.61441285  1.61441594  1.61441842  1.61442041  1.61442199  1.61442327
-[41]  2.43353305  2.43369454  2.43382389  2.43392751  2.43401051
+[65]  1.61442429  1.61442510  1.61442576  1.61442628  1.61442670  1.61442704  1.61442731  1.61442752
-[46]  2.43407700  2.43413026  2.43417292  2.43420710  2.43423447
+[73]  1.61442769  1.61442783  1.61442794
 > bs
- [1] 0.034354957 0.029037503 0.024772905 0.021352697 0.018609691
+ [1] 0.03998814 0.03493677 0.03088558 0.02763651 0.02503077 0.02294096 0.02126493 0.01992076 0.01884273
- [6] 0.016409799 0.014645487 0.013230508 0.012095695 0.011185575
+[10] 0.01797815 0.01728476 0.01672866 0.01628267 0.01592498 0.01563812 0.01540805 0.01522354 0.01507556
-[11] 0.010455658 0.009870265 0.009400780 0.009024253 0.008722279
+[19] 0.01495689 0.01486170 0.01478537 0.01472415 0.01467505 0.01463567 0.01460409 0.01457877 0.01455845
-[16] 0.008480095 0.008285864 0.008130090 0.008005160 0.007904966
+[28] 0.01454216 0.01452910 0.01451862 0.01451021 0.01450348 0.01449807 0.01449373 0.01449026 0.01448747
-[21] 0.007824610 0.007760165 0.007708480 0.007667028 0.007633784
+[37] 0.01448523 0.01448344 0.01448200 0.01448085 0.01447992 0.01447918 0.01447859 0.01447811 0.01447773
-[26] 0.007607122 0.007585740 0.007568591 0.007554837 0.007543807
+[46] 0.01447742 0.01447717 0.01447698 0.01447682 0.01447669 0.01447659 0.01447651 0.01447644 0.01447639
-[31] 0.007534961 0.007527866 0.007522176 0.007517613 0.007513953
+[55] 0.01447635 0.01447631 0.01447629 0.01447626 0.01447625 0.01447623 0.01447622 0.01447621 0.01447621
-[36] 0.007511018 0.007508664 0.007506776 0.007505262 0.007504047
+[64] 0.01447620 0.01447619 0.01447619 0.01447619 0.01447619 0.01447618 0.01447618 0.01447618 0.01447618
-[41] 0.007503073 0.007502292 0.007501666 0.007501164 0.007500761
+[73] 0.01447618 0.01447618 0.01447618
-[46] 0.007500438 0.007500178 0.007499971 0.007499804 0.007499670
 >
+> # 이것을 그래프로 나타내기 위해서 parameters 라는
+> # 변인으로 저장
+> # 이하 if 문들은 그래프에 처음 a, b값과 루프 문에서의
+> # 그 값들이 변화하는 것을 모두 나타내기 위해서 사용한
+> # 것으로 결과와 상관없다.
 > parameters <- data.frame(as, bs, mses)
 >
 > cat(paste0("Intercept: ", a, "\n", "Slope: ", b, "\n"))
-Intercept: 2.43423447454751
+Intercept: 1.61442794384896
-Slope: 0.00749967017274726
+Slope: 0.0144761780722265
 > summary(lm(y~x, data=sam))$coefficients
-               Estimate  Std. Error  t value     Pr(>|t|)
+              Estimate  Std. Error  t value     Pr(>|t|)
-(Intercept) 2.434344703 0.426665789 5.705507 1.233509e-07
+(Intercept) 1.61442839 0.344622352 4.684631 9.041202e-06
-x           0.007499129 0.003962082 1.892724 6.134554e-02
+x           0.01447618 0.003211564 4.507515 1.817763e-05
 >
 > if (as[nlen] > as[1]) {
@@ Line 1105: / Line 1149: @@
 + }
 > max(y)
-[1] 4.090577
+[1] 3.878048
 > min(y)
-[1] 2.185787
+[1] 2.092215
 > y.max
-[1] 4.090577
+[1] 3.878048
 > y.min
-[1] -3.369274
+[1] -4.450227
 >
 > ggplot(sam, aes(x = x, y = y)) +
@@ Line 1137: / Line 1181: @@
 Residuals:
      Min       1Q   Median       3Q      Max
--1.05708 -0.25025 -0.00149  0.17215  0.85818
+-0.92770 -0.20715  0.03368  0.20038  0.90512
 Coefficients:
             Estimate Std. Error t value Pr(>|t|)
-(Intercept) 2.434345   0.426666   5.706 1.23e-07 ***
+(Intercept) 1.614428   0.344622   4.685 9.04e-06 ***
-x           0.007499   0.003962   1.893   0.0613 .
+x           0.014476   0.003212   4.508 1.82e-05 ***
 ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 0.3777 on 98 degrees of freedom
+Residual standard error: 0.3346 on 98 degrees of freedom
-Multiple R-squared:  0.03527,	Adjusted R-squared:  0.02542
+Multiple R-squared:  0.1717,	Adjusted R-squared:  0.1633
-F-statistic: 3.582 on 1 and 98 DF,  p-value: 0.06135
+F-statistic: 20.32 on 1 and 98 DF,  p-value: 1.818e-05
 >
 > data.frame(head(as), head(bs))
-     head.as.   head.bs.
+    head.as.   head.bs.
--3.36927396 0.03435496
+-4.4502274 0.03998814
--2.21431158 0.02903750
+-3.2427456 0.03493677
--1.28920093 0.02477290
+-2.2756811 0.03088558
--0.54819753 0.02135270
+-1.5011643 0.02763651
-  0.04533893 0.01860969
+ -0.8808568 0.02503077
-  0.52075655 0.01640980
+ -0.3840542 0.02294096
 > data.frame(tail(as), tail(bs))
-  tail.as.    tail.bs.
+  tail.as.   tail.bs.
- 2.434011 0.007500761
+ 1.614427 0.01447618
- 2.434077 0.007500438
+ 1.614427 0.01447618
- 2.434130 0.007500178
+ 1.614428 0.01447618
- 2.434173 0.007499971
+ 1.614428 0.01447618
- 2.434207 0.007499804
+ 1.614428 0.01447618
- 2.434234 0.007499670
+ 1.614428 0.01447618
 > a
-[1] 2.434234
+[1] 1.614428
 > b
-[1] 0.00749967
+[1] 0.01447618
 >
 > ggplot(sam, aes(x = x, y = y)) +
@@ Line 1181: / Line 1225: @@
 +   theme_classic() +
 +   labs(title = 'Gradient descent. blue: start, red: end, green: gradients')
 >
 </code>
-{{pasted:20260510-234416.png}}
-{{pasted:20260510-234444.png}}
 </tabbox>
-{{pasted:20260506-041735.png?500}}
+{{pasted:20260512-215110.png?500}}
+{{pasted:20260512-215122.png?500}}