Differences

This shows you the differences between two versions of the page.

--- r:logistic_regression_analysis [2023/12/04 17:11] – hkimscil
+++ r:logistic_regression_analysis [2023/12/07 08:00] (current) – [e.g. 1] hkimscil
@@ Line 1: / Line 1: @@
-====== Logitistic Regression Analysis ======
+====== e.g. 1 ======
-\begin{align*}
-\displaystyle ln \left( {\frac{p}{(1-p)}} \right) = a + bX
-\end{align*}
-  * p = 변인 X가 A일 확률
+<code>
-  * 1-p = 변인 X가 A가 아닐 확률
+> Logit(Turnover ~ JS, data=td)
-  * ln 은 e를 밑으로 하는 log 를 말한다
-  * $ln \left( {\frac{p}{(1-p)}} \right) $ 을 $\text{logit(p)}$ 로 부른다
-\begin{align*}
+Data Frame:  mydata
-\text{logit(p)} & = ln \left( {\frac{p}{(1-p)}} \right) = a + bX   \\
-\frac{p}{1-p} & = e^{a+bX} \\
-p  & =  e^{a+bX} * (1-p) \\
-p  & =  e^{a+bX} - p * \left(e^{a+bX} \right) \\
-p + p * \left(e^{a+bX} \right) & =  e^{a+bX}  \\
-p * \left(1 + e^{a+bX} \right) & =  e^{a+bX}  \\
-p & =  \frac {e^{a+bX}} { \left(1 + e^{a+bX} \right)} \\
-\end{align*}
-  * 위에서 계수 b값이 충분히 커서 X 가 커지면 p 값은 1로 수렴하고
+Response Variable:   Turnover
-  * b값이 충분히 작아서 X가 아주 작아지면 p 값은 0에 가까이 간다
+Predictor Variable 1:  JS
-즉 p의 그래프는 아래와 같은 그래프의 곡선이다.
+Number of cases (rows) of data:  99
-<code>
+Number of cases retained for analysis:  98
-install.packages("sigmoid")
-library(sigmoid)
-library(ggplot2)
-input <- seq(-5, 5, 0.01)
-df = data.frame(input, logistic(input), Gompertz(input))
-ggplot( df, aes(input, logistic(input)) ) +
-  geom_line(color="red")
-</code>
-{{:r:pasted:20231204-170156.png?500}}
-여기서
-\begin{align*}
-y & = ln(x) \\
-& = log_e {x} \\
-x & = e^{y} \\
-\text{if } \; x = 1, y = 0 \\
-ln(1) = 0
-\end{align*}
+   BASIC ANALYSIS
+-- Estimated Model of Turnover for the Logit of Reference Group Membership
+             Estimate    Std Err  z-value  p-value   Lower 95%   Upper 95%
+(Intercept)   -1.8554     0.6883   -2.695    0.007     -3.2044     -0.5063
+         JS    0.4378     0.1958    2.236    0.025      0.0540      0.8216
+-- Odds Ratios and Confidence Intervals
+             Odds Ratio   Lower 95%   Upper 95%
+(Intercept)      0.1564      0.0406      0.6027
+         JS      1.5492      1.0555      2.2740
+-- Model Fit
+    Null deviance: 131.746 on 97 degrees of freedom
+Residual deviance: 126.341 on 96 degrees of freedom
+AIC: 130.3413
+Number of iterations to convergence: 4
+   ANALYSIS OF RESIDUALS AND INFLUENCE
+Data, Fitted, Residual, Studentized Residual, Dffits, Cook's Distance
+   [sorted by Cook's Distance]
+   [res_rows = 20 out of 98 cases (rows) of data]
+--------------------------------------------------------------------
+     JS Turnover fitted residual rstudent  dffits   cooks
+6.00     quit 0.6838  -0.6838  -1.5688 -0.3725 0.08496
+  1.38     stay 0.2225   0.7775   1.7682  0.2877 0.06241
+5.48     quit 0.6327  -0.6327  -1.4476 -0.2949 0.04889
+5.43     quit 0.6276  -0.6276  -1.4363 -0.2877 0.04618
+1.72     stay 0.2493   0.7507   1.6920  0.2486 0.04353
+1.77     stay 0.2534   0.7466   1.6810  0.2429 0.04117
+1.96     stay 0.2695   0.7305   1.6393  0.2219 0.03314
+  4.96     quit 0.5783  -0.5783  -1.3332 -0.2239 0.02609
+4.88     quit 0.5698  -0.5698  -1.3162 -0.2138 0.02353
+4.66     quit 0.5460  -0.5460  -1.2703 -0.1875 0.01757
+4.65     quit 0.5449  -0.5449  -1.2682 -0.1863 0.01733
+2.52     stay 0.3203   0.6797   1.5199  0.1668 0.01693
+5.59     stay 0.6438   0.3562   0.9554  0.2021 0.01693
+5.48     stay 0.6327   0.3673   0.9731  0.1985 0.01648
+2.56     stay 0.3242   0.6758   1.5115  0.1635 0.01615
+2.57     stay 0.3251   0.6749   1.5095  0.1626 0.01596
+2.65     stay 0.3329   0.6671   1.4929  0.1563 0.01454
+5.04     stay 0.5869   0.4131   1.0457  0.1813 0.01431
+4.46     quit 0.5243  -0.5243  -1.2296 -0.1656 0.01336
+4.43     quit 0.5210  -0.5210  -1.2235 -0.1625 0.01282
+   PREDICTION
+Probability threshold for classification stay: 0.5
+: quit
+: stay
+Data, Fitted Values, Standard Errors
+   [sorted by fitted value]
+   [pred_all=TRUE to see all intervals displayed]
+--------------------------------------------------------------------
+     JS Turnover label fitted std.err
+0.23     quit     0 0.1475 0.08116
+0.67     quit     0 0.1734 0.08096
+1.05     quit     0 0.1985 0.07904
+1.19     quit     0 0.2084 0.07790
+... for the rows of data where fitted is close to 0.5 ...
+     JS Turnover label fitted std.err
+4.14     stay     0 0.4893 0.06579
+4.15     stay     0 0.4903 0.06609
+4.26     quit     1 0.5024 0.06946
+4.41     stay     1 0.5188 0.07431
+4.43     quit     1 0.5210 0.07497
+... for the last 4 rows of sorted data ...
+     JS Turnover label fitted std.err
+5.48     stay     1 0.6327  0.1090
+5.48     quit     1 0.6327  0.1090
+5.59     stay     1 0.6438  0.1120
+6.00     quit     1 0.6838  0.1215
+--------------------------------------------------------------------
+----------------------------
+Specified confusion matrices
+----------------------------
+Probability threshold for predicting stay: 0.5
+Corresponding cutoff threshold for JS: 4.238
+                 Baseline         Predicted
+---------------------------------------------------
+                Total  %Tot        0      1  %Correct
+---------------------------------------------------
+       39  39.8       31      8     20.5
+Turnover   0       59  60.2       49     10     83.1
+---------------------------------------------------
+         Total     98                           58.2
+Accuracy: 58.16
+Sensitivity: 20.51
+Precision: 44.44
+>
+</code>
+====== e.g. 2 ======
 <code>d <- subset(iris, Species == "virginica" | Species == "versicolor")
 head(d)