Skip to content

Commit

Permalink
Fix minor bug
Browse files Browse the repository at this point in the history
  • Loading branch information
egemenzeytinci committed Jan 6, 2021
1 parent 36b798b commit fd40235
Showing 1 changed file with 66 additions and 65 deletions.
131 changes: 66 additions & 65 deletions statistics_with_python_specialization/week_9_and_10.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
"metadata": {},
"outputs": [],
"source": [
"dat = pd.read_csv('data/autism.csv').dropna()"
"dat = pd.read_csv('data/autism.csv').dropna()\n",
"original = dat.copy()"
]
},
{
Expand Down Expand Up @@ -584,47 +585,47 @@
"==============================================================\n",
"Model: MixedLM Dependent Variable: vsae \n",
"No. Observations: 610 Method: REML \n",
"No. Groups: 158 Scale: 410.7496 \n",
"Min. group size: 1 Log-Likelihood: -2752.2106\n",
"No. Groups: 158 Scale: 84.5319 \n",
"Min. group size: 1 Log-Likelihood: -2427.0905\n",
"Max. group size: 5 Converged: Yes \n",
"Mean group size: 3.9 \n",
"--------------------------------------------------------------\n",
" Coef. Std.Err. z P>|z| [0.025 0.975]\n",
"--------------------------------------------------------------\n",
"Intercept 17.421 1.470 11.848 0.000 14.539 20.303\n",
"C(sicdegp)[T.2] 6.359 1.942 3.274 0.001 2.552 10.166\n",
"C(sicdegp)[T.3] 23.403 2.157 10.852 0.000 19.176 27.630\n",
"age 2.731 0.641 4.257 0.000 1.474 3.988\n",
"age:C(sicdegp)[T.2] 1.188 0.843 1.409 0.159 -0.465 2.840\n",
"age:C(sicdegp)[T.3] 4.555 0.931 4.891 0.000 2.730 6.381\n",
"age Var 9.609 0.112 \n",
"Intercept 2.482 1.271 1.952 0.051 -0.010 4.973\n",
"C(sicdegp)[T.2] -1.293 1.674 -0.773 0.440 -4.574 1.987\n",
"C(sicdegp)[T.3] -4.230 1.862 -2.272 0.023 -7.880 -0.580\n",
"age 2.822 0.470 6.006 0.000 1.901 3.743\n",
"age:C(sicdegp)[T.2] 0.985 0.620 1.589 0.112 -0.230 2.199\n",
"age:C(sicdegp)[T.3] 4.463 0.688 6.482 0.000 3.113 5.812\n",
"age Var 8.198 0.124 \n",
"==============================================================\n",
"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: vsae R-squared: 0.431\n",
"Model: OLS Adj. R-squared: 0.426\n",
"Method: Least Squares F-statistic: 91.38\n",
"Date: Sat, 02 Jan 2021 Prob (F-statistic): 1.48e-71\n",
"Time: 22:43:45 Log-Likelihood: -2783.8\n",
"No. Observations: 610 AIC: 5580.\n",
"Df Residuals: 604 BIC: 5606.\n",
"Dep. Variable: vsae R-squared: 0.470\n",
"Model: OLS Adj. R-squared: 0.465\n",
"Method: Least Squares F-statistic: 107.1\n",
"Date: Wed, 06 Jan 2021 Prob (F-statistic): 7.46e-81\n",
"Time: 10:39:15 Log-Likelihood: -2762.1\n",
"No. Observations: 610 AIC: 5536.\n",
"Df Residuals: 604 BIC: 5563.\n",
"Df Model: 5 \n",
"Covariance Type: nonrobust \n",
"=======================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"---------------------------------------------------------------------------------------\n",
"Intercept 17.4211 1.692 10.294 0.000 14.097 20.745\n",
"C(sicdegp)[T.2] 6.3593 2.236 2.844 0.005 1.969 10.750\n",
"C(sicdegp)[T.3] 23.4032 2.482 9.428 0.000 18.528 28.278\n",
"age 2.5989 0.459 5.666 0.000 1.698 3.500\n",
"age:C(sicdegp)[T.2] 1.4736 0.599 2.458 0.014 0.296 2.651\n",
"age:C(sicdegp)[T.3] 4.4762 0.662 6.757 0.000 3.175 5.777\n",
"Intercept 2.6342 2.864 0.920 0.358 -2.990 8.259\n",
"C(sicdegp)[T.2] -3.2405 3.781 -0.857 0.392 -10.666 4.185\n",
"C(sicdegp)[T.3] -3.1418 4.256 -0.738 0.461 -11.501 5.217\n",
"age 2.6159 0.416 6.285 0.000 1.799 3.433\n",
"age:C(sicdegp)[T.2] 1.6173 0.545 2.968 0.003 0.547 2.688\n",
"age:C(sicdegp)[T.3] 4.3644 0.607 7.190 0.000 3.172 5.556\n",
"==============================================================================\n",
"Omnibus: 315.218 Durbin-Watson: 1.236\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 2400.509\n",
"Skew: 2.181 Prob(JB): 0.00\n",
"Kurtosis: 11.685 Cond. No. 14.7\n",
"Omnibus: 305.670 Durbin-Watson: 1.288\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 2480.083\n",
"Skew: 2.069 Prob(JB): 0.00\n",
"Kurtosis: 11.969 Cond. No. 48.8\n",
"==============================================================================\n",
"\n",
"Notes:\n",
Expand All @@ -638,13 +639,13 @@
" formula='vsae ~ age * C(sicdegp)', \n",
" groups='childid', \n",
" re_formula='0 + age', \n",
" data=dat\n",
" data=original\n",
")\n",
"\n",
"# ols model - no mixed effects\n",
"ols_mod = sm.OLS.from_formula(\n",
" formula='vsae ~ age * C(sicdegp)',\n",
" data=dat\n",
" data=original\n",
")\n",
"\n",
"mlm_result = mlm_mod.fit()\n",
Expand All @@ -665,11 +666,11 @@
"\n",
"\n",
"- First, fit the model WITH random child effects on the slope of interest, using restricted maximum likelihood estimation\n",
" - -2 REML log-likelihood = 5504.4212\n",
" - -2 REML log-likelihood = 4854.18\n",
"- Next, fit the nested model WITHOUT the random child effects on the slope:\n",
" - -2 REML log-likelihood = 5567.6 (higher value = worse fit!)\n",
" - -2 REML log-likelihood = 5524.20 (higher value = worse fit!)\n",
"- Compute the positive difference in the -2 REML log-likelihood values (“REML criterion”) for the models:\n",
" - Test Statistic (TS) = 5567.65504.4212 = 63.1788\n",
" - Test Statistic (TS) = 5524.204854.18 = 670.02\n",
"- Refer the TS to a mixture of chi-square distributions with 1 and 2 DF, and equal weight 0.5:"
]
},
Expand All @@ -682,7 +683,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"The p-value of our significance test is: 9.43689570931383e-16\n"
"The p-value of our significance test is: 0.0\n"
]
}
],
Expand All @@ -691,7 +692,7 @@
"# because the chi-squared distribution with zero degrees of freedom has no \n",
"# mass, we multiply the chi-squared distribution with one degree of freedom by \n",
"# 0.5\n",
"p_val = 0.5 * (1 - chi2.cdf(63.1788, 1)) \n",
"p_val = 0.5 * (1 - chi2.cdf(670.02, 1)) \n",
"print(f'The p-value of our significance test is: {p_val}')"
]
},
Expand Down Expand Up @@ -781,43 +782,43 @@
" <tbody>\n",
" <tr>\n",
" <th>C(sicdegp)[T.2]</th>\n",
" <td>6.36</td>\n",
" <td>2.24</td>\n",
" <td>6.36</td>\n",
" <td>1.94</td>\n",
" <td>-3.24</td>\n",
" <td>3.78</td>\n",
" <td>-1.29</td>\n",
" <td>1.67</td>\n",
" <td>5.64</td>\n",
" <td>2.95</td>\n",
" <td>6.36</td>\n",
" <td>2.88</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C(sicdegp)[T.3]</th>\n",
" <td>23.40</td>\n",
" <td>2.48</td>\n",
" <td>23.40</td>\n",
" <td>2.16</td>\n",
" <td>-3.14</td>\n",
" <td>4.26</td>\n",
" <td>-4.23</td>\n",
" <td>1.86</td>\n",
" <td>22.65</td>\n",
" <td>3.55</td>\n",
" <td>23.40</td>\n",
" <td>3.45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Intercept</th>\n",
" <td>17.42</td>\n",
" <td>1.69</td>\n",
" <td>17.42</td>\n",
" <td>1.47</td>\n",
" <td>2.63</td>\n",
" <td>2.86</td>\n",
" <td>2.48</td>\n",
" <td>1.27</td>\n",
" <td>17.59</td>\n",
" <td>1.99</td>\n",
" <td>17.42</td>\n",
" <td>1.91</td>\n",
" </tr>\n",
" <tr>\n",
" <th>age</th>\n",
" <td>2.60</td>\n",
" <td>0.46</td>\n",
" <td>2.73</td>\n",
" <td>0.64</td>\n",
" <td>2.62</td>\n",
" <td>0.42</td>\n",
" <td>2.82</td>\n",
" <td>0.47</td>\n",
" <td>2.60</td>\n",
" <td>0.51</td>\n",
" <td>2.60</td>\n",
Expand All @@ -827,7 +828,7 @@
" <th>age Var</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.02</td>\n",
" <td>0.10</td>\n",
" <td>0.01</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
Expand All @@ -836,21 +837,21 @@
" </tr>\n",
" <tr>\n",
" <th>age:C(sicdegp)[T.2]</th>\n",
" <td>1.47</td>\n",
" <td>0.60</td>\n",
" <td>1.19</td>\n",
" <td>0.84</td>\n",
" <td>1.62</td>\n",
" <td>0.54</td>\n",
" <td>0.98</td>\n",
" <td>0.62</td>\n",
" <td>1.47</td>\n",
" <td>0.78</td>\n",
" <td>1.47</td>\n",
" <td>0.78</td>\n",
" </tr>\n",
" <tr>\n",
" <th>age:C(sicdegp)[T.3]</th>\n",
" <td>4.48</td>\n",
" <td>0.66</td>\n",
" <td>4.56</td>\n",
" <td>0.93</td>\n",
" <td>4.36</td>\n",
" <td>0.61</td>\n",
" <td>4.46</td>\n",
" <td>0.69</td>\n",
" <td>4.48</td>\n",
" <td>0.88</td>\n",
" <td>4.48</td>\n",
Expand Down Expand Up @@ -1063,16 +1064,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
"SDMVSTRA The correlation between two observations in the same cluster is 0.001\n",
"SDMVSTRA The correlation between two observations in the same cluster is 0.002\n",
"SDMVSTRA The correlation between two observations in the same cluster is 0.002\n",
"SDMVSTRA The correlation between two observations in the same cluster is 0.000\n",
"SDMVSTRA The correlation between two observations in the same cluster is -0.001\n",
"SDMVSTRA The correlation between two observations in the same cluster is 0.003\n",
"SDMVSTRA The correlation between two observations in the same cluster is 0.001\n",
"SDMVSTRA The correlation between two observations in the same cluster is -0.001\n",
"SDMVSTRA The correlation between two observations in the same cluster is -0.002\n",
"SDMVSTRA The correlation between two observations in the same cluster is -0.001\n",
"SDMVSTRA The correlation between two observations in the same cluster is -0.001\n",
"SDMVSTRA The correlation between two observations in the same cluster is -0.001\n"
"SDMVSTRA The correlation between two observations in the same cluster is -0.000\n",
"SDMVSTRA The correlation between two observations in the same cluster is -0.001\n",
"SDMVSTRA The correlation between two observations in the same cluster is 0.001\n",
"SDMVSTRA The correlation between two observations in the same cluster is -0.000\n"
]
}
],
Expand Down

0 comments on commit fd40235

Please sign in to comment.