diff --git a/Victoria/LifeExpectancyLGA.py b/Victoria/LifeExpectancyLGA.py index 4bb741e..72eef5d 100644 --- a/Victoria/LifeExpectancyLGA.py +++ b/Victoria/LifeExpectancyLGA.py @@ -1,9 +1,10 @@ #!/usr/bin/env python # coding: utf-8 -# In[66]: +# In[72]: +#Getting Data for each LGA from pysal.model import spreg from pysal.lib import weights from pysal.explore import esda @@ -22,9 +23,10 @@ print(df) -# In[67]: +# In[73]: +#Getting LGA Shapefile df_1=geopandas.read_file('.../1270055003_lga_2016_aust_shape/LGA_2016_AUST.shp') variable_names=['GP1000'] @@ -32,9 +34,31 @@ print(df_1) -# In[69]: +# In[74]: + + +#Combining the two Dataframes +df_1['LGA_CODE16']=df_1['LGA_CODE16'].astype(int) + + +df_2=df_1.merge(df, left_on='LGA_CODE16', right_on='Code',how='right') +print(df_2) + + +# In[83]: +#Life Expectancy in each LGA +ax_LifeExpectancy=df_2.plot(color='k', alpha=0.5,figsize=(20,10)) +df_2.plot('LifeExpectancy', ax=ax_LifeExpectancy, legend=True) + +plt.show() + + +# In[84]: + + +#Nonspatial Regression. To see how the number of GPs per 1000 population affects Life Expectancy m1 = spreg.OLS( df[['LifeExpectancy']].values, df[variable_names].values, @@ -44,21 +68,13 @@ print(m1.summary) -# In[70]: - - -df_1['LGA_CODE16']=df_1['LGA_CODE16'].astype(int) +# In[76]: -df_2=df_1.merge(df, left_on='LGA_CODE16', right_on='Code',how='right') -print(df_2) - - -# In[47]: - - -df['residual'] = m1.u -medians = df.groupby( +#This shows the differences between the actual and predicted values from nonspatial regression for each LGA in each region +df_NSR=df_2.copy() +df_NSR['residual'] = m1.u +medians = df_NSR.groupby( "Region" ).residual.median().to_frame( 'region_residual' @@ -70,7 +86,7 @@ 'Region', 'residual', ax = ax, - data=df.merge( + data=df_NSR.merge( medians, how='left', left_on='Region', @@ -79,13 +95,20 @@ 'region_residual'), palette='bwr' ) f.autofmt_xdate() + + +ax_NSR_map=df_NSR.plot(color='k', alpha=0.5,figsize=(20,10)) +df_NSR.plot('residual', ax=ax_NSR_map, legend=True) + plt.show() -# In[74]: +# In[77]: -knn = weights.KNN.from_dataframe(df_2, k=10) +#Weighted KNN. From this we can see clusters of LGAs that are over or under predicted +df_KNN=df_2.copy() +knn = weights.KNN.from_dataframe(df_KNN, k=9) lag_residual = weights.spatial_lag.lag_spatial(knn, m1.u) ax = seaborn.regplot( m1.u.flatten(), @@ -100,7 +123,7 @@ outliers = esda.moran.Moran_Local(m1.u, knn, permutations=9999) error_clusters = (outliers.q % 2 == 1) error_clusters &= (outliers.p_sim <= .001) -df_2.assign( +df_KNN.assign( error_clusters = error_clusters, local_I = outliers.Is ).query( @@ -113,37 +136,77 @@ plt.show() -# In[75]: +# In[78]: -m4 = spreg.OLS_Regimes( - df[['LifeExpectancy']].values, - df[variable_names].values, - df['Region'].tolist(), +#Using Queen instead of Weighted KNN +df_Queen=df_2.copy() +QWeight = weights.Queen.from_dataframe(df_Queen) +lag_residual = weights.spatial_lag.lag_spatial(QWeight, m1.u) +ax = seaborn.regplot( + m1.u.flatten(), + lag_residual.flatten(), + line_kws=dict(color='orangered'), + ci=None +) +ax.set_xlabel('Model Residuals - $u$') +ax.set_ylabel('Spatial Lag of Model Residuals - $W u$'); + + +outliers = esda.moran.Moran_Local(m1.u, QWeight, permutations=9999) +error_clusters = (outliers.q % 2 == 1) +error_clusters &= (outliers.p_sim <= .001) +df_Queen.assign( + error_clusters = error_clusters, + local_I = outliers.Is +).query( + "error_clusters" +).sort_values( + 'local_I' +).plot( + 'local_I', cmap='bwr', marker='.' +); +plt.show() + + +# In[79]: + + +#Spatial Heterogeneity +df_SH=df_2.copy() +mSH = spreg.OLS_Regimes( + df_SH[['LifeExpectancy']].values, + df_SH[variable_names].values, + df_SH['Region'].tolist(), constant_regi='many', cols2regi=[False]*len(variable_names), regime_err_sep=False, name_y='LifeExpectancy', name_x=variable_names ) -df_2['predicted']=m4.predy -print(m4.summary) +df_SH['predicted']=mSH.predy +print(mSH.summary) + + +axSH = df_SH.plot(color='k', alpha=0.5, figsize=(20,10)) +df_SH.plot('predicted', ax=axSH, legend=True) +axSH.set_title("Predicted Life Expectancy") +plt.show() -# In[49]: +# In[80]: -f = 'LifeExpectancy ~ ' + '+ '.join(variable_names)+ ' + Region -1' -m3 = sm.ols(f, data=df).fit() -Region_effects = m3.params.filter(like='Region') +#Plotting Regional Fixed Effects +formula = 'LifeExpectancy ~ ' + '+ '.join(variable_names)+ ' + Region -1' +mSHr = sm.ols(formula, data=df_SH).fit() +Region_effects = mSHr.params.filter(like='Region') stripped = Region_effects.index.str.strip('Region[').str.strip(']') Region_effects.index = stripped Region_effects = Region_effects.to_frame('fixed_effect') -ax = df_2.plot( - color='k', alpha=0.5, figsize=(12,6) -) +axSHfe = df_SH.plot(color='k', alpha=0.5, figsize=(20,10)) -df_2.merge( +df_SH.merge( Region_effects, how='left', left_on='Region', @@ -151,48 +214,63 @@ ).dropna( subset=['fixed_effect'] ).plot( - 'fixed_effect', ax=ax + 'fixed_effect', ax=axSHfe, legend=True ) -ax.set_title("Victoria Regional Fixed Effects") +axSHfe.set_title("Victoria Regional Fixed Effects") plt.show() -# In[77]: - - -m=folium.Map(location=[-37.8,144.97],zoom_start=5) -folium.Choropleth(geo_data=df_2,data=df_2,fill_opacity=0.9,columns=['LGA_NAME16','predicted'], key_on="feature.properties.LGA_NAME16",bins=8, name='predicted',legend_name='predicted').add_to(m) +# In[89]: -style_function = lambda x: {'fillColor': '#ffffff', - 'color':'#000000', - 'fillOpacity': 0.1, - 'weight': 0.1} -highlight_function = lambda x: {'fillColor': '#000000', - 'color':'#000000', - 'fillOpacity': 0.50, - 'weight': 0.1} - -pugj = folium.features.GeoJson( - df_2, - style_function=style_function, - control=False, - highlight_function=highlight_function, - tooltip=folium.features.GeoJsonTooltip(fields=['LGA','predicted','LifeExpectancy'])) - - -m.add_child(pugj) -folium.LayerControl().add_to(m) -m.save('LGALife.html') +#Spatial Dependence with WeightedKNN +wx = df_KNN.filter( + like='GP1000' +).apply( + lambda y: weights.spatial_lag.lag_spatial(knn, y) +).rename( + columns=lambda c: 'w_'+c +) +slx_exog = df_KNN[variable_names].join(wx) +mKNNsd = spreg.OLS( + df_KNN[['LifeExpectancy']].values, + slx_exog.values, + name_y='l_LifeExpectancy', + name_x=slx_exog.columns.tolist() +) -# In[ ]: +df_KNN['predicted_SD']=mKNNsd.predy +axKNNsd = df_KNN.plot(color='k', alpha=0.5, figsize=(20,10)) +df_KNN.plot('predicted_SD', ax=axKNNsd, legend=True) +axKNNsd.set_title("Predicted Life Expectancy") +plt.show() +# In[88]: -# In[ ]: +#Spatial Dependence with Queen +wx = df_Queen.filter( + like='GP1000' +).apply( + lambda y: weights.spatial_lag.lag_spatial(QWeight, y) +).rename( + columns=lambda c: 'w_'+c +) +slx_exog = df_Queen[variable_names].join(wx) +mQueensd = spreg.OLS( + df_Queen[['LifeExpectancy']].values, + slx_exog.values, + name_y='l_LifeExpectancy', + name_x=slx_exog.columns.tolist() +) +df_Queen['predicted_SD']=mQueensd.predy +axQueensd = df_Queen.plot(color='k', alpha=0.5, figsize=(20,10)) +df_Queen.plot('predicted_SD', ax=axQueensd, legend=True) +axQueensd.set_title("Predicted Life Expectancy") +plt.show()