diff --git a/docs/conf.py b/docs/conf.py index ddac4fa8..9bab3f7c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -106,8 +106,8 @@ # (Optional) PySAL favicon html_favicon = "_static/images/pysal_favicon.ico" -html_logo = '_static/images/pysal_logo.png' -icon_links_label = 'inequality' +html_logo = "_static/images/pysal_logo.png" +icon_links_label = "inequality" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/docs/user-guide/measure/gini.ipynb b/docs/user-guide/measure/gini.ipynb index f734a896..497213c6 100644 --- a/docs/user-guide/measure/gini.ipynb +++ b/docs/user-guide/measure/gini.ipynb @@ -104,16 +104,16 @@ "\n", "# we will use greedy from mapclassify\n", "# states to ensure contiguous states are of a different color\n", - "sgdf = gdf.sort_values(by='NAME')\n", + "sgdf = gdf.sort_values(by=\"NAME\")\n", "sgdf.reset_index(inplace=True)\n", - "sgdf['label'] = range(1, 33)\n", - "sgdf['greedy'] = mapclassify.greedy(sgdf)\n", + "sgdf[\"label\"] = range(1, 33)\n", + "sgdf[\"greedy\"] = mapclassify.greedy(sgdf)\n", "\n", "\n", "font_size = 9\n", "outside = [9, 29]\n", "oc = [(-103, 17.5), (-95, 22.5)]\n", - "oe = [(-102.55, 17.49),(-95.5, 22.1)] \n", + "oe = [(-102.55, 17.49), (-95.5, 22.1)]\n", "oinfo = zip(outside, oc)\n", "\n", "\n", @@ -121,52 +121,78 @@ "\n", "\n", "import matplotlib.pyplot as plt\n", - "sgdf['centroid'] = sgdf.centroid\n", + "\n", + "sgdf[\"centroid\"] = sgdf.centroid\n", "ax = sgdf.plot(\n", " figsize=(8, 12),\n", " column=\"greedy\",\n", " categorical=True,\n", " cmap=\"Set3\",\n", - " #legend=True,\n", + " # legend=True,\n", " edgecolor=\"w\",\n", ")\n", "\n", "\n", "table = []\n", "for idx, row in sgdf.iterrows():\n", - " centroid = row['centroid']\n", + " centroid = row[\"centroid\"]\n", " table.append(f'{idx+1:2d} {row[\"NAME\"]}')\n", - " if idx+1 not in outside:\n", - " ax.text(centroid.x, centroid.y, str(idx+1), ha='center',\n", - " va='center', fontsize=font_size, color='black')\n", + " if idx + 1 not in outside:\n", + " ax.text(\n", + " centroid.x,\n", + " centroid.y,\n", + " str(idx + 1),\n", + " ha=\"center\",\n", + " va=\"center\",\n", + " fontsize=font_size,\n", + " color=\"black\",\n", + " )\n", "\n", "\n", "i = 0\n", "for out in oinfo:\n", " idx, coords = out\n", - " ax.text(coords[0], coords[1], str(idx), ha='center',\n", - " va='center', fontsize=font_size, color='black')\n", + " ax.text(\n", + " coords[0],\n", + " coords[1],\n", + " str(idx),\n", + " ha=\"center\",\n", + " va=\"center\",\n", + " fontsize=font_size,\n", + " color=\"black\",\n", + " )\n", " start_point = coords\n", - " end_point = sgdf.centroid[idx-1]\n", + " end_point = sgdf.centroid[idx - 1]\n", "\n", - " \n", " start_point = oe[i]\n", " line = LineString([start_point, end_point])\n", - " \n", - " \n", + "\n", " line_gdf = gpd.GeoSeries([line])\n", - " \n", - " \n", - " line_gdf.plot(ax=ax, color='red', linewidth=2)\n", - " i+=1\n", + "\n", + " line_gdf.plot(ax=ax, color=\"red\", linewidth=2)\n", + " i += 1\n", "\n", "for i, label in enumerate(table):\n", " if i < 16:\n", - " ax.text(-120, 20-i*1, label, ha='left',\n", - " va='center', fontsize=font_size, color='black');\n", + " ax.text(\n", + " -120,\n", + " 20 - i * 1,\n", + " label,\n", + " ha=\"left\",\n", + " va=\"center\",\n", + " fontsize=font_size,\n", + " color=\"black\",\n", + " )\n", " else:\n", - " ax.text(-110, 20-(i-16)*1, label, ha='left',\n", - " va='center', fontsize=font_size, color='black');\n", + " ax.text(\n", + " -110,\n", + " 20 - (i - 16) * 1,\n", + " label,\n", + " ha=\"left\",\n", + " va=\"center\",\n", + " fontsize=font_size,\n", + " color=\"black\",\n", + " )\n", "ax.set_axis_off()" ] }, @@ -193,13 +219,19 @@ "fig, axes = plt.subplots(1, 2, figsize=(8, 4)) # Two subplots in one row\n", "\n", "# Left column: Choropleth map\n", - "ax_map = gdf.plot(column=\"PCGDP1940\", k=5, scheme=\"Quantiles\", legend=True,\n", - " legend_kwds={'fmt': \"{:.0f}\"}, ax=axes[0])\n", + "ax_map = gdf.plot(\n", + " column=\"PCGDP1940\",\n", + " k=5,\n", + " scheme=\"Quantiles\",\n", + " legend=True,\n", + " legend_kwds={\"fmt\": \"{:.0f}\"},\n", + " ax=axes[0],\n", + ")\n", "ax_map.set_axis_off()\n", "ax_map.set_title(\"PC GDP 1940\")\n", "\n", "# Right column: Kernel density plot\n", - "sns.kdeplot(data=gdf['PCGDP1940'], ax=axes[1], fill=True, bw_adjust=0.5)\n", + "sns.kdeplot(data=gdf[\"PCGDP1940\"], ax=axes[1], fill=True, bw_adjust=0.5)\n", "axes[1].set_title(\"Kernel Density: PC GDP 1940\")\n", "axes[1].set_xlabel(\"Per Capita GDP\")\n", "axes[1].set_ylabel(\"Density\")\n", @@ -257,7 +289,8 @@ "outputs": [], "source": [ "from inequality.schutz import Schutz\n", - "s = Schutz(gdf, 'PCGDP1940')\n", + "\n", + "s = Schutz(gdf, \"PCGDP1940\")\n", "s.plot()" ] }, @@ -276,7 +309,7 @@ "metadata": {}, "outputs": [], "source": [ - "inequality.gini.Gini(gdf['PCGDP2000']).g" + "inequality.gini.Gini(gdf[\"PCGDP2000\"]).g" ] }, { @@ -285,7 +318,7 @@ "metadata": {}, "outputs": [], "source": [ - "s = Schutz(gdf, 'PCGDP2000')\n", + "s = Schutz(gdf, \"PCGDP2000\")\n", "s.plot()" ] }, @@ -307,7 +340,8 @@ "decades = range(1940, 2010, 10)\n", "ginis = [inequality.gini.Gini(gdf[\"PCGDP%s\" % decade]).g for decade in decades]\n", "import pandas as pd\n", - "ginis_df = pd.DataFrame(data=ginis, columns=['Gini'], index = list(decades))\n", + "\n", + "ginis_df = pd.DataFrame(data=ginis, columns=[\"Gini\"], index=list(decades))\n", "ginis_df.plot();" ] }, @@ -334,7 +368,7 @@ "outputs": [], "source": [ "numpy.random.seed(12345)\n", - "gdf['PCGDP1940r'] = numpy.random.permutation(gdf.PCGDP1940)" + "gdf[\"PCGDP1940r\"] = numpy.random.permutation(gdf.PCGDP1940)" ] }, { @@ -350,26 +384,36 @@ "\n", "fig, axes = plt.subplots(2, 2, figsize=(12, 10))\n", "\n", - "gdf.plot(column='PCGDP1940', ax=axes[0, 0], legend=True, scheme='quantiles',k=4,cmap='viridis',\n", - " legend_kwds={\n", - " \"fmt\": \"{:.0f}\"}\n", - " )\n", - "axes[0, 0].set_title('PCGDP1940')\n", - "axes[0,0].axis('off')\n", - "\n", - "gdf.plot(column='PCGDP1940r', ax=axes[0, 1], legend=True, scheme='quantiles', k=4, cmap='viridis',\n", - " legend_kwds={\n", - " \"fmt\": \"{:.0f}\"}\n", - " )\n", - "axes[0, 1].set_title('PCGDP1940r')\n", - "axes[0,1].axis('off')\n", + "gdf.plot(\n", + " column=\"PCGDP1940\",\n", + " ax=axes[0, 0],\n", + " legend=True,\n", + " scheme=\"quantiles\",\n", + " k=4,\n", + " cmap=\"viridis\",\n", + " legend_kwds={\"fmt\": \"{:.0f}\"},\n", + ")\n", + "axes[0, 0].set_title(\"PCGDP1940\")\n", + "axes[0, 0].axis(\"off\")\n", + "\n", + "gdf.plot(\n", + " column=\"PCGDP1940r\",\n", + " ax=axes[0, 1],\n", + " legend=True,\n", + " scheme=\"quantiles\",\n", + " k=4,\n", + " cmap=\"viridis\",\n", + " legend_kwds={\"fmt\": \"{:.0f}\"},\n", + ")\n", + "axes[0, 1].set_title(\"PCGDP1940r\")\n", + "axes[0, 1].axis(\"off\")\n", "\n", "\n", - "sns.kdeplot(gdf['PCGDP1940'], ax=axes[1, 0], fill=True, color='blue')\n", - "axes[1, 0].set_title('Kernel Density of PCGDP1940')\n", + "sns.kdeplot(gdf[\"PCGDP1940\"], ax=axes[1, 0], fill=True, color=\"blue\")\n", + "axes[1, 0].set_title(\"Kernel Density of PCGDP1940\")\n", "\n", - "sns.kdeplot(gdf['PCGDP1940r'], ax=axes[1, 1], fill=True, color='orange')\n", - "axes[1, 1].set_title('Kernel Density of PCGDP1940r')\n", + "sns.kdeplot(gdf[\"PCGDP1940r\"], ax=axes[1, 1], fill=True, color=\"orange\")\n", + "axes[1, 1].set_title(\"Kernel Density of PCGDP1940r\")\n", "\n", "plt.tight_layout()\n", "\n", @@ -382,7 +426,7 @@ "metadata": {}, "outputs": [], "source": [ - "inequality.gini.Gini(gdf['PCGDP1940']).g == inequality.gini.Gini(gdf['PCGDP1940r']).g" + "inequality.gini.Gini(gdf[\"PCGDP1940\"]).g == inequality.gini.Gini(gdf[\"PCGDP1940r\"]).g" ] }, { @@ -584,8 +628,8 @@ "\n", "# Plot\n", "plt.figure(figsize=(10, 6))\n", - "plt.plot(x_range, kde_values, label='Kernel Density Estimate')\n", - "plt.axvline(x=obs, color='red', linestyle='--', label=f'Value = {obs:.2f}')\n", + "plt.plot(x_range, kde_values, label=\"Kernel Density Estimate\")\n", + "plt.axvline(x=obs, color=\"red\", linestyle=\"--\", label=f\"Value = {obs:.2f}\")\n", "plt.title(\"Gini Spatial Polarization Index 1940\")\n", "plt.xlabel(\"Values\")\n", "plt.ylabel(\"Density\")\n", @@ -645,8 +689,8 @@ "\n", "# Plot\n", "plt.figure(figsize=(10, 6))\n", - "plt.plot(x_range, kde_values, label='Kernel Density Estimate')\n", - "plt.axvline(x=obs, color='red', linestyle='--', label=f'Value = {obs:.2f}')\n", + "plt.plot(x_range, kde_values, label=\"Kernel Density Estimate\")\n", + "plt.axvline(x=obs, color=\"red\", linestyle=\"--\", label=f\"Value = {obs:.2f}\")\n", "plt.title(\"Gini Spatial Polarization Index 1940 (Random)\")\n", "\n", "plt.xlabel(\"Values\")\n", @@ -701,8 +745,6 @@ "gsh.p_sim\n", "\n", "\n", - "\n", - "\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from scipy.stats import gaussian_kde\n", @@ -720,8 +762,8 @@ "\n", "# Plot\n", "plt.figure(figsize=(10, 6))\n", - "plt.plot(x_range, kde_values, label='Kernel Density Estimate')\n", - "plt.axvline(x=obs, color='red', linestyle='--', label=f'Value = {obs}')\n", + "plt.plot(x_range, kde_values, label=\"Kernel Density Estimate\")\n", + "plt.axvline(x=obs, color=\"red\", linestyle=\"--\", label=f\"Value = {obs}\")\n", "plt.title(\"Gini Spatial Polarization 1940 (Hanson)\")\n", "plt.xlabel(\"Values\")\n", "plt.ylabel(\"Density\")\n", @@ -773,8 +815,8 @@ "\n", "# Plot\n", "plt.figure(figsize=(10, 6))\n", - "plt.plot(x_range, kde_values, label='Kernel Density')\n", - "plt.axvline(x=obs, color='red', linestyle='--', label=f'Value = {obs:.2f}')\n", + "plt.plot(x_range, kde_values, label=\"Kernel Density\")\n", + "plt.axvline(x=obs, color=\"red\", linestyle=\"--\", label=f\"Value = {obs:.2f}\")\n", "plt.title(\"Gini Spatial Polarization 1940 (Hanson Random)\")\n", "\n", "plt.xlabel(\"Values\")\n", diff --git a/docs/user-guide/measure/theil.ipynb b/docs/user-guide/measure/theil.ipynb index 5045a33a..73ae0bf0 100644 --- a/docs/user-guide/measure/theil.ipynb +++ b/docs/user-guide/measure/theil.ipynb @@ -178,7 +178,7 @@ } ], "source": [ - "ax = gdf.plot(edgecolor='grey')\n", + "ax = gdf.plot(edgecolor=\"grey\")\n", "ax.set_axis_off()" ] }, @@ -432,8 +432,9 @@ } ], "source": [ - "ax = gdf.plot(column=\"PCGDP1960\", k=5, scheme=\"Quantiles\", \n", - " edgecolor='grey',legend=True)\n", + "ax = gdf.plot(\n", + " column=\"PCGDP1960\", k=5, scheme=\"Quantiles\", edgecolor=\"grey\", legend=True\n", + ")\n", "ax.set_axis_off()\n", "ax.set_title(\"PC GDP 1960\");\n", "# plt.savefig(\"1940.png\")" @@ -549,8 +550,7 @@ } ], "source": [ - "ax = gdf.plot(column=\"HANSON98\", categorical=True,\n", - " edgecolor='grey')\n", + "ax = gdf.plot(column=\"HANSON98\", categorical=True, edgecolor=\"grey\")\n", "ax.set_title(\"Regions\")\n", "ax.set_axis_off()\n", "# plt.savefig(\"regions.png\")" @@ -568,7 +568,7 @@ "outputs": [], "source": [ "numpy.random.seed(12345)\n", - "ts = inequality.theil.TheilD(gdf['PCGDP1960'], regimes)" + "ts = inequality.theil.TheilD(gdf[\"PCGDP1960\"], regimes)" ] }, { @@ -689,6 +689,7 @@ "outputs": [], "source": [ "import numpy as np\n", + "\n", "np.random.seed(10)" ] }, @@ -698,7 +699,7 @@ "metadata": {}, "outputs": [], "source": [ - "ts = inequality.theil.TheilDSim(gdf['PCGDP1960'], regimes, permutations=999)" + "ts = inequality.theil.TheilDSim(gdf[\"PCGDP1960\"], regimes, permutations=999)" ] }, { @@ -760,11 +761,12 @@ ], "source": [ "import matplotlib.pyplot as plt\n", + "\n", "kdeplot = sbn.kdeplot(ts.bg, fill=False, legend=False)\n", "x_vals = kdeplot.lines[0].get_xdata()\n", "y_vals = kdeplot.lines[0].get_ydata()\n", - "plt.fill_between(x_vals, y_vals, where=(x_vals > ts.bg[0]), color='red')\n", - "plt.xlabel('Between Regions Inequality 1960');" + "plt.fill_between(x_vals, y_vals, where=(x_vals > ts.bg[0]), color=\"red\")\n", + "plt.xlabel(\"Between Regions Inequality 1960\");" ] }, { diff --git a/docs/user-guide/measure/wolfson.ipynb b/docs/user-guide/measure/wolfson.ipynb index 885d66bd..d7c29f58 100644 --- a/docs/user-guide/measure/wolfson.ipynb +++ b/docs/user-guide/measure/wolfson.ipynb @@ -30,7 +30,7 @@ "metadata": {}, "outputs": [], "source": [ - "from inequality.wolfson import wolfson\n" + "from inequality.wolfson import wolfson" ] }, { @@ -61,31 +61,33 @@ "income_low_polarization = np.random.normal(loc=50000, scale=15000, size=10000)\n", "\n", "# Distribution 2: High polarization (bimodal distribution)\n", - "income_high_polarization = np.concatenate([\n", - " np.random.normal(loc=30000, scale=5000, size=5000),\n", - " np.random.normal(loc=70000, scale=5000, size=5000)\n", - "])\n", + "income_high_polarization = np.concatenate(\n", + " [\n", + " np.random.normal(loc=30000, scale=5000, size=5000),\n", + " np.random.normal(loc=70000, scale=5000, size=5000),\n", + " ]\n", + ")\n", "\n", "# Plotting the distributions\n", "plt.figure(figsize=(12, 6))\n", "\n", "# Plot for low polarization\n", "plt.subplot(1, 2, 1)\n", - "plt.hist(income_low_polarization, bins=50, color='blue', alpha=0.7)\n", - "plt.title('Low Polarization Income Distribution')\n", - "plt.xlabel('Income')\n", - "plt.ylabel('Frequency')\n", + "plt.hist(income_low_polarization, bins=50, color=\"blue\", alpha=0.7)\n", + "plt.title(\"Low Polarization Income Distribution\")\n", + "plt.xlabel(\"Income\")\n", + "plt.ylabel(\"Frequency\")\n", "\n", "# Plot for high polarization\n", "plt.subplot(1, 2, 2)\n", - "plt.hist(income_high_polarization, bins=50, color='red', alpha=0.7)\n", - "plt.title('High Polarization Income Distribution')\n", - "plt.xlabel('Income')\n", - "plt.ylabel('Frequency')\n", + "plt.hist(income_high_polarization, bins=50, color=\"red\", alpha=0.7)\n", + "plt.title(\"High Polarization Income Distribution\")\n", + "plt.xlabel(\"Income\")\n", + "plt.ylabel(\"Frequency\")\n", "\n", "# Show plots\n", "plt.tight_layout()\n", - "plt.show()\n" + "plt.show()" ] }, { @@ -139,6 +141,7 @@ "source": [ "import libpysal\n", "import geopandas\n", + "\n", "pth = libpysal.examples.get_path(\"mexicojoin.shp\")\n", "gdf = geopandas.read_file(pth)" ] @@ -161,7 +164,7 @@ } ], "source": [ - "gdf.plot(column='PCGDP1960', legend=True);" + "gdf.plot(column=\"PCGDP1960\", legend=True);" ] }, { @@ -237,7 +240,7 @@ } ], "source": [ - "gdf.plot(column='PCGDP2000', legend=True);" + "gdf.plot(column=\"PCGDP2000\", legend=True);" ] }, { @@ -313,7 +316,7 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas " + "import pandas" ] }, { @@ -323,7 +326,7 @@ "metadata": {}, "outputs": [], "source": [ - "ts = pandas.DataFrame(data=years, columns=['year'])" + "ts = pandas.DataFrame(data=years, columns=[\"year\"])" ] }, { @@ -333,8 +336,8 @@ "metadata": {}, "outputs": [], "source": [ - "ts['wolfson'] = w\n", - "ts = ts.set_index('year')" + "ts[\"wolfson\"] = w\n", + "ts = ts.set_index(\"year\")" ] }, { diff --git a/docs/user-guide/viz/pengram.ipynb b/docs/user-guide/viz/pengram.ipynb index 42b8b611..f2bc6cd6 100644 --- a/docs/user-guide/viz/pengram.ipynb +++ b/docs/user-guide/viz/pengram.ipynb @@ -38,7 +38,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf = gpd.read_file('weighted.shp')" + "gdf = gpd.read_file(\"weighted.shp\")" ] }, { @@ -297,10 +297,10 @@ "metadata": {}, "outputs": [], "source": [ - "col='PCGDP2000'\n", - "weight='p'\n", - "name='NAME'\n", - "figsize=(16,9)" + "col = \"PCGDP2000\"\n", + "weight = \"p\"\n", + "name = \"NAME\"\n", + "figsize = (16, 9)" ] }, { @@ -321,7 +321,7 @@ } ], "source": [ - "p = pen(gdf,col, name)" + "p = pen(gdf, col, name)" ] }, { @@ -358,7 +358,7 @@ } ], "source": [ - "p = pen(gdf,col, name, xticks=False)" + "p = pen(gdf, col, name, xticks=False)" ] }, { @@ -481,7 +481,7 @@ } ], "source": [ - "pengram(gdf, col, name, figsize=figsize, leg_pos='lower left');" + "pengram(gdf, col, name, figsize=figsize, leg_pos=\"lower left\");" ] }, { @@ -502,7 +502,7 @@ } ], "source": [ - "pengram(gdf, col, name, figsize=figsize, leg_pos='lower left', scheme='FisherJenks');" + "pengram(gdf, col, name, figsize=figsize, leg_pos=\"lower left\", scheme=\"FisherJenks\");" ] }, { @@ -523,7 +523,9 @@ } ], "source": [ - "pengram(gdf, col, name, figsize=figsize, leg_pos='lower left', scheme='FisherJenks',k=3);" + "pengram(\n", + " gdf, col, name, figsize=figsize, leg_pos=\"lower left\", scheme=\"FisherJenks\", k=3\n", + ");" ] }, { @@ -555,8 +557,16 @@ } ], "source": [ - "pengram(gdf, col, name, figsize=figsize, leg_pos='lower left', scheme='FisherJenks',k=3,\n", - " query=['Chiapas', 'Campeche']);" + "pengram(\n", + " gdf,\n", + " col,\n", + " name,\n", + " figsize=figsize,\n", + " leg_pos=\"lower left\",\n", + " scheme=\"FisherJenks\",\n", + " k=3,\n", + " query=[\"Chiapas\", \"Campeche\"],\n", + ");" ] }, { @@ -599,13 +609,14 @@ "source": [ "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", - "fig, axes = plt.subplots(2, 2, figsize=(10, 8), gridspec_kw={'height_ratios': [1, 1]})\n", - "ax0, ax1, ax2, ax3 = axes[0,0], axes[0,1], axes[1,0], axes[1,1]\n", "\n", - "sns.kdeplot(data=gdf, x='PCGDP2000', ax=ax0)\n", - "gdf.plot('PCGDP2000', ax=ax1)\n", + "fig, axes = plt.subplots(2, 2, figsize=(10, 8), gridspec_kw={\"height_ratios\": [1, 1]})\n", + "ax0, ax1, ax2, ax3 = axes[0, 0], axes[0, 1], axes[1, 0], axes[1, 1]\n", + "\n", + "sns.kdeplot(data=gdf, x=\"PCGDP2000\", ax=ax0)\n", + "gdf.plot(\"PCGDP2000\", ax=ax1)\n", "pen(gdf, col, name, ax=ax2)\n", - "pengram(gdf, col, name, xticks=False, legend=False, ax=ax3);\n" + "pengram(gdf, col, name, xticks=False, legend=False, ax=ax3);" ] }, { diff --git a/docs/user-guide/viz/schutz.ipynb b/docs/user-guide/viz/schutz.ipynb index 1d813ebd..e42f663c 100644 --- a/docs/user-guide/viz/schutz.ipynb +++ b/docs/user-guide/viz/schutz.ipynb @@ -53,7 +53,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = pd.DataFrame(data=np.array([1000, 2000, 1500, 3000, 2500]), columns=['GDP'])" + "df = pd.DataFrame(data=np.array([1000, 2000, 1500, 3000, 2500]), columns=[\"GDP\"])" ] }, { @@ -63,7 +63,7 @@ "metadata": {}, "outputs": [], "source": [ - "s = Schutz(df, 'GDP')" + "s = Schutz(df, \"GDP\")" ] }, { @@ -286,7 +286,7 @@ "metadata": {}, "outputs": [], "source": [ - "y = np.array([20,50,80,100,100,100,100,120, 150,180])" + "y = np.array([20, 50, 80, 100, 100, 100, 100, 120, 150, 180])" ] }, { @@ -296,7 +296,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = pd.DataFrame(data=y, columns=['GDP'])" + "df = pd.DataFrame(data=y, columns=[\"GDP\"])" ] }, { @@ -306,7 +306,7 @@ "metadata": {}, "outputs": [], "source": [ - "s = Schutz(df, 'GDP')" + "s = Schutz(df, \"GDP\")" ] }, { @@ -595,7 +595,8 @@ "outputs": [], "source": [ "import geopandas as gpd\n", - "gdf = gpd.read_file('weighted.shp')\n" + "\n", + "gdf = gpd.read_file(\"weighted.shp\")" ] }, { @@ -857,7 +858,7 @@ "metadata": {}, "outputs": [], "source": [ - "s1960 = Schutz(gdf, 'PCGDP1960')" + "s1960 = Schutz(gdf, \"PCGDP1960\")" ] }, { @@ -878,7 +879,7 @@ } ], "source": [ - "s1960.plot(xlabel='State Percentile Rank 1960')" + "s1960.plot(xlabel=\"State Percentile Rank 1960\")" ] }, { @@ -888,7 +889,7 @@ "metadata": {}, "outputs": [], "source": [ - "s2000= Schutz(gdf, 'PCGDP2000')" + "s2000 = Schutz(gdf, \"PCGDP2000\")" ] }, { @@ -909,7 +910,7 @@ } ], "source": [ - "s2000.plot(xlabel= 'State Percentile Rank 2000')" + "s2000.plot(xlabel=\"State Percentile Rank 2000\")" ] }, { diff --git a/inequality/__init__.py b/inequality/__init__.py index ffe034cb..662e40a0 100644 --- a/inequality/__init__.py +++ b/inequality/__init__.py @@ -4,7 +4,6 @@ """ - import contextlib from importlib.metadata import PackageNotFoundError, version diff --git a/inequality/atkinson.py b/inequality/atkinson.py index ce49d60f..e4230317 100644 --- a/inequality/atkinson.py +++ b/inequality/atkinson.py @@ -1,4 +1,3 @@ - import numpy as np __all__ = ["Atkinson", "atkinson"] diff --git a/inequality/gini.py b/inequality/gini.py index 4c67a11e..bb27df5c 100644 --- a/inequality/gini.py +++ b/inequality/gini.py @@ -156,10 +156,10 @@ def __init__(self, x, w, permutations=99): self.g = g n = len(x) den = x.mean() * 2 * n**2 - d = g * den # sum of absolute devations SAD + d = g * den # sum of absolute devations SAD wg = self._calc(x, w) # sum of absolute deviations for neighbor pairs - wcg = d - wg # sum of absolution deviations for distant pairs - n_pairs = n * (n-1) / 2 + wcg = d - wg # sum of absolution deviations for distant pairs + n_pairs = n * (n - 1) / 2 n_n_pairs = w.s0 / 2 n_d_pairs = n_pairs - n_n_pairs polarization = (wcg / wg) * (n_n_pairs / n_d_pairs) @@ -179,7 +179,7 @@ def __init__(self, x, w, permutations=99): for perm in range(permutations): numpy.random.shuffle(ids) wcgp[perm] = d - self._calc(x[ids], w) - polar = (wcgp[perm] / (d - wcgp[perm])) + polar = wcgp[perm] / (d - wcgp[perm]) polarization_sim[perm] = polar * _scale above = wcgp >= self.wcg larger = above.sum() diff --git a/inequality/tests/test_interface.py b/inequality/tests/test_interface.py index cd7150f3..87731e14 100644 --- a/inequality/tests/test_interface.py +++ b/inequality/tests/test_interface.py @@ -5,73 +5,113 @@ def test_lorenz_curve_with_array(): - income = np.array([20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000]) + income = np.array( + [20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000] + ) population, cumulative_income = lorenz_curve(income) - + # Check that both returned arrays have the correct length (n+1) assert len(population) == len(income) + 1 assert len(cumulative_income) == len(income) + 1 - + # Ensure that the Lorenz curve starts at zero assert cumulative_income[0] == 0.0 assert population[0] == 0.0 + def test_lorenz_curve_with_list(): income = [20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000] population, cumulative_income = lorenz_curve(income) - + # Check that both returned arrays have the correct length (n+1) assert len(population) == len(income) + 1 assert len(cumulative_income) == len(income) + 1 - + # Ensure that the Lorenz curve starts at zero assert cumulative_income[0] == 0.0 assert population[0] == 0.0 + def test_lorenz_curve_with_dataframe(): - df = pd.DataFrame({'income': [20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000]}) - population, cumulative_income = lorenz_curve(df, column='income') - + df = pd.DataFrame( + { + "income": [ + 20000, + 25000, + 27000, + 30000, + 35000, + 45000, + 60000, + 75000, + 80000, + 120000, + ] + } + ) + population, cumulative_income = lorenz_curve(df, column="income") + # Check that both returned arrays have the correct length (n+1) - assert len(population) == len(df['income']) + 1 - assert len(cumulative_income) == len(df['income']) + 1 - + assert len(population) == len(df["income"]) + 1 + assert len(cumulative_income) == len(df["income"]) + 1 + # Ensure that the Lorenz curve starts at zero assert cumulative_income[0] == 0.0 assert population[0] == 0.0 + def test_wolfson_with_array(): - income = np.array([20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000]) + income = np.array( + [20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000] + ) wolfson_index = wolfson(income) - + # Compare the result to an expected value (based on the example) assert np.isclose(wolfson_index, 0.2013, atol=1e-4) + def test_wolfson_with_list(): income = [20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000] wolfson_index = wolfson(income) - + # Compare the result to an expected value (based on the example) assert np.isclose(wolfson_index, 0.2013, atol=1e-4) + def test_wolfson_with_dataframe(): - df = pd.DataFrame({'income': [20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000]}) - wolfson_index = wolfson(df, column='income') - + df = pd.DataFrame( + { + "income": [ + 20000, + 25000, + 27000, + 30000, + 35000, + 45000, + 60000, + 75000, + 80000, + 120000, + ] + } + ) + wolfson_index = wolfson(df, column="income") + # Compare the result to an expected value (based on the example) assert np.isclose(wolfson_index, 0.2013, atol=1e-4) + def test_wolfson_with_small_dataset(): income = [6, 6, 8, 8, 10, 10, 12, 12] wolfson_index = wolfson(income) - + # Compare the result to an expected value (based on the example) assert np.isclose(wolfson_index, 0.0833, atol=1e-4) + def test_wolfson_with_even_distribution(): income = [2, 4, 6, 8, 10, 12, 14, 16] wolfson_index = wolfson(income) - + # Compare the result to an expected value (based on the example) assert np.isclose(wolfson_index, 0.1528, atol=1e-4) - diff --git a/inequality/tests/test_wolfson.py b/inequality/tests/test_wolfson.py index b5194057..99fd3ab8 100644 --- a/inequality/tests/test_wolfson.py +++ b/inequality/tests/test_wolfson.py @@ -8,11 +8,11 @@ def test_lorenz_curve(): population, cumulative_income = lorenz_curve(income) # Expected cumulative income values (calculated manually) - expected_cumulative_income = np.array( - [0, 0.06666667, 0.2, 0.4, 0.66666667, 1]) + expected_cumulative_income = np.array([0, 0.06666667, 0.2, 0.4, 0.66666667, 1]) np.testing.assert_almost_equal( - cumulative_income, expected_cumulative_income, decimal=6) + cumulative_income, expected_cumulative_income, decimal=6 + ) # Should include start and end points (0 and 1) assert len(population) == 6 @@ -20,9 +20,9 @@ def test_lorenz_curve(): def test_wolfson(): income = [6, 6, 8, 8, 10, 10, 12, 12] wolfson_idx = wolfson(income) - expected_wolfson_idx = 1/12 + expected_wolfson_idx = 1 / 12 assert np.isclose(wolfson_idx, expected_wolfson_idx, atol=0.01) income = [2, 4, 6, 8, 10, 12, 14, 16] wolfson_idx = wolfson(income) - expected_wolfson_idx = 11/72 + expected_wolfson_idx = 11 / 72 assert np.isclose(wolfson_idx, expected_wolfson_idx, atol=0.01) diff --git a/inequality/theil.py b/inequality/theil.py index 11be0b13..b4bb6048 100644 --- a/inequality/theil.py +++ b/inequality/theil.py @@ -54,7 +54,7 @@ def __init__(self, y, column=None): "The nxT input format is deprecated. In future versions, " "please provide nx1 sequences or a DataFrame with a single column.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) # Old API behavior n = y.shape[0] @@ -64,7 +64,7 @@ def __init__(self, y, column=None): if column is None: raise ValueError("For DataFrame input, `column` must be specified.") y = y[column].values - elif isinstance(y, (list, np.ndarray)): + elif isinstance(y, list | np.ndarray): y = np.asarray(y) else: raise TypeError("Input must be an array, list, or DataFrame.") @@ -118,33 +118,33 @@ def __init__(self, y, partition, column=None, partition_col=None): "The nxT input format is deprecated. In future versions, " "please provide nx1 sequences or a DataFrame with a single column.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) - n = y.shape[0] else: # New API: Handle sequence or DataFrame if isinstance(y, pd.DataFrame): if column is None: raise ValueError("For DataFrame input, `column` must be specified.") y = y[column].values - elif isinstance(y, (list, np.ndarray)): + elif isinstance(y, list | np.ndarray): y = np.asarray(y) else: raise TypeError("Input must be an array, list, or DataFrame.") - n = len(y) # Handle partition similarly if isinstance(partition, pd.DataFrame): if partition_col is None: - raise ValueError("For DataFrame input, `partition_col` must be specified.") + raise ValueError( + "For DataFrame input, `partition_col` must be specified." + ) partition = partition[partition_col].values - elif isinstance(partition, (list, np.ndarray)): + elif isinstance(partition, list | np.ndarray): partition = np.asarray(partition) else: raise TypeError("Partition must be an array, list, or DataFrame.") groups = np.unique(partition) - T = Theil(y).T + t = Theil(y).T ytot = y.sum(axis=0) # Group totals @@ -159,11 +159,13 @@ def __init__(self, y, partition, column=None, partition_col=None): ng.shape = (ng.size,) # ensure ng is 1-d # Between group inequality sg = sg + (sg == 0) # handle case when a partition has 0 for sum - bg = np.multiply(sg, np.log(np.dot(np.diag(len(y) * 1.0 / ng), sg))).sum(axis=0) + bg = np.multiply(sg, + np.log(np.dot(np.diag(len(y) * 1.0 / ng), + sg))).sum(axis=0) - self.T = T + self.T = t self.bg = bg - self.wg = T - bg + self.wg = t - bg class TheilDSim: diff --git a/inequality/utils.py b/inequality/utils.py index d38bedcc..988766f9 100644 --- a/inequality/utils.py +++ b/inequality/utils.py @@ -10,26 +10,32 @@ def wrapper(data, *args, column=None, **kwargs): # If input is a DataFrame, extract the specified column if isinstance(data, pd.DataFrame): if column is None: - raise ValueError("For DataFrame input, 'column' argument must be provided.") + raise ValueError( + "For DataFrame input, 'column' argument must be provided." + ) data = data[column].values # If input is a series, numpy array, or list, no transformation needed - elif isinstance(data, (pd.Series, np.ndarray, list)): + elif isinstance(data, pd.Series | np.ndarray | list): data = np.asarray(data) else: - raise TypeError("Input should be a sequence, numpy array, or pandas DataFrame.") + raise TypeError( + "Input should be a sequence, numpy array, or pandas DataFrame." + ) return func(data, *args, **kwargs) return wrapper + # Example function using the decorator @consistent_input def compute_mean(data): return np.mean(data) + # Usage -df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}) -print(compute_mean(df, column='a')) # Output: 2.5 +df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) +print(compute_mean(df, column="a")) # Output: 2.5 arr = np.array([1, 2, 3, 4]) print(compute_mean(arr)) # Output: 2.5 diff --git a/inequality/wolfson.py b/inequality/wolfson.py index 0fe1afcf..da0779a1 100644 --- a/inequality/wolfson.py +++ b/inequality/wolfson.py @@ -7,6 +7,7 @@ Author: Serge Rey """ + import numpy as np from .gini import Gini @@ -16,38 +17,34 @@ @consistent_input -def lorenz_curve(data, column=None): +def lorenz_curve(data): """ Calculate the Lorenz curve for a given distribution. This function takes an income or wealth distribution as input. The input can be a sequence, a NumPy array, or a Pandas DataFrame. If a DataFrame - is provided, the `column` parameter must be used to specify which column + is provided, the `column` parameter must be used to specify which column contains the income or wealth values. Parameters ---------- - data : array-like, numpy array, or pandas.DataFrame - A sequence, NumPy array, or DataFrame representing the income or + data : array-like or array + A sequence or NumPy array representing the income or wealth distribution. - column : str, optional - The column name to be used when `data` is a pandas DataFrame. Required - if `data` is a DataFrame. Returns ------- tuple - Two numpy arrays: the first represents the cumulative share of the - population, and the second represents the cumulative share of + Two numpy arrays: the first represents the cumulative share of the + population, and the second represents the cumulative share of the income/wealth. Example ------- >>> income = [20000, 25000, 27000, 30000, 35000, 45000, 60000, 75000, 80000, 120000] >>> population, income_share = lorenz_curve(income) - >>> print(population, income_share) - [0. 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. ] [0. 0.03868472 0.08704062 0.13926499 0.19729207 0.26499033 - 0.35203095 0.46808511 0.6131528 0.76789168 1. ] + >>> print(population[:2], income_share[:2]) + [0. 0.1] [0. 0.03868472] """ sorted_y = np.sort(data) cumulative_y = np.cumsum(sorted_y) @@ -56,36 +53,35 @@ def lorenz_curve(data, column=None): cumulative_population = np.linspace(0, 1, len(data) + 1) return cumulative_population, cumulative_y + @consistent_input -def wolfson(data, column=None): +def wolfson(data): """ Calculate the Wolfson Bipolarization Index for a given income distribution. - This function takes an income distribution and calculates the Wolfson - Bipolarization Index. The input can be a sequence, a NumPy array, or a - Pandas DataFrame. If a DataFrame is provided, the `column` parameter must + This function takes an income distribution and calculates the Wolfson + Bipolarization Index. The input can be a sequence, a NumPy array, or a + Pandas DataFrame. If a DataFrame is provided, the `column` parameter must be used to specify which column contains the income values. - The Wolfson index is constructed from the polarization curve, which is + The Wolfson index is constructed from the polarization curve, which is a rotation and rescaling of the Lorenz curve by the median income: .. math:: W = (2D_{50} - G)\\frac{\\mu}{m} - Where :math:`D_{50} =0.5 - L(0.5)`, :math:`L(0.5)` is the value of the - Lorenz curve at the median, :math:`G` is the Gini index, :math:`\\mu` + Where :math:`D_{50} =0.5 - L(0.5)`, :math:`L(0.5)` is the value of the + Lorenz curve at the median, :math:`G` is the Gini index, :math:`\\mu` is the mean, and :math:`m` is the median. See: :cite:`wolfson1994WhenInequalities`. Parameters ---------- - data : array-like, numpy array, or pandas.DataFrame - A sequence, NumPy array, or DataFrame representing the income distribution. - column : str, optional - The column name to be used when `data` is a pandas DataFrame. Required - if `data` is a DataFrame. + data : array-like or array + A sequence or NumPy array representing the income or + wealth distribution. Returns ------- @@ -110,8 +106,8 @@ def wolfson(data, column=None): y = np.array(data) y_med = np.median(y) ordinate, lc = lorenz_curve(y) - l50 = np.interp(.5, ordinate, lc) - d50 = .5 - l50 + l50 = np.interp(0.5, ordinate, lc) + d50 = 0.5 - l50 rat = y.mean() / y_med g = Gini(y).g w = (2 * d50 - g) * rat