Skip to content

Commit

Permalink
added get nearest postal codes
Browse files Browse the repository at this point in the history
  • Loading branch information
eracle committed Mar 10, 2022
1 parent 223f720 commit 38da878
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 8 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,10 @@ ENV/
.mypy_cache/
_build/
generated/
/.idea/pgeocode.iml
/.idea/inspectionProfiles/Project_Default.xml
/.idea/vcs.xml
/.idea/inspectionProfiles/profiles_settings.xml
/.idea/modules.xml
/.idea/misc.xml
/.idea/workspace.xml
19 changes: 19 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,16 @@ Quickstart
>>> dist.query_postal_code(["75013", "75014", "75015"], ["69006", "69005", "69004"])
array([ 389.15648697, 390.12577967, 390.49857655])
**Nearest Postal Codes:**

.. code:: python
>>> dist = pgeocode.NearestNominatim('it')
>>> dist.inverse_geocoding(lat, long, k=1)
[12345]
>>>
>>> dist.inverse_geocoding(lat, long, k=4)
[12345, 12346, 12347, 12348]
Geocoding format
Expand Down Expand Up @@ -147,3 +157,12 @@ The list of countries available in the GeoNames database, with the corresponding
Andorra (AD), Argentina (AR), American Samoa (AS), Austria (AT), Australia (AU), Åland Islands (AX), Bangladesh (BD), Belgium (BE), Bulgaria (BG), Bermuda (BM), Brazil (BR), Belarus (BY), Canada (CA), Switzerland (CH), Colombia (CO), Costa Rica (CR), Czechia (CZ), Germany (DE), Denmark (DK), Dominican Republic (DO), Algeria (DZ), Spain (ES), Finland (FI), Faroe Islands (FO), France (FR), United Kingdom of Great Britain and Northern Ireland (GB), French Guiana (GF), Guernsey (GG), Greenland (GL), Guadeloupe (GP), Guatemala (GT), Guam (GU), Croatia (HR), Hungary (HU), Ireland (IE), Isle of Man (IM), India (IN), Iceland (IS), Italy (IT), Jersey (JE), Japan (JP), Liechtenstein (LI), Sri Lanka (LK), Lithuania (LT), Luxembourg (LU), Latvia (LV), Monaco (MC), Republic of Moldova (MD), Marshall Islands (MH), The former Yugoslav Republic of Macedonia (MK), Northern Mariana Islands (MP), Martinique (MQ), Malta (MT), Mexico (MX), Malaysia (MY), New Caledonia (NC), Netherlands (NL), Norway (NO), New Zealand (NZ), Philippines (PH), Pakistan (PK), Poland (PL), Saint Pierre and Miquelon (PM), Puerto Rico (PR), Portugal (PT), Réunion (RE), Romania (RO), Russian Federation (RU), Sweden (SE), Slovenia (SI), Svalbard and Jan Mayen Islands (SJ), Slovakia (SK), San Marino (SM), Thailand (TH), Turkey (TR), Ukraine (UA), United States of America (US), Uruguay (UY), Holy See (VA), United States Virgin Islands (VI), Wallis and Futuna Islands (WF), Mayotte (YT), South Africa (ZA)

See `GeoNames database <http://download.geonames.org/export/zip/>`_ for more information.


Tests:
-------------------

.. code::
$ pip install pytest pytest-httpserver
$ pytest
35 changes: 32 additions & 3 deletions pgeocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
import os
import urllib.request
import warnings
from collections.abc import Iterable
from io import BytesIO
from typing import Any, Tuple, List
from zipfile import ZipFile

import numpy as np
import pandas as pd
from scipy.spatial import KDTree

__version__ = "0.3.0"

Expand All @@ -26,7 +28,6 @@
"https://symerio.github.io/postal-codes-data/data/geonames/{country}.txt",
]


DATA_FIELDS = [
"country_code",
"postal_code",
Expand Down Expand Up @@ -316,6 +317,34 @@ def query_location(self, name):
pass


class NearestNominatim(Nominatim):
"""
Finds closest postal code for given coordinates.
Parameters
----------
data_path: str
path to the dataset
error: str, default='ignore'
how to handle not found elements. One of
'ignore' (return NaNs), 'error' (raise an exception),
'nearest' (find from nearest valid points)
"""

def __init__(self, country: str = "fr", errors: str = "ignore"):
super().__init__(country)
self.tree = KDTree(data=self._data[['latitude', 'longitude']])

def inverse_geocoding(self, lat, long, k=1):
"""
Finds closest postal code for given coordinates. for given coordinates.
"""
idx = self.tree.query(x=(lat, long), k=k)[1]
if not isinstance(idx, Iterable):
idx = [idx]
locations = self._data[self._data.index.isin(idx)]
return locations.postal_code.to_list()


class GeoDistance(Nominatim):
"""Distance calculation from a city name or a postal code
Expand Down Expand Up @@ -426,8 +455,8 @@ def haversine_distance(x, y):
y_lat = y_rad[:, 0]

a = (
np.sin(dlat / 2.0) ** 2
+ np.cos(x_lat) * np.cos(y_lat) * np.sin(dlon / 2.0) ** 2
np.sin(dlat / 2.0) ** 2
+ np.cos(x_lat) * np.cos(y_lat) * np.sin(dlon / 2.0) ** 2
)

c = 2 * np.arcsin(np.sqrt(a))
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def find_version(*file_paths):
author_email="[email protected]",
py_modules=["pgeocode"],
python_requires=">=3.6",
install_requires=["requests", "numpy", "pandas"],
install_requires=["requests", "numpy", "pandas", "scipy"],
classifiers=[_f for _f in CLASSIFIERS.split("\n") if _f],
license="BSD",
)
25 changes: 21 additions & 4 deletions test_pgeocode.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
# License 3-clause BSD
#
# Authors: Roman Yurchak <[email protected]>
import json
import os
import urllib
import json
from zipfile import ZipFile
from io import BytesIO
from zipfile import ZipFile

import numpy as np
import pandas as pd
import pytest
from numpy.testing import assert_allclose, assert_array_equal

import pgeocode
from pgeocode import GeoDistance, Nominatim, haversine_distance
from pgeocode import GeoDistance, Nominatim, haversine_distance, NearestNominatim
from pgeocode import _open_extract_url


Expand Down Expand Up @@ -129,7 +129,6 @@ def test_nominatim_all_countries(country):


def test_nominatim_distance_postal_code():

gdist = GeoDistance("fr")

dist = gdist.query_postal_code("91120", "91120")
Expand Down Expand Up @@ -180,6 +179,24 @@ def test_haversine_distance():
assert_allclose(d_ref, d_pred, atol=3)


@pytest.mark.parametrize(
"country, postal_code, location, lat, long",
[
("it", "00155", "Rome", 41.9028, 12.4964),
("it", "20129", "Milan", 45.4642, 9.1900),
("it", "10149", "Turin", 45.0703, 7.6869),
("it", "90151", "Palermo", 38.1157, 13.3615),
],
)
def test_inverse_geocoding(country, postal_code, location, lat, long):
n = NearestNominatim(country)

res = n.inverse_geocoding(lat, long, k=1)

assert isinstance(res, list)
assert postal_code in res


def test_open_extract_url(httpserver):
download_url = "/fr.txt"

Expand Down

0 comments on commit 38da878

Please sign in to comment.