Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add reverse geocoding implementation with PostGIS #1

Merged
merged 3 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added pg_nearest_city/data/cities_1000_simple.txt.gz
Binary file not shown.
Binary file added pg_nearest_city/data/voronois.wkb.gz
Binary file not shown.
194 changes: 194 additions & 0 deletions pg_nearest_city/reverse_geocoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
import psycopg
import gzip

from dataclasses import dataclass
from typing import Optional
import importlib.resources


from psycopg import sql


@dataclass
class DbConfig:
dbname: str
user: str
password: str
host: str = "localhost"
port: int = 5432

def get_connection_string(self) -> str:
return f"dbname={self.dbname} user={self.user} password={self.password} host={self.host} port={self.port}"


@dataclass
class Location:
city: str
country: str
lat: float
lon: float


class ReverseGeocoder:
def __init__(self, config: DbConfig):
"""Initialize reverse geocoder with database configuration and data directory.

Args:
config: Database connection configuration
data_dir: Directory containing required data files
"""
self.conn_string = config.get_connection_string()

with importlib.resources.path(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting! I have never used importlib this way - it's a pretty neat approach.

Pathlib is another option, that I typically use:

from pathlib import Path
self.cities_file = Path(__file__).parent / "data" / "cities_1000_simple.txt.gz"

but your approach is probably more immune to project directory restructuring 👍

"pg_nearest_city.data", "cities_1000_simple.txt.gz"
) as cities_path:
self.cities_file = cities_path
with importlib.resources.path(
"pg_nearest_city.data", "voronois.wkb.gz"
) as voronoi_path:
self.voronoi_file = voronoi_path

if not self.cities_file.exists():
raise FileNotFoundError(f"Cities file not found: {self.cities_file}")
if not self.voronoi_file.exists():
raise FileNotFoundError(f"Voronoi file not found: {self.voronoi_file}")

def _get_connection(self):
"""Create and return a database connection."""
try:
return psycopg.connect(self.conn_string)
except Exception as e:
raise RuntimeError(f"Failed to connect to database: {str(e)}")

def reverse_geocode(self, lat: float, lon: float) -> Optional[Location]:
"""Find the nearest city to the given coordinates using Voronoi regions.

Args:
lat: Latitude in degrees (-90 to 90)
lon: Longitude in degrees (-180 to 180)

Returns:
Location object if a matching city is found, None otherwise

Raises:
ValueError: If coordinates are out of valid ranges
RuntimeError: If database query fails
"""
# Validate coordinate ranges
if not -90 <= lat <= 90:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice to have this validation 👍

raise ValueError(f"Latitude {lat} is outside valid range [-90, 90]")
if not -180 <= lon <= 180:
raise ValueError(f"Longitude {lon} is outside valid range [-180, 180]")

query = sql.SQL("""
SELECT city, country, lat, lon
FROM geocoding
WHERE ST_Contains(voronoi, ST_SetSRID(ST_MakePoint({}, {}), 4326))
LIMIT 1
""").format(sql.Literal(lon), sql.Literal(lat))

try:
with self._get_connection() as conn:
with conn.cursor() as cur:
cur.execute(query)
result = cur.fetchone()

if not result:
return None

return Location(
city=result[0],
country=result[1],
lat=float(result[2]),
lon=float(result[3]),
)
except Exception as e:
raise RuntimeError(f"Reverse geocoding failed: {str(e)}")

def initialize(self) -> None:
"""Initialize the geocoding database with cities and Voronoi regions."""
try:
with self._get_connection() as conn:
with conn.cursor() as cur:
print("Creating geocoding table...")
self._create_geocoding_table(cur)

print("Importing city data...")
self._import_cities(cur)

print("Processing Voronoi polygons...")
self._import_voronoi_polygons(cur)

print("Creating spatial index...")
self._create_spatial_index(cur)

conn.commit()

# Clean up the voronoi file after successful import
if self.voronoi_file.exists():
self.voronoi_file.unlink()
print("Cleaned up Voronoi file to save disk space")

print("Initialization complete!")
except Exception as e:
raise RuntimeError(f"Database initialization failed: {str(e)}")

def _import_cities(self, cur):
"""Import city data using COPY protocol."""
with cur.copy("COPY geocoding(city, country, lat, lon) FROM STDIN") as copy:
with gzip.open(self.cities_file, "r") as f:
copied_bytes = 0
while data := f.read(8192):
copy.write(data)
copied_bytes += len(data)
print(f"Imported {copied_bytes:,} bytes of city data")

def _create_geocoding_table(self, cur):
"""Create the main table"""
cur.execute("""
CREATE TABLE geocoding (
city varchar,
country varchar,
lat decimal,
lon decimal,
geom geometry(Point,4326) GENERATED ALWAYS AS (ST_SetSRID(ST_MakePoint(lon, lat), 4326)) STORED,
voronoi geometry(Polygon,4326)
);
""")

def _import_voronoi_polygons(self, cur):
"""Import and integrate Voronoi polygons into the main table."""
# First create temporary table for the import
cur.execute("""
CREATE TEMP TABLE voronoi_import (
city text,
country text,
wkb bytea
);
""")

# Import the binary WKB data
with cur.copy("COPY voronoi_import (city, country, wkb) FROM STDIN") as copy:
with gzip.open(self.voronoi_file, "rb") as f:
while data := f.read(8192):
copy.write(data)

# Update main table with Voronoi geometries
cur.execute("""
UPDATE geocoding g
SET voronoi = ST_GeomFromWKB(v.wkb, 4326)
FROM voronoi_import v
WHERE g.city = v.city
AND g.country = v.country;
""")

# Clean up temporary table
cur.execute("DROP TABLE voronoi_import;")

def _create_spatial_index(self, cur):
"""Create a spatial index on the Voronoi polygons for efficient queries."""
cur.execute("""
CREATE INDEX geocoding_voronoi_idx
ON geocoding
USING GIST (voronoi);
""")
6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ version = "0.0.0"
description = "Given a geopoint, find the nearest city using PostGIS (reverse geocode)."
authors = [
{name = "Sam Woodcock", email = "[email protected]"},
{name = "Emir Fabio Cognigni", email = "redacted"},
{name = "Emir Fabio Cognigni"}
]
readme = "README.md"
license = {text = "GPL-3.0-only"}
Expand Down Expand Up @@ -40,6 +40,10 @@ build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["pg_nearest_city"]
include = [
"pg_nearest_city/data/cities_1000_simple.txt",
"pg_nearest_city/data/voronois.wkb",
]

[tool.ruff]
fix = true
Expand Down
24 changes: 24 additions & 0 deletions tests/test_geocoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Test geocoder initialization and data file loading."""

import os
from pg_nearest_city.reverse_geocoder import ReverseGeocoder, DbConfig


def get_test_config():
"""Get database configuration from environment variables or defaults."""
return DbConfig(
dbname=os.getenv("PGNEAREST_TEST_DB", "gisdb"),
user=os.getenv("PGNEAREST_TEST_USER", "postgres"),
password=os.getenv("PGNEAREST_TEST_PASSWORD", "postgres"),
host=os.getenv("PGNEAREST_TEST_HOST", "localhost"),
port=int(os.getenv("PGNEAREST_TEST_PORT", "5432")),
)


def test_geocoder_initialization():
"""Test that geocoder can initialize and find its data files."""
config = get_test_config()
geocoder = ReverseGeocoder(config)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the initialize method needs to be called here potentially, unless we put the call inside the init method, or perhaps a enter method to be used via context manager


assert geocoder.cities_file.exists(), "Cities file should exist"
assert geocoder.voronoi_file.exists(), "Voronoi file should exist"
Loading