Skip to content

Commit

Permalink
Make aggregation field types consistent (#66)
Browse files Browse the repository at this point in the history
* AH-1801 Change key dtypes to match
* AH-1801 update workflow file - Trying fixes from conda-incubator/setup-miniconda#274
* AH-1801 fix pylint errors
  • Loading branch information
wcarthur authored Jul 26, 2023
1 parent b9523b4 commit 2e6cd71
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 19 deletions.
16 changes: 10 additions & 6 deletions .github/workflows/hazimp-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@ jobs:
Hazimp:
name: Test HazImp
runs-on: ubuntu-latest
strategy:
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
defaults:
run:
shell: bash -l {0}

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Cache conda
uses: actions/cache@v2
env:
Expand All @@ -27,13 +30,14 @@ jobs:
path: ~/conda_pkgs_dir
key:
${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{hashFiles('hazimp.yml') }}

- name: Set up environment
uses: conda-incubator/setup-miniconda@v2.0.0
uses: conda-incubator/setup-miniconda@v2
with:
python-version: ${{ matrix.python-version }}
miniforge-variant: Mambaforge
channels: conda-forge,defaults
mamba-version: "*"
channel-priority: true
python-version: ${{ matrix.python-version }}
activate-environment: hazimp
environment-file: hazimp.yml
auto-activate-base: false
Expand Down
28 changes: 15 additions & 13 deletions hazimp/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import geopandas
import pandas as pd
import numpy as np

from hazimp.misc import check_data_type
LOGGER = logging.getLogger(__name__)

# List of possible drivers for output:
Expand Down Expand Up @@ -91,6 +91,19 @@ def choropleth(dframe, boundaries, impactcode, bcode, filename,

left, right = impactcode, bcode

shapes = geopandas.read_file(boundaries)
try:
dtype = check_data_type(shapes[right])
except KeyError:
LOGGER.exception(f"Aggregation boundaries have no attribute '{right}'")
sys.exit(1)

try:
dframe[left] = dframe[left].astype(dtype)
except ValueError:
LOGGER.exception(f"Cannot convert {left} to {dtype}")
sys.exit(1)

aggregate = dframe.groupby(left).agg(fields).reset_index()
aggregate.columns = [
'_'.join(columns).rstrip('_') for columns in aggregate.columns.values
Expand All @@ -111,18 +124,7 @@ def choropleth(dframe, boundaries, impactcode, bcode, filename,
else:
aggregate.set_index(left, inplace=True)

shapes = geopandas.read_file(boundaries)

try:
shapes['key'] = shapes[right].astype(np.int64)
except KeyError:
LOGGER.error(f"{boundaries} does not contain an attribute {right}")
sys.exit(1)
except OverflowError:
LOGGER.error(f"Unable to convert {right} values to ints")
sys.exit(1)

result = shapes.merge(aggregate, left_on='key', right_index=True)
result = shapes.merge(aggregate, left_on=right, right_index=True)

fileext = os.path.splitext(filename)[1].replace('.', '')
try:
Expand Down
15 changes: 15 additions & 0 deletions hazimp/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,3 +454,18 @@ def upload_to_s3_if_applicable(local_path, bucket_name, bucket_key,
if not ignore_exception:
LOGGER.exception("S3 write error: {0}".format(local_path))
raise e


def check_data_type(data):
"""
Function to check the data type of a given attribute
:param data: Sample of the data
:type data: `pd.Series` or `pd.DataFrame`
"""
try:
dtype = data[0].dtype
except AttributeError:
dtype = type(data[0])

return dtype

0 comments on commit 2e6cd71

Please sign in to comment.