Skip to content

Commit

Permalink
Bugfix
Browse files Browse the repository at this point in the history
  • Loading branch information
tgrandje committed May 7, 2024
1 parent 5d34e23 commit bbd4b11
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 13 deletions.
25 changes: 16 additions & 9 deletions french_cities/city_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,10 +250,12 @@ def find_city(
# cities (using dep & city) using fuzzy matching
ix = addresses[addresses["candidat_0"].isnull()].index
if len(ix) > 0:
missing = addresses.loc[ix, [dep, "city_cleaned"]]
missing = addresses.loc[ix, [dep, "city_cleaned"]].rename(
{dep: "#dep#"}, axis=1
)
missing = _find_from_fuzzymatch_cities_names(
year, missing, "candidat_missing"
)
).rename({"#dep#": dep}, axis=1)
addresses = addresses.merge(
missing, on=[dep, "city_cleaned"], how="left"
)
Expand Down Expand Up @@ -319,6 +321,7 @@ def list_map(df, columns):
session=session,
rename_candidat=f"candidat_{k+1}",
addresses=addresses,
dep=dep,
)

if type_ban_search == "municipality":
Expand Down Expand Up @@ -348,6 +351,7 @@ def list_map(df, columns):
session=session,
rename_candidat=f"candidat_{k+1}",
addresses=addresses,
dep=dep,
)

# Proceed in two steps to keep best result (in case there are results from
Expand Down Expand Up @@ -555,8 +559,8 @@ def _find_from_fuzzymatch_cities_names(
df = df.reset_index(drop=False)

results = []
for dep in look_for["dep"].unique():
ix1 = look_for[look_for.dep == dep].index
for dep in look_for["#dep#"].unique():
ix1 = look_for[look_for["#dep#"] == dep].index
ix2 = df[df.dep == dep].index
match_ = pd.DataFrame(
cdist(
Expand All @@ -567,15 +571,14 @@ def _find_from_fuzzymatch_cities_names(
),
index=ix1,
columns=df.loc[ix2, "CODE"],
# index=look_for.loc[ix1, "city_cleaned"],
# columns=df.loc[ix2, "TITLE_SHORT"],
).replace(0, np.nan)
# print(match_)

try:
results.append(
pd.Series(
match_.dropna(how="all", axis=1).idxmax(axis=1),
match_.dropna(how="all", axis=1)
.dropna(how="all")
.idxmax(axis=1),
index=ix1,
)
)
Expand Down Expand Up @@ -867,6 +870,7 @@ def _filter_BAN_results(
session: Session,
rename_candidat: str,
addresses: pd.DataFrame,
dep: str = "dep",
fuzzymatch_threshold: int = 80,
ban_score_threshold_city_known: float = 0.6,
ban_score_threshold_city_unknown: float = 0.4,
Expand All @@ -890,6 +894,9 @@ def _filter_BAN_results(
Columnn to rename the results to.
addresses : pd.DataFrame
Full DataFrame of address to store the kept results into.
dep : str, optional
Field (column) containing the department values. Set to False if
not available. The default is "dep".
fuzzymatch_threshold : int, optional
The fuzzy match score threshold (on city labels) to keep the results.
Default is 80.
Expand All @@ -910,7 +917,7 @@ def _filter_BAN_results(
results_api = find_departements(
results_api, "result_citycode", "result_dep", "insee", session
)
ix = results_api[results_api.dep == results_api.result_dep].index
ix = results_api[results_api[dep] == results_api.result_dep].index
results_api = results_api.loc[ix]

if results_api.empty:
Expand Down
2 changes: 1 addition & 1 deletion french_cities/departement_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ def find_departements_from_names(
process.extractOne(
x,
candidates_keys,
scorer=fuzz.token_set_ratio,
scorer=fuzz.ratio,
score_cutoff=80,
)[0]
]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "french-cities"
version = "0.3.2"
version = "0.3.3"
description = "Toolbox on french cities: set vintage, find departments, find cities..."
authors = ["thomas.grandjean <[email protected]>"]
license = "GPL-3.0-or-later"
Expand Down
11 changes: 9 additions & 2 deletions tests/test_departement_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,15 @@

input_df2 = pd.DataFrame(
{
"deps": ["Corse sud", "Alpe de Haute-Provence", "Aisne", "Ain"],
"codes": ["2A", "04", "02", "01"],
"deps": [
"Charente-Maritime",
"Seine-et-Marne",
"Corse sud",
"Alpe de Haute-Provence",
"Aisne",
"Ain",
],
"codes": ["17", "77", "2A", "04", "02", "01"],
}
)

Expand Down

0 comments on commit bbd4b11

Please sign in to comment.