From 7bd916f1bd24dc6a34e52ff5a5b459315575ded9 Mon Sep 17 00:00:00 2001 From: Luka Renko <46861689+lukarenko@users.noreply.github.com> Date: Wed, 22 Dec 2021 22:24:59 +0100 Subject: [PATCH 01/15] Get overrides from GSheet to CSV --- requirements.txt | 2 ++ update.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/requirements.txt b/requirements.txt index c5c89c7b..9bf12d68 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,5 @@ pandas==1.3.* openpyxl==3.0.* requests==2.26.* bs4==0.0.1 +sheet2csv==1.0.6 + diff --git a/update.py b/update.py index 05e8eece..51da86ba 100644 --- a/update.py +++ b/update.py @@ -8,6 +8,11 @@ import glob import pandas as pd import subprocess +import sheet2csv + +GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"] +SHEET_OVERRIDES = "1gsIkUsvO-2_atHTsU9UcH2q69Js9PuvskTbtuY3eEWQ" +RANGE_OVERRIDES = "Overrides!A1:AA" type_map = { 'SPLOŠNA DEJAVNOST - SPLOŠNA AMBULANTA': 'gp', @@ -23,6 +28,16 @@ 'NE': 'n' } +def get_overrides(): + filename = "csv/overrides.csv" + print(f"Get overrides from GSheet to {filename}") + try: + sheet2csv.sheet2csv(id=SHEET_OVERRIDES, range=RANGE_OVERRIDES, api_key=GOOGLE_API_KEY, filename=filename) + except Exception as e: + print("Failed to import {}".format(filename)) + raise e + + def convert_to_csv(): doctors = [] for group in ["zdravniki", "zobozdravniki", "ginekologi"]: @@ -214,6 +229,7 @@ def download_zzzs_xlsx_files(): if __name__ == "__main__": + get_overrides() download_zzzs_xlsx_files() get_zzzs_api_data_by_category() get_zzzs_api_data_all() From 826bf81161eac050d22d3662a79536834730268c Mon Sep 17 00:00:00 2001 From: Luka Renko <46861689+lukarenko@users.noreply.github.com> Date: Wed, 22 Dec 2021 23:23:57 +0100 Subject: [PATCH 02/15] Initial join - does not work yet --- update.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/update.py b/update.py index 51da86ba..fe9ea597 100644 --- a/update.py +++ b/update.py @@ -37,6 +37,17 @@ def get_overrides(): print("Failed to import {}".format(filename)) raise e +def append_overrides(): + doctors = pd.read_csv('csv/doctors.csv', index_col=['doctor','type']) + overrides = pd.read_csv('csv/overrides.csv', index_col=['doctor','type']) + + print(doctors) + print(overrides) + + doctors.join(overrides, rsuffix='_override') + + print(doctors) + doctors.to_csv('csv/doctors-overrides.csv') def convert_to_csv(): doctors = [] @@ -230,6 +241,7 @@ def download_zzzs_xlsx_files(): if __name__ == "__main__": get_overrides() + append_overrides() download_zzzs_xlsx_files() get_zzzs_api_data_by_category() get_zzzs_api_data_all() From cfe86c4cf2e5ecbe76d0138dca83015653a7e5fe Mon Sep 17 00:00:00 2001 From: Luka Renko <46861689+lukarenko@users.noreply.github.com> Date: Thu, 23 Dec 2021 06:28:19 +0100 Subject: [PATCH 03/15] Join works, add override_ prefix --- update.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/update.py b/update.py index fe9ea597..a2420440 100644 --- a/update.py +++ b/update.py @@ -38,13 +38,15 @@ def get_overrides(): raise e def append_overrides(): + get_overrides() + doctors = pd.read_csv('csv/doctors.csv', index_col=['doctor','type']) - overrides = pd.read_csv('csv/overrides.csv', index_col=['doctor','type']) + overrides = pd.read_csv('csv/overrides.csv', index_col=['doctor','type']).add_prefix('override_') print(doctors) print(overrides) - doctors.join(overrides, rsuffix='_override') + doctors = doctors.join(overrides) print(doctors) doctors.to_csv('csv/doctors-overrides.csv') @@ -240,12 +242,11 @@ def download_zzzs_xlsx_files(): if __name__ == "__main__": - get_overrides() - append_overrides() download_zzzs_xlsx_files() get_zzzs_api_data_by_category() get_zzzs_api_data_all() convert_to_csv() + append_overrides() geocode_addresses() add_gurs_geodata() add_zzzs_api_data() From de8aa6100a585a27d27c84e2db978ed1c5a579f5 Mon Sep 17 00:00:00 2001 From: Luka Renko <46861689+lukarenko@users.noreply.github.com> Date: Thu, 23 Dec 2021 06:32:00 +0100 Subject: [PATCH 04/15] cleanup --- update.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/update.py b/update.py index a2420440..3303d47f 100644 --- a/update.py +++ b/update.py @@ -28,7 +28,7 @@ 'NE': 'n' } -def get_overrides(): +def append_overrides(): filename = "csv/overrides.csv" print(f"Get overrides from GSheet to {filename}") try: @@ -37,20 +37,15 @@ def get_overrides(): print("Failed to import {}".format(filename)) raise e -def append_overrides(): - get_overrides() - doctors = pd.read_csv('csv/doctors.csv', index_col=['doctor','type']) overrides = pd.read_csv('csv/overrides.csv', index_col=['doctor','type']).add_prefix('override_') - print(doctors) - print(overrides) - doctors = doctors.join(overrides) print(doctors) doctors.to_csv('csv/doctors-overrides.csv') + def convert_to_csv(): doctors = [] for group in ["zdravniki", "zobozdravniki", "ginekologi"]: From 7d4117483b680276bedaa3eee3618230f74e16ce Mon Sep 17 00:00:00 2001 From: Luka Renko <46861689+lukarenko@users.noreply.github.com> Date: Thu, 23 Dec 2021 14:52:41 +0100 Subject: [PATCH 05/15] Extract override addresses for geocoding --- update.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/update.py b/update.py index 3303d47f..e65fa67d 100644 --- a/update.py +++ b/update.py @@ -42,9 +42,11 @@ def append_overrides(): doctors = doctors.join(overrides) - print(doctors) doctors.to_csv('csv/doctors-overrides.csv') + addresses = overrides[['override_address', 'override_post']].reset_index(drop=True).dropna() + addresses.to_csv('gurs/addresses-overrides.csv') + def convert_to_csv(): doctors = [] From 53cfb8816d38d5c2512adec7c0c82ebae01482e2 Mon Sep 17 00:00:00 2001 From: Stefan Baebler Date: Fri, 24 Dec 2021 03:36:26 +0100 Subject: [PATCH 06/15] pass GOOGLE_API_KEY secret --- .github/workflows/update.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/update.yaml b/.github/workflows/update.yaml index d9697609..24b7c292 100644 --- a/.github/workflows/update.yaml +++ b/.github/workflows/update.yaml @@ -54,6 +54,8 @@ jobs: run: pip install -U -r requirements.txt - name: "Run update.py" + env: + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} run: | python update.py From 4cab7e1793a259f58b3a3664c732cd0ecb1ae54b Mon Sep 17 00:00:00 2001 From: Stefan Baebler Date: Fri, 24 Dec 2021 04:05:37 +0100 Subject: [PATCH 07/15] geocoded override addresses --- gurs/addresses-overrides-geocoded.csv | 23 +++++++++++++++++++++++ gurs/addresses-overrides.csv | 23 +++++++++++++++++++++++ update.py | 10 +++++++++- 3 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 gurs/addresses-overrides-geocoded.csv create mode 100644 gurs/addresses-overrides.csv diff --git a/gurs/addresses-overrides-geocoded.csv b/gurs/addresses-overrides-geocoded.csv new file mode 100644 index 00000000..eb931c69 --- /dev/null +++ b/gurs/addresses-overrides-geocoded.csv @@ -0,0 +1,23 @@ +override_post,override_address,lat,lon,street,streetAlt,housenumber,housenumberAppendix,city,cityAlt,municipalityPart,municipality,zipCode,zipName,statisticalRegion +1000 Ljubljana,Rakovniška ulica 4,46.03660,14.52361,Rakovniška ulica,,4,,Ljubljana,,Četrtna skupnost Rudnik,Ljubljana,1000,Ljubljana,Osrednjeslovenska +1000 Ljubljana,Čufarjeva ulica 11,46.05516,14.51021,Čufarjeva ulica,,11,,Ljubljana,,Četrtna skupnost Center,Ljubljana,1000,Ljubljana,Osrednjeslovenska +1218 Komenda,Glavarjeva cesta 61a,46.20453,14.53935,Glavarjeva cesta,,61,A,Komenda,,,Komenda,1218,Komenda,Osrednjeslovenska +1231 Ljubljana-Črnuče,Primožičeva ulica 2,46.10339,14.53177,Primožičeva ulica,,2,,Ljubljana,,Četrtna skupnost Črnuče,Ljubljana,1231,Ljubljana - Črnuče,Osrednjeslovenska +1292 Ig pri Ljubljani,Banija 4,45.95897,14.52720,Banija,,4,,Ig,,,Ig,1292,Ig,Osrednjeslovenska +1311 Turjak,Laporje 30,45.87088,14.61679,Laporje,,30,,Laporje,,,Velike Lašče,1311,Turjak,Osrednjeslovenska +1312 Videm - Dobrepolje,Videm 37 a,45.84819,14.69546,Videm,,37,A,Videm,,Videm,Dobrepolje,1312,Videm - Dobrepolje,Osrednjeslovenska +1354 Horjul,Slovenska cesta 17,46.02461,14.29536,Slovenska cesta,,17,,Horjul,,,Horjul,1354,Horjul,Osrednjeslovenska +1360 Vrhnika,Stara cesta 4a,45.96590,14.29733,Stara cesta,,4,A,Vrhnika,,Vrhnika Center,Vrhnika,1360,Vrhnika,Osrednjeslovenska +2000 Maribor,Kalohova ulica 18,46.55507,15.61747,Kalohova ulica,,18,,Maribor,,Studenci,Maribor,2000,Maribor,Podravska +2000 Maribor,Ljubljanska ulica 5,46.55289,15.64858,Ljubljanska ulica,,5,,Maribor,,Magdalena,Maribor,2000,Maribor,Podravska +2370 Dravograd,Trg 4. julija 9,46.58921,15.02233,Trg 4. julija,,9,,Dravograd,,Dravograd,Dravograd,2370,Dravograd,Koroška +3000 Celje,Gregorčičeva ulica 5,46.23224,15.26227,Gregorčičeva ulica,,5,,Celje,,Dolgo polje,Celje,3000,Celje,Savinjska +3272 Rimske Toplice,Aškerčeva cesta 4,46.12412,15.19462,Aškerčeva cesta,,4,,Rimske Toplice,,Rimske Toplice,Laško,3272,Rimske Toplice,Savinjska +3333 Ljubno ob Savinji,Foršt 28,46.34498,14.83276,Foršt,,28,,Ljubno ob Savinji,,,Ljubno,3333,Ljubno ob Savinji,Savinjska +4000 Kranj,Ljubljanska cesta 24A,46.22457,14.35459,Ljubljanska cesta,,24,A,Kranj,,Stražišče,Kranj,4000,Kranj,Gorenjska +4000 Kranj,Škofjeloška cesta 6,46.22819,14.35325,Škofjeloška cesta,,6,,Kranj,,Stražišče,Kranj,4000,Kranj,Gorenjska +4220 Škofja Loka,Stara cesta 10,46.16618,14.31281,Stara cesta,,10,,Škofja Loka,,Kamnitnik,Škofja Loka,4220,Škofja Loka,Gorenjska +4224 Gorenja vas,Trata 7,46.10233,14.14218,Trata,,7,,Gorenja vas,,Gorenja vas,Gorenja vas-Poljane,4224,Gorenja vas,Gorenjska +4226 Žiri,Trg svobode 9,46.04754,14.11159,Trg svobode,,9,,Žiri,,,Žiri,4226,Žiri,Gorenjska +4228 Železniki,Racovnik 29,46.22219,14.15786,Racovnik,,29,,Železniki,,Železniki,Železniki,4228,Železniki,Gorenjska +5292 Šempeter pri Gorici,Cesta prekomorskih brigad 25,45.93053,13.63792,Cesta Prekomorskih brigad,,25,,Šempeter pri Gorici,,,Šempeter-Vrtojba,5290,Šempeter pri Gorici,Goriška diff --git a/gurs/addresses-overrides.csv b/gurs/addresses-overrides.csv new file mode 100644 index 00000000..6409d47b --- /dev/null +++ b/gurs/addresses-overrides.csv @@ -0,0 +1,23 @@ +override_post,override_address +1000 Ljubljana,Rakovniška ulica 4 +1000 Ljubljana,Čufarjeva ulica 11 +1218 Komenda,Glavarjeva cesta 61a +1231 Ljubljana-Črnuče,Primožičeva ulica 2 +1292 Ig pri Ljubljani,Banija 4 +1311 Turjak,Laporje 30 +1312 Videm - Dobrepolje,Videm 37 a +1354 Horjul,Slovenska cesta 17 +1360 Vrhnika,Stara cesta 4a +2000 Maribor,Kalohova ulica 18 +2000 Maribor,Ljubljanska ulica 5 +2370 Dravograd,Trg 4. julija 9 +3000 Celje,Gregorčičeva ulica 5 +3272 Rimske Toplice,Aškerčeva cesta 4 +3333 Ljubno ob Savinji,Foršt 28 +4000 Kranj,Ljubljanska cesta 24A +4000 Kranj,Škofjeloška cesta 6 +4220 Škofja Loka,Stara cesta 10 +4224 Gorenja vas,Trata 7 +4226 Žiri,Trg svobode 9 +4228 Železniki,Racovnik 29 +5292 Šempeter pri Gorici,Cesta prekomorskih brigad 25 diff --git a/update.py b/update.py index e65fa67d..4e4083c6 100644 --- a/update.py +++ b/update.py @@ -44,9 +44,17 @@ def append_overrides(): doctors.to_csv('csv/doctors-overrides.csv') - addresses = overrides[['override_address', 'override_post']].reset_index(drop=True).dropna() + addresses = overrides[['override_post', 'override_address']].reset_index(drop=True).dropna() + addresses.sort_values(by=['override_post', 'override_address'], inplace=True) + addresses.drop_duplicates(inplace=True) + addresses.set_index(['override_post', 'override_address'], inplace=True) addresses.to_csv('gurs/addresses-overrides.csv') + try: + subprocess.run(["geocodecsv", "-in", "gurs/addresses-overrides.csv", "-out", "gurs/addresses-overrides-geocoded.csv", "-zipCol", "1", "-addressCol", "2", "-appendAll"]) + except FileNotFoundError: + print("geocodecsv not found, skipping.") + def convert_to_csv(): doctors = [] From 295a97ccbf7742563bc2e590af0edadb4ecd1952 Mon Sep 17 00:00:00 2001 From: Luka Renko <46861689+lukarenko@users.noreply.github.com> Date: Fri, 24 Dec 2021 16:10:39 +0100 Subject: [PATCH 08/15] Use _override postfix only for fields that override standard doctors.csv fields --- gurs/addresses-overrides-geocoded.csv | 2 +- gurs/addresses-overrides.csv | 2 +- update.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gurs/addresses-overrides-geocoded.csv b/gurs/addresses-overrides-geocoded.csv index eb931c69..4503e349 100644 --- a/gurs/addresses-overrides-geocoded.csv +++ b/gurs/addresses-overrides-geocoded.csv @@ -1,4 +1,4 @@ -override_post,override_address,lat,lon,street,streetAlt,housenumber,housenumberAppendix,city,cityAlt,municipalityPart,municipality,zipCode,zipName,statisticalRegion +post,address,lat,lon,street,streetAlt,housenumber,housenumberAppendix,city,cityAlt,municipalityPart,municipality,zipCode,zipName,statisticalRegion 1000 Ljubljana,Rakovniška ulica 4,46.03660,14.52361,Rakovniška ulica,,4,,Ljubljana,,Četrtna skupnost Rudnik,Ljubljana,1000,Ljubljana,Osrednjeslovenska 1000 Ljubljana,Čufarjeva ulica 11,46.05516,14.51021,Čufarjeva ulica,,11,,Ljubljana,,Četrtna skupnost Center,Ljubljana,1000,Ljubljana,Osrednjeslovenska 1218 Komenda,Glavarjeva cesta 61a,46.20453,14.53935,Glavarjeva cesta,,61,A,Komenda,,,Komenda,1218,Komenda,Osrednjeslovenska diff --git a/gurs/addresses-overrides.csv b/gurs/addresses-overrides.csv index 6409d47b..bf85ef18 100644 --- a/gurs/addresses-overrides.csv +++ b/gurs/addresses-overrides.csv @@ -1,4 +1,4 @@ -override_post,override_address +post,address 1000 Ljubljana,Rakovniška ulica 4 1000 Ljubljana,Čufarjeva ulica 11 1218 Komenda,Glavarjeva cesta 61a diff --git a/update.py b/update.py index 4e4083c6..e5926126 100644 --- a/update.py +++ b/update.py @@ -38,16 +38,16 @@ def append_overrides(): raise e doctors = pd.read_csv('csv/doctors.csv', index_col=['doctor','type']) - overrides = pd.read_csv('csv/overrides.csv', index_col=['doctor','type']).add_prefix('override_') + overrides = pd.read_csv('csv/overrides.csv', index_col=['doctor','type']) doctors = doctors.join(overrides) doctors.to_csv('csv/doctors-overrides.csv') - addresses = overrides[['override_post', 'override_address']].reset_index(drop=True).dropna() - addresses.sort_values(by=['override_post', 'override_address'], inplace=True) + addresses = overrides[['post', 'address']].reset_index(drop=True).dropna() + addresses.sort_values(by=['post', 'address'], inplace=True) addresses.drop_duplicates(inplace=True) - addresses.set_index(['override_post', 'override_address'], inplace=True) + addresses.set_index(['post', 'address'], inplace=True) addresses.to_csv('gurs/addresses-overrides.csv') try: From d727f36539ff17413aaafd445edf8d4de4be3aac Mon Sep 17 00:00:00 2001 From: Luka Renko <46861689+lukarenko@users.noreply.github.com> Date: Fri, 24 Dec 2021 16:48:56 +0100 Subject: [PATCH 09/15] remove some addresses not needed --- gurs/addresses-overrides-geocoded.csv | 2 -- gurs/addresses-overrides.csv | 2 -- 2 files changed, 4 deletions(-) diff --git a/gurs/addresses-overrides-geocoded.csv b/gurs/addresses-overrides-geocoded.csv index 4503e349..42fd3011 100644 --- a/gurs/addresses-overrides-geocoded.csv +++ b/gurs/addresses-overrides-geocoded.csv @@ -9,8 +9,6 @@ post,address,lat,lon,street,streetAlt,housenumber,housenumberAppendix,city,cityA 1354 Horjul,Slovenska cesta 17,46.02461,14.29536,Slovenska cesta,,17,,Horjul,,,Horjul,1354,Horjul,Osrednjeslovenska 1360 Vrhnika,Stara cesta 4a,45.96590,14.29733,Stara cesta,,4,A,Vrhnika,,Vrhnika Center,Vrhnika,1360,Vrhnika,Osrednjeslovenska 2000 Maribor,Kalohova ulica 18,46.55507,15.61747,Kalohova ulica,,18,,Maribor,,Studenci,Maribor,2000,Maribor,Podravska -2000 Maribor,Ljubljanska ulica 5,46.55289,15.64858,Ljubljanska ulica,,5,,Maribor,,Magdalena,Maribor,2000,Maribor,Podravska -2370 Dravograd,Trg 4. julija 9,46.58921,15.02233,Trg 4. julija,,9,,Dravograd,,Dravograd,Dravograd,2370,Dravograd,Koroška 3000 Celje,Gregorčičeva ulica 5,46.23224,15.26227,Gregorčičeva ulica,,5,,Celje,,Dolgo polje,Celje,3000,Celje,Savinjska 3272 Rimske Toplice,Aškerčeva cesta 4,46.12412,15.19462,Aškerčeva cesta,,4,,Rimske Toplice,,Rimske Toplice,Laško,3272,Rimske Toplice,Savinjska 3333 Ljubno ob Savinji,Foršt 28,46.34498,14.83276,Foršt,,28,,Ljubno ob Savinji,,,Ljubno,3333,Ljubno ob Savinji,Savinjska diff --git a/gurs/addresses-overrides.csv b/gurs/addresses-overrides.csv index bf85ef18..842d5eb9 100644 --- a/gurs/addresses-overrides.csv +++ b/gurs/addresses-overrides.csv @@ -9,8 +9,6 @@ post,address 1354 Horjul,Slovenska cesta 17 1360 Vrhnika,Stara cesta 4a 2000 Maribor,Kalohova ulica 18 -2000 Maribor,Ljubljanska ulica 5 -2370 Dravograd,Trg 4. julija 9 3000 Celje,Gregorčičeva ulica 5 3272 Rimske Toplice,Aškerčeva cesta 4 3333 Ljubno ob Savinji,Foršt 28 From affa1a40d07fab501f893ffda35b8e84a50c490f Mon Sep 17 00:00:00 2001 From: Luka Renko <46861689+lukarenko@users.noreply.github.com> Date: Fri, 24 Dec 2021 22:15:50 +0100 Subject: [PATCH 10/15] Add id_inst to index for overrides match exact doctor by institution (no duplicates) --- update.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/update.py b/update.py index 86ef8a90..b6470c9b 100644 --- a/update.py +++ b/update.py @@ -37,8 +37,8 @@ def append_overrides(): print("Failed to import {}".format(filename)) raise e - doctors = pd.read_csv('csv/doctors.csv', index_col=['doctor','type']) - overrides = pd.read_csv('csv/overrides.csv', index_col=['doctor','type']) + doctors = pd.read_csv('csv/doctors.csv', index_col=['doctor','type','id_inst']) + overrides = pd.read_csv('csv/overrides.csv', index_col=['doctor','type','id_inst']) doctors = doctors.join(overrides) From f9556487dbdf6053c773bcb12122ecae6c7f48a2 Mon Sep 17 00:00:00 2001 From: Luka Renko <46861689+lukarenko@users.noreply.github.com> Date: Fri, 24 Dec 2021 22:16:20 +0100 Subject: [PATCH 11/15] geocoded address --- gurs/addresses-overrides-geocoded.csv | 1 + gurs/addresses-overrides.csv | 1 + 2 files changed, 2 insertions(+) diff --git a/gurs/addresses-overrides-geocoded.csv b/gurs/addresses-overrides-geocoded.csv index 42fd3011..ce3c9642 100644 --- a/gurs/addresses-overrides-geocoded.csv +++ b/gurs/addresses-overrides-geocoded.csv @@ -9,6 +9,7 @@ post,address,lat,lon,street,streetAlt,housenumber,housenumberAppendix,city,cityA 1354 Horjul,Slovenska cesta 17,46.02461,14.29536,Slovenska cesta,,17,,Horjul,,,Horjul,1354,Horjul,Osrednjeslovenska 1360 Vrhnika,Stara cesta 4a,45.96590,14.29733,Stara cesta,,4,A,Vrhnika,,Vrhnika Center,Vrhnika,1360,Vrhnika,Osrednjeslovenska 2000 Maribor,Kalohova ulica 18,46.55507,15.61747,Kalohova ulica,,18,,Maribor,,Studenci,Maribor,2000,Maribor,Podravska +2370 Dravograd,Trg 4. julija 9,46.58921,15.02233,Trg 4. julija,,9,,Dravograd,,Dravograd,Dravograd,2370,Dravograd,Koroška 3000 Celje,Gregorčičeva ulica 5,46.23224,15.26227,Gregorčičeva ulica,,5,,Celje,,Dolgo polje,Celje,3000,Celje,Savinjska 3272 Rimske Toplice,Aškerčeva cesta 4,46.12412,15.19462,Aškerčeva cesta,,4,,Rimske Toplice,,Rimske Toplice,Laško,3272,Rimske Toplice,Savinjska 3333 Ljubno ob Savinji,Foršt 28,46.34498,14.83276,Foršt,,28,,Ljubno ob Savinji,,,Ljubno,3333,Ljubno ob Savinji,Savinjska diff --git a/gurs/addresses-overrides.csv b/gurs/addresses-overrides.csv index 842d5eb9..65a01211 100644 --- a/gurs/addresses-overrides.csv +++ b/gurs/addresses-overrides.csv @@ -9,6 +9,7 @@ post,address 1354 Horjul,Slovenska cesta 17 1360 Vrhnika,Stara cesta 4a 2000 Maribor,Kalohova ulica 18 +2370 Dravograd,Trg 4. julija 9 3000 Celje,Gregorčičeva ulica 5 3272 Rimske Toplice,Aškerčeva cesta 4 3333 Ljubno ob Savinji,Foršt 28 From 7d274e537efe2fb12f08e702e8597581b89c5dc0 Mon Sep 17 00:00:00 2001 From: Luka Renko <46861689+lukarenko@users.noreply.github.com> Date: Fri, 24 Dec 2021 23:30:41 +0100 Subject: [PATCH 12/15] refac geocoding --- update.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/update.py b/update.py index b6470c9b..b4dea35d 100644 --- a/update.py +++ b/update.py @@ -44,16 +44,6 @@ def append_overrides(): doctors.to_csv('csv/doctors-overrides.csv') - addresses = overrides[['post', 'address']].reset_index(drop=True).dropna() - addresses.sort_values(by=['post', 'address'], inplace=True) - addresses.drop_duplicates(inplace=True) - addresses.set_index(['post', 'address'], inplace=True) - addresses.to_csv('gurs/addresses-overrides.csv') - - try: - subprocess.run(["geocodecsv", "-in", "gurs/addresses-overrides.csv", "-out", "gurs/addresses-overrides-geocoded.csv", "-zipCol", "1", "-addressCol", "2", "-appendAll"]) - except FileNotFoundError: - print("geocodecsv not found, skipping.") def convert_to_csv(zzzsid_map): @@ -73,7 +63,7 @@ def convert_to_csv(zzzsid_map): df['city'] = df['city'].str.strip() df['unit'] = df['unit'].str.strip() df['zzzsid'] = df['name'].map(zzzsid_map) - df = df.reindex(['doctor', 'type', 'accepts', 'availability', 'load', 'name', 'address', 'city', 'unit', 'zzzsid'], axis='columns') + df = df.reindex(['doctor', 'type', 'zzzsid', 'accepts', 'availability', 'load', 'name', 'address', 'city', 'unit'], axis='columns') doctors.append(df) doctors = pd.concat(doctors, ignore_index=True) @@ -92,10 +82,9 @@ def convert_to_csv(zzzsid_map): # reindex: doctors.set_index(['doctor','type','id_inst'], inplace=True) - - print(doctors) doctors.to_csv('csv/doctors.csv') + def geocode_addresses(): xlsxAddresses = pd.read_csv('csv/dict-institutions.csv', usecols=['city','address']).rename(columns={'city':'cityZZZS','address':'addressZZZS'}) apiAddresses = pd.read_csv('zzzs/institutions-all.csv', usecols=['posta','naslov']).rename(columns={'posta':'cityZZZS','naslov':'addressZZZS'}) @@ -114,6 +103,17 @@ def geocode_addresses(): except FileNotFoundError: print("geocodecsv not found, skipping.") + addresses = pd.read_csv('csv/doctors-overrides.csv', usecols=['post', 'address']).dropna() + addresses.sort_values(by=['post', 'address'], inplace=True) + addresses.drop_duplicates(inplace=True) + addresses.set_index(['post', 'address'], inplace=True) + addresses.to_csv('gurs/addresses-overrides.csv') + + try: + subprocess.run(["geocodecsv", "-in", "gurs/addresses-overrides.csv", "-out", "gurs/addresses-overrides-geocoded.csv", "-zipCol", "1", "-addressCol", "2", "-appendAll"]) + except FileNotFoundError: + print("geocodecsv not found, skipping.") + def add_gurs_geodata(): institutions = pd.read_csv('csv/dict-institutions.csv', index_col=['id_inst']) From 832e8bb17484fbcc7ace6f19ddb80a94d4b66703 Mon Sep 17 00:00:00 2001 From: Luka Renko <46861689+lukarenko@users.noreply.github.com> Date: Fri, 24 Dec 2021 23:48:08 +0100 Subject: [PATCH 13/15] add geocoded data to doctors-overrides.csv --- update.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/update.py b/update.py index b4dea35d..6238a067 100644 --- a/update.py +++ b/update.py @@ -103,10 +103,10 @@ def geocode_addresses(): except FileNotFoundError: print("geocodecsv not found, skipping.") - addresses = pd.read_csv('csv/doctors-overrides.csv', usecols=['post', 'address']).dropna() - addresses.sort_values(by=['post', 'address'], inplace=True) + addresses = pd.read_csv('csv/doctors-overrides.csv', usecols=['post', 'address']).rename(columns={'post':'postOver','address':'addressOver'}).dropna() + addresses.sort_values(by=['postOver', 'addressOver'], inplace=True) addresses.drop_duplicates(inplace=True) - addresses.set_index(['post', 'address'], inplace=True) + addresses.set_index(['postOver', 'addressOver'], inplace=True) addresses.to_csv('gurs/addresses-overrides.csv') try: @@ -124,9 +124,18 @@ def add_gurs_geodata(): institutions = institutions.merge(dfgeo[['address','post','city','municipalityPart','municipality','lat','lon']], how = 'left', left_on = ['city','address'], right_index=True, suffixes=['_zzzs', '']) institutions.drop(['address_zzzs','city_zzzs'], axis='columns', inplace=True) - institutions.to_csv('csv/dict-institutions.csv') + doctors = pd.read_csv('csv/doctors-overrides.csv', index_col=['doctor', 'type', 'id_inst']) + dfgeo=pd.read_csv('gurs/addresses-overrides-geocoded.csv', index_col=['postOver','addressOver'], dtype=str) + dfgeo.fillna('', inplace=True) + dfgeo['address'] = dfgeo.apply(lambda x: f'{x.street} {x.housenumber}{x.housenumberAppendix}', axis = 1) + dfgeo['post'] = dfgeo.apply(lambda x: f'{x.zipCode} {x.zipName}', axis = 1) + + doctors = doctors.merge(dfgeo[['address','post','city','municipalityPart','municipality','lat','lon']], how = 'left', left_on = ['post','address'], right_index=True, suffixes=['Over', '']) + doctors.drop(['addressOver','postOver'], axis='columns', inplace=True) + doctors.to_csv('csv/doctors-overrides.csv') + def get_zzzs_api_data_all(): # https://api.zzzs.si/covid-sledilnik/0 ... 1600 by pages (of 100 records) From 4c6625a062d0a43b8a4699b5e11c4c4752e6cd29 Mon Sep 17 00:00:00 2001 From: Luka Renko <46861689+lukarenko@users.noreply.github.com> Date: Fri, 24 Dec 2021 23:52:06 +0100 Subject: [PATCH 14/15] minor cleanups --- update.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/update.py b/update.py index 6238a067..a7c12861 100644 --- a/update.py +++ b/update.py @@ -28,24 +28,6 @@ 'NE': 'n' } -def append_overrides(): - filename = "csv/overrides.csv" - print(f"Get overrides from GSheet to {filename}") - try: - sheet2csv.sheet2csv(id=SHEET_OVERRIDES, range=RANGE_OVERRIDES, api_key=GOOGLE_API_KEY, filename=filename) - except Exception as e: - print("Failed to import {}".format(filename)) - raise e - - doctors = pd.read_csv('csv/doctors.csv', index_col=['doctor','type','id_inst']) - overrides = pd.read_csv('csv/overrides.csv', index_col=['doctor','type','id_inst']) - - doctors = doctors.join(overrides) - - doctors.to_csv('csv/doctors-overrides.csv') - - - def convert_to_csv(zzzsid_map): doctors = [] for group in ["zdravniki", "zobozdravniki", "ginekologi"]: @@ -85,6 +67,23 @@ def convert_to_csv(zzzsid_map): doctors.to_csv('csv/doctors.csv') +def append_overrides(): + filename = "csv/overrides.csv" + print(f"Get overrides from GSheet to {filename}") + try: + sheet2csv.sheet2csv(id=SHEET_OVERRIDES, range=RANGE_OVERRIDES, api_key=GOOGLE_API_KEY, filename=filename) + except Exception as e: + print("Failed to import {}".format(filename)) + raise e + + doctors = pd.read_csv('csv/doctors.csv', index_col=['doctor','type','id_inst']) + overrides = pd.read_csv('csv/overrides.csv', index_col=['doctor','type','id_inst']) + + doctors = doctors.join(overrides) + + doctors.to_csv('csv/doctors-overrides.csv') + + def geocode_addresses(): xlsxAddresses = pd.read_csv('csv/dict-institutions.csv', usecols=['city','address']).rename(columns={'city':'cityZZZS','address':'addressZZZS'}) apiAddresses = pd.read_csv('zzzs/institutions-all.csv', usecols=['posta','naslov']).rename(columns={'posta':'cityZZZS','naslov':'addressZZZS'}) @@ -164,6 +163,7 @@ def get_zzzs_api_data_all(): df.sort_values(by=[*df], inplace=True) # sort by all columns df.to_csv('zzzs/institutions-all.csv') + def get_zzzs_api_data_by_category(): # keys for ZZZS API calls, add as needed, see https://www.zzzs.si/zzzs-api/izvajalci-zdravstvenih-storitev/po-dejavnosti/ zzzsApiKeys=[ @@ -282,8 +282,8 @@ def download_zzzs_xlsx_files(): if __name__ == "__main__": download_zzzs_xlsx_files() - zzzsid_map = get_zzzs_api_data_by_category() get_zzzs_api_data_all() + zzzsid_map = get_zzzs_api_data_by_category() convert_to_csv(zzzsid_map) append_overrides() geocode_addresses() From 8fdf918546d19b312fc99830d6b94cee9936f829 Mon Sep 17 00:00:00 2001 From: Luka Renko <46861689+lukarenko@users.noreply.github.com> Date: Fri, 24 Dec 2021 23:53:23 +0100 Subject: [PATCH 15/15] header changes --- gurs/addresses-overrides-geocoded.csv | 2 +- gurs/addresses-overrides.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gurs/addresses-overrides-geocoded.csv b/gurs/addresses-overrides-geocoded.csv index ce3c9642..60277297 100644 --- a/gurs/addresses-overrides-geocoded.csv +++ b/gurs/addresses-overrides-geocoded.csv @@ -1,4 +1,4 @@ -post,address,lat,lon,street,streetAlt,housenumber,housenumberAppendix,city,cityAlt,municipalityPart,municipality,zipCode,zipName,statisticalRegion +postOver,addressOver,lat,lon,street,streetAlt,housenumber,housenumberAppendix,city,cityAlt,municipalityPart,municipality,zipCode,zipName,statisticalRegion 1000 Ljubljana,Rakovniška ulica 4,46.03660,14.52361,Rakovniška ulica,,4,,Ljubljana,,Četrtna skupnost Rudnik,Ljubljana,1000,Ljubljana,Osrednjeslovenska 1000 Ljubljana,Čufarjeva ulica 11,46.05516,14.51021,Čufarjeva ulica,,11,,Ljubljana,,Četrtna skupnost Center,Ljubljana,1000,Ljubljana,Osrednjeslovenska 1218 Komenda,Glavarjeva cesta 61a,46.20453,14.53935,Glavarjeva cesta,,61,A,Komenda,,,Komenda,1218,Komenda,Osrednjeslovenska diff --git a/gurs/addresses-overrides.csv b/gurs/addresses-overrides.csv index 65a01211..da908743 100644 --- a/gurs/addresses-overrides.csv +++ b/gurs/addresses-overrides.csv @@ -1,4 +1,4 @@ -post,address +postOver,addressOver 1000 Ljubljana,Rakovniška ulica 4 1000 Ljubljana,Čufarjeva ulica 11 1218 Komenda,Glavarjeva cesta 61a