diff --git a/.github/workflows/build_pipeline.yml b/.github/workflows/build_pipeline.yml index 02ae4a95..8f857cf2 100644 --- a/.github/workflows/build_pipeline.yml +++ b/.github/workflows/build_pipeline.yml @@ -67,7 +67,7 @@ jobs: aws s3 cp app.zip "s3://$AWS_S3_CODE_BUCKET/$repo_slug.zip" - name: Send build success notification if: success() - uses: rtCamp/action-slack-notify@v2.0.0 + uses: rtCamp/action-slack-notify@v2.2.0 env: SLACK_MESSAGE: ${{ github.repository }} build ${{ github.run_number }} launched by ${{ github.actor }} has succeeded SLACK_TITLE: Build Success @@ -77,7 +77,7 @@ jobs: SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - name: Send build failure notification if: failure() - uses: rtCamp/action-slack-notify@v2.0.0 + uses: rtCamp/action-slack-notify@v2.2.0 env: SLACK_COLOR: '#FF0000' SLACK_MESSAGE: ${{ github.repository }} build ${{ github.run_number }} launched by ${{ github.actor }} has failed diff --git a/.github/workflows/daily-scheduled-ci.yml b/.github/workflows/daily-scheduled-ci.yml index 910a1854..2427110d 100644 --- a/.github/workflows/daily-scheduled-ci.yml +++ b/.github/workflows/daily-scheduled-ci.yml @@ -45,7 +45,7 @@ jobs: - name: Send build success notification if: success() - uses: rtCamp/action-slack-notify@v2.0.0 + uses: rtCamp/action-slack-notify@v2.2.0 env: SLACK_MESSAGE: ${{ github.repository }} Daily scheduled CI Build ${{ github.run_number }} has succeeded SLACK_TITLE: Daily Scheduled CI Build Success @@ -56,7 +56,7 @@ jobs: - name: Send build failure notification if: failure() - uses: rtCamp/action-slack-notify@v2.0.0 + uses: rtCamp/action-slack-notify@v2.2.0 env: SLACK_COLOR: '#FF0000' SLACK_LINK_NAMES: true diff --git a/README.md b/README.md index 1fe607dc..fb41a371 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ ## Overview -This package provides tools to represent and work with a multi-modal transport network with public transport (PT) +GeNet provides tools to represent and work with a multi-modal transport network with public transport (PT) services. It is based on [MATSim's](https://www.matsim.org/) representation of such networks. The underlying network available to PT services (roads, railways, but also ferry/flight connections) uses a `networkx.MultiDiGraph` with additional methods for `'links'` which are unique in `genet.Network` (`networkx.MultiDiGraph` accepts multiple diff --git a/genet/schedule_elements.py b/genet/schedule_elements.py index 60871801..2d9bd634 100644 --- a/genet/schedule_elements.py +++ b/genet/schedule_elements.py @@ -3,11 +3,12 @@ import json import logging import os +import math import pkgutil from abc import abstractmethod from collections import defaultdict from copy import deepcopy -from datetime import datetime +from datetime import datetime, timedelta from typing import Union, Dict, List, Set, Tuple import dictdiffer @@ -2948,6 +2949,90 @@ def remove_unused_stops(self): if stops_to_remove: self.remove_stops(stops_to_remove) + def has_trips_with_zero_headways(self): + """ + Deletes trips that have zero headways and thus deemed duplicates + :return: + """ + trip_headways_df = self.trips_headways() + zero_headways = trip_headways_df[(trip_headways_df['headway_mins'] == 0)] + return not bool(zero_headways.empty) + + def fix_trips_with_zero_headways(self): + """ + Deletes trips that have zero headways and thus deemed duplicates + :return: + """ + trip_headways_df = self.trips_headways() + zero_headways = trip_headways_df[(trip_headways_df['headway_mins'] == 0)] + + if not zero_headways.empty: + logging.info(f"Found {len(zero_headways)} trips with zero headways. " + f"{len(set(zero_headways['route_id']))} out of {len(set(trip_headways_df['route_id']))} " + f"routes and {len(set(zero_headways['service_id']))} out of " + f"{len(set(trip_headways_df['service_id']))} services are affected. " + "These will now be dropped as though they are duplicates of other trips, " + "thus resulting in zero headway between them") + new_trips = trip_headways_df[trip_headways_df['headway_mins'] != 0].drop(['headway_mins', 'headway'], + axis=1) + new_trips_for_affected_routes = new_trips.loc[new_trips['route_id'].isin(set(zero_headways['route_id'])), :] + self.set_trips_dataframe(new_trips_for_affected_routes.copy()) + self.generate_vehicles(overwrite=True) + # check + _trip_headways_df = self.trips_headways() + _zero_headways = _trip_headways_df[(_trip_headways_df['headway_mins'] == 0)] + logging.info(f"Checks after alterations result in {len(_zero_headways)} trips with zero headway") + else: + logging.info("No trips with zero headway found. Nothing to do.") + + def has_infinite_speeds(self): + pt_speeds = self.speed_geodataframe() + pt_speeds_inf = pt_speeds[(pt_speeds['speed'] == math.inf)] + return not bool(pt_speeds_inf.empty) + + def fix_infinite_speeds(self): + df_speeds = self.speed_geodataframe() + df_speeds_inf = df_speeds[(df_speeds['speed'] == math.inf)] + if not df_speeds_inf.empty: + affected_routes = set(df_speeds_inf['route_id']) + logging.info(f"Found {len(affected_routes)} routes with infinite speeds. " + f"{len(set(df_speeds_inf['service_id']))} out of {len(set(df_speeds['service_id']))} " + "services are affected. " + "These will now be dropped as though they are duplicates of other trips, " + "thus resulting in zero headway between them") + new_route_attributes_dict = {} + for route_id in affected_routes: + df_route_speeds = df_speeds[df_speeds['route_id'] == route_id] + df_route_speeds['length'] = [1.3 * x.length for x in df_route_speeds['geometry']] + + old_arrival_offsets = self.route(route_id).__dict__['arrival_offsets'] + old_departure_offsets = self.route(route_id).__dict__['departure_offsets'] + updated_arrival_offsets = ['00:00:00'] + updated_departure_offsets = ['00:00:00'] + + avg_speed = df_route_speeds[df_route_speeds['speed'] != math.inf]['speed'].mean() + distances = df_route_speeds['length'].to_list() + + for i in range(1, len(old_arrival_offsets)): + # if the offset is the same as previous (i.e. GTFS error), + # OR if the previous offset got infilled with a value bigger than the current offset + if datetime.strptime(updated_departure_offsets[i - 1], '%H:%M:%S') >= datetime.strptime( + old_arrival_offsets[i], '%H:%M:%S'): + time = round(distances[i - 1] / avg_speed, 0) + previous_offset = datetime.strptime(updated_departure_offsets[i - 1], '%H:%M:%S') + current_offset = previous_offset + timedelta(seconds=time) + str_current_offset = datetime.strftime(current_offset, '%H:%M:%S') + updated_arrival_offsets.append(str_current_offset) + updated_departure_offsets.append(str_current_offset) + else: + updated_arrival_offsets.append(old_arrival_offsets[i]) + updated_departure_offsets.append(old_departure_offsets[i]) + new_route_attributes_dict[route_id] = { + 'arrival_offsets': updated_arrival_offsets, 'departure_offsets': updated_departure_offsets} + self.apply_attributes_to_routes(new_attributes=new_route_attributes_dict) + else: + logging.info("No routes with infinite speeds were found. Nothing to do.") + def is_strongly_connected(self): if nx.number_strongly_connected_components(self.graph()) == 1: return True diff --git a/genet/utils/simplification.py b/genet/utils/simplification.py index 90236f0c..e51497b0 100644 --- a/genet/utils/simplification.py +++ b/genet/utils/simplification.py @@ -253,7 +253,7 @@ def simplify_graph(n, no_processes=1): df_routes['route'] = df_routes['route'].apply(lambda x: update_link_ids(x, n.link_simplification_map)) n.schedule.apply_attributes_to_routes(df_routes.T.to_dict()) logging.info("Updated Network Routes") - logging.info("Finished simplifying network") + logging.info("Finished simplifying network") def update_link_ids(old_route, link_mapping): diff --git a/scripts/add_elevation_to_network.py b/scripts/add_elevation_to_network.py index 85be19be..d67558ed 100644 --- a/scripts/add_elevation_to_network.py +++ b/scripts/add_elevation_to_network.py @@ -71,12 +71,15 @@ projection = args['projection'] elevation = args['elevation'] tif_null_value = args['null_value'] - output_dir = args['output_dir'] write_elevation_to_network = args['write_elevation_to_network'] write_slope_to_network = args['write_slope_to_network'] write_slope_to_object_attribute_file = args['write_slope_to_object_attribute_file'] save_dict_to_json = args['save_jsons'] + + output_dir = args['output_dir'] + supporting_outputs = os.path.join(output_dir, 'supporting_outputs') ensure_dir(output_dir) + ensure_dir(supporting_outputs) logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.WARNING) @@ -114,7 +117,7 @@ gdf_nodes = n.to_geodataframe()['nodes'] gdf_nodes = gdf_nodes[['id', 'z', 'geometry']] - save_geodataframe(gdf_nodes.to_crs('epsg:4326'), 'node_elevation', output_dir) + save_geodataframe(gdf_nodes.to_crs('epsg:4326'), 'node_elevation', supporting_outputs) logging.info('Creating slope dictionary for network links') slope_dictionary = n.get_link_slope_dictionary(elevation_dict=elevation_dictionary) @@ -138,7 +141,7 @@ df['slope'] = [x['slope'] for x in df['slope_tuple']] df = df[['id', 'slope']] gdf_links = pd.merge(gdf, df, on='id') - save_geodataframe(gdf_links.to_crs('epsg:4326'), 'link_slope', output_dir) + save_geodataframe(gdf_links.to_crs('epsg:4326'), 'link_slope', supporting_outputs) if write_slope_to_object_attribute_file: genet.elevation.write_slope_xml(slope_dictionary, output_dir) diff --git a/scripts/auto_schedule_fixes.py b/scripts/auto_schedule_fixes.py new file mode 100644 index 00000000..3cfa79e4 --- /dev/null +++ b/scripts/auto_schedule_fixes.py @@ -0,0 +1,115 @@ +import math + +import argparse +import logging +import geopandas as gpd + +from genet import read_matsim +from genet.utils.persistence import ensure_dir +from genet.output.geojson import save_geodataframe + + +def write_scaled_vehicles(network, list_of_scales, output_dir): + for i in list_of_scales: + scale = float(i) / 100 + network.schedule.scale_vehicle_capacity(scale, scale, output_dir) + + +def generate_headway_geojson(n, gdf, output_dir, filename_suffix): + headways = n.schedule.headway_stats() + headways = headways.merge(gdf[['route_id', 'geometry']], how='left', on='route_id') + save_geodataframe(gpd.GeoDataFrame(headways).to_crs('epsg:4326'), f'headway_stats_{filename_suffix}', output_dir) + + +def generate_speed_geojson(n, gdf, output_dir, filename_suffix): + speeds = n.schedule.speed_geodataframe() + # fill infinity by large number to show up in visualisations + speeds.loc[speeds['speed'] == math.inf, 'speed'] = 9999 + + speeds = speeds.groupby(['service_id', 'route_id', 'route_name', 'mode']).max()['speed'].reset_index() + speeds = speeds.merge(gdf[['route_id', 'geometry']], how='left', on='route_id') + save_geodataframe(gpd.GeoDataFrame(speeds).to_crs('epsg:4326'), f'max_speeds_{filename_suffix}', output_dir) + + +if __name__ == '__main__': + arg_parser = argparse.ArgumentParser( + description='' + ) + + arg_parser.add_argument('-n', + '--network', + help='Location of the network.xml file', + required=True) + + arg_parser.add_argument('-s', + '--schedule', + help='Location of the schedule.xml file', + required=False, + default=None) + + arg_parser.add_argument('-v', + '--vehicles', + help='Location of the vehicles.xml file', + required=False, + default=None) + + arg_parser.add_argument('-p', + '--projection', + help='The projection network is in, eg. "epsg:27700"', + required=True) + + arg_parser.add_argument('-vsc', + '--vehicle_scalings', + help='Comma seperated string of scales for vehicles, e.g. 1,10,25', + required=False, + default=None, + type=str) + + arg_parser.add_argument('-od', + '--output_dir', + help='Output directory for the simplified network', + required=True) + + args = vars(arg_parser.parse_args()) + network = args['network'] + schedule = args['schedule'] + vehicles = args['vehicles'] + projection = args['projection'] + output_dir = args['output_dir'] + scale_list = args['vehicle_scalings'] + ensure_dir(output_dir) + + logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.WARNING) + + logging.info('Reading in network at {}'.format(network)) + n = read_matsim( + path_to_network=network, + epsg=projection, + path_to_schedule=schedule, + path_to_vehicles=vehicles + ) + + gdf = n.schedule_network_routes_geodataframe().to_crs('epsg:4326') + + logging.info("Checking for zero headways") + if n.schedule.has_trips_with_zero_headways(): + generate_headway_geojson(n, gdf, output_dir, 'before') + n.schedule.fix_trips_with_zero_headways() + generate_headway_geojson(n, gdf, output_dir, 'after') + else: + logging.info("No trips with zero headways were found") + + logging.info("Checking for infinite speeds") + if n.schedule.has_infinite_speeds(): + generate_speed_geojson(n, gdf, output_dir, 'before') + n.schedule.fix_infinite_speeds() + generate_speed_geojson(n, gdf, output_dir, 'after') + else: + logging.info("No routes with infinite speeds were found") + + logging.info(f'Saving network in {output_dir}') + n.write_to_matsim(output_dir) + if scale_list: + logging.info('Generating scaled vehicles xml.') + scale_list = scale_list.split(",") + write_scaled_vehicles(n, scale_list, output_dir) diff --git a/scripts/intermodal_access_egress_network.py b/scripts/intermodal_access_egress_network.py index da13735e..57e170a4 100644 --- a/scripts/intermodal_access_egress_network.py +++ b/scripts/intermodal_access_egress_network.py @@ -168,7 +168,8 @@ def threshold_reached(d): distance_threshold=distance_threshold ) - # TODO There are multiple links to choose from, for the time being we are not precious about which link is selected. + # TODO There are multiple links to choose from, for the time being we are not precious about which link is + # selected. selected_links = closest_links.reset_index().groupby('index').first() if len(selected_links) != len(df_stops): logging.warning(f'Only {len(selected_links)} out of {len(df_stops)} stops found a link to snap to. ' @@ -193,12 +194,9 @@ def threshold_reached(d): accessible_tag = f'{snap_mode}Accessible' distance_catchment_tag = f'{snap_mode}_distance_catchment_tag' - selected_links[access_link_id_tag] = selected_links['link_id'].apply( - lambda x: {'name': access_link_id_tag, 'class': 'java.lang.String', 'text': x}) - selected_links[accessible_tag] = selected_links.apply( - lambda x: {'name': accessible_tag, 'class': 'java.lang.String', 'text': 'true'}, axis=1) - selected_links[distance_catchment_tag] = selected_links['catchment'].apply( - lambda x: {'name': distance_catchment_tag, 'class': 'java.lang.String', 'text': str(x)}) + selected_links[access_link_id_tag] = selected_links['link_id'] + selected_links[accessible_tag] = 'true' + selected_links[distance_catchment_tag] = selected_links['catchment'].astype(str) new_stops_data = selected_links[[access_link_id_tag, accessible_tag, distance_catchment_tag]].T.to_dict() new_stops_data = {k: {'attributes': v} for k, v in new_stops_data.items()} @@ -210,8 +208,7 @@ def threshold_reached(d): # generate the data dictionaries for updating stops data accessible_tag = f'{tele_mode}Accessible' - df_stops[accessible_tag] = df_stops.apply( - lambda x: {'name': accessible_tag, 'class': 'java.lang.String', 'text': 'true'}, axis=1) + df_stops[accessible_tag] = 'true' new_stops_data = df_stops[[accessible_tag]].T.to_dict() new_stops_data = {k: {'attributes': v} for k, v in new_stops_data.items()} diff --git a/scripts/scale_vehicles.py b/scripts/scale_vehicles.py new file mode 100644 index 00000000..a5dec1b7 --- /dev/null +++ b/scripts/scale_vehicles.py @@ -0,0 +1,65 @@ +import argparse +import logging + +from genet import read_matsim_schedule +from genet.utils.persistence import ensure_dir + + +def write_scaled_vehicles(schedule, list_of_scales, output_dir): + for i in list_of_scales: + scale = float(i) / 100 + schedule.scale_vehicle_capacity(scale, scale, output_dir) + + +if __name__ == '__main__': + arg_parser = argparse.ArgumentParser(description='Scale PT Schedule vehicles') + + arg_parser.add_argument('-s', + '--schedule', + help='Location of the schedule.xml file', + required=False, + default=None) + + arg_parser.add_argument('-v', + '--vehicles', + help='Location of the vehicles.xml file', + required=False, + default=None) + + arg_parser.add_argument('-p', + '--projection', + help='The projection network is in, eg. "epsg:27700"', + required=True) + + arg_parser.add_argument('-vsc', + '--vehicle_scalings', + help='Comma separated string of scales for vehicles, e.g. 1,10,25', + required=True, + default="1,10", + type=str) + + arg_parser.add_argument('-od', + '--output_dir', + help='Output directory for the simplified network', + required=True) + + args = vars(arg_parser.parse_args()) + schedule = args['schedule'] + vehicles = args['vehicles'] + projection = args['projection'] + output_dir = args['output_dir'] + scale_list = args['vehicle_scalings'] + ensure_dir(output_dir) + + logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.WARNING) + + logging.info('Reading in schedule at {}'.format(schedule)) + s = read_matsim_schedule( + path_to_schedule=schedule, + path_to_vehicles=vehicles, + epsg=projection + ) + + logging.info('Generating scaled vehicles xml.') + scale_list = scale_list.split(",") + write_scaled_vehicles(s, scale_list, output_dir) diff --git a/scripts/separate_modes_in_network.py b/scripts/separate_modes_in_network.py index ad86feec..d37e9fba 100644 --- a/scripts/separate_modes_in_network.py +++ b/scripts/separate_modes_in_network.py @@ -6,6 +6,23 @@ from genet import read_matsim from genet.utils.persistence import ensure_dir from genet.output.sanitiser import sanitise_dictionary +from genet.output.geojson import save_geodataframe + + +def sort_modes_and_convert_to_str(modes): + modes = list(modes) + modes.sort() + return ','.join(modes) + + +def generate_modal_network_geojsons(n, modes, output_dir, filename_suffix): + logging.info(f"Generating visual outputs {filename_suffix}") + gdf = n.to_geodataframe()['links'].to_crs('epsg:4326') + for mode in modes: + _gdf = gdf[gdf['modes'].apply(lambda x: mode in x)] + _gdf['modes'] = _gdf['modes'].apply(sort_modes_and_convert_to_str) + save_geodataframe(_gdf, f'mode_{mode}_{filename_suffix}', output_dir) + if __name__ == '__main__': arg_parser = argparse.ArgumentParser( @@ -14,14 +31,14 @@ 'do not come in contact. Given a link:' '>>> `n.link("LINK_ID")`' ' `{"id": "LINK_ID", "modes": {"car", "bike"}, "freespeed": 5, ...}`' - + 'The resulting links in the network will be:' '>>> `n.link("LINK_ID")`' ' `{"id": "LINK_ID", "modes": {"car"}, "freespeed": 5, ...}`' '>>> `n.link("bike---LINK_ID")`' ' `{"id": "bike---LINK_ID", "modes": {"bike"}, "freespeed": 5, ...}`' 'the new bike link will assume all the same attributes apart from the "modes".' - + 'In the case when a link already has a single dedicated mode, no updates are made to the link ID, ' 'you can assume that all links that were in the network previously are still there, but their ' 'allowed modes may have changed, so any simulation outputs may not be valid with this new network.' @@ -42,6 +59,13 @@ help='Comma separated modes to split from the network', required=True) + arg_parser.add_argument('-ic', + '--increase_capacity', + help='Sets capacity on detached links to 9999', + required=False, + default=False, + type=bool) + arg_parser.add_argument('-od', '--output_dir', help='Output directory for the simplified network', @@ -50,9 +74,12 @@ args = vars(arg_parser.parse_args()) network = args['network'] projection = args['projection'] - modes = args['modes'].split(',') + modes = set(args['modes'].split(',')) + increase_capacity = args['increase_capacity'] output_dir = args['output_dir'] + supporting_outputs = os.path.join(output_dir, 'supporting_outputs') ensure_dir(output_dir) + ensure_dir(supporting_outputs) logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.WARNING) @@ -63,6 +90,8 @@ ) logging.info(f'Number of links before separating graph: {len(n.link_id_mapping)}') + generate_modal_network_geojsons(n, modes, supporting_outputs, 'before') + for mode in modes: logging.info(f'Splitting links for mode: {mode}') df = n.link_attribute_data_under_key('modes') @@ -73,6 +102,14 @@ new_links = {f'{mode}---{k}': {**n.link(k), **{'modes': {mode}, 'id': f'{mode}---{k}'}} for k in modal_links} n.apply_attributes_to_links(update_mode_links) n.add_links(new_links) + if increase_capacity: + logging.info(f'Increasing capacity for link of mode {mode} to 9999') + mode_links = n.extract_links_on_edge_attributes( + {'modes': mode} + ) + df_capacity = n.link_attribute_data_under_keys(['capacity']).loc[mode_links, :] + df_capacity['capacity'] = 9999 + n.apply_attributes_to_links(df_capacity.T.to_dict()) logging.info(f'Number of links after separating graph: {len(n.link_id_mapping)}') @@ -81,13 +118,7 @@ logging.info('Generating validation report') report = n.generate_validation_report() logging.info(f'Graph validation: {report["graph"]["graph_connectivity"]}') - if n.schedule: - logging.info(f'Schedule level validation: {report["schedule"]["schedule_level"]["is_valid_schedule"]}') - logging.info( - f'Schedule vehicle level validation: {report["schedule"]["vehicle_level"]["vehicle_definitions_valid"]}' - ) - logging.info(f'Routing validation: {report["routing"]["services_have_routes_in_the_graph"]}') with open(os.path.join(output_dir, 'validation_report.json'), 'w', encoding='utf-8') as f: json.dump(sanitise_dictionary(report), f, ensure_ascii=False, indent=4) - n.generate_standard_outputs(os.path.join(output_dir, 'standard_outputs')) + generate_modal_network_geojsons(n, modes, supporting_outputs, 'after') diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index e6370ec5..1e1690c8 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -8,11 +8,13 @@ from genet.utils.persistence import ensure_dir from genet.output.sanitiser import sanitise_dictionary -def write_scaled_vehicles(network, list_of_scales,output_dir): + +def write_scaled_vehicles(network, list_of_scales, output_dir): for i in list_of_scales: - scale = float(i)/100 + scale = float(i) / 100 network.schedule.scale_vehicle_capacity(scale, scale, output_dir) + if __name__ == '__main__': arg_parser = argparse.ArgumentParser(description='Simplify a MATSim network by removing ' 'intermediate links from paths') @@ -45,7 +47,16 @@ def write_scaled_vehicles(network, list_of_scales,output_dir): required=False, default=1, type=int) - + + arg_parser.add_argument('-fc', + '--force_strongly_connected_graph', + help='If True, checks for disconnected subgraphs for modes walk, bike and car. If there are' + 'more than one strongly connected subgraph, genet connects them with links at closest' + 'points in the graph. The links used to connect are weighted at 20% of surrounding' + 'freespeed and capacity values', + default=False, + type=bool) + arg_parser.add_argument('-vsc', '--vehicle_scalings', help='Comma seperated string of scales for vehicles, e.g. 1,10,25', @@ -66,6 +77,7 @@ def write_scaled_vehicles(network, list_of_scales,output_dir): processes = args['processes'] output_dir = args['output_dir'] scale_list = args['vehicle_scalings'] + force_strongly_connected_graph = args['force_strongly_connected_graph'] ensure_dir(output_dir) logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.WARNING) @@ -83,12 +95,24 @@ def write_scaled_vehicles(network, list_of_scales,output_dir): start = time.time() n.simplify(no_processes=processes) end = time.time() + logging.info(f'This took {round((end - start) / 60, 3)} min.') logging.info( f'Simplification resulted in {len(n.link_simplification_map)} links being simplified.') with open(os.path.join(output_dir, 'link_simp_map.json'), 'w', encoding='utf-8') as f: json.dump(n.link_simplification_map, f, ensure_ascii=False, indent=4) + logging.info('Checking for disconnected subgraphs') + start = time.time() + for mode in {'car', 'bike', 'walk'}: + if not n.is_strongly_connected(modes={mode}): + logging.info(f'The graph for {mode} mode is not strongly connected.') + if force_strongly_connected_graph: + logging.info("GeNet will now attempt to add links to connect the graph.") + n.connect_components(modes={mode}, weight=1 / 5) + end = time.time() + logging.info(f'This took {round((end - start) / 60, 3)} min.') + n.write_to_matsim(output_dir) if scale_list: @@ -103,11 +127,9 @@ def write_scaled_vehicles(network, list_of_scales,output_dir): logging.info(f'Schedule level validation: {report["schedule"]["schedule_level"]["is_valid_schedule"]}') logging.info( f'Schedule vehicle level validation: {report["schedule"]["vehicle_level"]["vehicle_definitions_valid"]}' - ) + ) logging.info(f'Routing validation: {report["routing"]["services_have_routes_in_the_graph"]}') with open(os.path.join(output_dir, 'validation_report.json'), 'w', encoding='utf-8') as f: json.dump(sanitise_dictionary(report), f, ensure_ascii=False, indent=4) n.generate_standard_outputs(os.path.join(output_dir, 'standard_outputs')) - - logging.info(f'It took {round((end - start)/60, 3)} min to simplify the network.') diff --git a/scripts/squeeze_external_area.py b/scripts/squeeze_external_area.py new file mode 100644 index 00000000..d9b83688 --- /dev/null +++ b/scripts/squeeze_external_area.py @@ -0,0 +1,137 @@ +import os +import argparse +import logging +import geopandas as gpd + +from genet import read_matsim +from genet.utils.persistence import ensure_dir +from genet.output.geojson import save_geodataframe, modal_subset + +if __name__ == '__main__': + arg_parser = argparse.ArgumentParser( + description='Changes `freespeed` and `capacity` values for links **outside** of the given `study_area` ' + 'by given factors. ' + 'To squeeze links within the study area, look at the `squeeze_urban_links.py ' + 'script.' + ) + + arg_parser.add_argument('-n', + '--network', + help='Location of the network.xml file', + required=True) + + arg_parser.add_argument('-p', + '--projection', + help='The projection network is in, eg. "epsg:27700"', + required=True) + + arg_parser.add_argument('-sa', + '--study_area', + help='Geojson or shp file that when read into geopandas produces a table with a geometry ' + 'column that describes the area which should be left unaffected by speed and ' + 'capacity factors.', + required=False, + default=None) + + arg_parser.add_argument('-f', + '--freespeed', + help='Factor, e.g. 0.5, to reduce the "freespeed" attribute for the roads external to ' + 'given Study Area in the network. The current value will be multiplied by 0.5 ' + '(in that case). You can also pass 1.5, for example, to increase the value.', + required=False, + type=float, + default=1) + + arg_parser.add_argument('-c', + '--capacity', + help='Factor, e.g. 0.5, to reduce the "capacity" attribute for the roads external to ' + 'given Study Area in the network. The current value will be multiplied by 0.5 ' + '(in that case). You can also pass 1.5, for example, to increase the value.', + required=False, + type=float, + default=1) + + arg_parser.add_argument('-od', + '--output_dir', + help='Output directory for the reprojected network', + required=True) + + args = vars(arg_parser.parse_args()) + network = args['network'] + projection = args['projection'] + study_area = args['study_area'] + freespeed = args['freespeed'] + capacity = args['capacity'] + + output_dir = args['output_dir'] + supporting_outputs = os.path.join(output_dir, 'supporting_outputs') + ensure_dir(output_dir) + ensure_dir(supporting_outputs) + + logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.WARNING) + + logging.info(f'Reading in network at {network}') + n = read_matsim( + path_to_network=network, + epsg=projection + ) + + logging.info(f'Reading in Study Area geometry at {study_area}') + gdf_study_area = gpd.read_file(study_area) + if gdf_study_area.crs != projection: + logging.info( + f'Projecting Study Area geometry from {str(gdf_study_area.crs)} to {projection}, ' + 'to match the network projection') + gdf_study_area = gdf_study_area.to_crs(projection) + if gdf_study_area.empty: + raise RuntimeError('The Study Area was not found!!') + + logging.info('Finding links external to the study area') + network_gdf = n.to_geodataframe()['links'] + network_internal = gpd.sjoin(network_gdf, gdf_study_area, how='inner', op='intersects') + external_links = set(network_gdf["id"].astype('str')) - set(network_internal["id"].astype('str')) + + logging.info('Finding car mode links') + car_links = set(n.links_on_modal_condition('car')) + + logging.info('Finding minor road external links') + links_to_squeeze = external_links.intersection(car_links) + logging.info(f'{len(links_to_squeeze)} road links out of all {len(external_links)} external links and a total of ' + f'{len(car_links)} car mode links will be squeezed.') + + logging.info('Generating geojson of external road links') + external_tag_gdf = network_gdf[network_gdf['id'].isin(set(links_to_squeeze))] + save_geodataframe(external_tag_gdf[['id', 'geometry']].to_crs('epsg:4326'), + 'external_network_links', + supporting_outputs) + + # THE SQUEEZE SECTION + + network_gdf = network_gdf.to_crs('epsg:4326') + _gdf = network_gdf[network_gdf.apply(lambda x: modal_subset(x, {'car', 'bus'}), axis=1)] + save_geodataframe(_gdf[['id', 'freespeed', 'geometry']], output_dir=supporting_outputs, + filename='freespeed_before') + save_geodataframe(_gdf[['id', 'capacity', 'geometry']], output_dir=supporting_outputs, + filename='capacity_before') + + network_gdf = network_gdf[network_gdf['id'].isin(links_to_squeeze)] + if freespeed: + logging.info(f'Changing freespeed by {freespeed * 100}%') + network_gdf['freespeed'] = network_gdf['freespeed'] * freespeed + if capacity: + logging.info(f'Changing capacity by {capacity * 100}%') + network_gdf['capacity'] = network_gdf['capacity'] * capacity + + n.apply_attributes_to_links(network_gdf[['id', 'freespeed', 'capacity']].set_index('id').T.to_dict()) + + logging.info('Generating geojson outputs for visual validation') + network_gdf = n.to_geodataframe()['links'] + network_gdf = network_gdf.to_crs('epsg:4326') + network_gdf = network_gdf[network_gdf.apply(lambda x: modal_subset(x, {'car', 'bus'}), axis=1)] + save_geodataframe(network_gdf[['id', 'freespeed', 'geometry']], output_dir=supporting_outputs, + filename='freespeed_after') + save_geodataframe(network_gdf[['id', 'capacity', 'geometry']], output_dir=supporting_outputs, + filename='capacity_after') + + logging.info(f"Saving network to {output_dir}") + n.write_to_matsim(output_dir) diff --git a/scripts/squeeze_urban_links.py b/scripts/squeeze_urban_links.py new file mode 100644 index 00000000..cb4565ee --- /dev/null +++ b/scripts/squeeze_urban_links.py @@ -0,0 +1,184 @@ +import os +import argparse +import logging +import geopandas as gpd + +import genet as gn +from genet.utils.persistence import ensure_dir +from genet.output.geojson import save_geodataframe, modal_subset + +if __name__ == '__main__': + arg_parser = argparse.ArgumentParser( + description='Tag minor network links as urban, given geometries: `urban_geometries`. ' + 'Minor links are defined as anything other than: osm way highway tags: motorway, motorway_link, ' + 'trunk, trunk_link, primary, primary_link. ' + 'Urban geometries are passed via geojson input with a specific format, see script arguments ' + 'for description. ' + 'Passing `study_area` subsets the urban geometries and links to be squeezed - only links in the ' + 'study area will be tagged and squeezed. This is useful if your geometries covers a larger area. ' + 'The script then reduces capacity and/or freespeed by a factor of current value on those links. ' + 'To squeeze links outside the study area, look at the `squeeze_external_area.py ' + 'script.' + ) + + arg_parser.add_argument('-n', + '--network', + help='Path to the network.xml file', + required=True) + + arg_parser.add_argument('-p', + '--projection', + help='The projection network is currently in, eg. "epsg:27700"', + required=True) + + arg_parser.add_argument('-ug', + '--urban_geometries', + help='Geojson or shp file that when read into geopandas produces a table with columns: ' + '"label" (with at least some of the values in this column being a string: "urban") ' + 'and "geometry" (polygons defining urban areas)', + required=True) + + arg_parser.add_argument('-sa', + '--study_area', + help='Geojson or shp file that when read into geopandas produces a table with columns: ' + '"label" (with at least some of the values in this column being a string: "urban") ' + 'and "geometry" (polygons defining urban areas)', + required=False, + default=None) + + arg_parser.add_argument('-f', + '--freespeed', + help='Factor, e.g. 0.5, to reduce the "freespeed" attribute for the urban non-major roads' + 'in the network. The current value will be multiplied by 0.5 (in that case).' + 'You can also pass 1.5, for example, to increase the value.', + required=False, + type=float, + default=1) + + arg_parser.add_argument('-c', + '--capacity', + help='Factor, e.g. 0.5, to reduce the "capacity" attribute for the urban non-major roads' + 'in the network. The current value will be multiplied by 0.5 (in that case).' + 'You can also pass 1.5, for example, to increase the value.', + required=False, + type=float, + default=1) + + arg_parser.add_argument('-od', + '--output_dir', + help='Output directory for the network', + required=False, + default=None) + + args = vars(arg_parser.parse_args()) + network = args['network'] + projection = args['projection'] + urban_geometries = args['urban_geometries'] + study_area = args['study_area'] + freespeed = args['freespeed'] + capacity = args['capacity'] + + output_dir = args['output_dir'] + supporting_outputs = os.path.join(output_dir, 'supporting_outputs') + ensure_dir(output_dir) + ensure_dir(supporting_outputs) + + logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO) + + logging.info(f'Reading in network at {network}') + n = gn.read_matsim(path_to_network=network, epsg=projection) + + # URBAN TAGGING SECTION + + logging.info(f'Reading in urban geometries at {urban_geometries}') + gdf_urban = gpd.read_file(urban_geometries) + if gdf_urban.crs != projection: + logging.info( + f'Projecting urban geometries from {str(gdf_urban.crs)} to {projection}, to match the network projection') + gdf_urban = gdf_urban.to_crs(projection) + gdf_urban = gdf_urban[gdf_urban['label'] == 'urban'] + if gdf_urban.empty: + raise RuntimeError('No areas labelled "urban" were found!!') + if study_area: + logging.info(f'Reading in Study Area geometries at {study_area}') + gdf_study_area = gpd.read_file(study_area) + if gdf_study_area.crs != projection: + logging.info( + f'Projecting Study Area geometries from {str(gdf_study_area.crs)} to {projection}, to match the network ' + 'projection') + gdf_study_area = gdf_study_area.to_crs(projection) + logging.info(f'Subsetting urban geometries on study area') + gdf_urban = gpd.sjoin(gdf_urban, gdf_study_area, how='inner', op='intersects').drop(columns=['index_right']) + + logging.info('Finding urban links') + network_gdf = n.to_geodataframe()['links'] + network_urban = gpd.sjoin(network_gdf, gdf_urban, how='inner', op='intersects').drop(columns=['index_right']) + if study_area: + # subsetting gdf_urban on study area is not enough if it consists of polygons that extend beyond + # but it does make it faster to work with gdf_urban if it was large to begin with + network_urban = gpd.sjoin(network_gdf, gdf_study_area, how='inner', op='intersects') + urban_links = set(network_urban["id"].astype('str')) + + logging.info('Finding major road links') + major_links = set(n.extract_links_on_edge_attributes( + conditions=[ + {'attributes': {'osm:way:highway': 'motorway'}}, + {'attributes': {'osm:way:highway': 'motorway_link'}}, + {'attributes': {'osm:way:highway': 'trunk'}}, + {'attributes': {'osm:way:highway': 'trunk_link'}}, + {'attributes': {'osm:way:highway': 'primary'}}, + {'attributes': {'osm:way:highway': 'primary_link'}} + ], + how=any + )) + logging.info('Finding car mode links') + car_links = set(n.links_on_modal_condition('car')) + + logging.info('Finding minor road urban links') + links_to_tag = (urban_links.intersection(car_links) - major_links) + logging.info(f'{len(links_to_tag)} minor road links out of all {len(urban_links)} urban links and a total of ' + f'{len(car_links)} car mode links will be tagged with the "urban" tag') + + logging.info('Generating geojson of urban road links') + urban_tag_gdf = network_gdf[network_gdf['id'].isin(set(links_to_tag))] + save_geodataframe(urban_tag_gdf[['id', 'geometry']].to_crs('epsg:4326'), + 'urban_network_links', + supporting_outputs) + + logging.info('Applying "urban" tag to links') + n.apply_attributes_to_links( + {link_id: {'attributes': {'urban': 'True'}} for link_id in links_to_tag} + ) + + # THE SQUEEZE SECTION + + links_to_reduce = links_to_tag + + logging.info('Generating geojson outputs for visual validation') + network_gdf = network_gdf.to_crs('epsg:4326') + _gdf = network_gdf[network_gdf.apply(lambda x: modal_subset(x, {'car', 'bus'}), axis=1)] + save_geodataframe(_gdf[['id', 'freespeed', 'geometry']], output_dir=supporting_outputs, + filename='freespeed_before') + save_geodataframe(_gdf[['id', 'capacity', 'geometry']], output_dir=supporting_outputs, + filename='capacity_before') + + network_gdf = network_gdf[network_gdf['id'].isin(links_to_reduce)] + if freespeed: + logging.info(f'Changing freespeed by {freespeed * 100}%') + network_gdf['freespeed'] = network_gdf['freespeed'] * freespeed + if capacity: + logging.info(f'Changing capacity by {capacity * 100}%') + network_gdf['capacity'] = network_gdf['capacity'] * capacity + + n.apply_attributes_to_links(network_gdf[['id', 'freespeed', 'capacity']].set_index('id').T.to_dict()) + + logging.info('Generating geojson outputs for visual validation') + network_gdf = n.to_geodataframe()['links'].to_crs('epsg:4326') + network_gdf = network_gdf[network_gdf.apply(lambda x: modal_subset(x, {'car', 'bus'}), axis=1)] + save_geodataframe(network_gdf[['id', 'freespeed', 'geometry']], output_dir=supporting_outputs, + filename='freespeed_after') + save_geodataframe(network_gdf[['id', 'capacity', 'geometry']], output_dir=supporting_outputs, + filename='capacity_after') + + logging.info(f'Saving network in {output_dir}') + n.write_to_matsim(output_dir) diff --git a/scripts/validate_network.py b/scripts/validate_network.py index fbb05f0f..c5ee260f 100644 --- a/scripts/validate_network.py +++ b/scripts/validate_network.py @@ -5,6 +5,8 @@ from genet import read_matsim from genet.utils.persistence import ensure_dir +from genet.output.sanitiser import sanitise_dictionary + if __name__ == '__main__': arg_parser = argparse.ArgumentParser(description='Run MATSim specific validation methods on a MATSim network') @@ -69,4 +71,4 @@ logging.info(f'Routing validation: {report["routing"]["services_have_routes_in_the_graph"]}') with open(os.path.join(output_dir, 'validation_report.json'), 'w', encoding='utf-8') as f: - json.dump(report, f, ensure_ascii=False, indent=4) + json.dump(sanitise_dictionary(report), f, ensure_ascii=False, indent=4) diff --git a/tests/fixtures.py b/tests/fixtures.py index 5fec8308..ab462d4e 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -3,7 +3,7 @@ import sys from collections import OrderedDict from dataclasses import dataclass - +from datetime import datetime import dictdiffer import pandas as pd import pytest @@ -57,6 +57,30 @@ def assert_logging_warning_caught_with_message_containing(clog, message): return False +def time_somewhat_accurate(t1: str, t2: str, tolerance_s=5): + """ + t1: "HH:MM:SS" + t2: "HH:MM:SS" + tolerance_s: seconds of tolerable difference + + returns: bool + """ + t1 = datetime.strptime(t1, '%H:%M:%S') + t2 = datetime.strptime(t2, '%H:%M:%S') + return abs((t1 - t2).total_seconds()) <= tolerance_s + + +def list_of_times_somewhat_accurate(lt1: list, lt2: list, tolerance_s=5): + """ + lt1: list of times in str "HH:MM:SS" + lt2: list of times in str "HH:MM:SS" + tolerance_s: seconds of tolerable difference + + returns: bool + """ + return all([time_somewhat_accurate(t1, t2, tolerance_s) for t1, t2 in zip(lt1, lt2)]) + + ########################################################### # core data structure examples ########################################################### diff --git a/tests/test_core_schedule.py b/tests/test_core_schedule.py index cbac6032..facd3767 100644 --- a/tests/test_core_schedule.py +++ b/tests/test_core_schedule.py @@ -1524,6 +1524,87 @@ def test_reading_vehicles_after_reading_schedule(): 'passengerCarEquivalents': {'pce': '2.8'}}) +@pytest.fixture() +def schedule_with_zero_headway(): + # zero headway happens when two trips of the same route depart at the same time. We deal with it as if the trip + # is duplicated and remove it + route_1 = Route(route_short_name='name', + mode='bus', id='1', + stops=[Stop(id='1', x=4, y=2, epsg='epsg:27700'), Stop(id='2', x=1, y=2, epsg='epsg:27700'), + Stop(id='3', x=3, y=3, epsg='epsg:27700'), Stop(id='4', x=7, y=5, epsg='epsg:27700')], + trips={'trip_id': ['1', '2'], + 'trip_departure_time': ['13:00:00', '13:00:00'], + 'vehicle_id': ['veh_1_bus', 'veh_2_bus']}, + arrival_offsets=['00:00:00', '00:03:00', '00:06:00', '00:13:00'], + departure_offsets=['00:00:00', '00:04:00', '00:07:00', '00:15:00']) + return { + 'schedule': Schedule(epsg='epsg:27700', services=[Service(id='service', routes=[route_1])]), + 'route_id': '1', + 'expected_trips': {'trip_id': ['1'], + 'trip_departure_time': ['13:00:00'], + 'vehicle_id': ['veh_1_bus']}, + 'expected_vehicles': {'veh_1_bus': {'type': 'bus'}} + } + + +def test_recognises_schedule_has_zero_headway_problem(schedule_with_zero_headway): + assert schedule_with_zero_headway['schedule'].has_trips_with_zero_headways() + + +def test_updates_trips_for_route_with_zero_headways(schedule_with_zero_headway): + schedule_with_zero_headway['schedule'].fix_trips_with_zero_headways() + assert schedule_with_zero_headway['schedule'].route(schedule_with_zero_headway['route_id']).trips == \ + schedule_with_zero_headway['expected_trips'] + + +def test_updates_vehicles_for_route_with_zero_headways(schedule_with_zero_headway): + assert schedule_with_zero_headway['schedule'].vehicles != \ + schedule_with_zero_headway['expected_vehicles'] + + schedule_with_zero_headway['schedule'].fix_trips_with_zero_headways() + + assert schedule_with_zero_headway['schedule'].vehicles == \ + schedule_with_zero_headway['expected_vehicles'] + + +@pytest.fixture() +def schedule_with_infinite_speed(): + # infinite speed happens when the departure offset of stop before is the same as the arrival offset at the next stop + # this means a non zero distance has to be covered in zero time. + route_1 = Route(route_short_name='name', + mode='bus', id='1', + stops=[Stop(id='1', x=4, y=2, epsg='epsg:27700'), Stop(id='2', x=1, y=2, epsg='epsg:27700'), + Stop(id='3', x=3, y=3, epsg='epsg:27700'), Stop(id='4', x=7, y=5, epsg='epsg:27700')], + trips={'trip_id': ['1', '2'], + 'trip_departure_time': ['13:00:00', '13:30:00'], + 'vehicle_id': ['veh_1_bus', 'veh_2_bus']}, + arrival_offsets=['00:00:00', '00:03:00', '00:04:00', '00:13:00'], + departure_offsets=['00:00:00', '00:04:00', '00:09:00', '00:15:00']) + return { + 'schedule': Schedule(epsg='epsg:27700', services=[Service(id='service', routes=[route_1])]), + 'route_id': '1', + 'expected_arrival_offsets': ['00:00:00', '00:03:00', '00:06:08', '00:13:00'], + 'expected_departure_offsets': ['00:00:00', '00:04:00', '00:06:08', '00:15:00'] + } + + +def test_recognises_schedule_has_infinite_speed_problem(schedule_with_infinite_speed): + assert schedule_with_infinite_speed['schedule'].has_infinite_speeds() + + +def test_updates_offsets_for_stop_with_infinite_speed(schedule_with_infinite_speed): + schedule_with_infinite_speed['schedule'].fix_infinite_speeds() + assert list_of_times_somewhat_accurate( + schedule_with_infinite_speed['schedule'].route(schedule_with_infinite_speed['route_id']).arrival_offsets, + schedule_with_infinite_speed['expected_arrival_offsets'], + tolerance_s=5 + ) + assert list_of_times_somewhat_accurate( + schedule_with_infinite_speed['schedule'].route(schedule_with_infinite_speed['route_id']).departure_offsets, + schedule_with_infinite_speed['expected_departure_offsets'], + tolerance_s=5 + ) + def test_is_strongly_connected_with_strongly_connected_schedule(strongly_connected_schedule): assert strongly_connected_schedule.is_strongly_connected()