Merge pull request #426 from ActivitySim/develop

Publish
ActivitySim · Jun 11, 2021 · 5803923 · 5803923
2 parents bcdc7b6 + d962a42
commit 5803923
Show file tree

Hide file tree

Showing 110 changed files with 5,478 additions and 10,891 deletions.
diff --git a/activitysim/__init__.py b/activitysim/__init__.py
@@ -1,5 +1,6 @@
 # ActivitySim
 # See full license in LICENSE.txt.
 
-__version__ = '0.9.9.1'
+
+__version__ = '1.0'
 __doc__ = 'Activity-Based Travel Modeling'
diff --git a/activitysim/abm/misc.py b/activitysim/abm/misc.py
@@ -79,7 +79,9 @@ def trace_od(settings):
 
 @inject.injectable(cache=True)
 def chunk_size(settings):
-    return int(settings.get('chunk_size', 0) or 0)
+    _chunk_size = int(settings.get('chunk_size', 0) or 0)
+
+    return _chunk_size
 
 
 @inject.injectable(cache=True)

diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py
@@ -103,44 +103,6 @@ def compute_accessibilities_for_zones(
     return(accessibility_df)
 
 
-def accessibility_calc_row_size(accessibility_df, land_use_df, assignment_spec, network_los, trace_label):
-    """
-    rows_per_chunk calculator for accessibility
-    """
-
-    sizer = chunk.RowSizeEstimator(trace_label)
-
-    # if there are skims, and zone_system is THREE_ZONE, and there are any
-    # then we want to estimate the per-row overhead tvpb skims
-    # (do this first to facilitate tracing of rowsize estimation below)
-    if network_los.zone_system == los.THREE_ZONE:
-        # DISABLE_TVPB_OVERHEAD
-        logger.debug("disable calc_row_size for THREE_ZONE with tap skims")
-        return 0
-
-    land_use_rows = len(land_use_df.index)
-    land_use_columns = len(land_use_df.columns)
-    od_columns = 2
-
-    # assignment spec has one row per value to assign
-    # count number of unique persistent assign_variables targets simultaneously resident during spec eval
-    # (since dict overwrites recurring targets, only count unique targets)
-    def is_persistent(target):
-        return not (assign.is_throwaway(target) or assign.is_temp_scalar(target))
-    num_spec_values = len([target for target in assignment_spec.target.unique() if is_persistent(target)])
-
-    sizer.add_elements(land_use_rows * od_columns, 'od_df')
-
-    # each od_df joins to all land_use zones
-    sizer.add_elements(land_use_rows * land_use_columns, 'land_use_choosers')
-
-    # and then we assign_variables to joined land_use from assignment_spec
-    sizer.add_elements(land_use_rows * num_spec_values, 'spec_values')
-
-    row_size = sizer.get_hwm()
-    return row_size
-
-
 @inject.step()
 def compute_accessibility(land_use, accessibility, network_los, chunk_size, trace_od):
 
@@ -178,14 +140,10 @@ def compute_accessibility(land_use, accessibility, network_los, chunk_size, trac
 
     logger.info(f"Running {trace_label} with {len(accessibility_df.index)} orig zones {len(land_use_df)} dest zones")
 
-    row_size = \
-        chunk_size and accessibility_calc_row_size(accessibility_df, land_use_df,
-                                                   assignment_spec, network_los, trace_label)
-
     accessibilities_list = []
 
     for i, chooser_chunk, chunk_trace_label in \
-            chunk.adaptive_chunked_choosers(accessibility_df, chunk_size, row_size, trace_label):
+            chunk.adaptive_chunked_choosers(accessibility_df, chunk_size, trace_label):
 
         accessibilities = \
             compute_accessibilities_for_zones(chooser_chunk, land_use_df, assignment_spec,

diff --git a/activitysim/abm/models/atwork_subtour_mode_choice.py b/activitysim/abm/models/atwork_subtour_mode_choice.py
@@ -17,7 +17,6 @@
 from .util.mode import run_tour_mode_choice_simulate
 from .util import estimation
 
-from activitysim.core.mem import force_garbage_collect
 from activitysim.core.util import assign_in_place
 
 logger = logging.getLogger(__name__)
@@ -171,5 +170,3 @@ def atwork_subtour_mode_choice(
                          label=tracing.extend_trace_label(trace_label, mode_column_name),
                          slicer='tour_id',
                          index_label='tour_id')
-
-    force_garbage_collect()
diff --git a/activitysim/abm/models/auto_ownership.py b/activitysim/abm/models/auto_ownership.py
@@ -46,6 +46,8 @@ def auto_ownership_simulate(households,
         estimator.write_coefficients(coefficients_df, model_settings)
         estimator.write_choosers(choosers)
 
+    log_alt_losers = config.setting('log_alt_losers', False)
+
     choices = simulate.simple_simulate(
         choosers=choosers,
         spec=model_spec,
@@ -54,6 +56,7 @@ def auto_ownership_simulate(households,
         chunk_size=chunk_size,
         trace_label=trace_label,
         trace_choice_name='auto_ownership',
+        log_alt_losers=log_alt_losers,
         estimator=estimator)
 
     if estimator:

diff --git a/activitysim/abm/models/initialize.py b/activitysim/abm/models/initialize.py
@@ -10,6 +10,7 @@
 from activitysim.core import inject
 from activitysim.core import pipeline
 from activitysim.core import expressions
+from activitysim.core import chunk
 from activitysim.core import mem
 
 from activitysim.core.steps.output import write_data_dictionary
@@ -39,6 +40,10 @@
 
 def annotate_tables(model_settings, trace_label):
 
+    trace_label = tracing.extend_trace_label(trace_label, 'annotate_tables')
+
+    chunk.log_rss(trace_label)
+
     annotate_tables = model_settings.get('annotate_tables', [])
 
     if not annotate_tables:
@@ -53,7 +58,10 @@ def annotate_tables(model_settings, trace_label):
 
         tablename = table_info['tablename']
 
+        chunk.log_rss(f"{trace_label}.pre-get_table.{tablename}")
+
         df = inject.get_table(tablename).to_frame()
+        chunk.log_df(trace_label, tablename, df)
 
         # - rename columns
         column_map = table_info.get('column_map', None)
@@ -75,42 +83,66 @@ def annotate_tables(model_settings, trace_label):
                 model_settings=annotate,
                 trace_label=trace_label)
 
-        # fixme - narrow?
+        chunk.log_df(trace_label, tablename, df)
 
         # - write table to pipeline
         pipeline.replace_table(tablename, df)
 
+        del df
+        chunk.log_df(trace_label, tablename, None)
+
 
 @inject.step()
 def initialize_landuse():
 
     trace_label = 'initialize_landuse'
 
-    model_settings = config.read_model_settings('initialize_landuse.yaml', mandatory=True)
+    with chunk.chunk_log(trace_label, base=True):
+
+        model_settings = config.read_model_settings('initialize_landuse.yaml', mandatory=True)
 
-    annotate_tables(model_settings, trace_label)
+        annotate_tables(model_settings, trace_label)
 
-    # instantiate accessibility (must be checkpointed to be be used to slice accessibility)
-    accessibility = pipeline.get_table('accessibility')
+        # instantiate accessibility (must be checkpointed to be be used to slice accessibility)
+        accessibility = pipeline.get_table('accessibility')
+        chunk.log_df(trace_label, "accessibility", accessibility)
 
 
 @inject.step()
 def initialize_households():
 
     trace_label = 'initialize_households'
 
-    model_settings = config.read_model_settings('initialize_households.yaml', mandatory=True)
-    annotate_tables(model_settings, trace_label)
+    with chunk.chunk_log(trace_label, base=True):
 
-    # - initialize shadow_pricing size tables after annotating household and person tables
-    # since these are scaled to model size, they have to be created while single-process
-    shadow_pricing.add_size_tables()
-    mem.trace_memory_info(f"initialize_households after shadow_pricing.add_size_tables")
+        chunk.log_rss(f"{trace_label}.inside-yield")
 
-    # - preload person_windows
-    t0 = tracing.print_elapsed_time()
-    inject.get_table('person_windows').to_frame()
-    t0 = tracing.print_elapsed_time("preload person_windows", t0, debug=True)
+        households = inject.get_table('households').to_frame()
+        assert not households._is_view
+        chunk.log_df(trace_label, "households", households)
+        del households
+        chunk.log_df(trace_label, "households", None)
+
+        persons = inject.get_table('persons').to_frame()
+        assert not persons._is_view
+        chunk.log_df(trace_label, "persons", persons)
+        del persons
+        chunk.log_df(trace_label, "persons", None)
+
+        model_settings = config.read_model_settings('initialize_households.yaml', mandatory=True)
+        annotate_tables(model_settings, trace_label)
+
+        # - initialize shadow_pricing size tables after annotating household and person tables
+        # since these are scaled to model size, they have to be created while single-process
+        # this can now be called as a stand alone model step instead, add_size_tables
+        add_size_tables = model_settings.get('add_size_tables', True)
+        if add_size_tables:
+            # warnings.warn(f"Calling add_size_tables from initialize will be removed in the future.", FutureWarning)
+            shadow_pricing.add_size_tables()
+
+        # - preload person_windows
+        person_windows = inject.get_table('person_windows').to_frame()
+        chunk.log_df(trace_label, "person_windows", person_windows)
 
 
 @inject.injectable(cache=True)

diff --git a/activitysim/abm/models/initialize_los.py b/activitysim/abm/models/initialize_los.py
@@ -112,47 +112,6 @@ def initialize_los(network_los):
                     np.copyto(data, np.nan)
 
 
-def initialize_tvpb_calc_row_size(choosers, network_los, trace_label):
-    """
-    rows_per_chunk calculator for trip_purpose
-    """
-
-    sizer = chunk.RowSizeEstimator(trace_label)
-
-    model_settings = \
-        network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.tap_tap_settings')
-    attributes_as_columns = \
-        network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attributes_as_columns', [])
-
-    #  expression_values for each spec row
-    sizer.add_elements(len(choosers.columns), 'choosers')
-
-    #  expression_values for each spec row
-    sizer.add_elements(len(attributes_as_columns), 'attributes_as_columns')
-
-    preprocessor_settings = model_settings.get('PREPROCESSOR')
-    if preprocessor_settings:
-
-        preprocessor_spec_name = preprocessor_settings.get('SPEC', None)
-
-        if not preprocessor_spec_name.endswith(".csv"):
-            preprocessor_spec_name = f'{preprocessor_spec_name}.csv'
-        expressions_spec = assign.read_assignment_spec(config.config_file_path(preprocessor_spec_name))
-
-        sizer.add_elements(expressions_spec.shape[0], 'preprocessor')
-
-    #  expression_values for each spec row
-    spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
-    sizer.add_elements(spec.shape[0], 'expression_values')
-
-    #  expression_values for each spec row
-    sizer.add_elements(spec.shape[1], 'utilities')
-
-    row_size = sizer.get_hwm()
-
-    return row_size
-
-
 def compute_utilities_for_atttribute_tuple(network_los, scalar_attributes, data, chunk_size, trace_label):
 
     # scalar_attributes is a dict of attribute name/value pairs for this combination
@@ -175,19 +134,25 @@ def compute_utilities_for_atttribute_tuple(network_los, scalar_attributes, data,
     # get od skim_offset dataframe with uid index corresponding to scalar_attributes
     choosers_df = uid_calculator.get_od_dataframe(scalar_attributes)
 
-    row_size = chunk_size and initialize_tvpb_calc_row_size(choosers_df, network_los, trace_label)
+    # choosers_df is pretty big and was custom made for compute_utilities but we don't need to chunk_log it
+    # since it is created outside of adaptive_chunked_choosers and so will show up in baseline
+    assert not chunk.chunk_logging()  # otherwise we should chunk_log this
+
+    chunk_tag = 'initialize_tvpb'  # all attribute_combinations can use same cached data for row_size calc
+
     for i, chooser_chunk, chunk_trace_label \
-            in chunk.adaptive_chunked_choosers(choosers_df, chunk_size, row_size, trace_label):
+            in chunk.adaptive_chunked_choosers(choosers_df, chunk_size, trace_label, chunk_tag=chunk_tag):
 
         # we should count choosers_df as chunk overhead since its pretty big and was custom made for compute_utilities
-        # (call log_df from inside yield loop so it is visible to adaptive_chunked_choosers chunk_log)
-        chunk.log_df(trace_label, 'choosers_df', choosers_df)
+        assert chooser_chunk._is_view  # otherwise copying it is wasteful
+        chooser_chunk = chooser_chunk.copy()
+        chunk.log_df(trace_label, 'attribute_chooser_chunk', chooser_chunk)
 
         # add any attribute columns specified as column attributes in settings (the rest will be scalars in locals_dict)
         for attribute_name in attributes_as_columns:
             chooser_chunk[attribute_name] = scalar_attributes[attribute_name]
 
-        chunk.log_df(trace_label, 'chooser_chunk', chooser_chunk)
+        chunk.log_df(trace_label, 'attribute_chooser_chunk', chooser_chunk)
 
         utilities_df = \
             pathbuilder.compute_utilities(network_los,
@@ -204,6 +169,9 @@ def compute_utilities_for_atttribute_tuple(network_los, scalar_attributes, data,
 
         data[chooser_chunk.index.values, :] = utilities_df.values
 
+        del chooser_chunk
+        chunk.log_df(trace_label, 'attribute_chooser_chunk', None)
+
     logger.debug(f"{trace_label} updated utilities")
 
 

diff --git a/activitysim/abm/models/initialize_tours.py b/activitysim/abm/models/initialize_tours.py
@@ -104,8 +104,8 @@ def initialize_tours(network_los, households, persons, trace_hh_id):
 
     tracing.register_traceable_table('tours', tours)
 
-    print(f"{len(tours.household_id.unique())} unique household_ids in tours")
-    print(f"{len(households.index.unique())} unique household_ids in households")
+    logger.debug(f"{len(tours.household_id.unique())} unique household_ids in tours")
+    logger.debug(f"{len(households.index.unique())} unique household_ids in households")
     assert not tours.index.duplicated().any()
 
     tours_without_persons = ~tours.person_id.isin(persons.index)

diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py
@@ -195,8 +195,8 @@ def participants_chooser(probs, choosers, spec, trace_label):
             probs = probs[~satisfied]
             candidates = candidates[~satisfied]
 
-        logger.info('%s iteration %s : %s joint tours satisfied %s remaining' %
-                    (trace_label, iter, num_tours_satisfied_this_iter, num_tours_remaining,))
+        logger.debug(f"{trace_label} iteration {iter} : "
+                     f"{num_tours_satisfied_this_iter} joint tours satisfied {num_tours_remaining} remaining")
 
     choices = pd.concat(choices_list)
     rands = pd.concat(rands_list).reindex(choosers.index)