From bfee8874acdad5f87fc08a8809ff541b1fbc9304 Mon Sep 17 00:00:00 2001 From: David Hensle <51132108+dhensle@users.noreply.github.com> Date: Thu, 28 Mar 2024 11:26:05 -0400 Subject: [PATCH] BayDAG Contribution #7: Sampling in EDB for Location Choice (#773) * sampling in EDB for location choice * sampling in EDB for location choice * blacken --- activitysim/abm/models/location_choice.py | 46 ++++++++++++++++++++--- activitysim/core/configuration/logit.py | 9 +++++ 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index 2b28a1738..3017235f6 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -142,15 +142,17 @@ def _location_sample( sample_size = model_settings.SAMPLE_SIZE - if state.settings.disable_destination_sampling or ( - estimator and estimator.want_unsampled_alternatives - ): - # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count + if estimator: + sample_size = model_settings.ESTIMATION_SAMPLE_SIZE logger.info( - "Estimation mode for %s using unsampled alternatives short_circuit_choices" - % (trace_label,) + f"Estimation mode for {trace_label} using sample size of {sample_size}" ) + + if state.settings.disable_destination_sampling: sample_size = 0 + logger.info( + f"SAMPLE_SIZE set to 0 for {trace_label} because disable_destination_sampling is set" + ) locals_d = { "skims": skims, @@ -487,6 +489,38 @@ def run_location_sample( trace_label=trace_label, ) + # adding observed choice to alt set when running in estimation mode + if estimator: + # grabbing survey values + survey_persons = estimation.manager.get_survey_table("persons") + if "school_location" in trace_label: + survey_choices = survey_persons["school_zone_id"].reset_index() + elif ("workplace_location" in trace_label) and ("external" not in trace_label): + survey_choices = survey_persons["workplace_zone_id"].reset_index() + else: + return choices + survey_choices.columns = ["person_id", "alt_dest"] + survey_choices = survey_choices[ + survey_choices["person_id"].isin(choices.index) + & (survey_choices.alt_dest > 0) + ] + # merging survey destination into table if not available + joined_data = survey_choices.merge( + choices, on=["person_id", "alt_dest"], how="left", indicator=True + ) + missing_rows = joined_data[joined_data["_merge"] == "left_only"] + missing_rows["pick_count"] = 1 + if len(missing_rows) > 0: + new_choices = missing_rows[ + ["person_id", "alt_dest", "prob", "pick_count"] + ].set_index("person_id") + choices = choices.append(new_choices, ignore_index=False).sort_index() + # making probability the mean of all other sampled destinations by person + # FIXME is there a better way to do this? Does this even matter for estimation? + choices["prob"] = choices["prob"].fillna( + choices.groupby("person_id")["prob"].transform("mean") + ) + return choices diff --git a/activitysim/core/configuration/logit.py b/activitysim/core/configuration/logit.py index cb6cf0432..a7f507aeb 100644 --- a/activitysim/core/configuration/logit.py +++ b/activitysim/core/configuration/logit.py @@ -198,6 +198,15 @@ class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"): ORIG_ZONE_ID: str | None = None """This setting appears to do nothing...""" + ESTIMATION_SAMPLE_SIZE: int = 0 + """ + The number of alternatives to sample for estimation mode. + If zero, then all alternatives are used. + Truth alternative will be included in the sample. + Larch does not yet support sampling alternatives for estimation, + but this setting is still helpful for estimation mode runtime. + """ + class TourModeComponentSettings(TemplatedLogitComponentSettings, extra="forbid"): MODE_CHOICE_LOGSUM_COLUMN_NAME: str | None = None