From bfee8874acdad5f87fc08a8809ff541b1fbc9304 Mon Sep 17 00:00:00 2001
From: David Hensle <51132108+dhensle@users.noreply.github.com>
Date: Thu, 28 Mar 2024 11:26:05 -0400
Subject: [PATCH] BayDAG Contribution #7: Sampling in EDB for Location Choice
 (#773)

* sampling in EDB for location choice

* sampling in EDB for location choice

* blacken
---
 activitysim/abm/models/location_choice.py | 46 ++++++++++++++++++++---
 activitysim/core/configuration/logit.py   |  9 +++++
 2 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py
index 2b28a1738..3017235f6 100644
--- a/activitysim/abm/models/location_choice.py
+++ b/activitysim/abm/models/location_choice.py
@@ -142,15 +142,17 @@ def _location_sample(
 
     sample_size = model_settings.SAMPLE_SIZE
 
-    if state.settings.disable_destination_sampling or (
-        estimator and estimator.want_unsampled_alternatives
-    ):
-        # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
+    if estimator:
+        sample_size = model_settings.ESTIMATION_SAMPLE_SIZE
         logger.info(
-            "Estimation mode for %s using unsampled alternatives short_circuit_choices"
-            % (trace_label,)
+            f"Estimation mode for {trace_label} using sample size of {sample_size}"
         )
+
+    if state.settings.disable_destination_sampling:
         sample_size = 0
+        logger.info(
+            f"SAMPLE_SIZE set to 0 for {trace_label} because disable_destination_sampling is set"
+        )
 
     locals_d = {
         "skims": skims,
@@ -487,6 +489,38 @@ def run_location_sample(
             trace_label=trace_label,
         )
 
+    # adding observed choice to alt set when running in estimation mode
+    if estimator:
+        # grabbing survey values
+        survey_persons = estimation.manager.get_survey_table("persons")
+        if "school_location" in trace_label:
+            survey_choices = survey_persons["school_zone_id"].reset_index()
+        elif ("workplace_location" in trace_label) and ("external" not in trace_label):
+            survey_choices = survey_persons["workplace_zone_id"].reset_index()
+        else:
+            return choices
+        survey_choices.columns = ["person_id", "alt_dest"]
+        survey_choices = survey_choices[
+            survey_choices["person_id"].isin(choices.index)
+            & (survey_choices.alt_dest > 0)
+        ]
+        # merging survey destination into table if not available
+        joined_data = survey_choices.merge(
+            choices, on=["person_id", "alt_dest"], how="left", indicator=True
+        )
+        missing_rows = joined_data[joined_data["_merge"] == "left_only"]
+        missing_rows["pick_count"] = 1
+        if len(missing_rows) > 0:
+            new_choices = missing_rows[
+                ["person_id", "alt_dest", "prob", "pick_count"]
+            ].set_index("person_id")
+            choices = choices.append(new_choices, ignore_index=False).sort_index()
+            # making probability the mean of all other sampled destinations by person
+            # FIXME is there a better way to do this? Does this even matter for estimation?
+            choices["prob"] = choices["prob"].fillna(
+                choices.groupby("person_id")["prob"].transform("mean")
+            )
+
     return choices
 
 
diff --git a/activitysim/core/configuration/logit.py b/activitysim/core/configuration/logit.py
index cb6cf0432..a7f507aeb 100644
--- a/activitysim/core/configuration/logit.py
+++ b/activitysim/core/configuration/logit.py
@@ -198,6 +198,15 @@ class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"):
     ORIG_ZONE_ID: str | None = None
     """This setting appears to do nothing..."""
 
+    ESTIMATION_SAMPLE_SIZE: int = 0
+    """
+    The number of alternatives to sample for estimation mode.
+    If zero, then all alternatives are used.
+    Truth alternative will be included in the sample.
+    Larch does not yet support sampling alternatives for estimation, 
+    but this setting is still helpful for estimation mode runtime.
+    """
+
 
 class TourModeComponentSettings(TemplatedLogitComponentSettings, extra="forbid"):
     MODE_CHOICE_LOGSUM_COLUMN_NAME: str | None = None