Skip to content

Commit

Permalink
Faster Trip Segmentation Cleanup
Browse files Browse the repository at this point in the history
Code cleanup as per the comments.
  • Loading branch information
humbleOldSage committed Apr 3, 2024
1 parent 1563e2b commit 89c2387
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 7 deletions.
1 change: 0 additions & 1 deletion emission/analysis/intake/segmentation/trip_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from builtins import *
from builtins import object
import logging
import pandas as pd

import emission.storage.timeseries.abstract_timeseries as esta
import emission.storage.decorations.place_queries as esdp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def segment_into_trips(self, transition_df, motion_df, timeseries, time_query):
self.filtered_points_df = timeseries.get_data_df("background/filtered_location", time_query)
self.filtered_points_df.loc[:,"valid"] = True
self.transition_df = transition_df
self.motion_df =motion_df
if len(self.transition_df) > 0:
logging.debug("self.transition_df = %s" % self.transition_df[["fmt_time", "transition"]])
else:
Expand Down Expand Up @@ -88,7 +89,7 @@ def segment_into_trips(self, transition_df, motion_df, timeseries, time_query):
# So we reset_index upstream and use it here.
last10Points_df = self.filtered_points_df.iloc[max(idx-self.point_threshold, curr_trip_start_point.idx):idx+1]
lastPoint = self.find_last_valid_point(idx)
if self.has_trip_ended(lastPoint, currPoint, timeseries, motion_df):
if self.has_trip_ended(lastPoint, currPoint, timeseries):
last_trip_end_point = lastPoint
logging.debug("Appending last_trip_end_point %s with index %s " %
(last_trip_end_point, idx-1))
Expand Down Expand Up @@ -144,7 +145,7 @@ def segment_into_trips(self, transition_df, motion_df, timeseries, time_query):
logging.debug("Found %d transitions after last point, not ending trip..." % len(stopped_moving_after_last))
return segmentation_points

def has_trip_ended(self, lastPoint, currPoint, timeseries, motion_df):
def has_trip_ended(self, lastPoint, currPoint, timeseries):
# So we must not have been moving for the last _time filter_
# points. So the trip must have ended
# Since this is a distance filter, we detect that the last
Expand Down Expand Up @@ -180,7 +181,7 @@ def has_trip_ended(self, lastPoint, currPoint, timeseries, motion_df):
# In general, we get multiple locations between each motion activity. If we see a bunch of motion activities
# between two location points, and there is a large gap between the last location and the first
# motion activity as well, let us just assume that there was a restart
ongoing_motion_in_range = eaisr.get_ongoing_motion_in_range(lastPoint.ts, currPoint.ts, motion_df)
ongoing_motion_in_range = eaisr.get_ongoing_motion_in_range(lastPoint.ts, currPoint.ts, self.motion_df)
ongoing_motion_check = len(ongoing_motion_in_range) > 0
if timeDelta > self.time_threshold and not ongoing_motion_check:
logging.debug("lastPoint.ts = %s, currPoint.ts = %s, threshold = %s, large gap = %s, ongoing_motion_in_range = %s, ending trip" %
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def segment_into_trips(self, transition_df,motion_df, timeseries, time_query):
logging.debug("last5MinsTimes.max() = %s, time_threshold = %s" %
(last5MinTimes.max() if len(last5MinTimes) > 0 else np.NaN, self.time_threshold))

if self.has_trip_ended(prevPoint, currPoint, timeseries, last10PointsDistances, last5MinsDistances, last5MinTimes,transition_df, motion_df):
if self.has_trip_ended(prevPoint, currPoint, last10PointsDistances, last5MinsDistances, last5MinTimes,transition_df, motion_df):
(ended_before_this, last_trip_end_point) = self.get_last_trip_end_point(filtered_points_df,
last10Points_df, last5MinsPoints_df)
segmentation_points.append((curr_trip_start_point, last_trip_end_point))
Expand Down Expand Up @@ -199,7 +199,7 @@ def continue_just_ended(self, idx, currPoint, filtered_points_df):
else:
return False

def has_trip_ended(self, prev_point, curr_point, timeseries, last10PointsDistances, last5MinsDistances, last5MinTimes, transition_df, motion_df):
def has_trip_ended(self, prev_point, curr_point, last10PointsDistances, last5MinsDistances, last5MinTimes, transition_df, motion_df):
# Another mismatch between phone and server. Phone stops tracking too soon,
# so the distance is still greater than the threshold at the end of the trip.
# But then the next point is a long time away, so we can split again (similar to a distance filter)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def is_huge_invalid_ts_offset(filterMethod, lastPoint, currPoint, timeseries,

non_still_motions=motionInRange[~motionInRange['type'].isin(ignore_modes_list) & (motionInRange['confidence'] ==100)]
#logging.debug("non_still_motions = %s" % [(ecwm.MotionTypes(ma["data"]["type"]), ma["data"]["confidence"], ma["data"]["fmt_time"]) for ma in non_still_motions]) logging.debug("non_still_motions = %s" % [(ecwm.MotionTypes(ma["data"]["type"]), ma["data"]["confidence"], ma["data"]["fmt_time"]) for ma in non_still_motions])

logging.debug("non_still_motions = %s" %[(ecwm.MotionTypes(row['type']),row['confidence'],row['fmt_time']) for index,row in non_still_motions.iterrows()])

non_still_motions_rate = len(non_still_motions) / (currPoint.ts - lastPoint.ts)

logging.debug("in is_huge_invalid_ts_offset: len(intermediate_transitions) = %d, non_still_motions = %d, time_diff = %s mins, non_still_motions_rate = %s" % (len(intermediate_transitions), len(non_still_motions), (currPoint.ts - lastPoint.ts)/60, non_still_motions_rate))
Expand Down

0 comments on commit 89c2387

Please sign in to comment.