Skip to content

Commit

Permalink
First commit for basic analysis code.
Browse files Browse the repository at this point in the history
  • Loading branch information
eyyoung24 committed Oct 31, 2019
1 parent c86687c commit 7aa13a2
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 0 deletions.
23 changes: 23 additions & 0 deletions apc_data_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import numpy as np
import matplotlib.pyplot as plt
import os
from apc_plot import APCPlotMixin
from apc_data_process import APCDataProcessMixin

class APCDataAnalysis(APCPlotMixin, APCDataProcessMixin):

def __init__(self, load_data=True, clean_data=True):
"""
General initializer
"""
# annoying hard coded paths
self.root_dir = os.path.join('/Users/edwardyoung/Google Drive/',
'CodeForSF/OpenTransit/OT Raw Data (not in shared drive)')
self.clean_data_dir = os.path.join('/Users/edwardyoung/Google Drive/',
'CodeForSF/OpenTransit/clean_data')

if load_data:
self.load_data()

if clean_data:
self.clean_data()
50 changes: 50 additions & 0 deletions apc_data_process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import numpy as np
import os

class APCDataProcessMixin:

def load_data(self, sort_by_date=True):
"""
Load the raw npy files from disk. These have already been extracted
from the csv file. This preprocessing makes the data reading much
faster.
"""
self.data = {}
self.data['OPEN_DATE_TIME'] = np.load(os.path.join(self.clean_data_dir,
'OPEN_DATE_TIME.npy'))
self.data['CLOSE_DATE_TIME'] = np.load(os.path.join(self.clean_data_dir,
'CLOSE_DATE_TIME.npy'))
self.data['VEH_LAT'] = np.load(os.path.join(self.clean_data_dir,
'VEH_LAT.npy'))
self.data['VEH_LONG'] = np.load(os.path.join(self.clean_data_dir,
'VEH_LONG.npy'))
self.data['ONS'] = np.load(os.path.join(self.clean_data_dir,
'ONS.npy'))
self.data['OFFS'] = np.load(os.path.join(self.clean_data_dir,
'OFFS.npy'))
self.data['RUN_ID'] = np.load(os.path.join(self.clean_data_dir,
'RUN_ID.npy'))
self.data['ROUTE_ID'] = np.load(os.path.join(self.clean_data_dir,
'ROUTE_ID.npy'))

if sort_by_date:
# Sort by open time
idx = np.argsort(self.data['OPEN_DATE_TIME'])
for k in self.data.keys():
self.data[k] = self.data[k][idx]


def clean_data(self):
"""
Generic function for cleaning data. Currently only cleans out
lat=0/long=0.
"""
# Mask files with lat = 0 or lon = 0
mask = np.where(np.logical_or(self.data['VEH_LAT']==0,
self.data['VEH_LONG']==0))

for k in self.data.keys():
if isinstance(self.data[k][0], float):
self.data[k][mask] = np.nan


54 changes: 54 additions & 0 deletions apc_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import numpy as np
import matplotlib.pyplot as plt

class APCPlotMixin:

def plot_lat_lon(self, start_date, end_date, run_id=None, route_id=None):
"""
Args:
-----
start_date (datetime) : The beginning of time to plot
end_date (datetime) : The end of time to plot
Opt Args:
---------
run_id (int) : The ID of the run.
route_id (int) : The route number. What do these 2 things really mean??
"""
# Find indicies of data points between start and end dates
idx = np.logical_and(self.data['OPEN_DATE_TIME'] > start_date,
self.data['OPEN_DATE_TIME'] < end_date)
if run_id is not None:
idx = np.logical_and(idx, self.data['RUN_ID'] == run_id)
if route_id is not None:
idx = np.logical_and(idx, self.data['ROUTE_ID'] == route_id)


print(f'There are {np.sum(idx)} elements')

fig, ax = plt.subplots(4, sharex=True, figsize=(5,8))
ax[0].plot(self.data['OPEN_DATE_TIME'][idx], self.data['VEH_LAT'][idx])
ax[1].plot(self.data['OPEN_DATE_TIME'][idx], self.data['VEH_LONG'][idx])
ax[2].plot(self.data['OPEN_DATE_TIME'][idx], self.data['ONS'][idx],
label='On')
ax[2].plot(self.data['OPEN_DATE_TIME'][idx], self.data['OFFS'][idx],
label='Off')
ax[3].plot(self.data['OPEN_DATE_TIME'][idx],
np.cumsum(self.data['ONS'][idx])-np.cumsum(self.data['OFFS'][idx]))

ax[2].legend(loc='upper right')
ax[0].set_ylabel('Lat')
ax[1].set_ylabel('Lon')
ax[2].set_ylabel('On/Off #')
ax[3].set_ylabel('Total #')
ax[3].set_xlabel('Time')

for tick in ax[3].get_xticklabels():
tick.set_rotation(90)

plt.tight_layout()

plt.figure()
plt.plot(self.data['VEH_LAT'][idx],self.data['VEH_LONG'][idx], '.')
plt.xlabel('Lat')
plt.ylabel('Long')

0 comments on commit 7aa13a2

Please sign in to comment.