diff --git a/syscore/pandas/pdutils.py b/syscore/pandas/pdutils.py index 5e5491285b..582bc112ca 100755 --- a/syscore/pandas/pdutils.py +++ b/syscore/pandas/pdutils.py @@ -11,7 +11,8 @@ from syscore.constants import named_object, arg_not_supplied DEFAULT_DATE_FORMAT_FOR_CSV = "%Y-%m-%d %H:%M:%S" -EXPECTED_LENGTH_OF_DATE = 19 +EXPECTED_LENGTH_OF_DATE = 19 + def rolling_pairwise_correlation( x: pd.DataFrame, periods: int, min_periods: int = 3 @@ -105,7 +106,9 @@ def pd_readcsv( df = pd.read_csv(filename, skiprows=skiprows, skipfooter=skipfooter) ## Add time index as index - df = add_datetime_index(df=df, date_index_name=date_index_name, date_format=date_format) + df = add_datetime_index( + df=df, date_index_name=date_index_name, date_format=date_format + ) if input_column_mapping is not arg_not_supplied: df = remap_columns_in_pd(df, input_column_mapping) @@ -113,11 +116,18 @@ def pd_readcsv( return df -def add_datetime_index(df: pd.DataFrame, date_index_name: str, date_format: str = DEFAULT_DATE_FORMAT_FOR_CSV, expected_length_of_date: int = EXPECTED_LENGTH_OF_DATE) -> pd.DataFrame: +def add_datetime_index( + df: pd.DataFrame, + date_index_name: str, + date_format: str = DEFAULT_DATE_FORMAT_FOR_CSV, + expected_length_of_date: int = EXPECTED_LENGTH_OF_DATE, +) -> pd.DataFrame: date_index = df[date_index_name] date_index = date_index.astype(str) - def left(x:str, n): + + def left(x: str, n): return x[:n] + date_index = date_index.apply(left, n=EXPECTED_LENGTH_OF_DATE) df.index = pd.to_datetime(date_index, format=date_format).values del df[date_index_name] @@ -125,6 +135,7 @@ def left(x:str, n): return df + def remap_columns_in_pd(df: pd.DataFrame, input_column_mapping: dict) -> pd.DataFrame: """ Returns the bool for columns of slice_data for which we have at least one non nan value diff --git a/sysproduction/backup_parquet_data_to_remote.py b/sysproduction/backup_parquet_data_to_remote.py index 1018d1ce6d..29d482717c 100644 --- a/sysproduction/backup_parquet_data_to_remote.py +++ b/sysproduction/backup_parquet_data_to_remote.py @@ -1,11 +1,7 @@ import os from sysdata.config.production_config import get_production_config -from sysproduction.data.directories import ( - - get_parquet_backup_directory - -) +from sysproduction.data.directories import get_parquet_backup_directory from sysdata.data_blob import dataBlob @@ -18,7 +14,6 @@ def backup_parquet_data_to_remote(): return None - def get_parquet_directory(data): return data.parquet_root_directory @@ -33,6 +28,7 @@ def backup_parquet(self): log.debug("Copying data to backup destination") backup_parquet_data_to_remote_with_data(data) + def backup_parquet_data_to_remote_with_data(data): source_path = get_parquet_directory(data) destination_path = get_parquet_backup_directory() diff --git a/sysproduction/data/directories.py b/sysproduction/data/directories.py index 07907eef5c..6f82add5d5 100644 --- a/sysproduction/data/directories.py +++ b/sysproduction/data/directories.py @@ -7,7 +7,6 @@ production_config = get_production_config() - def get_main_backup_directory(): ans = production_config.get_element("offsystem_backup_directory") return get_resolved_pathname(ans) @@ -19,6 +18,7 @@ def get_csv_backup_directory(): return ans + def get_parquet_backup_directory(): main_backup = get_main_backup_directory() ans = os.path.join(main_backup, "parquet") diff --git a/sysproduction/reporting/data/pricechanges.py b/sysproduction/reporting/data/pricechanges.py index 422733fdf6..01d4cb1f28 100644 --- a/sysproduction/reporting/data/pricechanges.py +++ b/sysproduction/reporting/data/pricechanges.py @@ -209,7 +209,9 @@ def get_percentage_change_from_series_for_period( price_series_for_period = price_series[start_date:end_date] if len(price_series_for_period) == 0: return np.nan - return 100 * ((price_series_for_period.iloc[-1] / price_series_for_period.iloc[0]) - 1) + return 100 * ( + (price_series_for_period.iloc[-1] / price_series_for_period.iloc[0]) - 1 + ) def get_stdev_at_start_date_for_instrument( diff --git a/sysproduction/run_backups.py b/sysproduction/run_backups.py index 7b53cd0a8f..224102daec 100644 --- a/sysproduction/run_backups.py +++ b/sysproduction/run_backups.py @@ -29,7 +29,7 @@ def get_list_of_timer_functions_for_backup(): ("backup_db_to_csv", db_backup_object), ("backup_mongo_data_as_dump", mongodump_backup_object), ("backup_files", statefile_backup_object), - ("backup_parquet", parquet_backup_object) + ("backup_parquet", parquet_backup_object), ] return list_of_timer_names_and_functions