Skip to content

Commit

Permalink
remove old function and update docstring
Browse files Browse the repository at this point in the history
  • Loading branch information
hannahker committed Dec 12, 2024
1 parent d25d070 commit f1d18fd
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 45 deletions.
33 changes: 20 additions & 13 deletions src/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,19 +142,26 @@ def config_pipeline(dataset, test, update, mode, backfill, engine):
def generate_date_series(
start_date, end_date, frequency="D", missing_dates=None, chunk_size=100
):
"""
Generate a sorted list of dates between start and end dates, incorporating missing dates,
partitioned into chunks of specified size.
Parameters:
start_date (str or datetime): Start date in 'YYYY-MM-DD' format if string
end_date (str or datetime): End date in 'YYYY-MM-DD' format if string
frequency (str): 'D' for daily or 'M' for monthly
missing_dates (list): Optional list of dates to include, in 'YYYY-MM-DD' format if strings
chunk_size (int): Maximum number of dates per partition
Returns:
list of lists: List of date chunks, where each chunk is a list of datetime.date objects
"""Generate a sorted list of dates partitioned into chunks.
Parameters
----------
start_date : str or datetime
Start date in 'YYYY-MM-DD' format if string
end_date : str or datetime
End date in 'YYYY-MM-DD' format if string, or None for single date
frequency : str, default='D'
Date frequency, either 'D' for daily or 'M' for monthly
missing_dates : list, optional
Additional dates to include in the series
chunk_size : int, default=100
Maximum number of dates per chunk
Returns
-------
list of list of datetime.date
List of date chunks, where each chunk contains up to chunk_size dates,
sorted in ascending order with duplicates removed
"""
if not end_date:
dates = [start_date]
Expand Down
33 changes: 1 addition & 32 deletions src/utils/general_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re
from datetime import datetime, timedelta
from datetime import datetime
from typing import List

import pandas as pd
Expand All @@ -9,37 +9,6 @@
from src.utils.cloud_utils import get_container_client


def split_date_range(start_date, end_date):
"""
Split the date range into yearly chunks if the range is greater than a year.
Parameters
----------
start_date (str): Start date in 'YYYY-MM-DD' format
end_date (str): End date in 'YYYY-MM-DD' format
Returns
-------
list of tuples: Each tuple contains the start and end date for a chunk
"""
start = pd.to_datetime(start_date)
end = pd.to_datetime(end_date)

# If the date range is less than or equal to a year, return it as a single chunk
if (end - start).days <= 365:
return [(start_date, end_date)]

date_ranges = []
while start < end:
year_end = min(datetime(start.year, 12, 31), end)
date_ranges.append(
(start.strftime("%Y-%m-%d"), year_end.strftime("%Y-%m-%d"))
)
start = year_end + timedelta(days=1)

return date_ranges


def add_months_to_date(date_string, months):
"""
Add or subtract a number of months to/from a given date string.
Expand Down

0 comments on commit f1d18fd

Please sign in to comment.