Skip to content

Commit

Permalink
Add --export-from to allow for generating incremental data
Browse files Browse the repository at this point in the history
This commit adds an `--export-from` flag which allows generating
incremental data. This is useful for generating data every day instead
of having to export the CSV of all years at once.
  • Loading branch information
serramatutu committed May 7, 2024
1 parent 4e7e96c commit ffc58a1
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 20 deletions.
10 changes: 8 additions & 2 deletions jafgen/cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import datetime as dt
from typing import Annotated

import typer

from jafgen.simulation import Simulation
from jafgen.time import Day

app = typer.Typer()

Expand All @@ -16,8 +18,12 @@ def run(
str,
typer.Option(help="Optional prefix for the output file names."),
] = "raw",
export_from: Annotated[
dt.datetime,
typer.Option(help="Export data from this date onwards.")
] = Day.EPOCH
) -> None:
"""Run jafgen in CLI mode."""
sim = Simulation(years, pre)
sim = Simulation(years * 365, pre)
sim.run_simulation()
sim.save_results()
sim.save_results(path="./jaffle-data/", start_from=export_from)
7 changes: 7 additions & 0 deletions jafgen/customers/order.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,10 @@ def to_dict(self) -> dict[str, Any]:
"order_total": int(int(self.subtotal * 100) + int(self.tax_paid * 100)),
}

def items_to_dict(self) -> list[dict[str, Any]]:
"""Serialize this order's items to a dict."""
return [{
"order_id": self.id,
"sku": item.sku
} for item in self.items]

90 changes: 72 additions & 18 deletions jafgen/simulation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import csv
import datetime as dt
import itertools
import os
from dataclasses import dataclass, field
from typing import Any
import warnings

from rich.progress import track

Expand All @@ -23,14 +27,25 @@
T_3PM = time_from_total_minutes(60 * 15)
T_8PM = time_from_total_minutes(60 * 20)

@dataclass(frozen=True)
class SimulationDayData:

"""Holds the new data created every day of a simulation."""

day: Day
new_customers: list[Customer] = field(default_factory=list)
new_orders: list[Order] = field(default_factory=list)
new_tweets: list[Tweet] = field(default_factory=list)



class Simulation:

"""Runs a simulation of multiple days of our customers' lives."""

def __init__(self, years: int, prefix: str):
def __init__(self, days: int, prefix: str):
"""Initialize the simulation."""
self.years = years
self.sim_days = days
self.scale = 100
self.prefix = prefix
self.stores = [
Expand Down Expand Up @@ -60,47 +75,86 @@ def __init__(self, years: int, prefix: str):
for store_name, popularity, opened_date, market_size, tax in self.stores
]

self.simulated_days: list[SimulationDayData] = []
self.customers: dict[CustomerId, Customer] = {}
self.orders: list[Order] = []
self.tweets: list[Tweet] = []
self.sim_days = 365 * self.years

def run_simulation(self):
"""Run the simulation."""
for i in track(
range(self.sim_days), description="🥪 Pressing fresh jaffles..."
):
day_data = SimulationDayData(day=Day(i))

for market in self.markets:
day = Day(i)
for order, tweet in market.sim_day(day):
for order, tweet in market.sim_day(day_data.day):
if order:
self.orders.append(order)
day_data.new_orders.append(order)
if order.customer.id not in self.customers:
self.customers[order.customer.id] = order.customer
day_data.new_customers.append(order.customer)
if tweet:
self.tweets.append(tweet)
day_data.new_tweets.append(tweet)

self.simulated_days.append(day_data)

def save_results(self) -> None:
"""Save the simulated results to `./jaffle-data/[prefix]_[entity].csv`."""
def save_results(self, path: str, start_from: dt.datetime = Day.EPOCH) -> None:
"""Save the simulated results to `path`."""
stock: Stock = Stock()
inventory: Inventory = Inventory()

if start_from < Day.EPOCH:
raise ValueError("Cannot start from day before the EPOCH.")

discard_days = (start_from - Day.EPOCH).days
if discard_days >= self.sim_days:
discard_days = self.sim_days
warnings.warn(
"start_from is after end of simulation. All data will be empty "
"except for slowly changing dimensions."
)

save_days = self.simulated_days[discard_days:]

entities: dict[str, list[dict[str, Any]]] = {
"customers": [customer.to_dict() for customer in self.customers.values()],
"orders": [order.to_dict() for order in self.orders],
"items": [item.to_dict() for order in self.orders for item in order.items],
# new data every day, produce only requested
"customers": [
customer.to_dict()
for day in save_days
for customer in day.new_customers
],
"orders": [
order.to_dict()
for day in save_days
for order in day.new_orders
],
"items": list(itertools.chain.from_iterable([
order.items_to_dict()
for day in save_days
for order in day.new_orders
])),
"tweets": [
tweet.to_dict()
for day in save_days
for tweet in day.new_tweets
],

# slowly changing dimensions, produce the same everytime
"stores": [market.store.to_dict() for market in self.markets],
"supplies": stock.to_dict(),
"products": inventory.to_dict(),
"tweets": [tweet.to_dict() for tweet in self.tweets],
}

if not os.path.exists("./jaffle-data"):
os.makedirs("./jaffle-data")
if not os.path.exists(path):
os.makedirs(path)
for entity, data in track(
entities.items(), description="🚚 Delivering jaffles..."
):
if len(data) == 0:
continue

file_path = os.path.join(path, f"{self.prefix}_{entity}.csv")
with open(
f"./jaffle-data/{self.prefix}_{entity}.csv", "w", newline=""
file_path, "w", newline=""
) as file:
writer = csv.DictWriter(file, fieldnames=data[0].keys())
writer.writeheader()
Expand Down

0 comments on commit ffc58a1

Please sign in to comment.