Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --export-from to allow for generating incremental data #118

Open
wants to merge 1 commit into
base: serramatutu/ruff
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions jafgen/cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import datetime as dt
from typing import Annotated

import typer

from jafgen.simulation import Simulation
from jafgen.time import Day

app = typer.Typer()

Expand All @@ -16,8 +18,12 @@ def run(
str,
typer.Option(help="Optional prefix for the output file names."),
] = "raw",
export_from: Annotated[
dt.datetime,
typer.Option(help="Export data from this date onwards.")
] = Day.EPOCH
) -> None:
"""Run jafgen in CLI mode."""
sim = Simulation(years, pre)
sim = Simulation(years * 365, pre)
sim.run_simulation()
sim.save_results()
sim.save_results(path="./jaffle-data/", start_from=export_from)
7 changes: 7 additions & 0 deletions jafgen/customers/order.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,10 @@ def to_dict(self) -> dict[str, Any]:
"order_total": int(int(self.subtotal * 100) + int(self.tax_paid * 100)),
}

def items_to_dict(self) -> list[dict[str, Any]]:
"""Serialize this order's items to a dict."""
return [{
"order_id": self.id,
"sku": item.sku
} for item in self.items]

90 changes: 72 additions & 18 deletions jafgen/simulation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import csv
import datetime as dt
import itertools
import os
from dataclasses import dataclass, field
from typing import Any
import warnings

from rich.progress import track

Expand All @@ -23,14 +27,25 @@
T_3PM = time_from_total_minutes(60 * 15)
T_8PM = time_from_total_minutes(60 * 20)

@dataclass(frozen=True)
class SimulationDayData:

"""Holds the new data created every day of a simulation."""

day: Day
new_customers: list[Customer] = field(default_factory=list)
new_orders: list[Order] = field(default_factory=list)
new_tweets: list[Tweet] = field(default_factory=list)



class Simulation:

"""Runs a simulation of multiple days of our customers' lives."""

def __init__(self, years: int, prefix: str):
def __init__(self, days: int, prefix: str):
"""Initialize the simulation."""
self.years = years
self.sim_days = days
self.scale = 100
self.prefix = prefix
self.stores = [
Expand Down Expand Up @@ -60,47 +75,86 @@ def __init__(self, years: int, prefix: str):
for store_name, popularity, opened_date, market_size, tax in self.stores
]

self.simulated_days: list[SimulationDayData] = []
self.customers: dict[CustomerId, Customer] = {}
self.orders: list[Order] = []
self.tweets: list[Tweet] = []
self.sim_days = 365 * self.years

def run_simulation(self):
"""Run the simulation."""
for i in track(
range(self.sim_days), description="🥪 Pressing fresh jaffles..."
):
day_data = SimulationDayData(day=Day(i))

for market in self.markets:
day = Day(i)
for order, tweet in market.sim_day(day):
for order, tweet in market.sim_day(day_data.day):
if order:
self.orders.append(order)
day_data.new_orders.append(order)
if order.customer.id not in self.customers:
self.customers[order.customer.id] = order.customer
day_data.new_customers.append(order.customer)
if tweet:
self.tweets.append(tweet)
day_data.new_tweets.append(tweet)

self.simulated_days.append(day_data)

def save_results(self) -> None:
"""Save the simulated results to `./jaffle-data/[prefix]_[entity].csv`."""
def save_results(self, path: str, start_from: dt.datetime = Day.EPOCH) -> None:
"""Save the simulated results to `path`."""
stock: Stock = Stock()
inventory: Inventory = Inventory()

if start_from < Day.EPOCH:
raise ValueError("Cannot start from day before the EPOCH.")

discard_days = (start_from - Day.EPOCH).days
if discard_days >= self.sim_days:
discard_days = self.sim_days
warnings.warn(
"start_from is after end of simulation. All data will be empty "
"except for slowly changing dimensions."
)

save_days = self.simulated_days[discard_days:]

entities: dict[str, list[dict[str, Any]]] = {
"customers": [customer.to_dict() for customer in self.customers.values()],
"orders": [order.to_dict() for order in self.orders],
"items": [item.to_dict() for order in self.orders for item in order.items],
# new data every day, produce only requested
"customers": [
customer.to_dict()
for day in save_days
for customer in day.new_customers
],
"orders": [
order.to_dict()
for day in save_days
for order in day.new_orders
],
"items": list(itertools.chain.from_iterable([
order.items_to_dict()
for day in save_days
for order in day.new_orders
])),
"tweets": [
tweet.to_dict()
for day in save_days
for tweet in day.new_tweets
],

# slowly changing dimensions, produce the same everytime
"stores": [market.store.to_dict() for market in self.markets],
"supplies": stock.to_dict(),
"products": inventory.to_dict(),
"tweets": [tweet.to_dict() for tweet in self.tweets],
}

if not os.path.exists("./jaffle-data"):
os.makedirs("./jaffle-data")
if not os.path.exists(path):
os.makedirs(path)
for entity, data in track(
entities.items(), description="🚚 Delivering jaffles..."
):
if len(data) == 0:
continue

file_path = os.path.join(path, f"{self.prefix}_{entity}.csv")
with open(
f"./jaffle-data/{self.prefix}_{entity}.csv", "w", newline=""
file_path, "w", newline=""
) as file:
writer = csv.DictWriter(file, fieldnames=data[0].keys())
writer.writeheader()
Expand Down
6 changes: 0 additions & 6 deletions tests/test_days.py

This file was deleted.