diff --git a/openskistats/nesh/timelines.py b/openskistats/nesh/timelines.py index e5370d3c53..a2424c1651 100644 --- a/openskistats/nesh/timelines.py +++ b/openskistats/nesh/timelines.py @@ -18,6 +18,7 @@ from pathlib import Path from typing import Any, ClassVar, Literal +import polars as pl import requests from bs4 import BeautifulSoup @@ -130,3 +131,23 @@ def scrape_all_seasons(cls) -> list[dict[str, Any]]: json_str = json.dumps(rows, indent=2, ensure_ascii=False) cls.JSON_PATH.write_text(json_str + "\n") return rows + + +def read_nesh_timelines() -> pl.DataFrame: + df = ( + pl.read_json(NewEnglandSkiHistoryTimelineScraper.JSON_PATH) + .pivot( + index=["ski_area_name", "season", "state", "ski_area_page"], + on="moment", + values="date_iso", + maintain_order=True, + ) + .with_columns( + pl.col("opening", "closing").cast(pl.Date).name.suffix("_date"), + ) + .drop("opening", "closing") + .with_columns( + season_duration=pl.col("closing_date").sub("opening_date").dt.total_days() + ) + ) + return df