Skip to content

Commit

Permalink
read_nesh_timelines
Browse files Browse the repository at this point in the history
  • Loading branch information
dhimmel committed Jan 26, 2025
1 parent dd916f7 commit 4fdda11
Showing 1 changed file with 21 additions and 0 deletions.
21 changes: 21 additions & 0 deletions openskistats/nesh/timelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pathlib import Path
from typing import Any, ClassVar, Literal

import polars as pl
import requests
from bs4 import BeautifulSoup

Expand Down Expand Up @@ -130,3 +131,23 @@ def scrape_all_seasons(cls) -> list[dict[str, Any]]:
json_str = json.dumps(rows, indent=2, ensure_ascii=False)
cls.JSON_PATH.write_text(json_str + "\n")
return rows


def read_nesh_timelines() -> pl.DataFrame:
df = (
pl.read_json(NewEnglandSkiHistoryTimelineScraper.JSON_PATH)
.pivot(
index=["ski_area_name", "season", "state", "ski_area_page"],
on="moment",
values="date_iso",
maintain_order=True,
)
.with_columns(
pl.col("opening", "closing").cast(pl.Date).name.suffix("_date"),
)
.drop("opening", "closing")
.with_columns(
season_duration=pl.col("closing_date").sub("opening_date").dt.total_days()
)
)
return df

0 comments on commit 4fdda11

Please sign in to comment.