Skip to content

Commit

Permalink
Merge pull request #413 from knaaptime/sedaupdate
Browse files Browse the repository at this point in the history
dont use assert in datastore; update seda url for districts
  • Loading branch information
knaaptime authored Jan 22, 2025
2 parents 0a14104 + 6362bf6 commit 52b5306
Showing 1 changed file with 26 additions and 19 deletions.
45 changes: 26 additions & 19 deletions geosnap/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def __dir__(self):
"codebook",
"counties",
"ejscreen",
"ejscreen_codebook"
"ejscreen_codebook",
"lodes_codebook",
"ltdb",
"msa_definitions",
Expand Down Expand Up @@ -208,38 +208,47 @@ def seda(
Subject to your compliance with the terms and conditions set forth in this Agreement, Stanford grants you a revocable, non-exclusive, non-transferable right to access and make use of the Data Sets.
"""
assert accept_eula, (
"You must accept the EULA by passing `accept_eula=True` \n" f"{eula}"
)
assert level in [
if not accept_eula:
raise ValueError(
f"You must accept the EULA by passing `accept_eula=True` \n{eula}"
)
if level not in [
"school",
"geodist",
], "Supported options for the `level` argument are 'school' and 'geodist'"
assert pooling in [
]:
raise ValueError(
"Supported options for the `level` argument are 'school' and 'geodist'"
)
if pooling not in [
"pool",
"long",
"poolsub",
], "`pool` argument must be either 'pool', 'long', or 'poolsub'"
assert (
standardize
in [
"gcs",
"cs",
]
), "`standardize` argument must be either 'cs' for cohort-standardized or 'gcs' for grade-cohort-standardized"
]:
raise ValueError(
"`pool` argument must be either 'pool', 'long', or 'poolsub'"
)
if standardize not in [
"gcs",
"cs",
]:
raise ValueError(
"`standardize` argument must be either 'cs' for cohort-standardized or 'gcs' for grade-cohort-standardized"
)

if pooling == "poolsub":
fn = f"seda_{level}_{pooling}_{standardize}_5.0"
else:
fn = f"seda_{level}_{pooling}_{standardize}_5.0"
if level == "geodist":
fn += "_updated_20240319"
local_path = pathlib.Path(self.data_dir, "seda", f"{fn}.parquet")
remote_path = f"https://stacks.stanford.edu/file/druid:cs829jn7849/{fn}.csv"
msg = (
"Streaming data from SEDA archive at <https://exhibits.stanford.edu/data/catalog/db586ns4974>.\n"
"Use `geosnap.io.store_seda()` to store the data locally for better performance"
)
if level == "school":
assert pooling == "pool", "The school level only supports pooled data"
if level == "school" and not pooling == "pool":
raise ValueError("The school level only supports pooled data")
try:
t = pd.read_parquet(local_path)
except FileNotFoundError:
Expand Down Expand Up @@ -683,5 +692,3 @@ def nlcd_definitions(self):
os.path.dirname(os.path.abspath(__file__)), "io/nlcd_definitions.csv"
)
)


0 comments on commit 52b5306

Please sign in to comment.