Skip to content

Commit

Permalink
Merge pull request #3216 from catalyst-cooperative/dev
Browse files Browse the repository at this point in the history
Switch from dev to main/nightly/stable branch structure.
  • Loading branch information
zaneselvans authored Jan 5, 2024
2 parents 618c20c + e9c205a commit 485bc7b
Show file tree
Hide file tree
Showing 16 changed files with 200 additions and 243 deletions.
2 changes: 1 addition & 1 deletion .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ version: 2
updates:
- package-ecosystem: github-actions
directory: /
target-branch: dev
target-branch: main
schedule:
interval: weekly
25 changes: 11 additions & 14 deletions .github/workflows/build-deploy-pudl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ on:
tags:
- "v20*"
schedule:
- cron: "0 6 * * 1-5" # Weekdays at midnight on MST
# 6am UTC daily (11pm PDT, 2am EDT)
# But only if there are changes since the last nightly build.
- cron: "0 6 * * *"

env:
GCP_BILLING_PROJECT: ${{ secrets.GCP_BILLING_PROJECT }}
BUILD_REF: ${{ github.ref_name }} # This is changed to dev if running on a schedule
GCE_INSTANCE: pudl-deployment-tag # This is changed to pudl-deployment-dev if running on a schedule
GCE_INSTANCE_ZONE: ${{ secrets.GCE_INSTANCE_ZONE }}
GCS_OUTPUT_BUCKET: gs://nightly-build-outputs.catalyst.coop
GCS_OUTPUT_BUCKET: gs://builds.catalyst.coop

jobs:
build_and_deploy_pudl:
Expand All @@ -22,17 +23,15 @@ jobs:
contents: write
id-token: write
steps:
- name: Use pudl-deployment-dev vm and dev branch if running on a schedule
- name: Use pudl-deployment-dev vm if running on a schedule
if: ${{ (github.event_name == 'schedule') }}
run: |
echo "This action was triggered by a schedule."
echo "GCE_INSTANCE=pudl-deployment-dev" >> $GITHUB_ENV
echo "BUILD_REF=dev" >> $GITHUB_ENV
- name: Checkout Repository
uses: actions/checkout@v4
with:
ref: ${{ env.BUILD_REF }}
fetch-depth: 0

- name: Skip the build if no changes since the last successful nightly build.
Expand All @@ -50,13 +49,12 @@ jobs:
if: ${{ env.SKIP_BUILD != 'true' }}
run: |
echo "NIGHTLY_TAG=nightly-$(date +%Y-%m-%d)" >> $GITHUB_ENV
echo "BUILD_ID=$(date +%Y-%m-%d-%H%M)-$(git rev-parse --short HEAD)-${BUILD_REF}" >> $GITHUB_ENV
echo "BUILD_ID=$(date +%Y-%m-%d-%H%M)-$(git rev-parse --short HEAD)-${{ github.ref_name }}" >> $GITHUB_ENV
- name: Show freshly set envvars
if: ${{ env.SKIP_BUILD != 'true' }}
run: |
echo "GCE_INSTANCE: $GCE_INSTANCE"
echo "BUILD_REF: $BUILD_REF"
echo "NIGHTLY_TAG: $NIGHTLY_TAG"
echo "BUILD_ID: $BUILD_ID"
Expand All @@ -65,7 +63,7 @@ jobs:
run: |
git config user.email "[email protected]"
git config user.name "pudlbot"
git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG $BUILD_REF
git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG ${{ github.ref_name }}
git push origin $NIGHTLY_TAG
- name: Docker Metadata
Expand All @@ -77,7 +75,7 @@ jobs:
flavor: |
latest=auto
tags: |
type=raw,value=${{ env.BUILD_REF}}
type=raw,value=${{ github.ref_name }}
type=ref,event=tag
- name: Set up Docker Buildx
Expand Down Expand Up @@ -121,7 +119,7 @@ jobs:
if: ${{ env.SKIP_BUILD != 'true' }}
env:
DAGSTER_PG_PASSWORD: ${{ secrets.DAGSTER_PG_PASSWORD }}
PUDL_OUTPUT_PATH: ${{ env.GCS_OUTPUT_BUCKET }}/${{ env.BUILD_ID }}
PUDL_GCS_OUTPUT: ${{ env.GCS_OUTPUT_BUCKET }}/${{ env.BUILD_ID }}
run: |-
gcloud compute instances add-metadata "$GCE_INSTANCE" \
--zone "$GCE_INSTANCE_ZONE" \
Expand All @@ -137,8 +135,7 @@ jobs:
--container-arg='' \
--container-arg="bash" \
--container-arg="./docker/gcp_pudl_etl.sh" \
--container-env-file="./docker/.env" \
--container-env BUILD_REF=${{ env.BUILD_REF }} \
--container-env BUILD_REF=${{ github.ref_name }} \
--container-env BUILD_ID=${{ env.BUILD_ID }} \
--container-env NIGHTLY_TAG=${{ env.NIGHTLY_TAG }} \
--container-env GITHUB_ACTION_TRIGGER=${{ github.event_name }} \
Expand All @@ -157,7 +154,7 @@ jobs:
--container-env PUDL_BOT_PAT=${{ secrets.PUDL_BOT_PAT }} \
--container-env ZENODO_SANDBOX_TOKEN_PUBLISH=${{ secrets.ZENODO_SANDBOX_TOKEN_PUBLISH }} \
--container-env PUDL_SETTINGS_YML="/home/mambauser/pudl/src/pudl/package_data/settings/etl_full.yml" \
--container-env PUDL_GCS_OUTPUT=${{ env.PUDL_OUTPUT_PATH }}
--container-env PUDL_GCS_OUTPUT=${{ env.PUDL_GCS_OUTPUT }}
# Start the VM
- name: Start the deploy-pudl-vm
Expand Down
59 changes: 0 additions & 59 deletions .github/workflows/run-etl.yml

This file was deleted.

16 changes: 2 additions & 14 deletions .github/workflows/update-conda-lockfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:

# What branch does this action run on?
# - workflow_dispatch: Whatever branch it was run against.
# - schedule: Always the same branch (will be dev or main)
# - schedule: Always runs on main

jobs:
update-conda-lockfile:
Expand All @@ -21,21 +21,9 @@ jobs:
- name: Get today's date
run: |
echo "TODAY=$(date +%Y-%m-%d)" >> $GITHUB_ENV
- name: Set GITHUB_REF for use with workflow_dispatch
if: ${{ (github.event_name == 'workflow_dispatch') }}
run: |
echo "GITHUB_REF="${{ github.ref_name }} >> $GITHUB_ENV
- name: Set GITHUB_REF for use with schedule
if: ${{ (github.event_name == 'schedule') }}
run: |
echo "GITHUB_REF=dev" >> $GITHUB_ENV
- name: Log final value of GITHUB_REF
run: |
echo "Final GITHUB_REF:" ${{ env.GITHUB_REF }}
- uses: actions/checkout@v4
with:
token: ${{ secrets.PUDL_BOT_PAT }}
ref: ${{ env.GITHUB_REF }}
- name: Install Micromamba
uses: mamba-org/setup-micromamba@v1
with:
Expand Down Expand Up @@ -64,5 +52,5 @@ jobs:
labels: dependencies, conda-lock
reviewers: zaneselvans
branch: update-conda-lockfile
base: ${{ env.GITHUB_REF }}
base: ${{ github.ref_name }}
delete-branch: true
11 changes: 0 additions & 11 deletions .github/workflows/zenodo-cache-sync.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ on:
env:
INTERNAL_ZENODO_CACHE_BUCKET: gs://internal-zenodo-cache.catalyst.coop
PUBLIC_ZENODO_CACHE_BUCKET: gs://zenodo-cache.catalyst.coop
GITHUB_REF: ${{ github.ref_name }} # This is changed to dev if running on a schedule
PUDL_OUTPUT: ~/pudl-work/output
PUDL_INPUT: ~/pudl-work/input/

Expand All @@ -29,16 +28,6 @@ jobs:
shell: bash -l {0}

steps:
- name: Use dev branch if running on a schedule
if: ${{ (github.event_name == 'schedule') }}
run: |
echo "This action was triggered by a schedule." && echo "GITHUB_REF=dev" >> $GITHUB_ENV
- name: Log value of github ref
if: ${{ (github.event_name == 'pull_request') }}
run: |
echo "This action was triggered by a pull request." && echo "GITHUB_REF="${{ github.head_ref }} >> $GITHUB_ENV
- name: Checkout desired branch
uses: actions/checkout@v4

Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ ci:
For more information, see https://pre-commit.ci
autofix_prs: true
autoupdate_branch: dev
autoupdate_branch: main
autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate"
autoupdate_schedule: weekly
skip: [unit-tests, nb-output-clear, conda-lock]
Expand Down
36 changes: 18 additions & 18 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ PUDL is comprised of three core components:
to the original inputs. Each of the data inputs may have several different versions
archived, and all are assigned a unique DOI and made available through the REST API.
You can read more about the Raw Data Archives in the
`docs <https://catalystcoop-pudl.readthedocs.io/en/dev/intro.html#raw-data-archives>`__.
`docs <https://catalystcoop-pudl.readthedocs.io/en/nightly/#raw-data-archives>`__.
- **ETL Pipeline**

- The ETL pipeline (this repo) ingests the raw archives, cleans them,
Expand All @@ -77,13 +77,13 @@ PUDL is comprised of three core components:
Python package is embedded with a set of of DOIs to indicate which version of the
raw inputs it is meant to process. This process helps ensure that the ETL and it's
outputs are replicable. You can read more about the ETL in the
`docs <https://catalystcoop-pudl.readthedocs.io/en/dev/intro.html#the-etl-process>`__.
`docs <https://catalystcoop-pudl.readthedocs.io/en/nightly/#the-etl-process>`__.
- **Data Warehouse**

- The outputs from the ETL, sometimes called "PUDL outputs",
are stored in a data warehouse as a collection of SQLite and Parquet files so that
users can access the data without having to run any code. Learn more about how to
access the data `here <https://catalystcoop-pudl.readthedocs.io/en/dev/data_access.html>`__.
access the data `here <https://catalystcoop-pudl.readthedocs.io/en/nightly/data_access.html>`__.

What data is available?
-----------------------
Expand All @@ -92,24 +92,24 @@ PUDL currently integrates data from:

* **EIA Form 860**: 2001-2022
- `Source Docs <https://www.eia.gov/electricity/data/eia860/>`__
- `PUDL Docs <https://catalystcoop-pudl.readthedocs.io/en/dev/data_sources/eia860.html>`__
- `PUDL Docs <https://catalystcoop-pudl.readthedocs.io/en/nightly/data_sources/eia860.html>`__
* **EIA Form 860m**: 2023-06
- `Source Docs <https://www.eia.gov/electricity/data/eia860m/>`__
* **EIA Form 861**: 2001-2022
- `Source Docs <https://www.eia.gov/electricity/data/eia861/>`__
- `PUDL Docs <https://catalystcoop-pudl.readthedocs.io/en/dev/data_sources/eia861.html>`__
- `PUDL Docs <https://catalystcoop-pudl.readthedocs.io/en/nightly/data_sources/eia861.html>`__
* **EIA Form 923**: 2001-2022
- `Source Docs <https://www.eia.gov/electricity/data/eia923/>`__
- `PUDL Docs <https://catalystcoop-pudl.readthedocs.io/en/dev/data_sources/eia923.html>`__
- `PUDL Docs <https://catalystcoop-pudl.readthedocs.io/en/nightly/data_sources/eia923.html>`__
* **EPA Continuous Emissions Monitoring System (CEMS)**: 1995-2022
- `Source Docs <https://campd.epa.gov/>`__
- `PUDL Docs <https://catalystcoop-pudl.readthedocs.io/en/dev/data_sources/epacems.html>`__
- `PUDL Docs <https://catalystcoop-pudl.readthedocs.io/en/nightly/data_sources/epacems.html>`__
* **FERC Form 1**: 1994-2021
- `Source Docs <https://www.ferc.gov/industries-data/electric/general-information/electric-industry-forms/form-1-electric-utility-annual>`__
- `PUDL Docs <https://catalystcoop-pudl.readthedocs.io/en/dev/data_sources/ferc1.html>`__
- `PUDL Docs <https://catalystcoop-pudl.readthedocs.io/en/nightly/data_sources/ferc1.html>`__
* **FERC Form 714**: 2006-2020
- `Source Docs <https://www.ferc.gov/industries-data/electric/general-information/electric-industry-forms/form-no-714-annual-electric/data>`__
- `PUDL Docs <https://catalystcoop-pudl.readthedocs.io/en/dev/data_sources/ferc714.html>`__
- `PUDL Docs <https://catalystcoop-pudl.readthedocs.io/en/nightly/data_sources/ferc714.html>`__
* **FERC Form 2**: 2021 (raw only)
- `Source Docs <https://www.ferc.gov/industries-data/natural-gas/industry-forms/form-2-2a-3-q-gas-historical-vfp-data>`__
* **FERC Form 6**: 2021 (raw only)
Expand All @@ -135,33 +135,33 @@ How do I access the data?
-------------------------

For details on how to access PUDL data, see the `data access documentation
<https://catalystcoop-pudl.readthedocs.io/en/latest/data_access.html>`__. A quick
<https://catalystcoop-pudl.readthedocs.io/en/nightly/data_access.html>`__. A quick
summary:

* `Datasette <https://catalystcoop-pudl.readthedocs.io/en/latest/data_access.html#-access-datasette>`__
* `Datasette <https://catalystcoop-pudl.readthedocs.io/en/nightly/data_access.html#-access-datasette>`__
provides browsable and queryable data from our nightly builds on the web:
https://data.catalyst.coop
* `Kaggle <https://catalystcoop-pudl.readthedocs.io/en/latest/data_access.html#access-kaggle>`__
* `Kaggle <https://catalystcoop-pudl.readthedocs.io/en/nightly/data_access.html#access-kaggle>`__
provides easy Jupyter notebook access to the PUDL data, updated weekly:
https://www.kaggle.com/datasets/catalystcooperative/pudl-project
* `Zenodo <https://catalystcoop-pudl.readthedocs.io/en/latest/data_access.html#access-zenodo>`__
* `Zenodo <https://catalystcoop-pudl.readthedocs.io/en/nightly/data_access.html#access-zenodo>`__
provides stable long-term access to our versioned data releases with a citeable DOI:
https://doi.org/10.5281/zenodo.3653158
* `Nightly Data Builds <https://catalystcoop-pudl.readthedocs.io/en/latest/data_access.html#access-nightly-builds>`__
* `Nightly Data Builds <https://catalystcoop-pudl.readthedocs.io/en/nightly/data_access.html#access-nightly-builds>`__
push their outputs to the AWS Open Data Registry:
https://registry.opendata.aws/catalyst-cooperative-pudl/
See `the nightly build docs <https://catalystcoop-pudl.readthedocs.io/en/latest/data_access.html#access-nightly-builds>`__
See `the nightly build docs <https://catalystcoop-pudl.readthedocs.io/en/nightly/data_access.html#access-nightly-builds>`__
for direct download links.
* `The PUDL Development Environment <https://catalystcoop-pudl.readthedocs.io/en/latest/dev/dev_setup.html>`__
* `The PUDL Development Environment <https://catalystcoop-pudl.readthedocs.io/en/nightly/dev/dev_setup.html>`__
lets you run the PUDL data processing pipeline locally.

Contributing to PUDL
--------------------

Find PUDL useful? Want to help make it better? There are lots of ways to help!

* Check out our `contribution guide <https://catalystcoop-pudl.readthedocs.io/en/latest/CONTRIBUTING.html>`__
including our `Code of Conduct <https://catalystcoop-pudl.readthedocs.io/en/latest/code_of_conduct.html>`__.
* Check out our `contribution guide <https://catalystcoop-pudl.readthedocs.io/en/nightly/CONTRIBUTING.html>`__
including our `Code of Conduct <https://catalystcoop-pudl.readthedocs.io/en/nightly/code_of_conduct.html>`__.
* You can file a bug report, make a feature request, or ask questions in the
`Github issue tracker <https://github.com/catalyst-cooperative/pudl/issues>`__.
* Feel free to fork the project and make a pull request with new code, better
Expand Down
11 changes: 0 additions & 11 deletions docker/.env

This file was deleted.

3 changes: 2 additions & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM mambaorg/micromamba:1.5.5
FROM mambaorg/micromamba:1.5.6

USER root

Expand Down Expand Up @@ -29,6 +29,7 @@ ENV CONTAINER_PUDL_WORKSPACE=${CONTAINER_HOME}/pudl_work
ENV PUDL_INPUT=${CONTAINER_PUDL_WORKSPACE}/input
ENV PUDL_OUTPUT=${CONTAINER_PUDL_WORKSPACE}/output
ENV DAGSTER_HOME=${CONTAINER_PUDL_WORKSPACE}/dagster_home
ENV LOGFILE=${PUDL_OUTPUT}/pudl-etl.log

RUN mkdir -p ${PUDL_INPUT} ${PUDL_OUTPUT} ${DAGSTER_HOME} ${PUDL_REPO}

Expand Down
Loading

0 comments on commit 485bc7b

Please sign in to comment.