Skip to content

build-deploy-pudl #1141

build-deploy-pudl

build-deploy-pudl #1141

---
name: build-deploy-pudl
on:
workflow_dispatch:
push:
tags:
- "v20*"
schedule:
# 6am UTC daily (11pm PDT, 2am EDT)
# But only if there are changes since the last nightly build.
- cron: "0 6 * * *"
env:
GCP_BILLING_PROJECT: ${{ secrets.GCP_BILLING_PROJECT }}
GCS_OUTPUT_BUCKET: gs://builds.catalyst.coop
BATCH_JOB_JSON: batch_job.json
jobs:
build_and_deploy_pudl:
name: Build Docker image, push to Docker Hub and deploy to Google Batch
runs-on: ubuntu-latest
permissions:
contents: write
id-token: write
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Skip the build if no changes since the last successful nightly build.
if: ${{ (github.event_name == 'schedule') }}
run: |
CURRENT_COMMIT=$(git rev-parse HEAD)
NIGHTLY_COMMIT=$(git rev-parse origin/nightly)
if [[ "$CURRENT_COMMIT" == "$NIGHTLY_COMMIT" ]]; then
echo "::notice::No changes since last successful nightly build. Skipping."
echo "SKIP_BUILD=true" >> $GITHUB_ENV
exit 0
fi
- name: Set action environment variables
if: ${{ env.SKIP_BUILD != 'true' }}
run: |
echo "NIGHTLY_TAG=nightly-$(date +%Y-%m-%d)" >> $GITHUB_ENV
echo "BUILD_ID=$(date +%Y-%m-%d-%H%M)-$(git rev-parse --short HEAD)-${{ github.ref_name }}" >> $GITHUB_ENV
echo "BATCH_JOB_ID=$(date +%Y-%m-%d-%H%M)-$(git rev-parse --short HEAD)" >> $GITHUB_ENV
- name: Show freshly set envvars
if: ${{ env.SKIP_BUILD != 'true' }}
run: |
echo "NIGHTLY_TAG: $NIGHTLY_TAG"
echo "BUILD_ID: $BUILD_ID"
echo "BATCH_JOB_ID: $BATCH_JOB_ID"
- name: Tag nightly build
if: ${{ (github.event_name == 'schedule') && (env.SKIP_BUILD != 'true') }}
run: |
git config user.email "[email protected]"
git config user.name "pudlbot"
git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG ${{ github.ref_name }}
git push origin $NIGHTLY_TAG
- name: Docker Metadata
id: docker_metadata
if: ${{ env.SKIP_BUILD != 'true' }}
uses: docker/metadata-action@v5
with:
images: catalystcoop/pudl-etl
flavor: |
latest=auto
tags: |
type=raw,value=${{ github.ref_name }}
type=ref,event=tag
- name: Set up Docker Buildx
if: ${{ env.SKIP_BUILD != 'true' }}
uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
if: ${{ (github.event_name != 'pull_request') && (env.SKIP_BUILD != 'true') }}
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build image and push to Docker Hub
id: docker-build
if: ${{ env.SKIP_BUILD != 'true' }}
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.docker_metadata.outputs.tags }}
labels: ${{ steps.docker_metadata.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
- id: "auth"
if: ${{ env.SKIP_BUILD != 'true' }}
uses: "google-github-actions/auth@v2"
with:
workload_identity_provider: "projects/345950277072/locations/global/workloadIdentityPools/gh-actions-pool/providers/gh-actions-provider"
service_account: "deploy-pudl-github-action@catalyst-cooperative-pudl.iam.gserviceaccount.com"
# Setup gcloud CLI
- name: Set up Cloud SDK
if: ${{ env.SKIP_BUILD != 'true' }}
uses: google-github-actions/setup-gcloud@v2
# Deploy PUDL image to GCE
# Dagster Postgres connection configured in docker/dagster.yaml - otherwise we get a str for port num
- name: Make GCP Batch config file
if: ${{ env.SKIP_BUILD != 'true' }}
env:
PUDL_GCS_OUTPUT: ${{ env.GCS_OUTPUT_BUCKET }}/${{ env.BUILD_ID }}
run: |-
./devtools/generate_batch_config.py \
--container-image "docker.io/catalystcoop/pudl-etl@${{ steps.docker-build.outputs.digest }}" \
--container-command "micromamba" \
--container-arg="run" \
--container-arg="--prefix" \
--container-arg="/home/mambauser/env" \
--container-arg="--attach" \
--container-arg='' \
--container-arg="bash" \
--container-arg="./docker/gcp_pudl_etl.sh" \
--container-env AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }} \
--container-env AWS_DEFAULT_REGION=${{ secrets.AWS_DEFAULT_REGION }} \
--container-env AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }} \
--container-env BUILD_ID=${{ env.BUILD_ID }} \
--container-env BUILD_REF=${{ github.ref_name }} \
--container-env FLY_ACCESS_TOKEN=${{ secrets.FLY_ACCESS_TOKEN }} \
--container-env GCP_BILLING_PROJECT=${{ secrets.GCP_BILLING_PROJECT }} \
--container-env GITHUB_ACTION_TRIGGER=${{ github.event_name }} \
--container-env NIGHTLY_TAG=${{ env.NIGHTLY_TAG }} \
--container-env OMP_NUM_THREADS=4 \
--container-env PUDL_BOT_PAT=${{ secrets.PUDL_BOT_PAT }} \
--container-env PUDL_GCS_OUTPUT=${{ env.PUDL_GCS_OUTPUT }} \
--container-env PUDL_SETTINGS_YML="/home/mambauser/pudl/src/pudl/package_data/settings/etl_full.yml" \
--container-env SLACK_TOKEN=${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} \
--container-env ZENODO_SANDBOX_TOKEN_PUBLISH=${{ secrets.ZENODO_SANDBOX_TOKEN_PUBLISH }} \
--container-env ZENODO_TARGET_ENV=${{ (startsWith(github.ref_name, 'v20') && 'production') || 'sandbox' }} \
--container-env ZENODO_TOKEN_UPLOAD=${{ secrets.ZENODO_TOKEN_UPLOAD }} \
--output ${{ env.BATCH_JOB_JSON }}
# Start the batch job
- name: Kick off batch job
if: ${{ env.SKIP_BUILD != 'true' }}
run: gcloud batch jobs submit run-etl-${{ env.BATCH_JOB_ID }} --config ${{ env.BATCH_JOB_JSON }} --location us-west1
- name: Post to pudl-deployments channel
if: always()
id: slack
uses: slackapi/slack-github-action@v2
with:
method: chat.postMessage
token: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }}
payload: |
text: "`${{ env.BUILD_ID }}` build-deploy-pudl status: ${{ (env.SKIP_BUILD == 'true') && 'skipped' || job.status }}\n${{ env.GCS_OUTPUT_BUCKET }}/${{ env.BUILD_ID }}"
channel: "C03FHB9N0PQ"