Skip to content

Add Pagerduty severity for workflows #1

Add Pagerduty severity for workflows

Add Pagerduty severity for workflows #1

Workflow file for this run

name: Reusable Sigstore Prober Workflow
on:
workflow_call:
secrets:
PAGERDUTY_INTEGRATION_KEY:
description: 'Integration key for PagerDuty'
required: true
inputs:
rekor_url:
required: false
type: string
default: 'https://rekor.sigstore.dev'
description: 'Rekor URL'
fulcio_url:
required: false
type: string
default: 'https://fulcio.sigstore.dev'
description: 'Fulcio URL'
oidc_url:
required: false
type: string
default: 'https://oauth2.sigstore.dev/auth'
description: 'OIDC URL'
enable_staging:
required: false
type: boolean
tuf_repo:
required: false
type: string
default: 'https://tuf-repo-cdn.sigstore.dev'
description: 'TUF Repo'
tuf_preprod_repo:
required: false
type: string
default: 'https://tuf-preprod-repo-cdn.sigstore.dev'
description: 'TUF Repo'
tuf_root_path:
required: false
type: string
default: "root.json"
description: "path to the tuf root"
tuf_root_url:
required: false
type: string
triggerPagerDutyTest:
description: 'Trigger PagerDuty test message'
required: false
type: string
severity:

Check failure on line 50 in .github/workflows/reusable-prober.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/reusable-prober.yml

Invalid workflow file

You have an error in your yaml syntax on line 50
description: 'The perceived severity of the status the event is describing with respect to the affected system. This can be "critical", "error", "warning" or "info".' "critical" and "error" default to High urgency, "warning" and "info" to Low.
default: 'error'
type: string
permissions:
contents: read
id-token: write
jobs:
sigstore-probe:
runs-on: ubuntu-latest
timeout-minutes: 10
outputs:
sigstore_probe: ${{ steps.msg.outputs.sigstore_probe }}
steps:
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
- name: Set up Go
id: setup-go
uses: actions/setup-go@fac708d6674e30b6ba41289acaab6d4b75aa0753 # v3.3.0
with:
go-version-file: 'prober/hack/toolz/go.mod'
check-latest: true
cache: false
- uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
with:
path: |
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-go-${{ steps.setup-go.outputs.go-version }}-${{ github.job }}-${{ hashFiles('prober/hack/toolz/go.sum') }}
restore-keys: |
${{ runner.os }}-go-${{ steps.setup-go.outputs.go-version }}-${{ github.job }}-
- name: Install 'prober' from sigstore/scaffolding
run: |
make -C prober/ prober
echo "PATH=$PATH:$PWD/prober/hack/toolz/bin" >> $GITHUB_ENV
# Make sure rekor is up and we can get root info
- name: Run prober
env:
DEBUG: 1
uses: nick-fields/retry@943e742917ac94714d2f408a0e8320f2d1fcafcd # v2.8.3
with:
timeout_minutes: 3
max_attempts: 3
retry_wait_seconds: 60
retry_on: error
command: prober --one-time --rekor-url ${{ inputs.rekor_url }} --fulcio-url ${{ inputs.fulcio_url }}
- name: Set messages
id: msg
if: success() || failure()
run: |
echo "sigstore_probe=good" >> $GITHUB_OUTPUT
if [ "${{ job.status }}" == 'failure' ]; then echo "sigstore_probe=failure" >> $GITHUB_OUTPUT; fi
root-probe:
timeout-minutes: 10
runs-on: ubuntu-latest
outputs:
root_state: ${{ steps.msg.outputs.root_state }}
steps:
- name: Checkout sigstore-probers code
uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
- name: Checkout root-signing to get roots
uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
with:
repository: sigstore/root-signing
ref: main
path: root-signing
- uses: actions/setup-go@fac708d6674e30b6ba41289acaab6d4b75aa0753 # v3.3.0
id: setup-go
with:
go-version-file: 'prober/hack/toolz/go.mod'
check-latest: true
cache: false
- uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
with:
path: |
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-go-${{ steps.setup-go.outputs.go-version }}-${{ github.job }}-${{ hashFiles('prober/hack/toolz/go.sum') }}
restore-keys: |
${{ runner.os }}-go-${{ steps.setup-go.outputs.go-version }}-${{ github.job }}-
- name: Install 'verify' tool from sigstore/root-signing
run: |
make -C prober/ verify
echo "PATH=$PATH:$PWD/prober/hack/toolz/bin" >> $GITHUB_ENV
- name: Check expiration
run: |
if [ "${{ inputs.tuf_root_url }}" != "" ]; then
curl -Lo root.json --user-agent "sigstore-prober" ${{ inputs.tuf_root_url }}
export ROOT_PATH=root.json
else
export ROOT_PATH=$GITHUB_WORKSPACE/root-signing/${{ inputs.tuf_root_path }}
fi
export EXPIRY=$(date -d '+2 days' '+%s')
verify repository --repository ${{ inputs.tuf_repo }} --root ${ROOT_PATH} --valid-until ${EXPIRY}
export EXPIRY=$(date -d '+15 days' '+%s')
echo "Verifying root valid within 15 days..."
verify repository --repository ${{ inputs.tuf_repo }} --root ${ROOT_PATH} --valid-until ${EXPIRY} --role root.json --role targets.json
# For preprod/staging TUF bucket
export EXPIRY=$(date -d '+2 days' '+%s')
verify repository --repository ${{ inputs.tuf_preprod_repo }} --root ${ROOT_PATH} --valid-until ${EXPIRY}
export EXPIRY=$(date -d '+15 days' '+%s')
echo "Verifying root valid within 15 days..."
verify repository --repository ${{ inputs.tuf_preprod_repo }} --root ${ROOT_PATH} --valid-until ${EXPIRY} --role root.json --role targets.json
- name: Set messages
id: msg
if: success() || failure()
run: |
echo "root_state=good" >> $GITHUB_OUTPUT
if [ "${{ job.status }}" == 'failure' ]; then echo "root_state=failure" >> $GITHUB_OUTPUT; fi
rekor-fulcio-e2e:
timeout-minutes: 10
permissions:
id-token: write
contents: read
env:
COSIGN_YES: "true"
GIT_HASH: ${{ github.sha }}
GIT_VERSION: unstable
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
IMAGE: localhost:1338/image:${{ github.sha }}-${{ github.run_id }}
REKOR_SERVER: ${{ inputs.rekor_url }}
runs-on: ubuntu-latest
outputs:
rekor_fulcio_e2e: ${{ steps.msg.outputs.rekor_fulcio_e2e }}
skip_pagerduty: ${{ steps.set-skip-pagerduty.outputs.skip_pagerduty }}
steps:
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
# This server is often down, resulting in a lot of flaky probers
# If the server is down, and this step fails, we don't alert PagerDuty
- name: Confirm Github OIDC Server is Available
continue-on-error: true
run: |
curl -H "Authorization: Bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" $ACTIONS_ID_TOKEN_REQUEST_URL&audience=sigstore
# Since the server is down, we want to ignore the failure in this workflow
# and skip paging PagerDuty
- name: Set skip_pagerduty outputs
id: set-skip-pagerduty
if: failure()
run: |
echo "skip_pagerduty=true" >> $GITHUB_OUTPUT
- uses: actions/setup-go@fac708d6674e30b6ba41289acaab6d4b75aa0753 # v3.3.0
id: setup-go
with:
go-version-file: 'prober/hack/toolz/go.mod'
check-latest: true
cache: false
- uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
with:
path: |
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-go-${{ steps.setup-go.outputs.go-version }}-${{ github.job }}-${{ hashFiles('prober/hack/toolz/go.sum') }}
restore-keys: |
${{ runner.os }}-go-${{ steps.setup-go.outputs.go-version }}-${{ github.job }}-
# Install crane / rekor-cli / cosign tools
- name: Install (crane, rekor-cli, cosign) tools
run: |
make -C prober/ crane rekor-cli cosign
echo "PATH=$PATH:$PWD/prober/hack/toolz/bin" >> $GITHUB_ENV
# Setup the registry on port 1338
- uses: chainguard-dev/actions/setup-registry@main
- name: Build and copy a container image
continue-on-error: true
run: |
for i in {1..5}
do
if crane cp busybox@sha256:d2b53584f580310186df7a2055ce3ff83cc0df6caacf1e3489bff8cf5d0af5d8 ${IMAGE}; then
echo "Successfully copied image" && exit 0
else
echo "Failed to copy image ${IMAGE}" && sleep 10
fi
done
exit 1
# START: PREPRODUCTION VERIFICATION
# TODO: Create a matrix (https://docs.github.com/en/actions/using-jobs/using-a-matrix-for-your-jobs)
# to reduce duplication
# Test the preproduction bucket only for prod
- name: Initialize preprod TUF root
if: ${{ inputs.enable_staging == false }}
run: |
curl -Lo root.json --user-agent "sigstore-prober" ${{ inputs.tuf_preprod_repo }}/root.json
for i in {1..5}
do
if cosign initialize --mirror=${{ inputs.tuf_preprod_repo }} --root=root.json; then
echo "Successfully initialized" && exit 0
else
echo "Failed to initialize" && sleep 10
fi
done
# Test signing in preproduction
- name: Sign and verify the image with preprod TUF
if: ${{ inputs.enable_staging == false }}
run: |
cosign sign --yes ${IMAGE} --rekor-url ${{ inputs.rekor_url }} --fulcio-url ${{ inputs.fulcio_url }} --oidc-issuer ${{ inputs.oidc_url }} --oidc-provider github-actions
cosign verify ${IMAGE} --rekor-url ${{ inputs.rekor_url }} --certificate-oidc-issuer=https://token.actions.githubusercontent.com --certificate-identity-regexp='https://github.com/sigstore/sigstore-probers/.github/workflows/reusable-prober.yml@refs/.*'
- name: Remove preprod TUF
if: ${{ inputs.enable_staging == false }}
run: |
rm -rf ~/.sigstore
# END: PREPRODUCTION VERIFICATION
- name: Initialize prod TUF root
if: ${{ inputs.enable_staging == false }}
run: |
for i in {1..5}
do
if cosign initialize; then
echo "Successfully initialized" && exit 0
else
echo "Failed to initialize" && sleep 10
fi
done
- name: Initialize staging TUF root
if: ${{ inputs.enable_staging }}
run: |
curl -Lo root.json ${{ inputs.tuf_root_url }}
for i in {1..5}
do
if cosign initialize --mirror=${{ inputs.tuf_repo }} --root=root.json; then
echo "Successfully initialized" && exit 0
else
echo "Failed to initialize" && sleep 10
fi
done
exit 1
- name: Sign and verify the image
run: |
cosign sign --yes ${IMAGE} --rekor-url ${{ inputs.rekor_url }} --fulcio-url ${{ inputs.fulcio_url }} --oidc-issuer ${{ inputs.oidc_url }} --oidc-provider github-actions
cosign verify ${IMAGE} --rekor-url ${{ inputs.rekor_url }} --certificate-oidc-issuer=https://token.actions.githubusercontent.com --certificate-identity-regexp='https://github.com/sigstore/sigstore-probers/.github/workflows/reusable-prober.yml@refs/.*'
- name: Generate and upload attestation
run: |
cosign attest --predicate ./prober/attestation.json --type slsaprovenance --rekor-url ${{ inputs.rekor_url }} --fulcio-url ${{ inputs.fulcio_url }} --oidc-issuer ${{ inputs.oidc_url }} ${IMAGE}
cosign verify-attestation --rekor-url ${{ inputs.rekor_url }} --type=slsaprovenance ${IMAGE} --certificate-oidc-issuer=https://token.actions.githubusercontent.com --certificate-identity-regexp='https://github.com/sigstore/sigstore-probers/.github/workflows/reusable-prober.yml@refs/.*'
# Get attestation hash
IMAGE_ATT=$(echo $(cosign triangulate ${IMAGE}) | sed 's/\.sig/\.att/')
DIGEST=$(crane manifest ${IMAGE_ATT} | jq -r '.layers[-1]'.digest)
echo "attestation_digest=${DIGEST}" >> $GITHUB_ENV
- name: Verify attestation contents
run: |
./prober/verify-attestation.sh "${{ env.attestation_digest }}"
- name: Set messages
id: msg
if: success() || failure()
run: |
echo "rekor_fulcio_e2e=good" >> $GITHUB_OUTPUT
if [ "${{ job.status }}" == 'failure' ]; then echo "rekor_fulcio_e2e=failure" >> $GITHUB_OUTPUT; fi
compute-summary-msg:
runs-on: ubuntu-latest
outputs:
summary: ${{ steps.msg.outputs.summary }}
group: ${{ steps.msg.outputs.group }}
steps:
- name: Set messages
id: msg
if: success() || failure()
run: |
if [ "${{ inputs.triggerPagerDutyTest }}" == "true" ]; then
echo "summary=Test Notification" >> $GITHUB_OUTPUT
else
echo "summary=Prober Failed" >> $GITHUB_OUTPUT
fi
echo "group=production" >> $GITHUB_OUTPUT
if [ ${{ inputs.enable_staging }} == 'true' ]; then
echo "group=staging" >> $GITHUB_OUTPUT;
fi
pagerduty-notification:
if: github.event.inputs.triggerPagerDutyTest=='true' || (failure() && needs.rekor-fulcio-e2e.outputs.skip_pagerduty != 'true')
needs: [sigstore-probe, root-probe, rekor-fulcio-e2e, compute-summary-msg]
uses: ./.github/workflows/reusable-pager.yml
secrets:
PAGERDUTY_INTEGRATION_KEY: ${{ secrets.PAGERDUTY_INTEGRATION_KEY }}
with:
summary: ${{ needs.compute-summary-msg.outputs.summary }}
component: "health prober"
group: ${{ needs.compute-summary-msg.outputs.group }}
severity: ${{ inputs.severity }}
details: >
{
"Environment": "${{ needs.compute-summary-msg.outputs.group }}",
"Failure URL": "https://github.com/sigstore/sigstore-probers/actions/runs/${{ github.run_id }}",
"Commit": "${{ github.sha }}",
"Prober": "${{ needs.sigstore-probe.outputs.sigstore_probe }}",
"GCS Root": "${{ needs.root-probe.outputs.root_state }}",
"Rekor Fulcio E2E Test": "${{ needs.rekor-fulcio-e2e.outputs.rekor_fulcio_e2e }}"
}
links: >
[
{
"href": "https://github.com/sigstore/public-good-instance/blob/main/playbooks/alerting/alerts/k8s-api-endpoint-prober.md",
"text": "Prober Failure Playbook"
}
]