Add Pagerduty severity for workflows #1
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Reusable Sigstore Prober Workflow | ||
on: | ||
workflow_call: | ||
secrets: | ||
PAGERDUTY_INTEGRATION_KEY: | ||
description: 'Integration key for PagerDuty' | ||
required: true | ||
inputs: | ||
rekor_url: | ||
required: false | ||
type: string | ||
default: 'https://rekor.sigstore.dev' | ||
description: 'Rekor URL' | ||
fulcio_url: | ||
required: false | ||
type: string | ||
default: 'https://fulcio.sigstore.dev' | ||
description: 'Fulcio URL' | ||
oidc_url: | ||
required: false | ||
type: string | ||
default: 'https://oauth2.sigstore.dev/auth' | ||
description: 'OIDC URL' | ||
enable_staging: | ||
required: false | ||
type: boolean | ||
tuf_repo: | ||
required: false | ||
type: string | ||
default: 'https://tuf-repo-cdn.sigstore.dev' | ||
description: 'TUF Repo' | ||
tuf_preprod_repo: | ||
required: false | ||
type: string | ||
default: 'https://tuf-preprod-repo-cdn.sigstore.dev' | ||
description: 'TUF Repo' | ||
tuf_root_path: | ||
required: false | ||
type: string | ||
default: "root.json" | ||
description: "path to the tuf root" | ||
tuf_root_url: | ||
required: false | ||
type: string | ||
triggerPagerDutyTest: | ||
description: 'Trigger PagerDuty test message' | ||
required: false | ||
type: string | ||
severity: | ||
description: 'The perceived severity of the status the event is describing with respect to the affected system. This can be "critical", "error", "warning" or "info".' "critical" and "error" default to High urgency, "warning" and "info" to Low. | ||
default: 'error' | ||
type: string | ||
permissions: | ||
contents: read | ||
id-token: write | ||
jobs: | ||
sigstore-probe: | ||
runs-on: ubuntu-latest | ||
timeout-minutes: 10 | ||
outputs: | ||
sigstore_probe: ${{ steps.msg.outputs.sigstore_probe }} | ||
steps: | ||
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 | ||
- name: Set up Go | ||
id: setup-go | ||
uses: actions/setup-go@fac708d6674e30b6ba41289acaab6d4b75aa0753 # v3.3.0 | ||
with: | ||
go-version-file: 'prober/hack/toolz/go.mod' | ||
check-latest: true | ||
cache: false | ||
- uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1 | ||
with: | ||
path: | | ||
~/.cache/go-build | ||
~/go/pkg/mod | ||
key: ${{ runner.os }}-go-${{ steps.setup-go.outputs.go-version }}-${{ github.job }}-${{ hashFiles('prober/hack/toolz/go.sum') }} | ||
restore-keys: | | ||
${{ runner.os }}-go-${{ steps.setup-go.outputs.go-version }}-${{ github.job }}- | ||
- name: Install 'prober' from sigstore/scaffolding | ||
run: | | ||
make -C prober/ prober | ||
echo "PATH=$PATH:$PWD/prober/hack/toolz/bin" >> $GITHUB_ENV | ||
# Make sure rekor is up and we can get root info | ||
- name: Run prober | ||
env: | ||
DEBUG: 1 | ||
uses: nick-fields/retry@943e742917ac94714d2f408a0e8320f2d1fcafcd # v2.8.3 | ||
with: | ||
timeout_minutes: 3 | ||
max_attempts: 3 | ||
retry_wait_seconds: 60 | ||
retry_on: error | ||
command: prober --one-time --rekor-url ${{ inputs.rekor_url }} --fulcio-url ${{ inputs.fulcio_url }} | ||
- name: Set messages | ||
id: msg | ||
if: success() || failure() | ||
run: | | ||
echo "sigstore_probe=good" >> $GITHUB_OUTPUT | ||
if [ "${{ job.status }}" == 'failure' ]; then echo "sigstore_probe=failure" >> $GITHUB_OUTPUT; fi | ||
root-probe: | ||
timeout-minutes: 10 | ||
runs-on: ubuntu-latest | ||
outputs: | ||
root_state: ${{ steps.msg.outputs.root_state }} | ||
steps: | ||
- name: Checkout sigstore-probers code | ||
uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 | ||
- name: Checkout root-signing to get roots | ||
uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 | ||
with: | ||
repository: sigstore/root-signing | ||
ref: main | ||
path: root-signing | ||
- uses: actions/setup-go@fac708d6674e30b6ba41289acaab6d4b75aa0753 # v3.3.0 | ||
id: setup-go | ||
with: | ||
go-version-file: 'prober/hack/toolz/go.mod' | ||
check-latest: true | ||
cache: false | ||
- uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1 | ||
with: | ||
path: | | ||
~/.cache/go-build | ||
~/go/pkg/mod | ||
key: ${{ runner.os }}-go-${{ steps.setup-go.outputs.go-version }}-${{ github.job }}-${{ hashFiles('prober/hack/toolz/go.sum') }} | ||
restore-keys: | | ||
${{ runner.os }}-go-${{ steps.setup-go.outputs.go-version }}-${{ github.job }}- | ||
- name: Install 'verify' tool from sigstore/root-signing | ||
run: | | ||
make -C prober/ verify | ||
echo "PATH=$PATH:$PWD/prober/hack/toolz/bin" >> $GITHUB_ENV | ||
- name: Check expiration | ||
run: | | ||
if [ "${{ inputs.tuf_root_url }}" != "" ]; then | ||
curl -Lo root.json --user-agent "sigstore-prober" ${{ inputs.tuf_root_url }} | ||
export ROOT_PATH=root.json | ||
else | ||
export ROOT_PATH=$GITHUB_WORKSPACE/root-signing/${{ inputs.tuf_root_path }} | ||
fi | ||
export EXPIRY=$(date -d '+2 days' '+%s') | ||
verify repository --repository ${{ inputs.tuf_repo }} --root ${ROOT_PATH} --valid-until ${EXPIRY} | ||
export EXPIRY=$(date -d '+15 days' '+%s') | ||
echo "Verifying root valid within 15 days..." | ||
verify repository --repository ${{ inputs.tuf_repo }} --root ${ROOT_PATH} --valid-until ${EXPIRY} --role root.json --role targets.json | ||
# For preprod/staging TUF bucket | ||
export EXPIRY=$(date -d '+2 days' '+%s') | ||
verify repository --repository ${{ inputs.tuf_preprod_repo }} --root ${ROOT_PATH} --valid-until ${EXPIRY} | ||
export EXPIRY=$(date -d '+15 days' '+%s') | ||
echo "Verifying root valid within 15 days..." | ||
verify repository --repository ${{ inputs.tuf_preprod_repo }} --root ${ROOT_PATH} --valid-until ${EXPIRY} --role root.json --role targets.json | ||
- name: Set messages | ||
id: msg | ||
if: success() || failure() | ||
run: | | ||
echo "root_state=good" >> $GITHUB_OUTPUT | ||
if [ "${{ job.status }}" == 'failure' ]; then echo "root_state=failure" >> $GITHUB_OUTPUT; fi | ||
rekor-fulcio-e2e: | ||
timeout-minutes: 10 | ||
permissions: | ||
id-token: write | ||
contents: read | ||
env: | ||
COSIGN_YES: "true" | ||
GIT_HASH: ${{ github.sha }} | ||
GIT_VERSION: unstable | ||
GITHUB_RUN_ID: ${{ github.run_id }} | ||
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }} | ||
IMAGE: localhost:1338/image:${{ github.sha }}-${{ github.run_id }} | ||
REKOR_SERVER: ${{ inputs.rekor_url }} | ||
runs-on: ubuntu-latest | ||
outputs: | ||
rekor_fulcio_e2e: ${{ steps.msg.outputs.rekor_fulcio_e2e }} | ||
skip_pagerduty: ${{ steps.set-skip-pagerduty.outputs.skip_pagerduty }} | ||
steps: | ||
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 | ||
# This server is often down, resulting in a lot of flaky probers | ||
# If the server is down, and this step fails, we don't alert PagerDuty | ||
- name: Confirm Github OIDC Server is Available | ||
continue-on-error: true | ||
run: | | ||
curl -H "Authorization: Bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" $ACTIONS_ID_TOKEN_REQUEST_URL&audience=sigstore | ||
# Since the server is down, we want to ignore the failure in this workflow | ||
# and skip paging PagerDuty | ||
- name: Set skip_pagerduty outputs | ||
id: set-skip-pagerduty | ||
if: failure() | ||
run: | | ||
echo "skip_pagerduty=true" >> $GITHUB_OUTPUT | ||
- uses: actions/setup-go@fac708d6674e30b6ba41289acaab6d4b75aa0753 # v3.3.0 | ||
id: setup-go | ||
with: | ||
go-version-file: 'prober/hack/toolz/go.mod' | ||
check-latest: true | ||
cache: false | ||
- uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1 | ||
with: | ||
path: | | ||
~/.cache/go-build | ||
~/go/pkg/mod | ||
key: ${{ runner.os }}-go-${{ steps.setup-go.outputs.go-version }}-${{ github.job }}-${{ hashFiles('prober/hack/toolz/go.sum') }} | ||
restore-keys: | | ||
${{ runner.os }}-go-${{ steps.setup-go.outputs.go-version }}-${{ github.job }}- | ||
# Install crane / rekor-cli / cosign tools | ||
- name: Install (crane, rekor-cli, cosign) tools | ||
run: | | ||
make -C prober/ crane rekor-cli cosign | ||
echo "PATH=$PATH:$PWD/prober/hack/toolz/bin" >> $GITHUB_ENV | ||
# Setup the registry on port 1338 | ||
- uses: chainguard-dev/actions/setup-registry@main | ||
- name: Build and copy a container image | ||
continue-on-error: true | ||
run: | | ||
for i in {1..5} | ||
do | ||
if crane cp busybox@sha256:d2b53584f580310186df7a2055ce3ff83cc0df6caacf1e3489bff8cf5d0af5d8 ${IMAGE}; then | ||
echo "Successfully copied image" && exit 0 | ||
else | ||
echo "Failed to copy image ${IMAGE}" && sleep 10 | ||
fi | ||
done | ||
exit 1 | ||
# START: PREPRODUCTION VERIFICATION | ||
# TODO: Create a matrix (https://docs.github.com/en/actions/using-jobs/using-a-matrix-for-your-jobs) | ||
# to reduce duplication | ||
# Test the preproduction bucket only for prod | ||
- name: Initialize preprod TUF root | ||
if: ${{ inputs.enable_staging == false }} | ||
run: | | ||
curl -Lo root.json --user-agent "sigstore-prober" ${{ inputs.tuf_preprod_repo }}/root.json | ||
for i in {1..5} | ||
do | ||
if cosign initialize --mirror=${{ inputs.tuf_preprod_repo }} --root=root.json; then | ||
echo "Successfully initialized" && exit 0 | ||
else | ||
echo "Failed to initialize" && sleep 10 | ||
fi | ||
done | ||
# Test signing in preproduction | ||
- name: Sign and verify the image with preprod TUF | ||
if: ${{ inputs.enable_staging == false }} | ||
run: | | ||
cosign sign --yes ${IMAGE} --rekor-url ${{ inputs.rekor_url }} --fulcio-url ${{ inputs.fulcio_url }} --oidc-issuer ${{ inputs.oidc_url }} --oidc-provider github-actions | ||
cosign verify ${IMAGE} --rekor-url ${{ inputs.rekor_url }} --certificate-oidc-issuer=https://token.actions.githubusercontent.com --certificate-identity-regexp='https://github.com/sigstore/sigstore-probers/.github/workflows/reusable-prober.yml@refs/.*' | ||
- name: Remove preprod TUF | ||
if: ${{ inputs.enable_staging == false }} | ||
run: | | ||
rm -rf ~/.sigstore | ||
# END: PREPRODUCTION VERIFICATION | ||
- name: Initialize prod TUF root | ||
if: ${{ inputs.enable_staging == false }} | ||
run: | | ||
for i in {1..5} | ||
do | ||
if cosign initialize; then | ||
echo "Successfully initialized" && exit 0 | ||
else | ||
echo "Failed to initialize" && sleep 10 | ||
fi | ||
done | ||
- name: Initialize staging TUF root | ||
if: ${{ inputs.enable_staging }} | ||
run: | | ||
curl -Lo root.json ${{ inputs.tuf_root_url }} | ||
for i in {1..5} | ||
do | ||
if cosign initialize --mirror=${{ inputs.tuf_repo }} --root=root.json; then | ||
echo "Successfully initialized" && exit 0 | ||
else | ||
echo "Failed to initialize" && sleep 10 | ||
fi | ||
done | ||
exit 1 | ||
- name: Sign and verify the image | ||
run: | | ||
cosign sign --yes ${IMAGE} --rekor-url ${{ inputs.rekor_url }} --fulcio-url ${{ inputs.fulcio_url }} --oidc-issuer ${{ inputs.oidc_url }} --oidc-provider github-actions | ||
cosign verify ${IMAGE} --rekor-url ${{ inputs.rekor_url }} --certificate-oidc-issuer=https://token.actions.githubusercontent.com --certificate-identity-regexp='https://github.com/sigstore/sigstore-probers/.github/workflows/reusable-prober.yml@refs/.*' | ||
- name: Generate and upload attestation | ||
run: | | ||
cosign attest --predicate ./prober/attestation.json --type slsaprovenance --rekor-url ${{ inputs.rekor_url }} --fulcio-url ${{ inputs.fulcio_url }} --oidc-issuer ${{ inputs.oidc_url }} ${IMAGE} | ||
cosign verify-attestation --rekor-url ${{ inputs.rekor_url }} --type=slsaprovenance ${IMAGE} --certificate-oidc-issuer=https://token.actions.githubusercontent.com --certificate-identity-regexp='https://github.com/sigstore/sigstore-probers/.github/workflows/reusable-prober.yml@refs/.*' | ||
# Get attestation hash | ||
IMAGE_ATT=$(echo $(cosign triangulate ${IMAGE}) | sed 's/\.sig/\.att/') | ||
DIGEST=$(crane manifest ${IMAGE_ATT} | jq -r '.layers[-1]'.digest) | ||
echo "attestation_digest=${DIGEST}" >> $GITHUB_ENV | ||
- name: Verify attestation contents | ||
run: | | ||
./prober/verify-attestation.sh "${{ env.attestation_digest }}" | ||
- name: Set messages | ||
id: msg | ||
if: success() || failure() | ||
run: | | ||
echo "rekor_fulcio_e2e=good" >> $GITHUB_OUTPUT | ||
if [ "${{ job.status }}" == 'failure' ]; then echo "rekor_fulcio_e2e=failure" >> $GITHUB_OUTPUT; fi | ||
compute-summary-msg: | ||
runs-on: ubuntu-latest | ||
outputs: | ||
summary: ${{ steps.msg.outputs.summary }} | ||
group: ${{ steps.msg.outputs.group }} | ||
steps: | ||
- name: Set messages | ||
id: msg | ||
if: success() || failure() | ||
run: | | ||
if [ "${{ inputs.triggerPagerDutyTest }}" == "true" ]; then | ||
echo "summary=Test Notification" >> $GITHUB_OUTPUT | ||
else | ||
echo "summary=Prober Failed" >> $GITHUB_OUTPUT | ||
fi | ||
echo "group=production" >> $GITHUB_OUTPUT | ||
if [ ${{ inputs.enable_staging }} == 'true' ]; then | ||
echo "group=staging" >> $GITHUB_OUTPUT; | ||
fi | ||
pagerduty-notification: | ||
if: github.event.inputs.triggerPagerDutyTest=='true' || (failure() && needs.rekor-fulcio-e2e.outputs.skip_pagerduty != 'true') | ||
needs: [sigstore-probe, root-probe, rekor-fulcio-e2e, compute-summary-msg] | ||
uses: ./.github/workflows/reusable-pager.yml | ||
secrets: | ||
PAGERDUTY_INTEGRATION_KEY: ${{ secrets.PAGERDUTY_INTEGRATION_KEY }} | ||
with: | ||
summary: ${{ needs.compute-summary-msg.outputs.summary }} | ||
component: "health prober" | ||
group: ${{ needs.compute-summary-msg.outputs.group }} | ||
severity: ${{ inputs.severity }} | ||
details: > | ||
{ | ||
"Environment": "${{ needs.compute-summary-msg.outputs.group }}", | ||
"Failure URL": "https://github.com/sigstore/sigstore-probers/actions/runs/${{ github.run_id }}", | ||
"Commit": "${{ github.sha }}", | ||
"Prober": "${{ needs.sigstore-probe.outputs.sigstore_probe }}", | ||
"GCS Root": "${{ needs.root-probe.outputs.root_state }}", | ||
"Rekor Fulcio E2E Test": "${{ needs.rekor-fulcio-e2e.outputs.rekor_fulcio_e2e }}" | ||
} | ||
links: > | ||
[ | ||
{ | ||
"href": "https://github.com/sigstore/public-good-instance/blob/main/playbooks/alerting/alerts/k8s-api-endpoint-prober.md", | ||
"text": "Prober Failure Playbook" | ||
} | ||
] |