Skip to content

Commit

Permalink
Merge pull request #27 from DFE-Digital/893-create-postgres-backups-t…
Browse files Browse the repository at this point in the history
…o-azure-storage

[#893]: Create PostgreSQL backups to Azure Storage for snapshot production database
  • Loading branch information
ltello authored Jan 27, 2025
2 parents 640b6d0 + 5e5cbf9 commit e3f3beb
Show file tree
Hide file tree
Showing 11 changed files with 554 additions and 0 deletions.
46 changes: 46 additions & 0 deletions .github/actions/backup-and-restore-snapshot-database/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Backup DB
description: Backup production DB and restore to snapshot DB

inputs:
environment:
description: The name of the environment
required: true
azure-credentials:
description: Azure credentials
required: true

runs:
using: composite

steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Login
uses: azure/login@v1
with:
creds: ${{ inputs.azure-credentials }}

- uses: DFE-Digital/github-actions/set-kubelogin-environment@master
with:
azure-credentials: ${{ inputs.azure-credentials }}

- name: Set AKS credentials
shell: bash
run: make ci production get-cluster-credentials

- name: Install kubectl
uses: DFE-Digital/github-actions/set-kubectl@master

- name: Install konduit
shell: bash
run: make install-konduit

- name: Backup database
shell: bash
run: |
bin/konduit.sh cpd-ec2-${{ inputs.environment }}-web -- pg_dump -E utf8 --compress=1 --clean --if-exists --no-privileges --no-owner --verbose -f backup-${{ inputs.environment }}.sql.gz
- name: Restore snapshot database
shell: bash
run: bin/konduit.sh -d s189p01-cpdec2-pd-pg-snapshot -k s189p01-cpdec2-pd-app-kv -i backup-${{ inputs.environment }}.sql.gz -c -t 7200 cpd-ec2-${{ inputs.environment }}-web -- psql
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
name: Restore snapshot database from Azure storage
description: Restore an Azure postgres snapshot database from a daily backup saved in Azure Storage
inputs:
storage-account:
description: Name of the Azure storage account that contains the backup
required: true
app-name:
description: Name of the app deployment
required: true
namespace:
description: Namespace where the app is deployed. Required when role is not cluster admin.
required: false
resource-group:
description: Name of the Azure resource group
required: true
cluster:
description: Cluster being used. Test or Production.
required: true
azure-credentials:
description: 'A JSON string containing service principal credentials e.g. {"client_id": "x", "client_secret": "x", "subscription_id": "x", "tenant_id": "x"}'
required: false
default: ''
azure-client-id:
description: Azure service principal or managed identity client ID when using OIDC
required: false
default: ''
azure-subscription-id:
description: Azure service principal or managed identity subscription ID when using OIDC
required: false
default: ''
azure-tenant-id:
description: Azure service principal or managed identity tenant ID when using OIDC
required: false
default: ''
backup-file:
description: Name of the backup file to restore. Must include the .gz extension if the remote file has it.
required: true
slack-webhook:
description: Name of the slack webhook
required: true

runs:
using: composite
steps:
- uses: azure/login@v2
with:
creds: ${{ inputs.azure-credentials }}
client-id: ${{ inputs.azure-client-id }}
tenant-id: ${{ inputs.azure-tenant-id }}
subscription-id: ${{ inputs.azure-subscription-id }}

- name: Setup postgres client
uses: DFE-Digital/github-actions/install-postgres-client@master
with:
version: 16

- name: Install kubectl
uses: DFE-Digital/github-actions/set-kubectl@master

- uses: DFE-Digital/github-actions/set-kubelogin-environment@master
with:
azure-credentials: ${{ inputs.azure-credentials }}
azure-client-id: ${{ inputs.azure-client-id }}
azure-tenant-id: ${{ inputs.azure-tenant-id }}
azure-subscription-id: ${{ inputs.azure-subscription-id }}

- name: Set up cluster environment variables
shell: bash
run: |
case ${{ inputs.cluster }} in
test)
echo "cluster_rg=s189t01-tsc-ts-rg" >> $GITHUB_ENV
echo "cluster_name=s189t01-tsc-test-aks" >> $GITHUB_ENV
;;
production)
echo "cluster_rg=s189p01-tsc-pd-rg" >> $GITHUB_ENV
echo "cluster_name=s189p01-tsc-production-aks" >> $GITHUB_ENV
;;
*)
echo "unknown cluster"
;;
esac
- name: K8 setup
shell: bash
run: |
az aks get-credentials --overwrite-existing -g ${{ env.cluster_rg }} -n ${{ env.cluster_name }}
kubelogin convert-kubeconfig -l spn
# install konduit
curl -s https://raw.githubusercontent.com/DFE-Digital/teacher-services-cloud/master/scripts/konduit.sh -o ./konduit.sh
chmod +x ./konduit.sh
- name: Set Connection String
shell: bash
run: |
STORAGE_CONN_STR=$(az storage account show-connection-string -g ${{ inputs.resource-group }} -n ${{ inputs.storage-account }} --query 'connectionString')
echo "::add-mask::$STORAGE_CONN_STR"
echo "AZURE_STORAGE_CONNECTION_STRING=$STORAGE_CONN_STR" >> $GITHUB_ENV
- name: Download Backup from Azure Storage
shell: bash
run: |
az config set extension.use_dynamic_install=yes_without_prompt
az config set core.only_show_errors=true
az storage azcopy blob download --container database-backup \
--source ${{ inputs.backup-file }} --destination ${{ inputs.backup-file }}
- name: Restore backup to aks env database
shell: bash
run: |
if [[ -n "${{ inputs.namespace }}" ]]; then
NAMESPACE_ARG="-n ${{ inputs.namespace }}"
fi
COMPRESS=$( file --brief ${{ inputs.backup-file }} | grep -ic compressed || true )
if [[ $COMPRESS -gt 0 ]]; then
COMPRESS_ARG=-c
fi
./konduit.sh ${NAMESPACE_ARG} -d s189p01-cpdec2-pd-pg-snapshot -k s189p01-cpdec2-pd-app-kv -i ${{ inputs.backup-file }} ${COMPRESS_ARG} -t 7200 -x ${{ inputs.app-name }} -- psql
- name: Restore Summary
if: success()
shell: bash
run: |
NOW=$(TZ=Europe/London date +"%F %R")
echo 'SNAPSHOT DB RESTORE SUCCESSFUL!' >> $GITHUB_STEP_SUMMARY
echo ' APP: ${{ inputs.app-name }}' >> $GITHUB_STEP_SUMMARY
echo ' BACKUP FILE RESTORED: ${{ inputs.storage-account }} / database-backup / ${{ inputs.backup-file }}' >> $GITHUB_STEP_SUMMARY
echo " AT : ${NOW}" >> $GITHUB_STEP_SUMMARY
- name: Notify Slack channel on job failure
if: failure()
uses: rtCamp/action-slack-notify@v2
env:
SLACK_USERNAME: CI Deployment
SLACK_TITLE: Snapshot Database restore failure
SLACK_MESSAGE: ${{ inputs.app-name }} snapshop restore job failed
SLACK_WEBHOOK: ${{ inputs.slack-webhook }}
SLACK_COLOR: failure
SLACK_FOOTER: Sent from restore-snapshot-database-from-azure-storage action
109 changes: 109 additions & 0 deletions .github/workflows/backup-db.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
name: Backup database to Azure storage

on:
workflow_dispatch:
inputs:
environment:
description: Environment to backup
required: true
default: staging
type: choice
options:
- staging
- production
backup-file:
description: |
Backup file name (without extension). Default is cpdec2_[env]_adhoc_YYYY-MM-DD. Set it explicitly when backing up a point-in-time (PTR) server. (Optional)
required: false
type: string
default: default
db-server:
description: |
Name of the database server. Default is the live server. When backing up a point-in-time (PTR) server, use the full name of the PTR server. (Optional)
workflow_call:
inputs:
environment:
description: Environment to backup
required: true
default: staging
type: string
backup-file:
description: |
Backup file name (without extension). Default is cpdec2_[env]_adhoc_YYYY-MM-DD. Set it explicitly when backing up a point-in-time (PTR) server. (Optional)
required: false
type: string
default: default
db-server:
description: |
Name of the database server. Default is the live server. When backing up a point-in-time (PTR) server, use the full name of the PTR server. (Optional)
type: string
required: false

env:
SERVICE_NAME: cpd-ec2
SERVICE_SHORT: cpdec2
TF_VARS_PATH: config/terraform/application/config

jobs:
backup:
name: Backup database
runs-on: ubuntu-latest
environment:
name: ${{ inputs.environment || 'production' }}
env:
DEPLOY_ENV: ${{ inputs.environment || 'production' }}
BACKUP_FILE: ${{ inputs.backup-file || 'schedule' }}

steps:
- uses: actions/checkout@v4

- uses: azure/login@v2
with:
creds: ${{ secrets.AZURE_CREDENTIALS }}

- name: Set Azure environment variables
run: |
# Load environment-specific configuration
source config/global_config/${DEPLOY_ENV}.sh
tf_vars_file=${TF_VARS_PATH}/${DEPLOY_ENV}.tfvars.json
# Set Azure environment variables
echo "CLUSTER=$(jq -r '.cluster' ${tf_vars_file})" >> $GITHUB_ENV
echo "RESOURCE_GROUP_NAME=${AZURE_RESOURCE_PREFIX}-${SERVICE_SHORT}-${CONFIG_SHORT}-rg" >> $GITHUB_ENV
echo "STORAGE_ACCOUNT_NAME=${AZURE_RESOURCE_PREFIX}${SERVICE_SHORT}dbbkp${CONFIG_SHORT}sa" >> $GITHUB_ENV
echo "DB_SERVER=${AZURE_RESOURCE_PREFIX}-${SERVICE_SHORT}-${CONFIG_SHORT}-pg" >> $GITHUB_ENV
echo "KEYVAULT_NAME=${AZURE_RESOURCE_PREFIX}-${SERVICE_SHORT}-${CONFIG_SHORT}-inf-kv" >> $GITHUB_ENV
- name: Generate the backup file name
run: |
TODAY=$(date +"%F")
if [ "${BACKUP_FILE}" == "schedule" ]; then
BACKUP_FILE=${SERVICE_SHORT}_${CONFIG_SHORT}_${TODAY}
elif [ "${BACKUP_FILE}" == "default" ]; then
BACKUP_FILE=${SERVICE_SHORT}_${CONFIG_SHORT}_adhoc_${TODAY}
else
BACKUP_FILE=${BACKUP_FILE}
fi
echo "BACKUP_FILE=${BACKUP_FILE}" >> $GITHUB_ENV
- name: Fetch secrets from key vault
uses: azure/CLI@v2
id: key-vault-secrets
with:
inlineScript: |
SLACK_WEBHOOK=$(az keyvault secret show --name "SLACK-WEBHOOK" --vault-name ${KEYVAULT_NAME} --query "value" -o tsv)
echo "::add-mask::$SLACK_WEBHOOK"
echo "SLACK_WEBHOOK=$SLACK_WEBHOOK" >> $GITHUB_OUTPUT
- name: Backup ${{ env.DEPLOY_ENV }} postgres
uses: DFE-Digital/github-actions/backup-postgres@master
with:
storage-account: ${{ env.STORAGE_ACCOUNT_NAME }}
resource-group: ${{ env.RESOURCE_GROUP_NAME }}
app-name: ${{ env.SERVICE_NAME }}-${{ env.DEPLOY_ENV }}-web
cluster: ${{ env.CLUSTER }}
azure-credentials: ${{ secrets.AZURE_CREDENTIALS }}
backup-file: ${{ env.BACKUP_FILE }}.sql
db-server-name: ${{ inputs.db-server }}
slack-webhook: ${{ steps.key-vault-secrets.outputs.SLACK_WEBHOOK }}
11 changes: 11 additions & 0 deletions .github/workflows/nightly-backup-of-production-db.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: Nightly backup production database and upload it to Azure Storage

on:
schedule:
- cron: "15 0 * * *" # 00:15 UTC

jobs:
backup-and-store:
uses: ./.github/workflows/backup-db.yml
with:
environment: production
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: Nightly backup production database, upload it to Azure Storage and restore the snapshot database from it

on:
workflow_dispatch:
schedule:
- cron: "15 4 * * *" # 04:15 UTC

jobs:
backup-and-store:
uses: ./.github/workflows/backup-db.yml
with:
environment: production
backup-file: backup-production.sql

restore-from-storage:
uses: ./.github/workflows/restore-snapshot-db-from-azure-storage.yml
with:
environment: production
backup-file: backup-production.sql
30 changes: 30 additions & 0 deletions .github/workflows/nightly_copy_snapshot_db_from_production.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Restore Snapshot DB from production DB
on:
workflow_dispatch:
inputs:
environment:
description: GitHub environment to backup and restore
type: choice
default: production
options:
- staging
- production
required: true

schedule:
- cron: "15 4 * * *" # 04:15 UTC

jobs:
backup-and-restore-production:
runs-on: ubuntu-20.04
environment:
name: ${{ inputs.environment || 'production' }}
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Backup and restore snapshot
uses: ./.github/actions/backup-and-restore-snapshot-database
with:
environment: ${{ inputs.environment || 'production' }}
azure-credentials: ${{ secrets.AZURE_CREDENTIALS }}
Loading

0 comments on commit e3f3beb

Please sign in to comment.