Skip to content

Restore databases

Restore databases #303

name: Restore databases
on:
workflow_dispatch:
inputs:
backup_from:
description: 'Backup from:'
required: true
options:
- prod
- staging
- test
default: 'staging'
type: choice
restore_to:
description: 'Restore to:'
required: true
options:
- staging
- test
- demo1
- demo2
- demo3
default: 'test'
type: choice
backup_age_limit_mins:
description: 'Run ONLY if last backup file is older than mins:'
required: true
default: 60
type: number
restore_ignore_backup_age:
description: 'Force restore, ignoring backup age:'
required: true
default: 'false'
type: boolean
databases:
description: 'Databases to backup and restore:'
required: true
default: '[\"prime_data_hub_candidate\",\"prime_data_hub\"]'
type: string
defaults:
run:
working-directory: prime-router
jobs:
pre_job:
name: "Set Build Environment"
runs-on: ubuntu-latest
outputs:
env_name: ${{ env.ENV_NAME }}
sink_env_name: ${{ env.SINK_ENV_NAME }}
restore_ignore_backup_age: ${{ env.RESTORE_IGNORE_BACKUP_AGE }}
backup_age_limit_mins: ${{ env.BACKUP_AGE_LIMIT_MINS }}
databases: ${{ env.DATABASES }}
source_resource_group: ${{ env.SOURCE_RESOURCE_GROUP }}
sink_resource_group: ${{ env.SINK_RESOURCE_GROUP }}
source_db_server: ${{ env.SOURCE_DB_SERVER }}
sink_db_server: ${{ env.SINK_DB_SERVER }}
sink_backup_storage: ${{ env.SINK_BACKUP_STORAGE }}
steps:
- name: Check out changes
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
- name: Set env variables
shell: bash
run: |
echo "event_name: ${{ github.event_name }}"
if [[ "${{ github.event_name }}" == 'workflow_dispatch' ]]; then
echo "ENV_NAME=${{ github.event.inputs.backup_from }}" >> $GITHUB_ENV
echo "SINK_ENV_NAME=${{ github.event.inputs.restore_to }}" >> $GITHUB_ENV
echo "RESTORE_IGNORE_BACKUP_AGE=${{ github.event.inputs.restore_ignore_backup_age }}" >> $GITHUB_ENV
echo "BACKUP_AGE_LIMIT_MINS=${{ github.event.inputs.backup_age_limit_mins }}" >> $GITHUB_ENV
echo "DATABASES=${{ github.event.inputs.databases }}" >> $GITHUB_ENV
else
echo "ENV_NAME=staging" >> $GITHUB_ENV
echo "SINK_ENV_NAME=test" >> $GITHUB_ENV
echo "RESTORE_IGNORE_BACKUP_AGE=false" >> $GITHUB_ENV
echo "BACKUP_AGE_LIMIT_MINS=1440" >> $GITHUB_ENV
echo "DATABASES=[\"prime_data_hub_candidate\"]" >> $GITHUB_ENV
fi
- name: Set backup and restore variables
shell: bash
run: |
echo "SOURCE_RESOURCE_GROUP=prime-data-hub-${{ env.ENV_NAME }}" >> $GITHUB_ENV
echo "SINK_RESOURCE_GROUP=prime-data-hub-${{ env.SINK_ENV_NAME }}" >> $GITHUB_ENV
echo "SOURCE_DB_SERVER=pdh${{ env.ENV_NAME }}-pgsql" >> $GITHUB_ENV
echo "SINK_DB_SERVER=pdh${{ env.SINK_ENV_NAME }}-pgsql" >> $GITHUB_ENV
echo "SINK_BACKUP_STORAGE=pdh${{ env.SINK_ENV_NAME }}terraform" >> $GITHUB_ENV
# Login to Azure
- uses: azure/login@a65d910e8af852a8061c627c456678983e180302
with:
creds: ${{ secrets.SERVICE_PRINCIPAL_CREDS }}
# Restores will fail if sink server is not pre-expanded
- name: Increase sink server size if necessary
uses: ./.github/actions/retry
with:
timeout_minutes: 20
max_attempts: 4
retry_wait_seconds: 120
command: |
source_size=$(az postgres server show --name ${{ env.SOURCE_DB_SERVER }} --resource-group ${{ env.SOURCE_RESOURCE_GROUP }} \
--query 'storageProfile.storageMb' -o tsv)
sink_size=$(az postgres server show --name ${{ env.SINK_DB_SERVER }} --resource-group ${{ env.SINK_RESOURCE_GROUP }} \
--query 'storageProfile.storageMb' -o tsv)
replica_exists=$(az postgres server list -g ${{ env.SINK_RESOURCE_GROUP }} --query '[].name | join(`, `, @) | contains(@, `replica`)')
if [[ ${source_size} -gt ${sink_size} ]]; then
if [ ${replica_exists} = true ]; then
az postgres server update --name ${{ env.SINK_DB_SERVER }}-replica -g ${{ env.SINK_RESOURCE_GROUP }} \
--storage-size $source_size
fi
az postgres server update --name ${{ env.SINK_DB_SERVER }} -g ${{ env.SINK_RESOURCE_GROUP }} \
--storage-size $source_size
fi
shell: bash
backup_source:
name: "Backup source: ${{ needs.pre_job.outputs.env_name }} - ${{ matrix.db }}"
needs:
- pre_job
# 22.04 error: mount error: cifs filesystem not supported by the system
runs-on: ubuntu-20.04
strategy:
max-parallel: 1
matrix:
db: ${{ fromJson(needs.pre_job.outputs.databases) }}
environment: ${{ needs.pre_job.outputs.env_name }}
env:
env_name: ${{ needs.pre_job.outputs.env_name }}
sink_backup_storage: ${{ needs.pre_job.outputs.sink_backup_storage }}
source_resource_group: ${{ needs.pre_job.outputs.source_resource_group }}
source_db_server: ${{ needs.pre_job.outputs.source_db_server }}
outputs:
backup_age_valid: ${{ env.BACKUP_AGE_VALID }}
steps:
- name: Check out changes
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
- uses: azure/login@a65d910e8af852a8061c627c456678983e180302
with:
creds: ${{ secrets.SERVICE_PRINCIPAL_CREDS }}
- name: Get public ip
shell: bash
run: echo "PUBLIC_IP=$(dig +short myip.opendns.com @resolver1.opendns.com)" >> $GITHUB_ENV
# Only create a backup if last run is greater than age limit
- name: Confirm last backup age valid
shell: bash
run: |
backup_exists=$(az storage directory exists --account-name ${{ env.sink_backup_storage }} --name ${{ env.env_name }}_${{ matrix.db }} \
--share-name dbbackups --query 'exists' -o tsv --only-show-errors)
if [ $backup_exists == "false" ]; then
lastModified="1970-01-01"
else
lastModified=$(az storage directory show --name ${{ env.env_name }}_${{ matrix.db }} --share-name dbbackups \
--account-name ${{ env.sink_backup_storage }} --query 'properties.lastModified' --only-show-errors -o tsv)
fi
currentDate=$(date +%s)
lastModified=$(date +%s -d $lastModified)
backupAgeMins=$(( ( $currentDate - $lastModified ) / 60 ))
if [[ ${backupAgeMins} -gt ${{ needs.pre_job.outputs.backup_age_limit_mins }} ]]; then echo "BACKUP_AGE_VALID=true" >> $GITHUB_ENV; fi
# Allow runner db access
- name: Add db access
if: env.BACKUP_AGE_VALID == 'true'
id: add-db-access
shell: bash
run: |
az postgres server update -g ${{ env.source_resource_group }} -n ${{ env.source_db_server }} --public-network-access "Enabled"
sleep 10;
az postgres server firewall-rule create -g ${{ env.source_resource_group }} -s ${{ env.source_db_server }} -n github_actions_runner \
--start-ip-address ${{ env.PUBLIC_IP }} --end-ip-address ${{ env.PUBLIC_IP }}
# Mount Azure file share to store backup file
- name: Mount file share
if: env.BACKUP_AGE_VALID == 'true'
id: mount_share
uses: ./.github/actions/mount-share
with:
storage-account-resource-group: ${{ needs.pre_job.outputs.sink_resource_group }}
storage-account-name: ${{ env.sink_backup_storage }}
file-share-name: "dbbackups"
- name: Backup db
if: env.BACKUP_AGE_VALID == 'true'
uses: ./.github/actions/db-backup
with:
pass: ${{ secrets[format('POSTGRESQL_{0}_PWD', env.env_name)] }}
host: ${{ env.source_db_server }}.postgres.database.azure.com
port: 5432
user: prime
database: ${{ matrix.db }}
file-path: ${{ steps.mount_share.outputs.mount-path }}/${{ env.env_name }}_${{ matrix.db }}
options: -Z2 -j 16 -Fd
# Remove runner db access
- name: Remove db access
if: always() && steps.add-db-access.outcome == 'success'
shell: bash
run: |
az postgres server firewall-rule delete -g ${{ env.source_resource_group }} -s ${{ env.source_db_server }} -n github_actions_runner --yes
sleep 10;
az postgres server update -g ${{ env.source_resource_group }} -n ${{ env.source_db_server }} --public-network-access "Disabled"
restore_sink:
name: "Restore sink: ${{ needs.pre_job.outputs.sink_env_name }} - ${{ matrix.db }}"
needs:
- pre_job
- backup_source
# Restore only if backup age is less than limit, backup age is ignored, and the restore destination is either staging or test
if: |
(needs.pre_job.outputs.restore_ignore_backup_age == 'true' || needs.backup_source.outputs.backup_age_valid != 'true') &&
(needs.pre_job.outputs.sink_env_name != 'prod')
# 22.04 error: mount error: cifs filesystem not supported by the system
runs-on: ubuntu-20.04
strategy:
max-parallel: 1
matrix:
db: ${{ fromJson(needs.pre_job.outputs.databases) }}
environment: ${{ needs.pre_job.outputs.sink_env_name }}
env:
sink_env_name: ${{ needs.pre_job.outputs.sink_env_name }}
sink_resource_group: ${{ needs.pre_job.outputs.sink_resource_group }}
sink_db_server: ${{ needs.pre_job.outputs.sink_db_server }}
steps:
- name: Check out changes
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
- uses: azure/login@a65d910e8af852a8061c627c456678983e180302
with:
creds: ${{ secrets.SERVICE_PRINCIPAL_CREDS }}
- name: Get public ip
shell: bash
run: echo "PUBLIC_IP=$(dig +short myip.opendns.com @resolver1.opendns.com)" >> $GITHUB_ENV
# Allow runner db access
- name: Add db access
id: add-db-access
uses: ./.github/actions/retry
with:
timeout_minutes: 120
max_attempts: 6
retry_wait_seconds: 30
command: |
az postgres server update -g ${{ env.sink_resource_group }} -n ${{ env.sink_db_server }} --public-network-access "Enabled"
sleep 30;
az postgres server firewall-rule create -g ${{ env.sink_resource_group }} -s ${{ env.sink_db_server }} -n github_actions_runner \
--start-ip-address ${{ env.PUBLIC_IP }} --end-ip-address ${{ env.PUBLIC_IP }}
az postgres server firewall-rule show -g ${{ env.sink_resource_group }} -s ${{ env.sink_db_server }} -n github_actions_runner
shell: bash
# Mount Azure file share to retrieve backup file
- name: Mount file share
id: mount_share
uses: ./.github/actions/mount-share
with:
storage-account-resource-group: ${{ env.sink_resource_group }}
storage-account-name: ${{ needs.pre_job.outputs.sink_backup_storage }}
file-share-name: "dbbackups"
- name: Restore db
uses: ./.github/actions/db-restore
with:
pass: ${{ secrets[format('POSTGRESQL_{0}_PWD', env.sink_env_name)] }}
host: ${{ env.sink_db_server }}
port: 5432
user: prime
database: ${{ matrix.db }}
file-path: ${{ steps.mount_share.outputs.mount-path }}/${{ needs.pre_job.outputs.env_name }}_${{ matrix.db }}
resource-group: ${{ env.sink_resource_group }}
options: -j 32 -Fd -O
- name: Clean sink db
if: |
needs.pre_job.outputs.sink_env_name != 'prod' &&
(${{ matrix.db }} == 'prime_data_hub' || ${{ matrix.db }} == 'prime_data_hub_candidate')
uses: ./.github/actions/db-query
with:
pass: ${{ secrets[format('POSTGRESQL_{0}_PWD', env.sink_env_name)] }}
host: ${{ env.sink_db_server }}
port: 5432
user: prime
database: ${{ matrix.db }}
output-file: clean_output_${{ matrix.db }}.html
options: -H
query: |
\echo <h1>${{ matrix.db }} data cleanup</h1>
\echo <h2><code>setting</code> table changes:</h2>
\echo
UPDATE
setting
SET
values = values || '{"transport": {"host": "10.0.1.100", "port": "22", "type": "SFTP", "filePath": "./staging", "credentialName": null}}'::jsonb
WHERE
type = 'RECEIVER'
AND values -> 'transport' ->> 'type' = 'SFTP';
\echo <hr/>
SELECT
relname, n_tup_ins - n_tup_del as rowcount
FROM pg_stat_all_tables
WHERE schemaname NOT IN ('pg_catalog', 'information_schema') AND
schemaname !~ '^pg_toast'
ORDER BY relname;
- name: Upload clean db result
if: |
needs.pre_job.outputs.sink_env_name != 'prod' &&
(${{ matrix.db }} == 'prime_data_hub' || ${{ matrix.db }} == 'prime_data_hub_candidate')
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874
with:
name: clean_output_${{ matrix.db }}
path: clean_output_${{ matrix.db }}.html
retention-days: 7
# Remove runner db access
- name: Remove db access
if: always() && steps.add-db-access.outcome == 'success'
continue-on-error: true
shell: bash
run: |
az postgres server firewall-rule delete -g ${{ env.sink_resource_group }} -s ${{ env.sink_db_server }} -n github_actions_runner --yes;
sleep 10;
az postgres server update -g ${{ env.sink_resource_group }} -n ${{ env.sink_db_server }} --public-network-access "Disabled";
sleep 30;