From 666fb261b62069ba99b6ab275f61ae6abc4e3b59 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 5 Jan 2016 11:31:29 +0100 Subject: [PATCH] Write periodic backup scheduler --- .editorconfig | 10 ++++ Dockerfile | 11 ++-- rethinkdb/backup.py => backup-rethinkdb.py | 16 +++-- cron.py | 14 ----- crontab | 1 - schedule-rethinkdb-backup.py | 69 ++++++++++++++++++++++ 6 files changed, 94 insertions(+), 27 deletions(-) create mode 100644 .editorconfig rename rethinkdb/backup.py => backup-rethinkdb.py (75%) delete mode 100644 cron.py delete mode 100644 crontab create mode 100755 schedule-rethinkdb-backup.py diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..09136eb --- /dev/null +++ b/.editorconfig @@ -0,0 +1,10 @@ +root = true + +[*] +indent_style = space +end_of_line = lf +insert_final_newline = true +max_line_length = 79 + +[*.py] +indent_size = 4 diff --git a/Dockerfile b/Dockerfile index fbaade5..d4ccabb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,19 +1,18 @@ FROM debian MAINTAINER "MuzHack" -RUN apt-get update && apt-get install -y cron python3 python3-pip lsb-release wget +RUN apt-get update && apt-get install -y python3 python3-pip lsb-release wget RUN echo "deb http://download.rethinkdb.com/apt `lsb_release -cs` main" | tee /etc/apt/sources.list.d/rethinkdb.list RUN wget -qO- https://download.rethinkdb.com/apt/pubkey.gpg | apt-key add - RUN apt-get update && apt-get install -y rethinkdb RUN apt-get clean -y && apt-get autoclean -y && apt-get autoremove -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* WORKDIR /app -COPY ./crontab /etc/cron.d/ -COPY ./requirements.txt /app/ -RUN mkdir -p /app/rethinkdb -COPY ./rethinkdb/backup.py /app/rethinkdb/ +COPY ./requirements.txt /app/ RUN pip3 install -r requirements.txt RUN rm -rf requirements.txt -CMD python3 cron.py +COPY ./schedule-rethinkdb-backup.py /app/ + +CMD python3 schedule-rethinkdb-backup.py diff --git a/rethinkdb/backup.py b/backup-rethinkdb.py similarity index 75% rename from rethinkdb/backup.py rename to backup-rethinkdb.py index 13dc62b..7d965c8 100755 --- a/rethinkdb/backup.py +++ b/backup-rethinkdb.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +"""Script to back up RethinkDB either locally or remotely to S3.""" import subprocess import argparse import os.path @@ -24,10 +25,11 @@ def _get_environment_value(key): return value -parser = argparse.ArgumentParser(description='Back up local RethinkDB instance') +parser = argparse.ArgumentParser( + description='Back up local RethinkDB instance') parser.add_argument('--s3-bucket', default=None, help='Specify S3 bucket') parser.add_argument('--remove', action='store_true', default=False, - help='Remove backup archive when done?') + help='Remove backup archive when done?') args = parser.parse_args() date_time_str = datetime.utcnow().strftime('%Y-%m-%dT%H:%M') @@ -37,16 +39,18 @@ def _get_environment_value(key): command = ['rethinkdb', 'dump', '-f', filename] auth_key = os.environ.get('RETHINKDB_AUTH_KEY') if auth_key: - command.extend(['-a', auth_key,]) + command.extend(['-a', auth_key, ]) _info('Backing up database to {}...'.format(filename)) subprocess.check_call(command, stdout=subprocess.PIPE) if args.s3_bucket: - _info('Uploading \'{}\' to S3 bucket \'{}\'...'.format(filename, args.s3_bucket)) + _info('Uploading \'{}\' to S3 bucket \'{}\'...'.format(filename, + args.s3_bucket)) access_key_id = _get_environment_value('AWS_ACCESS_KEY_ID') secret = _get_environment_value('AWS_SECRET_ACCESS_KEY') - s3_client = boto3.client('s3', region_name='eu-central-1', aws_access_key_id=access_key_id, - aws_secret_access_key=secret) + s3_client = boto3.client('s3', region_name='eu-central-1', + aws_access_key_id=access_key_id, + aws_secret_access_key=secret) s3_client.upload_file(filename, args.s3_bucket, filename) # TODO: Implement deleting backups that are older than 100 days diff --git a/cron.py b/cron.py deleted file mode 100644 index 78be180..0000000 --- a/cron.py +++ /dev/null @@ -1,14 +0,0 @@ -import subprocess -import os - - -crontab_filename = '/etc/cron.d/crontab' -with open(crontab_filename, 'rb') as f: - crontab = f.readlines() -for k in [k for k in os.environ if k.startswith('CRONVAR_')]: - cron_vars.append((k, os.environ[k],)) - -crontab = cron_vars + ['',] + crontab -with open(crontab_filename, 'wb') as f: - f.write('\n'.join(crontab)) -subprocess.check_call(['crond', '-L', '15',]) diff --git a/crontab b/crontab deleted file mode 100644 index 53e87b3..0000000 --- a/crontab +++ /dev/null @@ -1 +0,0 @@ -30 18 * * * python3 /app/rethinkdb/backup.py --s3-bucket $S3_BUCKET --remove diff --git a/schedule-rethinkdb-backup.py b/schedule-rethinkdb-backup.py new file mode 100755 index 0000000..68fb767 --- /dev/null +++ b/schedule-rethinkdb-backup.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +"""Script to schedule RethinkDB backup.""" +import asyncio +import subprocess +from datetime import datetime, date, timedelta +import sys +import logging +import contextlib + + +def _configure_logging(): + logging.getLogger().setLevel(logging.WARNING) + logger = logging.getLogger('app') + logger.setLevel(logging.DEBUG) + + ch = logging.StreamHandler() + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + ch.setFormatter(formatter) + logger.addHandler(ch) + + return logger + + +_logger = _configure_logging() + + +def _schedule_backup(loop): + hour = 18 + + now = datetime.now() + today = date.today() + desired_time = datetime(today.year, today.month, today.day, hour=hour) + if now.hour >= hour: + _logger.debug( + 'Delaying until {} next day since we\'re already at {}'.format( + hour, now.hour)) + desired_time = desired_time + timedelta(days=1) + else: + _logger.debug('Delaying until {} same day'.format(hour)) + desired_seconds = (desired_time - now).seconds + + _logger.debug( + 'Delaying for {} second(s) before backup'.format(desired_seconds)) + loop.call_later(desired_seconds, _backup, loop) + + +def _backup(loop): + """Perform actual backup.""" + now = datetime.now() + _logger.info('Backing up at {}...'.format( + now.strftime('%Y-%m-%d %H:%M:%S'))) + _logger.info('Backed up successfully!') + _logger.debug('Scheduling next backup') + _schedule_backup(loop) + + +def _main(): + with contextlib.closing(asyncio.get_event_loop()) as loop: + _schedule_backup(loop) + try: + loop.run_forever() + except KeyboardInterrupt: + _logger.info('Interrupted') + sys.exit(0) + + +if __name__ == '__main__': + _main()