diff --git a/Dockerfile b/Dockerfile index 2d2e30365..3cf8b9ffc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,8 @@ FROM kobotoolbox/kobocat_base:latest ENV KOBOCAT_SRC_DIR=/srv/src/kobocat \ - BACKUPS_DIR=/srv/backups + BACKUPS_DIR=/srv/backups \ + KOBOCAT_LOGS_DIR=/srv/logs # Install post-base-image `apt` additions from `apt_requirements.txt`, if modified. COPY ./apt_requirements.txt "${KOBOCAT_TMP_DIR}/current_apt_requirements.txt" @@ -37,7 +38,9 @@ RUN mkdir -p /etc/service/uwsgi && \ cp "${KOBOCAT_SRC_DIR}/docker/sync_static.sh" /etc/my_init.d/11_sync_static.bash && \ mkdir -p "${KOBOCAT_SRC_DIR}/emails/" && \ chown -R wsgi "${KOBOCAT_SRC_DIR}/emails/" && \ - mkdir -p "${BACKUPS_DIR}" + mkdir -p "${BACKUPS_DIR}" && \ + mkdir -p "${KOBOCAT_LOGS_DIR}" && \ + chown -R wsgi "${KOBOCAT_LOGS_DIR}" RUN echo "db:*:*:kobo:kobo" > /root/.pgpass && \ chmod 600 /root/.pgpass @@ -46,10 +49,6 @@ RUN echo "db:*:*:kobo:kobo" > /root/.pgpass && \ RUN echo 'source /etc/profile' >> /root/.bashrc - -# FIXME: Remove. -VOLUME ["${KOBOCAT_SRC_DIR}", "${KOBOCAT_SRC_DIR}/media", "/srv/src/kobocat-template"] - WORKDIR "${KOBOCAT_SRC_DIR}" EXPOSE 8000 diff --git a/docker/backup_media.bash b/docker/backup_media.bash index aea16a329..7f99169e2 100755 --- a/docker/backup_media.bash +++ b/docker/backup_media.bash @@ -3,6 +3,9 @@ set -e source /etc/profile KOBOCAT_MEDIA_URL="${KOBOCAT_MEDIA_URL:-media}" +BACKUPS_DIR="${BACKUPS_DIR:-/srv/backups}" + +mkdir -p "${BACKUPS_DIR}" timestamp="$(date +%Y.%m.%d.%H_%M)" backup_filename="kobocat_media__${timestamp}.tar" diff --git a/docker/init.bash b/docker/init.bash index f4b81c5a2..8f9c77dda 100755 --- a/docker/init.bash +++ b/docker/init.bash @@ -24,6 +24,12 @@ else echo "KoBoCAT media automatic backup schedule: ${KOBOCAT_MEDIA_BACKUP_SCHEDULE}" fi +rm -rf /etc/profile.d/pydev_debugger.bash.sh +if [[ -d /srv/pydev_orig && ! -z "${KOBOCAT_PATH_FROM_ECLIPSE_TO_PYTHON_PAIRS}" ]]; then + echo 'Enabling PyDev remote debugging.' + "${KOBOCAT_SRC_DIR}/docker/setup_pydev.bash" +fi + echo 'KoBoCAT initialization complete.' cd $oldpwd diff --git a/docker/kobocat.ini b/docker/kobocat.ini index 25ed60e2e..691d20c9d 100644 --- a/docker/kobocat.ini +++ b/docker/kobocat.ini @@ -1,29 +1,18 @@ [uwsgi] # directory structure -chdir = /srv/src/kobocat +chdir = $(KOBOCAT_SRC_DIR) module = onadata.apps.main.wsgi - -# virtualenvs -#home = /home/ubuntu/.virtualenvs/kc -#envdir = /home/ubuntu/env/kc_envdir +logto = $(KOBOCAT_LOGS_DIR)/uwsgi.log # process related settings master = true processes = 2 -# -#socket = /home/ubuntu/sockets/kobocat.sock -#chmod-socket = 666 -#vacuum = true - socket = 0.0.0.0:8000 -#http-socket = 0.0.0.0:8000 buffer-size = 32768 harakiri = 120 uid = wsgi gid = wsgi die-on-term = true - -# uwsgi --socket /home/ubuntu/sockets/kobocat.sock --wsgi-file=/home/ubuntu/src/kobocat/onadata/apps/main/wsgi.py --chmod-socket=666 --chdir=/home/ubuntu/src/kobocat --home=/home/ diff --git a/docker/run_celery.bash b/docker/run_celery.bash index 958de51d6..831892418 100755 --- a/docker/run_celery.bash +++ b/docker/run_celery.bash @@ -3,11 +3,7 @@ set -e source /etc/profile -CELERYD_TASK_SOFT_TIME_LIMIT="${CELERYD_TASK_SOFT_TIME_LIMIT:-$((15*60))}" -# Give tasks 1 minute for exception handling and cleanup before killing timed out Celery processes. -CELERYD_TASK_TIME_LIMIT="${CELERYD_TASK_TIME_LIMIT:-$((${CELERYD_TASK_SOFT_TIME_LIMIT}+60))}" - -CELERYD_OPTIONS="--beat --loglevel=DEBUG --soft-time-limit=${CELERYD_TASK_SOFT_TIME_LIMIT} --time-limit=${CELERYD_TASK_TIME_LIMIT} --maxtasksperchild=5" +CELERYD_OPTIONS="-Ofair --beat --loglevel=DEBUG" cd "${KOBOCAT_SRC_DIR}" diff --git a/docker/setup_pydev.bash b/docker/setup_pydev.bash new file mode 100755 index 000000000..5689a1858 --- /dev/null +++ b/docker/setup_pydev.bash @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -e + +if [[ ! -d /srv/pydev_orig ]]; then + echo 'Directory `/srv/pydev_orig` must exist to use PyDev debugger (see `kobo-docker/docker-compose.yml`).' + exit 1 +fi + +cp -a /srv/pydev_orig /srv/pydev + +if [[ -z "${KOBOCAT_PATH_FROM_ECLIPSE_TO_PYTHON_PAIRS}" ]]; then + echo '`KOBOCAT_PATH_FROM_ECLIPSE_TO_PYTHON_PAIRS` must be set to use the PyDev debugger (see `kobo-docker/envfiles/kobocat.txt`).' + exit 1 +fi + +echo 'Setting up PyDev remote debugger path mappings.' + +# Set up the `PATHS_FROM_ECLIPSE_TO_PYTHON` variable from the environment per +# https://github.com/fabioz/PyDev.Debugger/blob/master/pydevd_file_utils.py. +find_string='PATHS_FROM_ECLIPSE_TO_PYTHON = []' +replace_string="\ +import os\n\ +path_map_pair_strings = os.environ['KOBOCAT_PATH_FROM_ECLIPSE_TO_PYTHON_PAIRS'].split('|')\n\ +PATHS_FROM_ECLIPSE_TO_PYTHON = [tuple([pair_element.strip() for pair_element in pair_string.split('->')]) for pair_string in path_map_pair_strings]\n\ +" +escaped_find_sting="$(echo "${find_string}" | sed -e 's/[]\/$*.^|[]/\\&/g')" +escaped_replace_string=$(echo "${replace_string}" | sed -e '/\\n/b; s/[]\/$*.^|[]/\\&/g') + +sed -i "s/${escaped_find_sting}/${escaped_replace_string}/" /srv/pydev/pydevd_file_utils.py + +echo 'Adding `PYTHONPATH` modifications to profile.' +echo 'export PYTHONPATH=${PYTHONPATH}:/srv/pydev' > /etc/profile.d/pydev_debugger.bash.sh diff --git a/fabfile/__init__.py b/fabfile/__init__.py new file mode 100644 index 000000000..dbf45b634 --- /dev/null +++ b/fabfile/__init__.py @@ -0,0 +1,3 @@ +from .docker import deploy +from .legacy import deploy as deploy_legacy +from .legacy import deploy_ref as deploy_ref_legacy diff --git a/fabfile/docker.py b/fabfile/docker.py new file mode 100644 index 000000000..ede690d6d --- /dev/null +++ b/fabfile/docker.py @@ -0,0 +1,91 @@ +import json +import os + +from fabric.api import cd, env, run, sudo +from fabric.contrib import files + + +SERVICE_NAME = 'kobocat' +GIT_REPO = 'https://github.com/kobotoolbox/{}.git'.format(SERVICE_NAME) +CONTAINER_SRC_DIR_ENV_VAR = '{}_SRC_DIR'.format(SERVICE_NAME.upper()) +UPDATE_STATIC_FILE = '{}/LAST_UPDATE.txt'.format(SERVICE_NAME) +# These must be defined in deployments.json +REQUIRED_SETTINGS = ( + 'build_root', # Temporary location for cloning repo; deleted at end + 'docker_config_path', # Location must house `docker_compose.yml` + 'static_path' # `UPDATE_STATIC_FILE` will be written here +) + +DEPLOYMENTS = {} +IMPORTED_DEPLOYMENTS = {} +deployments_file = os.environ.get('DEPLOYMENTS_JSON', 'deployments.json') +if os.path.exists(deployments_file): + with open(deployments_file, 'r') as f: + IMPORTED_DEPLOYMENTS = json.load(f) +else: + raise Exception("Cannot find {}".format(deployments_file)) + + +def run_no_pty(*args, **kwargs): + # Avoids control characters being returned in the output + kwargs['pty'] = False + return run(*args, **kwargs) + + +def sudo_no_pty(*args, **kwargs): + # Avoids control characters being returned in the output + kwargs['pty'] = False + return sudo(*args, **kwargs) + + +def setup_env(deployment_name): + deployment = DEPLOYMENTS.get(deployment_name, {}) + + if deployment_name in IMPORTED_DEPLOYMENTS: + deployment.update(IMPORTED_DEPLOYMENTS[deployment_name]) + + env.update(deployment) + + for required_setting in REQUIRED_SETTINGS: + if required_setting not in env: + raise Exception('Please define {} in {} and try again'.format( + required_setting, deployments_file)) + + +def deploy(deployment_name, branch='master'): + setup_env(deployment_name) + build_dir = os.path.join(env.build_root, SERVICE_NAME) + with cd(build_dir): + # Start from scratch + run("find -delete") + # Shallow clone the requested branch to a temporary directory + run("git clone --quiet --depth=1 --branch='{}' '{}' .".format( + branch, GIT_REPO)) + # Note which commit is at the tip of the cloned branch + cloned_commit = run_no_pty("git show --no-patch") + with cd(env.docker_config_path): + # Build the image + run("docker-compose build '{}'".format(SERVICE_NAME)) + # Run the new image + run("docker-compose stop '{}'".format(SERVICE_NAME)) + run("docker-compose rm -f '{}'".format(SERVICE_NAME)) + # Don't specify a service name to avoid "Cannot link to a non running + # container" + run("docker-compose up -d") + running_commit = run_no_pty( + "docker exec $(docker-compose ps -q '{service}') bash -c '" + "cd \"${src_dir_var}\" && git show --no-patch'".format( + service=SERVICE_NAME, + src_dir_var=CONTAINER_SRC_DIR_ENV_VAR + ) + ) + with cd(env.static_path): + # Write the date and running commit to a publicly-accessible file + sudo("(date; echo) > '{}'".format(UPDATE_STATIC_FILE)) + files.append(UPDATE_STATIC_FILE, running_commit, use_sudo=True) + if running_commit != cloned_commit: + raise Exception( + 'The running commit does not match the tip of the cloned' + 'branch! Make sure docker-compose.yml is set to build from ' + '{}'.format(build_dir) + ) diff --git a/fabfile.py b/fabfile/legacy.py similarity index 62% rename from fabfile.py rename to fabfile/legacy.py index d26ced56a..220992ebb 100644 --- a/fabfile.py +++ b/fabfile/legacy.py @@ -1,14 +1,9 @@ -import glob +import json import os -from subprocess import check_call import sys -import json -import re -import requests -from fabric.api import cd, env, prefix, run as run_ -from fabric.contrib import files -from fabric.operations import put +from fabric.api import cd, env, prefix, run + DEPLOYMENTS = {} @@ -19,13 +14,10 @@ DEPLOYMENTS.update(imported_deployments) -def run(*args, **kwargs): - ''' - Workaround for mangled output that's returned after sourcing - $NVM_DIR/nvm.sh - ''' +def run_no_pty(*args, **kwargs): + # Avoids control characters being returned in the output kwargs['pty'] = False - return run_(*args, **kwargs) + return run(*args, **kwargs) def kobo_workon(_virtualenv_name): @@ -45,7 +37,7 @@ def check_key_filename(deployment_configs): deployment_configs['key_filename'] ) if not os.path.exists(deployment_configs['key_filename']): - exit_with_error("Cannot find required permissions file: %s" % + exit_with_error("Cannot find required SSH key file: %s" % deployment_configs['key_filename']) @@ -95,7 +87,8 @@ def deploy_ref(deployment_name, ref): with cd(env.kc_path): run("git fetch origin") # Make sure we're not moving to an older codebase - git_output = run('git rev-list {}..HEAD --count 2>&1'.format(ref)) + git_output = run_no_pty( + 'git rev-list {}..HEAD --count 2>&1'.format(ref)) if int(git_output) > 0: raise Exception("The server's HEAD is already in front of the " "commit to be deployed.") @@ -103,7 +96,7 @@ def deploy_ref(deployment_name, ref): # detached. Perhaps consider using `git reset`. run('git checkout {}'.format(ref)) # Report if the working directory is unclean. - git_output = run('git status --porcelain') + git_output = run_no_pty('git status --porcelain') if len(git_output): run('git status') print('WARNING: The working directory is unclean. See above.') @@ -145,55 +138,3 @@ def deploy_ref(deployment_name, ref): def deploy(deployment_name, branch='master'): deploy_ref(deployment_name, 'origin/{}'.format(branch)) - - -def deploy_passing(deployment_name, branch='master'): - ''' Deploy the latest code on the given branch that's - been marked passing by Travis CI. ''' - print 'Asking Travis CI for the hash of the latest passing commit...' - desired_commit = get_last_successfully_built_commit(branch) - print 'Found passing commit {} for branch {}!'.format(desired_commit, - branch) - deploy_ref(deployment_name, desired_commit) - - -def get_last_successfully_built_commit(branch): - ''' Returns the hash of the latest successfully built commit - on the given branch according to Travis CI. ''' - - API_ENDPOINT='https://api.travis-ci.org/' - REPO_SLUG='kobotoolbox/kobocat' - COMMON_HEADERS={'accept': 'application/vnd.travis-ci.2+json'} - - ''' Travis only lets us specify `number`, `after_number`, and `event_type`. - It'd be great to filter by state and branch, but it seems we can't - (http://docs.travis-ci.com/api/?http#builds). ''' - - request = requests.get( - '{}repos/{}/builds'.format(API_ENDPOINT, REPO_SLUG), - headers=COMMON_HEADERS - ) - if request.status_code != 200: - raise Exception('Travis returned unexpected code {}.'.format( - request.status_code - )) - response = json.loads(request.text) - - builds = response['builds'] - commits = {commit['id']: commit for commit in response['commits']} - - for build in builds: - if build['state'] != 'passed' or build['pull_request']: - # No interest in non-passing builds or PRs - continue - commit = commits[build['commit_id']] - if commit['branch'] == branch: - # Assumes the builds are in descending chronological order - if re.match('^[0-9a-f]+$', commit['sha']) is None: - raise Exception('Travis returned the invalid SHA {}.'.format( - commit['sha'])) - return commit['sha'] - - raise Exception("Couldn't find a passing build for the branch {}. " - "This could be due to pagination, in which case this code " - "must be made more robust!".format(branch)) diff --git a/onadata/apps/export/views.py b/onadata/apps/export/views.py index ac901357b..ebce38388 100644 --- a/onadata/apps/export/views.py +++ b/onadata/apps/export/views.py @@ -52,7 +52,9 @@ def build_formpack(username, id_string): def build_export(request, username, id_string): - hierarchy_in_labels = request.REQUEST.get('hierarchy_in_labels', None) + hierarchy_in_labels = request.REQUEST.get( + 'hierarchy_in_labels', '' + ).lower() == 'true' group_sep = request.REQUEST.get('groupsep', '/') lang = request.REQUEST.get('lang', None) diff --git a/onadata/apps/logger/migrations/0002_attachment_filename_length.py b/onadata/apps/logger/migrations/0002_attachment_filename_length.py new file mode 100644 index 000000000..77a5e3103 --- /dev/null +++ b/onadata/apps/logger/migrations/0002_attachment_filename_length.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models +import onadata.apps.logger.models.attachment + + +class Migration(migrations.Migration): + + dependencies = [ + ('logger', '0001_initial'), + ] + + operations = [ + migrations.AlterField( + model_name='attachment', + name='media_file', + field=models.FileField(max_length=380, upload_to=onadata.apps.logger.models.attachment.upload_to), + ), + ] diff --git a/onadata/apps/logger/models/attachment.py b/onadata/apps/logger/models/attachment.py index 60c209f9c..443c321fe 100644 --- a/onadata/apps/logger/models/attachment.py +++ b/onadata/apps/logger/models/attachment.py @@ -7,16 +7,20 @@ from instance import Instance -def upload_to(instance, filename): +def upload_to(attachment, filename): + instance = attachment.instance + xform = instance.xform return os.path.join( - instance.instance.xform.user.username, + xform.user.username, 'attachments', + xform.uuid or 'form', + instance.uuid or 'instance', os.path.split(filename)[1]) class Attachment(models.Model): instance = models.ForeignKey(Instance, related_name="attachments") - media_file = models.FileField(upload_to=upload_to) + media_file = models.FileField(upload_to=upload_to, max_length=380) mimetype = models.CharField( max_length=50, null=False, blank=True, default='') diff --git a/onadata/apps/logger/tasks.py b/onadata/apps/logger/tasks.py index 734ebacf2..87eb5f2e8 100644 --- a/onadata/apps/logger/tasks.py +++ b/onadata/apps/logger/tasks.py @@ -12,3 +12,109 @@ def fix_root_node_names(minimum_instance_id): ) ###### END ISSUE 242 FIX ###### + +import csv +import datetime +import pytz +import zipfile +from io import BytesIO +from django.contrib.auth.models import User +from django.core.files.storage import get_storage_class +from .models import Instance, XForm + +@shared_task +def generate_stats_zip(output_filename): + REPORTS = { + 'instances.csv': { + 'model': Instance, + 'date_field': 'date_created' + }, + 'xforms.csv': { + 'model': XForm, + 'date_field': 'date_created' + }, + 'users.csv': { + 'model': User, + 'date_field': 'date_joined' + } + } + + def first_day_of_next_month(any_date): + return datetime.date( + year=any_date.year if any_date.month < 12 else any_date.year + 1, + month=any_date.month + 1 if any_date.month < 12 else 1, + day=1 + ) + + def first_day_of_previous_month(any_date): + return datetime.date( + year=any_date.year if any_date.month > 1 else any_date.year - 1, + month=any_date.month - 1 if any_date.month > 1 else 12, + day=1 + ) + + def utc_midnight(any_date): + return datetime.datetime( + year=any_date.year, + month=any_date.month, + day=any_date.day, + tzinfo=pytz.UTC + ) + + def list_created_by_month(model, date_field): + today = datetime.date.today() + # Just start at January 1 of the previous year. Going back to the + # oldest object would be great, but it's too slow right now. Django + # 1.10 will provide a more efficient way: + # https://docs.djangoproject.com/en/1.10/ref/models/database-functions/#trunc + first_date = datetime.date( + year=today.year - 1, + month=1, + day=1 + ) + # We *ASSUME* that primary keys increase cronologically! + last_object = model.objects.order_by('pk').last() + last_date = first_day_of_next_month(getattr(last_object, date_field)) + year_month_count = [] + while last_date > first_date: + this_start_date = first_day_of_previous_month(last_date) + this_end_date = last_date + criteria = { + '{}__gte'.format(date_field): utc_midnight(this_start_date), + '{}__lt'.format(date_field): utc_midnight(this_end_date) + } + objects_this_month = model.objects.filter(**criteria).count() + year_month_count.append(( + this_start_date.year, + this_start_date.month, + objects_this_month + )) + last_date = this_start_date + return year_month_count + + default_storage = get_storage_class()() + + with default_storage.open(output_filename, 'wb') as output_file: + zip_file = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) + + for filename, report_settings in REPORTS.iteritems(): + model_name_plural = report_settings[ + 'model']._meta.verbose_name_plural + fieldnames = [ + 'Year', + 'Month', + 'New {}'.format(model_name_plural.capitalize()), + 'NOTE: Records created prior to January 1 of last ' + 'year are NOT included in this report!' + ] + data = list_created_by_month( + report_settings['model'], report_settings['date_field']) + csv_io = BytesIO() + writer = csv.DictWriter(csv_io, fieldnames=fieldnames) + writer.writeheader() + for row in data: + writer.writerow(dict(zip(fieldnames, row))) + zip_file.writestr(filename, csv_io.getvalue()) + csv_io.close() + + zip_file.close() diff --git a/onadata/apps/logger/views.py b/onadata/apps/logger/views.py index 67e3cea63..969ca7a96 100644 --- a/onadata/apps/logger/views.py +++ b/onadata/apps/logger/views.py @@ -3,10 +3,7 @@ import json import os import tempfile -import csv import re -import zipfile -from io import BytesIO import pytz from django.contrib.auth.decorators import login_required, user_passes_test @@ -23,6 +20,7 @@ HttpResponseRedirect, HttpResponseServerError, StreamingHttpResponse, + Http404, ) from django.shortcuts import get_object_or_404 from django.shortcuts import render @@ -34,7 +32,7 @@ from django.views.decorators.http import require_http_methods from django.views.decorators.csrf import csrf_exempt from django_digest import HttpDigestAuthenticator -from pyxform import survey_from +from pyxform import Survey from pyxform.spss import survey_to_spss_label_zip from wsgiref.util import FileWrapper @@ -65,6 +63,7 @@ ) from onadata.libs.utils.viewer_tools import _get_form_url from ...koboform.pyxform_utils import convert_csv_to_xls +from .tasks import generate_stats_zip IO_ERROR_STRINGS = [ 'request data read error', @@ -464,7 +463,7 @@ def download_spss_labels(request, username, form_id_string): except: return HttpResponseServerError('Error retrieving XLSForm.') - survey= survey_from.xls(filelike_obj=xlsform_io) + survey= Survey.from_xls(filelike_obj=xlsform_io) zip_filename= '{}_spss_labels.zip'.format(xform.id_string) zip_io= survey_to_spss_label_zip(survey, xform.id_string) @@ -764,90 +763,38 @@ def ziggy_submissions(request, username): @user_passes_test(lambda u: u.is_superuser) def superuser_stats(request, username): - REPORTS = { - 'instances.csv': { - 'model': Instance, - 'date_field': 'date_created' - }, - 'xforms.csv': { - 'model': XForm, - 'date_field': 'date_created' - }, - 'users.csv': { - 'model': User, - 'date_field': 'date_joined' - } - } - - def first_day_of_next_month(any_day): - return datetime_module.date( - year=any_day.year if any_day.month < 12 else any_day.year + 1, - month=any_day.month + 1 if any_day.month < 12 else 1, - day=1 - ) - - def first_day_of_previous_month(any_day): - return datetime_module.date( - year=any_day.year if any_day.month > 1 else any_day.year - 1, - month=any_day.month - 1 if any_day.month > 1 else 12, - day=1 + base_filename = '{}_{}_{}.zip'.format( + re.sub('[^a-zA-Z0-9]', '-', request.META['HTTP_HOST']), + datetime_module.date.today(), + datetime_module.datetime.now().microsecond + ) + filename = os.path.join( + request.user.username, + 'superuser_stats', + base_filename ) + generate_stats_zip.delay(filename) + template_ish = ( + '