diff --git a/Dockerfile b/Dockerfile index 2d2e30365..3cf8b9ffc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,8 @@ FROM kobotoolbox/kobocat_base:latest ENV KOBOCAT_SRC_DIR=/srv/src/kobocat \ - BACKUPS_DIR=/srv/backups + BACKUPS_DIR=/srv/backups \ + KOBOCAT_LOGS_DIR=/srv/logs # Install post-base-image `apt` additions from `apt_requirements.txt`, if modified. COPY ./apt_requirements.txt "${KOBOCAT_TMP_DIR}/current_apt_requirements.txt" @@ -37,7 +38,9 @@ RUN mkdir -p /etc/service/uwsgi && \ cp "${KOBOCAT_SRC_DIR}/docker/sync_static.sh" /etc/my_init.d/11_sync_static.bash && \ mkdir -p "${KOBOCAT_SRC_DIR}/emails/" && \ chown -R wsgi "${KOBOCAT_SRC_DIR}/emails/" && \ - mkdir -p "${BACKUPS_DIR}" + mkdir -p "${BACKUPS_DIR}" && \ + mkdir -p "${KOBOCAT_LOGS_DIR}" && \ + chown -R wsgi "${KOBOCAT_LOGS_DIR}" RUN echo "db:*:*:kobo:kobo" > /root/.pgpass && \ chmod 600 /root/.pgpass @@ -46,10 +49,6 @@ RUN echo "db:*:*:kobo:kobo" > /root/.pgpass && \ RUN echo 'source /etc/profile' >> /root/.bashrc - -# FIXME: Remove. -VOLUME ["${KOBOCAT_SRC_DIR}", "${KOBOCAT_SRC_DIR}/media", "/srv/src/kobocat-template"] - WORKDIR "${KOBOCAT_SRC_DIR}" EXPOSE 8000 diff --git a/docker/backup_media.bash b/docker/backup_media.bash index aea16a329..7f99169e2 100755 --- a/docker/backup_media.bash +++ b/docker/backup_media.bash @@ -3,6 +3,9 @@ set -e source /etc/profile KOBOCAT_MEDIA_URL="${KOBOCAT_MEDIA_URL:-media}" +BACKUPS_DIR="${BACKUPS_DIR:-/srv/backups}" + +mkdir -p "${BACKUPS_DIR}" timestamp="$(date +%Y.%m.%d.%H_%M)" backup_filename="kobocat_media__${timestamp}.tar" diff --git a/docker/init.bash b/docker/init.bash index f4b81c5a2..8f9c77dda 100755 --- a/docker/init.bash +++ b/docker/init.bash @@ -24,6 +24,12 @@ else echo "KoBoCAT media automatic backup schedule: ${KOBOCAT_MEDIA_BACKUP_SCHEDULE}" fi +rm -rf /etc/profile.d/pydev_debugger.bash.sh +if [[ -d /srv/pydev_orig && ! -z "${KOBOCAT_PATH_FROM_ECLIPSE_TO_PYTHON_PAIRS}" ]]; then + echo 'Enabling PyDev remote debugging.' + "${KOBOCAT_SRC_DIR}/docker/setup_pydev.bash" +fi + echo 'KoBoCAT initialization complete.' cd $oldpwd diff --git a/docker/kobocat.ini b/docker/kobocat.ini index 25ed60e2e..691d20c9d 100644 --- a/docker/kobocat.ini +++ b/docker/kobocat.ini @@ -1,29 +1,18 @@ [uwsgi] # directory structure -chdir = /srv/src/kobocat +chdir = $(KOBOCAT_SRC_DIR) module = onadata.apps.main.wsgi - -# virtualenvs -#home = /home/ubuntu/.virtualenvs/kc -#envdir = /home/ubuntu/env/kc_envdir +logto = $(KOBOCAT_LOGS_DIR)/uwsgi.log # process related settings master = true processes = 2 -# -#socket = /home/ubuntu/sockets/kobocat.sock -#chmod-socket = 666 -#vacuum = true - socket = 0.0.0.0:8000 -#http-socket = 0.0.0.0:8000 buffer-size = 32768 harakiri = 120 uid = wsgi gid = wsgi die-on-term = true - -# uwsgi --socket /home/ubuntu/sockets/kobocat.sock --wsgi-file=/home/ubuntu/src/kobocat/onadata/apps/main/wsgi.py --chmod-socket=666 --chdir=/home/ubuntu/src/kobocat --home=/home/ diff --git a/docker/run_celery.bash b/docker/run_celery.bash index 958de51d6..831892418 100755 --- a/docker/run_celery.bash +++ b/docker/run_celery.bash @@ -3,11 +3,7 @@ set -e source /etc/profile -CELERYD_TASK_SOFT_TIME_LIMIT="${CELERYD_TASK_SOFT_TIME_LIMIT:-$((15*60))}" -# Give tasks 1 minute for exception handling and cleanup before killing timed out Celery processes. -CELERYD_TASK_TIME_LIMIT="${CELERYD_TASK_TIME_LIMIT:-$((${CELERYD_TASK_SOFT_TIME_LIMIT}+60))}" - -CELERYD_OPTIONS="--beat --loglevel=DEBUG --soft-time-limit=${CELERYD_TASK_SOFT_TIME_LIMIT} --time-limit=${CELERYD_TASK_TIME_LIMIT} --maxtasksperchild=5" +CELERYD_OPTIONS="-Ofair --beat --loglevel=DEBUG" cd "${KOBOCAT_SRC_DIR}" diff --git a/docker/setup_pydev.bash b/docker/setup_pydev.bash new file mode 100755 index 000000000..5689a1858 --- /dev/null +++ b/docker/setup_pydev.bash @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -e + +if [[ ! -d /srv/pydev_orig ]]; then + echo 'Directory `/srv/pydev_orig` must exist to use PyDev debugger (see `kobo-docker/docker-compose.yml`).' + exit 1 +fi + +cp -a /srv/pydev_orig /srv/pydev + +if [[ -z "${KOBOCAT_PATH_FROM_ECLIPSE_TO_PYTHON_PAIRS}" ]]; then + echo '`KOBOCAT_PATH_FROM_ECLIPSE_TO_PYTHON_PAIRS` must be set to use the PyDev debugger (see `kobo-docker/envfiles/kobocat.txt`).' + exit 1 +fi + +echo 'Setting up PyDev remote debugger path mappings.' + +# Set up the `PATHS_FROM_ECLIPSE_TO_PYTHON` variable from the environment per +# https://github.com/fabioz/PyDev.Debugger/blob/master/pydevd_file_utils.py. +find_string='PATHS_FROM_ECLIPSE_TO_PYTHON = []' +replace_string="\ +import os\n\ +path_map_pair_strings = os.environ['KOBOCAT_PATH_FROM_ECLIPSE_TO_PYTHON_PAIRS'].split('|')\n\ +PATHS_FROM_ECLIPSE_TO_PYTHON = [tuple([pair_element.strip() for pair_element in pair_string.split('->')]) for pair_string in path_map_pair_strings]\n\ +" +escaped_find_sting="$(echo "${find_string}" | sed -e 's/[]\/$*.^|[]/\\&/g')" +escaped_replace_string=$(echo "${replace_string}" | sed -e '/\\n/b; s/[]\/$*.^|[]/\\&/g') + +sed -i "s/${escaped_find_sting}/${escaped_replace_string}/" /srv/pydev/pydevd_file_utils.py + +echo 'Adding `PYTHONPATH` modifications to profile.' +echo 'export PYTHONPATH=${PYTHONPATH}:/srv/pydev' > /etc/profile.d/pydev_debugger.bash.sh diff --git a/fabfile/__init__.py b/fabfile/__init__.py new file mode 100644 index 000000000..dbf45b634 --- /dev/null +++ b/fabfile/__init__.py @@ -0,0 +1,3 @@ +from .docker import deploy +from .legacy import deploy as deploy_legacy +from .legacy import deploy_ref as deploy_ref_legacy diff --git a/fabfile/docker.py b/fabfile/docker.py new file mode 100644 index 000000000..ede690d6d --- /dev/null +++ b/fabfile/docker.py @@ -0,0 +1,91 @@ +import json +import os + +from fabric.api import cd, env, run, sudo +from fabric.contrib import files + + +SERVICE_NAME = 'kobocat' +GIT_REPO = 'https://github.com/kobotoolbox/{}.git'.format(SERVICE_NAME) +CONTAINER_SRC_DIR_ENV_VAR = '{}_SRC_DIR'.format(SERVICE_NAME.upper()) +UPDATE_STATIC_FILE = '{}/LAST_UPDATE.txt'.format(SERVICE_NAME) +# These must be defined in deployments.json +REQUIRED_SETTINGS = ( + 'build_root', # Temporary location for cloning repo; deleted at end + 'docker_config_path', # Location must house `docker_compose.yml` + 'static_path' # `UPDATE_STATIC_FILE` will be written here +) + +DEPLOYMENTS = {} +IMPORTED_DEPLOYMENTS = {} +deployments_file = os.environ.get('DEPLOYMENTS_JSON', 'deployments.json') +if os.path.exists(deployments_file): + with open(deployments_file, 'r') as f: + IMPORTED_DEPLOYMENTS = json.load(f) +else: + raise Exception("Cannot find {}".format(deployments_file)) + + +def run_no_pty(*args, **kwargs): + # Avoids control characters being returned in the output + kwargs['pty'] = False + return run(*args, **kwargs) + + +def sudo_no_pty(*args, **kwargs): + # Avoids control characters being returned in the output + kwargs['pty'] = False + return sudo(*args, **kwargs) + + +def setup_env(deployment_name): + deployment = DEPLOYMENTS.get(deployment_name, {}) + + if deployment_name in IMPORTED_DEPLOYMENTS: + deployment.update(IMPORTED_DEPLOYMENTS[deployment_name]) + + env.update(deployment) + + for required_setting in REQUIRED_SETTINGS: + if required_setting not in env: + raise Exception('Please define {} in {} and try again'.format( + required_setting, deployments_file)) + + +def deploy(deployment_name, branch='master'): + setup_env(deployment_name) + build_dir = os.path.join(env.build_root, SERVICE_NAME) + with cd(build_dir): + # Start from scratch + run("find -delete") + # Shallow clone the requested branch to a temporary directory + run("git clone --quiet --depth=1 --branch='{}' '{}' .".format( + branch, GIT_REPO)) + # Note which commit is at the tip of the cloned branch + cloned_commit = run_no_pty("git show --no-patch") + with cd(env.docker_config_path): + # Build the image + run("docker-compose build '{}'".format(SERVICE_NAME)) + # Run the new image + run("docker-compose stop '{}'".format(SERVICE_NAME)) + run("docker-compose rm -f '{}'".format(SERVICE_NAME)) + # Don't specify a service name to avoid "Cannot link to a non running + # container" + run("docker-compose up -d") + running_commit = run_no_pty( + "docker exec $(docker-compose ps -q '{service}') bash -c '" + "cd \"${src_dir_var}\" && git show --no-patch'".format( + service=SERVICE_NAME, + src_dir_var=CONTAINER_SRC_DIR_ENV_VAR + ) + ) + with cd(env.static_path): + # Write the date and running commit to a publicly-accessible file + sudo("(date; echo) > '{}'".format(UPDATE_STATIC_FILE)) + files.append(UPDATE_STATIC_FILE, running_commit, use_sudo=True) + if running_commit != cloned_commit: + raise Exception( + 'The running commit does not match the tip of the cloned' + 'branch! Make sure docker-compose.yml is set to build from ' + '{}'.format(build_dir) + ) diff --git a/fabfile.py b/fabfile/legacy.py similarity index 62% rename from fabfile.py rename to fabfile/legacy.py index d26ced56a..220992ebb 100644 --- a/fabfile.py +++ b/fabfile/legacy.py @@ -1,14 +1,9 @@ -import glob +import json import os -from subprocess import check_call import sys -import json -import re -import requests -from fabric.api import cd, env, prefix, run as run_ -from fabric.contrib import files -from fabric.operations import put +from fabric.api import cd, env, prefix, run + DEPLOYMENTS = {} @@ -19,13 +14,10 @@ DEPLOYMENTS.update(imported_deployments) -def run(*args, **kwargs): - ''' - Workaround for mangled output that's returned after sourcing - $NVM_DIR/nvm.sh - ''' +def run_no_pty(*args, **kwargs): + # Avoids control characters being returned in the output kwargs['pty'] = False - return run_(*args, **kwargs) + return run(*args, **kwargs) def kobo_workon(_virtualenv_name): @@ -45,7 +37,7 @@ def check_key_filename(deployment_configs): deployment_configs['key_filename'] ) if not os.path.exists(deployment_configs['key_filename']): - exit_with_error("Cannot find required permissions file: %s" % + exit_with_error("Cannot find required SSH key file: %s" % deployment_configs['key_filename']) @@ -95,7 +87,8 @@ def deploy_ref(deployment_name, ref): with cd(env.kc_path): run("git fetch origin") # Make sure we're not moving to an older codebase - git_output = run('git rev-list {}..HEAD --count 2>&1'.format(ref)) + git_output = run_no_pty( + 'git rev-list {}..HEAD --count 2>&1'.format(ref)) if int(git_output) > 0: raise Exception("The server's HEAD is already in front of the " "commit to be deployed.") @@ -103,7 +96,7 @@ def deploy_ref(deployment_name, ref): # detached. Perhaps consider using `git reset`. run('git checkout {}'.format(ref)) # Report if the working directory is unclean. - git_output = run('git status --porcelain') + git_output = run_no_pty('git status --porcelain') if len(git_output): run('git status') print('WARNING: The working directory is unclean. See above.') @@ -145,55 +138,3 @@ def deploy_ref(deployment_name, ref): def deploy(deployment_name, branch='master'): deploy_ref(deployment_name, 'origin/{}'.format(branch)) - - -def deploy_passing(deployment_name, branch='master'): - ''' Deploy the latest code on the given branch that's - been marked passing by Travis CI. ''' - print 'Asking Travis CI for the hash of the latest passing commit...' - desired_commit = get_last_successfully_built_commit(branch) - print 'Found passing commit {} for branch {}!'.format(desired_commit, - branch) - deploy_ref(deployment_name, desired_commit) - - -def get_last_successfully_built_commit(branch): - ''' Returns the hash of the latest successfully built commit - on the given branch according to Travis CI. ''' - - API_ENDPOINT='https://api.travis-ci.org/' - REPO_SLUG='kobotoolbox/kobocat' - COMMON_HEADERS={'accept': 'application/vnd.travis-ci.2+json'} - - ''' Travis only lets us specify `number`, `after_number`, and `event_type`. - It'd be great to filter by state and branch, but it seems we can't - (http://docs.travis-ci.com/api/?http#builds). ''' - - request = requests.get( - '{}repos/{}/builds'.format(API_ENDPOINT, REPO_SLUG), - headers=COMMON_HEADERS - ) - if request.status_code != 200: - raise Exception('Travis returned unexpected code {}.'.format( - request.status_code - )) - response = json.loads(request.text) - - builds = response['builds'] - commits = {commit['id']: commit for commit in response['commits']} - - for build in builds: - if build['state'] != 'passed' or build['pull_request']: - # No interest in non-passing builds or PRs - continue - commit = commits[build['commit_id']] - if commit['branch'] == branch: - # Assumes the builds are in descending chronological order - if re.match('^[0-9a-f]+$', commit['sha']) is None: - raise Exception('Travis returned the invalid SHA {}.'.format( - commit['sha'])) - return commit['sha'] - - raise Exception("Couldn't find a passing build for the branch {}. " - "This could be due to pagination, in which case this code " - "must be made more robust!".format(branch)) diff --git a/onadata/apps/export/views.py b/onadata/apps/export/views.py index ac901357b..ebce38388 100644 --- a/onadata/apps/export/views.py +++ b/onadata/apps/export/views.py @@ -52,7 +52,9 @@ def build_formpack(username, id_string): def build_export(request, username, id_string): - hierarchy_in_labels = request.REQUEST.get('hierarchy_in_labels', None) + hierarchy_in_labels = request.REQUEST.get( + 'hierarchy_in_labels', '' + ).lower() == 'true' group_sep = request.REQUEST.get('groupsep', '/') lang = request.REQUEST.get('lang', None) diff --git a/onadata/apps/logger/migrations/0002_attachment_filename_length.py b/onadata/apps/logger/migrations/0002_attachment_filename_length.py new file mode 100644 index 000000000..77a5e3103 --- /dev/null +++ b/onadata/apps/logger/migrations/0002_attachment_filename_length.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models +import onadata.apps.logger.models.attachment + + +class Migration(migrations.Migration): + + dependencies = [ + ('logger', '0001_initial'), + ] + + operations = [ + migrations.AlterField( + model_name='attachment', + name='media_file', + field=models.FileField(max_length=380, upload_to=onadata.apps.logger.models.attachment.upload_to), + ), + ] diff --git a/onadata/apps/logger/models/attachment.py b/onadata/apps/logger/models/attachment.py index 60c209f9c..443c321fe 100644 --- a/onadata/apps/logger/models/attachment.py +++ b/onadata/apps/logger/models/attachment.py @@ -7,16 +7,20 @@ from instance import Instance -def upload_to(instance, filename): +def upload_to(attachment, filename): + instance = attachment.instance + xform = instance.xform return os.path.join( - instance.instance.xform.user.username, + xform.user.username, 'attachments', + xform.uuid or 'form', + instance.uuid or 'instance', os.path.split(filename)[1]) class Attachment(models.Model): instance = models.ForeignKey(Instance, related_name="attachments") - media_file = models.FileField(upload_to=upload_to) + media_file = models.FileField(upload_to=upload_to, max_length=380) mimetype = models.CharField( max_length=50, null=False, blank=True, default='') diff --git a/onadata/apps/logger/tasks.py b/onadata/apps/logger/tasks.py index 734ebacf2..87eb5f2e8 100644 --- a/onadata/apps/logger/tasks.py +++ b/onadata/apps/logger/tasks.py @@ -12,3 +12,109 @@ def fix_root_node_names(minimum_instance_id): ) ###### END ISSUE 242 FIX ###### + +import csv +import datetime +import pytz +import zipfile +from io import BytesIO +from django.contrib.auth.models import User +from django.core.files.storage import get_storage_class +from .models import Instance, XForm + +@shared_task +def generate_stats_zip(output_filename): + REPORTS = { + 'instances.csv': { + 'model': Instance, + 'date_field': 'date_created' + }, + 'xforms.csv': { + 'model': XForm, + 'date_field': 'date_created' + }, + 'users.csv': { + 'model': User, + 'date_field': 'date_joined' + } + } + + def first_day_of_next_month(any_date): + return datetime.date( + year=any_date.year if any_date.month < 12 else any_date.year + 1, + month=any_date.month + 1 if any_date.month < 12 else 1, + day=1 + ) + + def first_day_of_previous_month(any_date): + return datetime.date( + year=any_date.year if any_date.month > 1 else any_date.year - 1, + month=any_date.month - 1 if any_date.month > 1 else 12, + day=1 + ) + + def utc_midnight(any_date): + return datetime.datetime( + year=any_date.year, + month=any_date.month, + day=any_date.day, + tzinfo=pytz.UTC + ) + + def list_created_by_month(model, date_field): + today = datetime.date.today() + # Just start at January 1 of the previous year. Going back to the + # oldest object would be great, but it's too slow right now. Django + # 1.10 will provide a more efficient way: + # https://docs.djangoproject.com/en/1.10/ref/models/database-functions/#trunc + first_date = datetime.date( + year=today.year - 1, + month=1, + day=1 + ) + # We *ASSUME* that primary keys increase cronologically! + last_object = model.objects.order_by('pk').last() + last_date = first_day_of_next_month(getattr(last_object, date_field)) + year_month_count = [] + while last_date > first_date: + this_start_date = first_day_of_previous_month(last_date) + this_end_date = last_date + criteria = { + '{}__gte'.format(date_field): utc_midnight(this_start_date), + '{}__lt'.format(date_field): utc_midnight(this_end_date) + } + objects_this_month = model.objects.filter(**criteria).count() + year_month_count.append(( + this_start_date.year, + this_start_date.month, + objects_this_month + )) + last_date = this_start_date + return year_month_count + + default_storage = get_storage_class()() + + with default_storage.open(output_filename, 'wb') as output_file: + zip_file = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) + + for filename, report_settings in REPORTS.iteritems(): + model_name_plural = report_settings[ + 'model']._meta.verbose_name_plural + fieldnames = [ + 'Year', + 'Month', + 'New {}'.format(model_name_plural.capitalize()), + 'NOTE: Records created prior to January 1 of last ' + 'year are NOT included in this report!' + ] + data = list_created_by_month( + report_settings['model'], report_settings['date_field']) + csv_io = BytesIO() + writer = csv.DictWriter(csv_io, fieldnames=fieldnames) + writer.writeheader() + for row in data: + writer.writerow(dict(zip(fieldnames, row))) + zip_file.writestr(filename, csv_io.getvalue()) + csv_io.close() + + zip_file.close() diff --git a/onadata/apps/logger/views.py b/onadata/apps/logger/views.py index 67e3cea63..969ca7a96 100644 --- a/onadata/apps/logger/views.py +++ b/onadata/apps/logger/views.py @@ -3,10 +3,7 @@ import json import os import tempfile -import csv import re -import zipfile -from io import BytesIO import pytz from django.contrib.auth.decorators import login_required, user_passes_test @@ -23,6 +20,7 @@ HttpResponseRedirect, HttpResponseServerError, StreamingHttpResponse, + Http404, ) from django.shortcuts import get_object_or_404 from django.shortcuts import render @@ -34,7 +32,7 @@ from django.views.decorators.http import require_http_methods from django.views.decorators.csrf import csrf_exempt from django_digest import HttpDigestAuthenticator -from pyxform import survey_from +from pyxform import Survey from pyxform.spss import survey_to_spss_label_zip from wsgiref.util import FileWrapper @@ -65,6 +63,7 @@ ) from onadata.libs.utils.viewer_tools import _get_form_url from ...koboform.pyxform_utils import convert_csv_to_xls +from .tasks import generate_stats_zip IO_ERROR_STRINGS = [ 'request data read error', @@ -464,7 +463,7 @@ def download_spss_labels(request, username, form_id_string): except: return HttpResponseServerError('Error retrieving XLSForm.') - survey= survey_from.xls(filelike_obj=xlsform_io) + survey= Survey.from_xls(filelike_obj=xlsform_io) zip_filename= '{}_spss_labels.zip'.format(xform.id_string) zip_io= survey_to_spss_label_zip(survey, xform.id_string) @@ -764,90 +763,38 @@ def ziggy_submissions(request, username): @user_passes_test(lambda u: u.is_superuser) def superuser_stats(request, username): - REPORTS = { - 'instances.csv': { - 'model': Instance, - 'date_field': 'date_created' - }, - 'xforms.csv': { - 'model': XForm, - 'date_field': 'date_created' - }, - 'users.csv': { - 'model': User, - 'date_field': 'date_joined' - } - } - - def first_day_of_next_month(any_day): - return datetime_module.date( - year=any_day.year if any_day.month < 12 else any_day.year + 1, - month=any_day.month + 1 if any_day.month < 12 else 1, - day=1 - ) - - def first_day_of_previous_month(any_day): - return datetime_module.date( - year=any_day.year if any_day.month > 1 else any_day.year - 1, - month=any_day.month - 1 if any_day.month > 1 else 12, - day=1 + base_filename = '{}_{}_{}.zip'.format( + re.sub('[^a-zA-Z0-9]', '-', request.META['HTTP_HOST']), + datetime_module.date.today(), + datetime_module.datetime.now().microsecond + ) + filename = os.path.join( + request.user.username, + 'superuser_stats', + base_filename ) + generate_stats_zip.delay(filename) + template_ish = ( + 'Hello, superuser.' + 'Your report is being generated. Once finished, it will be ' + 'available at {0}. If you receive a 404, please ' + 'refresh your browser periodically until your request succeeds.' + '' + ).format(base_filename) + return HttpResponse(template_ish) - def list_created_by_month(model, date_field): - today = datetime_module.date.today() - # Just start at January 1 of the previous year. Going back to the - # oldest object would be great, but it's too slow right now. Django - # 1.10 will provide a more efficient way: - # https://docs.djangoproject.com/en/1.10/ref/models/database-functions/#trunc - first_date = datetime_module.date( - year=today.year - 1, - month=1, - day=1 - ) - last_object = model.objects.last() - last_date = first_day_of_next_month(getattr(last_object, date_field)) - year_month_count = [] - while last_date > first_date: - this_start_date = first_day_of_previous_month(last_date) - this_end_date = last_date - criteria = { - '{}__gte'.format(date_field): this_start_date, - '{}__lt'.format(date_field): this_end_date - } - objects_this_month = model.objects.filter(**criteria).count() - year_month_count.append(( - this_start_date.year, - this_start_date.month, - objects_this_month - )) - last_date = this_start_date - return year_month_count - - response = HttpResponse(content_type='application/zip') - response['Content-Disposition'] = 'attachment;filename="{}_{}.zip"'.format( - re.sub('[^a-zA-Z0-9]', '-', request.META['HTTP_HOST']), - datetime_module.date.today() +@user_passes_test(lambda u: u.is_superuser) +def retrieve_superuser_stats(request, username, base_filename): + filename = os.path.join( + request.user.username, + 'superuser_stats', + base_filename ) - zip_file = zipfile.ZipFile(response, 'w', zipfile.ZIP_DEFLATED) - - for filename, report_settings in REPORTS.iteritems(): - model_name_plural = report_settings['model']._meta.verbose_name_plural - fieldnames = [ - 'Year', - 'Month', - 'New {}'.format(model_name_plural.capitalize()), - 'NOTE: Records created prior to January 1 of last ' - 'year are NOT included in this report!' - ] - data = list_created_by_month( - report_settings['model'], report_settings['date_field']) - csv_io = BytesIO() - writer = csv.DictWriter(csv_io, fieldnames=fieldnames) - writer.writeheader() - for row in data: - writer.writerow(dict(zip(fieldnames, row))) - zip_file.writestr(filename, csv_io.getvalue()) - csv_io.close() - - zip_file.close() - return response + default_storage = get_storage_class()() + if not default_storage.exists(filename): + raise Http404 + with default_storage.open(filename) as f: + response = StreamingHttpResponse(f, content_type='application/zip') + response['Content-Disposition'] = 'attachment;filename="{}"'.format( + base_filename) + return response diff --git a/onadata/apps/logger/xform_instance_parser.py b/onadata/apps/logger/xform_instance_parser.py index 004ed53d8..937660b86 100644 --- a/onadata/apps/logger/xform_instance_parser.py +++ b/onadata/apps/logger/xform_instance_parser.py @@ -1,4 +1,5 @@ import re +import logging import dateutil.parser from xml.dom import minidom, Node from django.utils.encoding import smart_unicode, smart_str @@ -272,7 +273,12 @@ class XFormInstanceParser(object): def __init__(self, xml_str, data_dictionary): self.dd = data_dictionary - self.parse(xml_str) + try: + self.parse(xml_str) + except Exception as err: + logger = logging.getLogger("console_logger") + logger.error( + "Failed to parse instance '%s'" % xml_str, exc_info=True) def parse(self, xml_str): self._xml_obj = clean_and_parse_xml(xml_str) @@ -315,7 +321,6 @@ def _set_attributes(self): try: assert key not in self._attributes except AssertionError: - import logging logger = logging.getLogger("console_logger") logger.debug("Skipping duplicate attribute: %s" " with value %s" % (key, value)) diff --git a/onadata/apps/main/urls.py b/onadata/apps/main/urls.py index 8b291819c..0a56e5574 100644 --- a/onadata/apps/main/urls.py +++ b/onadata/apps/main/urls.py @@ -278,7 +278,11 @@ # Statistics for superusers. The username is irrelevant, but leave it as # the first part of the path to avoid collisions url(r"^(?P[^/]+)/superuser_stats/$", - 'onadata.apps.logger.views.superuser_stats')) + 'onadata.apps.logger.views.superuser_stats'), + url(r"^(?P[^/]+)/superuser_stats/(?P[^/]+)$", + 'onadata.apps.logger.views.retrieve_superuser_stats'), + +) urlpatterns += patterns('django.contrib.staticfiles.views', url(r'^static/(?P.*)$', 'serve')) diff --git a/onadata/apps/restservice/services/generic_json.py b/onadata/apps/restservice/services/generic_json.py index 26199de8b..f31ce553c 100644 --- a/onadata/apps/restservice/services/generic_json.py +++ b/onadata/apps/restservice/services/generic_json.py @@ -1,6 +1,7 @@ -import httplib2 import json +import requests + from onadata.apps.restservice.RestServiceInterface import RestServiceInterface @@ -11,7 +12,4 @@ class ServiceDefinition(RestServiceInterface): def send(self, url, parsed_instance): post_data = json.dumps(parsed_instance.to_dict_for_mongo()) headers = {"Content-Type": "application/json"} - http = httplib2.Http() - resp, content = http.request(uri=url, method='POST', - headers=headers, - body=post_data) + requests.post(url, headers=headers, data=post_data) diff --git a/onadata/apps/survey_report/views.py b/onadata/apps/survey_report/views.py index 2c4ae0cc8..f9b8be8dc 100644 --- a/onadata/apps/survey_report/views.py +++ b/onadata/apps/survey_report/views.py @@ -55,14 +55,16 @@ def build_formpack(username, id_string): def build_export_context(request, username, id_string): - hierarchy_in_labels = request.REQUEST.get('hierarchy_in_labels', None) + hierarchy_in_labels = request.REQUEST.get( + 'hierarchy_in_labels', '' + ).lower() == 'true' group_sep = request.REQUEST.get('group_sep', '/') user, xform, formpack = build_formpack(username, id_string) - + translations = formpack.available_translations lang = request.REQUEST.get('lang', None) or next(iter(translations), None) - + options = {'versions': 'v1', 'group_sep': group_sep, 'lang': lang, @@ -70,14 +72,14 @@ def build_export_context(request, username, id_string): 'copy_fields': ('_id', '_uuid', '_submission_time'), 'force_index': True } - + return { 'username': username, 'id_string': id_string, 'languages': translations, 'headers_lang': lang, 'formpack': formpack, - 'xform': xform, + 'xform': xform, 'group_sep': group_sep, 'lang': lang, 'hierarchy_in_labels': hierarchy_in_labels, @@ -106,15 +108,11 @@ def export_menu(request, username, id_string): req = request.REQUEST export_type = req.get('type', None) if export_type: - lang = req.get('lang', None) - hierarchy_in_labels = req.get('hierarchy_in_labels') - group_sep = req.get('group_sep', '/') - q = QueryDict('', mutable=True) q['lang'] = req.get('lang') q['hierarchy_in_labels'] = req.get('hierarchy_in_labels') q['group_sep'] = req.get('group_sep', '/') - + if export_type == "xlsx": url = reverse('formpack_xlsx_export', args=(username, id_string)) return redirect(url + '?' + q.urlencode()) @@ -130,7 +128,7 @@ def export_menu(request, username, id_string): def autoreport_menu(request, username, id_string): user, xform, form_pack = build_formpack(username, id_string) - + # exclude fields in repeat group split_by_fields = form_pack.get_fields_for_versions(data_types="select_one") @@ -196,7 +194,7 @@ def html_export(request, username, id_string): data = [("v1", page.object_list)] context = build_export_context(request, username, id_string) - + context.update({ 'page': page, 'table': [], @@ -207,7 +205,7 @@ def html_export(request, username, id_string): sections = list(export.labels.items()) section, labels = sections[0] id_index = labels.index('_id') - + # generator dublicating the "_id" to allow to make a link to each # submission def make_table(submissions): @@ -228,9 +226,9 @@ def auto_report(request, username, id_string): user, xform, formpack = build_formpack(username, id_string) report = formpack.autoreport() - + limit = int(request.REQUEST.get('limit', 20)) - split_by = request.REQUEST.get('split_by') or None + split_by = request.REQUEST.get('split_by') or None fields = [field.name for field in formpack.get_fields_for_versions()] paginator = Paginator(fields, limit, request=request) @@ -265,12 +263,12 @@ def auto_report(request, username, id_string): ctx['stats'] = report.get_stats(data, page.object_list, lang, split_by) if split_by: - + return render(request, 'survey_report/auto_report_split_by.html', ctx) return render(request, 'survey_report/auto_report.html', ctx) - - + + @readable_xform_required def view_one_submission(request, username, id_string, submission): @@ -279,7 +277,7 @@ def view_one_submission(request, username, id_string, submission): instances = list(instances) if not instances: raise Http404('Unable to find this submission') - + context = { 'title': id_string } diff --git a/onadata/apps/viewer/models/parsed_instance.py b/onadata/apps/viewer/models/parsed_instance.py index 89e882460..85c6bea8f 100644 --- a/onadata/apps/viewer/models/parsed_instance.py +++ b/onadata/apps/viewer/models/parsed_instance.py @@ -1,6 +1,7 @@ import base64 import datetime import json +import logging import re from bson import json_util, ObjectId @@ -17,7 +18,6 @@ from onadata.libs.utils.common_tags import ID, UUID, ATTACHMENTS, GEOLOCATION,\ SUBMISSION_TIME, MONGO_STRFTIME, BAMBOO_DATASET_ID, DELETEDAT, TAGS,\ NOTES, SUBMITTED_BY - from onadata.libs.utils.decorators import apply_form_field_names from onadata.libs.utils.model_tools import queryset_iterator @@ -90,7 +90,7 @@ def update_mongo_instance(record): try: return xform_instances.save(record) except Exception: - # todo: mail admins about the exception + logging.getLogger().warning('Submission could not be saved to Mongo.', exc_info=True) pass @@ -226,6 +226,9 @@ def query_mongo_minimal( if start < 0 or limit < 0: raise ValueError(_("Invalid start/limit params")) + if limit > cls.DEFAULT_LIMIT: + limit = cls.DEFAULT_LIMIT + cursor.skip(start).limit(limit) if type(sort) == dict and len(sort) == 1: sort_key = sort.keys()[0] diff --git a/onadata/apps/viewer/views.py b/onadata/apps/viewer/views.py index fd64b150c..dfd9f8309 100644 --- a/onadata/apps/viewer/views.py +++ b/onadata/apps/viewer/views.py @@ -1,5 +1,8 @@ import json import os +import re +import logging + from datetime import datetime from tempfile import NamedTemporaryFile from time import strftime, strptime @@ -45,6 +48,8 @@ from xls_writer import XlsWriter from onadata.libs.utils.chart_tools import build_chart_data +media_file_logger = logging.getLogger('media_files') + def _set_submission_time_to_query(query, request): query[SUBMISSION_TIME] = {} @@ -721,18 +726,32 @@ def attachment_url(request, size='medium'): media_file = request.GET.get('media_file') # TODO: how to make sure we have the right media file, # this assumes duplicates are the same file - result = Attachment.objects.filter(media_file=media_file)[0:1] + if media_file: + mtch = re.search('^([^\/]+)/attachments(/[^\/]+)$', media_file) + if mtch: + # in cases where the media_file url created by instance.html's + # _attachment_url function is in the wrong format, this will + # match attachments with the correct owner and the same file name + (username, filename) = mtch.groups() + result = Attachment.objects.filter(**{ + 'instance__xform__user__username': username, + }).filter(**{ + 'media_file__endswith': filename, + })[0:1] + else: + # search for media_file with exact matching name + result = Attachment.objects.filter(media_file=media_file)[0:1] if result.count() == 0: + media_file_logger.info('attachment not found') return HttpResponseNotFound(_(u'Attachment not found')) attachment = result[0] if not attachment.mimetype.startswith('image'): return redirect(attachment.media_file.url) + try: media_url = image_url(attachment, size) except: - # TODO: log this somewhere - # image not found, 404, S3ResponseError timeouts - pass + media_file_logger.error('could not get thumbnail for image', exc_info=True) else: if media_url: return redirect(media_url) diff --git a/onadata/libs/serializers/data_serializer.py b/onadata/libs/serializers/data_serializer.py index 72d6cca88..fb19107ce 100644 --- a/onadata/libs/serializers/data_serializer.py +++ b/onadata/libs/serializers/data_serializer.py @@ -30,6 +30,8 @@ def to_representation(self, obj): ParsedInstance.USERFORM_ID: u'%s_%s' % (obj.user.username, obj.id_string) } + limit = query_params.get('limit', False) + start = query_params.get('start', False) try: query.update(json.loads(query_params.get('query', '{}'))) @@ -42,6 +44,12 @@ def to_representation(self, obj): 'fields': query_params.get('fields'), 'sort': query_params.get('sort') } + if limit: + query_kwargs['limit'] = int(limit) + + if start: + query_kwargs['start'] = int(start) + cursor = ParsedInstance.query_mongo_minimal(**query_kwargs) return list(cursor) diff --git a/onadata/settings/common.py b/onadata/settings/common.py index f75d9049f..02eb851d6 100644 --- a/onadata/settings/common.py +++ b/onadata/settings/common.py @@ -57,7 +57,7 @@ ugettext = lambda s: s -SITE_ID = 1 +SITE_ID = os.environ.get('DJANGO_SITE_ID', '1') # If you set this to False, Django will make some optimizations so as not # to load the internationalization machinery. diff --git a/onadata/settings/kc_environ.py b/onadata/settings/kc_environ.py index 86d94af54..a8a33adf5 100644 --- a/onadata/settings/kc_environ.py +++ b/onadata/settings/kc_environ.py @@ -161,8 +161,10 @@ # Set the `server_name` attribute. See https://docs.sentry.io/hosted/clients/python/advanced/ server_name = os.environ.get('RAVEN_SERVER_NAME') - server_name = server_name or os.environ.get('KOBOCAT_PUBLIC_SUBDOMAIN', '') + \ - os.environ.get('PUBLIC_DOMAIN_NAME', '') + server_name = server_name or '.'.join(filter(None, ( + os.environ.get('KOBOCAT_PUBLIC_SUBDOMAIN', None), + os.environ.get('PUBLIC_DOMAIN_NAME', None) + ))) if server_name: RAVEN_CONFIG.update({'name': server_name}) diff --git a/requirements/base.pip b/requirements/base.pip index a03f9c072..062b0b251 100644 --- a/requirements/base.pip +++ b/requirements/base.pip @@ -1,7 +1,7 @@ # pybamboo has deps in common but on newer versions. # on top so those will be overwritten pybamboo==0.5.8.1 -pytz==2014.7 +pytz==2016.10 Django>=1.8,<1.9 dj-database-url==0.4.0 @@ -21,9 +21,8 @@ lxml==3.4.0 #-e git+https://github.com/onaio/pyxform.git@onaio#egg=pyxform # kobo fork supports csvs with utf, character escaping, etc. #-e git+https://github.com/kobotoolbox/pyxform.git#egg=pyxform-dev -# Use customized upstream master to gain select_one_from_file +django-reversion==2.0.8 git+https://github.com/kimetrica/pyxform.git@bdm#egg=pyxform-dev -django-reversion==1.8.4 xlrd==0.9.3 xlwt==0.7.5 openpyxl==2.0.5 @@ -34,6 +33,7 @@ librabbitmq==1.5.2 amqp>1.4,<2.0 django-nose==1.4.2 python-digest==1.7 +raven==5.32.0 -e git+https://github.com/dimagi/django-digest@0eb1c921329dd187c343b61acfbec4e98450136e#egg=django_digest -e git+https://github.com/onaio/python-json2xlsclient.git@5a39387752d819cb6387f75569dbea9a5288aa6f#egg=python_json2xlsclient diff --git a/requirements/s3.pip b/requirements/s3.pip index c5ea489b1..ce22c014b 100644 --- a/requirements/s3.pip +++ b/requirements/s3.pip @@ -1,2 +1,2 @@ -boto==2.1.1 -django-storages==1.1.4 +boto==2.46.1 +django-storages==1.5.2