From 62dbed0d4f7ce192de803af0c0348cd3d139d0f0 Mon Sep 17 00:00:00 2001 From: Troy Sankey Date: Fri, 12 Jul 2019 13:20:59 -0400 Subject: [PATCH 01/13] Python 2/3 compatibility for finance tasks This does not change any infrastructure to actually run anything under python 3, it only represents the results of my testing of the finance unit tests and acceptance tests under python 3 and modernizing the code to become both python 2 and 3 compatible. --- .isort.cfg | 1 + .travis.yml | 32 ++++++- Makefile | 44 +++++++-- edx/analytics/tasks/common/bigquery_load.py | 10 ++- edx/analytics/tasks/common/mapreduce.py | 8 +- edx/analytics/tasks/common/mysql_load.py | 2 +- edx/analytics/tasks/common/pathutil.py | 8 +- edx/analytics/tasks/common/sqoop.py | 9 +- .../tasks/common/tests/test_sqoop.py | 6 +- edx/analytics/tasks/common/vertica_load.py | 6 +- .../tasks/export/data_obfuscation.py | 6 +- .../tasks/export/events_obfuscation.py | 4 +- edx/analytics/tasks/export/obfuscation.py | 4 +- edx/analytics/tasks/insights/calendar_task.py | 9 +- .../tasks/insights/database_imports.py | 9 +- .../tasks/insights/tests/test_answer_dist.py | 16 ++-- .../insights/tests/test_course_blocks.py | 18 ++-- .../tasks/insights/tests/test_course_list.py | 5 +- .../tasks/insights/tests/test_enrollments.py | 36 ++++---- .../tasks/insights/tests/test_video.py | 6 +- edx/analytics/tasks/insights/video.py | 7 +- edx/analytics/tasks/launchers/local.py | 14 ++- edx/analytics/tasks/launchers/remote.py | 4 +- .../monitor/tests/test_overall_events.py | 2 +- .../monitor/tests/test_total_events_report.py | 4 +- .../tasks/tests/acceptance/__init__.py | 14 +-- .../tasks/tests/acceptance/services/task.py | 12 +-- .../tests/acceptance/test_database_export.py | 10 ++- .../tests/acceptance/test_database_import.py | 10 +-- .../acceptance/test_enrollment_validation.py | 4 +- .../tests/acceptance/test_enrollments.py | 2 +- edx/analytics/tasks/tools/obfuscate_eval.py | 15 ++-- edx/analytics/tasks/tools/s3util.py | 5 +- edx/analytics/tasks/util/csv_util.py | 10 ++- edx/analytics/tasks/util/eventlog.py | 11 ++- edx/analytics/tasks/util/fast_json.py | 36 ++++++++ edx/analytics/tasks/util/hive.py | 1 + edx/analytics/tasks/util/id_codec.py | 15 +++- edx/analytics/tasks/util/obfuscate_util.py | 2 +- edx/analytics/tasks/util/opaque_key_util.py | 4 +- edx/analytics/tasks/util/overwrite.py | 2 + edx/analytics/tasks/util/record.py | 2 +- edx/analytics/tasks/util/retry.py | 1 + edx/analytics/tasks/util/s3_util.py | 5 +- edx/analytics/tasks/util/tests/config.py | 4 +- .../tasks/util/tests/opaque_key_mixins.py | 6 +- edx/analytics/tasks/util/tests/target.py | 12 ++- edx/analytics/tasks/util/tests/test_hive.py | 1 + .../tasks/util/tests/test_id_codec.py | 18 ++-- .../tasks/util/tests/test_opaque_key_util.py | 11 ++- edx/analytics/tasks/util/tests/test_record.py | 4 +- edx/analytics/tasks/util/tests/test_retry.py | 7 +- .../tasks/util/tests/test_s3_util.py | 6 +- edx/analytics/tasks/util/tests/test_url.py | 1 + edx/analytics/tasks/util/url.py | 18 +++- .../warehouse/financial/affiliate_window.py | 4 +- .../tasks/warehouse/financial/cybersource.py | 3 +- .../financial/ed_services_financial_report.py | 2 + .../tasks/warehouse/financial/fees.py | 3 +- .../warehouse/financial/finance_reports.py | 2 + .../warehouse/financial/orders_import.py | 1 + .../tasks/warehouse/financial/payment.py | 1 + .../tasks/warehouse/financial/paypal.py | 40 +++++---- .../tasks/warehouse/financial/reconcile.py | 57 ++++++++---- .../warehouse/financial/tests/test_paypal.py | 47 +++++++--- .../financial/tests/test_reconcile.py | 8 +- .../warehouse/lms_courseware_link_clicked.py | 5 +- .../warehouse/tests/test_course_subjects.py | 6 +- requirements/base.in | 3 +- requirements/base.txt | 14 ++- requirements/default.in | 14 +-- requirements/default.txt | 46 +++++----- requirements/docs.txt | 67 +++++++------- requirements/extra.txt | 8 ++ requirements/pip-tools.txt | 2 +- requirements/pip.txt | 2 +- requirements/test.in | 2 +- requirements/test.txt | 73 +++++++-------- share/task.yml | 89 +++++++++++-------- 79 files changed, 648 insertions(+), 370 deletions(-) create mode 100644 edx/analytics/tasks/util/fast_json.py diff --git a/.isort.cfg b/.isort.cfg index 241a19da9e..bdac169f2c 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,3 +1,4 @@ [settings] line_length=120 multi_line_output=5 +known_future_library=future diff --git a/.travis.yml b/.travis.yml index b22537b18f..bc0ad497bd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,6 +18,33 @@ env: - secure: NLqmm18NpV3JRwD4CaugXm5cMWgxjdOA88xRFocmmVrduv0QT9JxBZFGebLYmFQOoKNJ23hz6g3EHe1aWfhLYnr1iUYerrwIriSI1wzuqbXJBRN6gO2n3YW+IfG83OLMZkOIMswT8MEdT3JPWVJL3bsocjHp8bYhRCt1KTCMJjY= - secure: aG8l39jaLFWXB5CEOOAR9mJTT3GnqxCl/oFM/7NvTZCBoSWIPIztpFhSAkRE9xSIiKUKXakZcL5H349NLC28jdlHPVsNAaKKt2YNhB6MjmePihp3RPwZGn8c/SjslwY7DPVUKMdWsI7AVNJBH8ab30OPxKwXFAMOiJJza206CYQ= +# TODO: re-introduce the coverage test. +matrix: + # Mark travis build as finished before jobs under allow_failures complete. + fast_finish: true + + include: + # Standard unit tests. + - name: "Python 2.7 Unit Tests" + env: TEST_SUITE=test-docker + + # Python 3 whitelisted and full unit test jobs. Once python 3 support is + # complete, delete the whitelist job and remove the full job from + # allow_failures. + - name: "Python 3.x Whitelisted Unit Tests" + env: TEST_SUITE=test-docker-py3-whitelist + - name: "Python 3.x FULL Unit Tests" + env: TEST_SUITE=test-docker-py3 + + - name: "Quality Tests" + env: TEST_SUITE=quality-docker + + # Names of jobs (defined above) that cannot fail the travis build even if + # they fail. + allow_failures: + - name: "Python 3.x FULL Unit Tests" + - name: "Quality Tests" # This is here because isort is a hot mess right now. + # Do NOT install Python requirements. # Doing so is a waste of time since they won't be used. install: true @@ -37,10 +64,7 @@ before_install: # Ensure we have a place to store coverage output - mkdir -p coverage -script: - - make test-docker - - make quality-docker - - make coverage-docker +script: make $TEST_SUITE after_success: - pip install --upgrade codecov diff --git a/Makefile b/Makefile index 46488d5bf1..9ac03d1b6d 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ uninstall: pip install -r requirements/pip.txt - while pip uninstall -y edx.analytics.tasks; do true; done + pip uninstall -y edx.analytics.tasks python setup.py clean install: requirements uninstall @@ -28,7 +28,7 @@ docker-shell: system-requirements: ifeq (,$(wildcard /usr/bin/yum)) # This is not great, we can't use these libraries on slave nodes using this method. - sudo apt-get install -y -q libmysqlclient-dev libpq-dev python-dev libffi-dev libssl-dev libxml2-dev libxslt1-dev + sudo apt-get install -y -q libmysqlclient-dev libpq-dev python-dev python3-dev libffi-dev libssl-dev libxml2-dev libxslt1-dev else sudo yum install -y -q postgresql-devel libffi-devel endif @@ -56,20 +56,48 @@ upgrade: ## update the requirements/*.txt files with the latest packages satisfy CUSTOM_COMPILE_COMMAND="make upgrade" pip-compile --upgrade -o requirements/docs.txt requirements/docs.in CUSTOM_COMPILE_COMMAND="make upgrade" pip-compile --upgrade -o requirements/test.txt requirements/test.in -test-docker-local: - docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make develop-local test-local - +# Entry point for running python 2 unit tests in CI. test-docker: - docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make reset-virtualenv test-requirements develop-local test-local + docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make system-requirements reset-virtualenv test-requirements develop-local test-local +# Entry point for running python 3 unit tests in CI. test-docker-py3: - docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make reset-virtualenv-py3 test-requirements develop-local test-local + docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make system-requirements reset-virtualenv-py3 test-requirements develop-local test-local + +# Entry point for running python 3 unit tests in CI. Only invokes a subset +# (whitelist) of unit tests which are known to pass under python 3. +test-docker-py3-whitelist: + docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make system-requirements reset-virtualenv-py3 test-requirements develop-local test-local-py3-whitelist test-local: # TODO: when we have better coverage, modify this to actually fail when coverage is too low. rm -rf .coverage LUIGI_CONFIG_PATH='config/test.cfg' python -m coverage run --rcfile=./.coveragerc -m nose --with-xunit --xunit-file=unittests.xml -A 'not acceptance' +# Speical test-local target specifically for running a whitelist of tests which +# are known to pass under python 3 +test-local-py3-whitelist: + # TODO: when we have better coverage, modify this to actually fail when coverage is too low. + rm -rf .coverage + LUIGI_CONFIG_PATH='config/test.cfg' python -m coverage run --rcfile=./.coveragerc -m nose --with-xunit --xunit-file=unittests.xml -A 'not acceptance' \ + edx.analytics.tasks.enterprise.tests \ + edx.analytics.tasks.insights.tests.test_database_imports \ + edx.analytics.tasks.insights.tests.test_grades \ + edx.analytics.tasks.monitor.tests.test_overall_events \ + edx.analytics.tasks.tests \ + edx.analytics.tasks.util.tests.helpers \ + edx.analytics.tasks.util.tests.opaque_key_mixins \ + edx.analytics.tasks.util.tests.test_decorators \ + edx.analytics.tasks.util.tests.test_geolocation \ + edx.analytics.tasks.util.tests.test_hive \ + edx.analytics.tasks.util.tests.test_retry \ + edx.analytics.tasks.util.tests.test_s3_util \ + edx.analytics.tasks.util.tests.test_url \ + edx.analytics.tasks.warehouse.financial.tests \ + edx.analytics.tasks.warehouse.tests.test_internal_reporting_active_users \ + edx.analytics.tasks.warehouse.tests.test_internal_reporting_database \ + edx.analytics.tasks.warehouse.tests.test_run_vertica_sql_scripts + test: test-requirements develop test-local test-acceptance: test-requirements @@ -98,7 +126,7 @@ quality-docker-local: docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make develop-local quality-local quality-docker: - docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make reset-virtualenv test-requirements develop-local quality-local + docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make system-requirements reset-virtualenv test-requirements develop-local quality-local coverage-docker: docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest coverage xml diff --git a/edx/analytics/tasks/common/bigquery_load.py b/edx/analytics/tasks/common/bigquery_load.py index 27a9f4f762..a05de7cc42 100644 --- a/edx/analytics/tasks/common/bigquery_load.py +++ b/edx/analytics/tasks/common/bigquery_load.py @@ -1,10 +1,14 @@ +from __future__ import absolute_import +from future.standard_library import install_aliases +install_aliases() + import json import logging import os import subprocess import tempfile import time -import urlparse +from urllib.parse import urlparse import luigi @@ -216,7 +220,7 @@ def field_delimiter(self): @property def null_marker(self): - return '\N' + return r'\N' @property def quote_character(self): @@ -262,7 +266,7 @@ def init_copy(self, client): self.output().clear_marker_table() def _get_destination_from_source(self, source_path): - parsed_url = urlparse.urlparse(source_path) + parsed_url = urlparse(source_path) destination_path = url_path_join('gs://{}'.format(parsed_url.netloc), parsed_url.path) return destination_path diff --git a/edx/analytics/tasks/common/mapreduce.py b/edx/analytics/tasks/common/mapreduce.py index 071ee8eeea..19682280f1 100644 --- a/edx/analytics/tasks/common/mapreduce.py +++ b/edx/analytics/tasks/common/mapreduce.py @@ -7,7 +7,7 @@ import logging import logging.config import os -import StringIO +from io import StringIO from hashlib import md5 import luigi @@ -183,7 +183,7 @@ class EmulatedMapReduceJobRunner(luigi.contrib.hadoop.JobRunner): """ def group(self, input): - output = StringIO.StringIO() + output = StringIO() lines = [] for i, line in enumerate(input): parts = line.rstrip('\n').split('\t') @@ -197,7 +197,7 @@ def group(self, input): def run_job(self, job): job.init_hadoop() job.init_mapper() - map_output = StringIO.StringIO() + map_output = StringIO() input_targets = luigi.task.flatten(job.input_hadoop()) for input_target in input_targets: # if file is a directory, then assume that it's Hadoop output, @@ -232,7 +232,7 @@ def run_job(self, job): try: reduce_output = job.output().open('w') except Exception: - reduce_output = StringIO.StringIO() + reduce_output = StringIO() try: job._run_reducer(reduce_input, reduce_output) diff --git a/edx/analytics/tasks/common/mysql_load.py b/edx/analytics/tasks/common/mysql_load.py index 39fb14440c..f719f5321d 100644 --- a/edx/analytics/tasks/common/mysql_load.py +++ b/edx/analytics/tasks/common/mysql_load.py @@ -422,7 +422,7 @@ def coerce_for_mysql_connect(input): return input # Hive indicates a null value with the string "\N" # We represent an infinite value with the string "inf", MySQL has no such representation so we use NULL - if input in ('None', '\\N', 'inf', '-inf'): + if input in ('None', r'\N', 'inf', '-inf'): return None if isinstance(input, str): return input.decode('utf-8') diff --git a/edx/analytics/tasks/common/pathutil.py b/edx/analytics/tasks/common/pathutil.py index ee6cacd6eb..a87585c63b 100644 --- a/edx/analytics/tasks/common/pathutil.py +++ b/edx/analytics/tasks/common/pathutil.py @@ -268,12 +268,12 @@ def get_event_and_date_string(self, line): """Default mapper implementation, that always outputs the log line, but with a configurable key.""" event = eventlog.parse_json_event(line) if event is None: - self.incr_counter('Event', 'Discard Unparseable Event', 1) + self.incr_counter(u'Event', u'Discard Unparseable Event', 1) return None event_time = self.get_event_time(event) if not event_time: - self.incr_counter('Event', 'Discard Missing Time Field', 1) + self.incr_counter(u'Event', u'Discard Missing Time Field', 1) return None # Don't use strptime to parse the date, it is extremely slow @@ -283,7 +283,7 @@ def get_event_and_date_string(self, line): date_string = event_time.split("T")[0] if date_string < self.lower_bound_date_string or date_string >= self.upper_bound_date_string: - # Slow: self.incr_counter('Event', 'Discard Outside Date Interval', 1) + # Slow: self.incr_counter(u'Event', u'Discard Outside Date Interval', 1) return None return event, date_string @@ -307,5 +307,5 @@ def get_map_input_file(self): return os.environ['map_input_file'] except KeyError: log.warn('mapreduce_map_input_file not defined in os.environ, unable to determine input file path') - self.incr_counter('Event', 'Missing map_input_file', 1) + self.incr_counter(u'Event', u'Missing map_input_file', 1) return '' diff --git a/edx/analytics/tasks/common/sqoop.py b/edx/analytics/tasks/common/sqoop.py index 44a854bc40..2091a1cc0e 100644 --- a/edx/analytics/tasks/common/sqoop.py +++ b/edx/analytics/tasks/common/sqoop.py @@ -1,6 +1,8 @@ """ Gather data using Sqoop table dumps run on RDBMS databases. """ +from __future__ import absolute_import + import datetime import json import logging @@ -296,7 +298,12 @@ def run_job(self, job): metadata['end_time'] = datetime.datetime.utcnow().isoformat() try: with job.metadata_output().open('w') as metadata_file: - json.dump(metadata, metadata_file) + # Under python 2, json.dumps() will return ascii-only bytes, so .encode('utf-8') + # is a no-op. Under python 3, json.dumps() will return ascii-only unicode, so + # .encode('utf-8') will return bytes, thus normalizing the output to bytes + # across all python versions. + metadata_file.write(json.dumps(metadata).encode('utf-8')) + metadata_file.flush() except Exception: log.exception("Unable to dump metadata information.") pass diff --git a/edx/analytics/tasks/common/tests/test_sqoop.py b/edx/analytics/tasks/common/tests/test_sqoop.py index 1c8d50128d..27de9daa1d 100644 --- a/edx/analytics/tasks/common/tests/test_sqoop.py +++ b/edx/analytics/tasks/common/tests/test_sqoop.py @@ -197,12 +197,12 @@ def test_connect_with_columns(self): self.assertEquals(arglist[-3], 'column1,column2') def test_connect_with_null_string(self): - self.create_and_run_mysql_task(null_string='\\\\N') + self.create_and_run_mysql_task(null_string=r'\\N') arglist = self.get_call_args_after_run() self.assertEquals(arglist[-6], '--null-string') - self.assertEquals(arglist[-5], '\\\\N') + self.assertEquals(arglist[-5], r'\\N') self.assertEquals(arglist[-4], '--null-non-string') - self.assertEquals(arglist[-3], '\\\\N') + self.assertEquals(arglist[-3], r'\\N') def test_connect_with_fields_terminations(self): self.create_and_run_mysql_task(fields_terminated_by='\x01') diff --git a/edx/analytics/tasks/common/vertica_load.py b/edx/analytics/tasks/common/vertica_load.py index 4626b81b4d..c2086d30b0 100644 --- a/edx/analytics/tasks/common/vertica_load.py +++ b/edx/analytics/tasks/common/vertica_load.py @@ -1,6 +1,7 @@ """ Support for loading data into an HP Vertica database. """ +from __future__ import absolute_import import logging import traceback @@ -12,6 +13,7 @@ from edx.analytics.tasks.util.overwrite import OverwriteOutputMixin from edx.analytics.tasks.util.url import ExternalURL from edx.analytics.tasks.util.vertica_target import CredentialFileVerticaTarget +import six log = logging.getLogger(__name__) @@ -416,7 +418,7 @@ def copy_delimiter(self): @property def copy_null_sequence(self): """The null sequence in the data to be copied. Default is Hive NULL (\\N)""" - return "'\\N'" + return r"'\N'" @property def copy_enclosed_by(self): @@ -437,7 +439,7 @@ def copy_escape_spec(self): def copy_data_table_from_target(self, cursor): """Performs the copy query from the insert source.""" - if isinstance(self.columns[0], basestring): + if isinstance(self.columns[0], six.string_types): column_names = ','.join([name for name in self.columns]) elif len(self.columns[0]) == 2: column_names = ','.join([name for name, _type in self.columns]) diff --git a/edx/analytics/tasks/export/data_obfuscation.py b/edx/analytics/tasks/export/data_obfuscation.py index 6d9261c44a..bfc0414e80 100644 --- a/edx/analytics/tasks/export/data_obfuscation.py +++ b/edx/analytics/tasks/export/data_obfuscation.py @@ -8,11 +8,11 @@ import tempfile import xml.etree.ElementTree -import cjson import luigi import yaml import edx.analytics.tasks.util.opaque_key_util as opaque_key_util +from edx.analytics.tasks.util.fast_json import FastJson from edx.analytics.tasks.common.pathutil import PathSetTask from edx.analytics.tasks.util.file_util import copy_file_to_file, read_config_file from edx.analytics.tasks.util.obfuscate_util import ( @@ -194,7 +194,7 @@ def filter_row(self, row): if state_str == 'NULL': updated_state_dict = {} else: - state_dict = cjson.decode(state_str, all_unicode=True) + state_dict = FastJson.loads(state_str) # Traverse the dictionary, looking for entries that need to be scrubbed. updated_state_dict = self.obfuscator.obfuscate_structure(state_dict, u"state", user_info) except Exception: # pylint: disable=broad-except @@ -204,7 +204,7 @@ def filter_row(self, row): if updated_state_dict is not None: # Can't reset values, so update original fields. - updated_state = cjson.encode(updated_state_dict).replace('\\', '\\\\') + updated_state = FastJson.dumps(updated_state_dict).replace('\\', '\\\\') row[4] = updated_state if self.obfuscator.is_logging_enabled(): log.info(u"Obfuscated state for user_id '%s' module_id '%s'", user_id, row[2]) diff --git a/edx/analytics/tasks/export/events_obfuscation.py b/edx/analytics/tasks/export/events_obfuscation.py index 1eb1000ff4..edb0632fa0 100644 --- a/edx/analytics/tasks/export/events_obfuscation.py +++ b/edx/analytics/tasks/export/events_obfuscation.py @@ -6,10 +6,10 @@ import re from collections import defaultdict, namedtuple -import cjson import luigi.date_interval import edx.analytics.tasks.util.opaque_key_util as opaque_key_util +from edx.analytics.tasks.util.fast_json import FastJson from edx.analytics.tasks.common.mapreduce import MapReduceJobTaskMixin, MultiOutputMapReduceJobTask from edx.analytics.tasks.common.pathutil import PathSetTask from edx.analytics.tasks.util import eventlog @@ -328,7 +328,7 @@ def _obfuscate_event(self, event): # Re-encode payload as a json string if it originally was one. # (This test works because we throw away string values that didn't parse as JSON.) if isinstance(event.get('event'), basestring): - event['event'] = cjson.encode(event_data) + event['event'] = FastJson.dumps(event_data) else: event['event'] = event_data diff --git a/edx/analytics/tasks/export/obfuscation.py b/edx/analytics/tasks/export/obfuscation.py index 0a189caffd..2a3a653e94 100644 --- a/edx/analytics/tasks/export/obfuscation.py +++ b/edx/analytics/tasks/export/obfuscation.py @@ -1,11 +1,13 @@ """Tasks to obfuscate course data for RDX.""" +from future.standard_library import install_aliases +install_aliases() import errno import json import logging import os import tarfile -import urlparse +from urllib.parse import urlparse import luigi diff --git a/edx/analytics/tasks/insights/calendar_task.py b/edx/analytics/tasks/insights/calendar_task.py index 0f5ce6fc01..1ac4811562 100644 --- a/edx/analytics/tasks/insights/calendar_task.py +++ b/edx/analytics/tasks/insights/calendar_task.py @@ -1,8 +1,11 @@ -"""A canonical calendar that can be joined with other tables to provide information about dates.""" - +""" +A canonical calendar that can be joined with other tables to provide information about dates. +""" +from __future__ import absolute_import import logging from datetime import timedelta +import six import luigi.configuration @@ -63,7 +66,7 @@ def run(self): (week.sunday() + timedelta(1)).isoformat(), iso_weekday ) - output_file.write('\t'.join([unicode(v).encode('utf8') for v in column_values]) + '\n') + output_file.write(b'\t'.join([six.text_type(v).encode('utf-8') for v in column_values]) + b'\n') class CalendarTableTask(CalendarDownstreamMixin, HiveTableTask): diff --git a/edx/analytics/tasks/insights/database_imports.py b/edx/analytics/tasks/insights/database_imports.py index fd6a7801f3..2896d674c6 100644 --- a/edx/analytics/tasks/insights/database_imports.py +++ b/edx/analytics/tasks/insights/database_imports.py @@ -1,6 +1,8 @@ """ Import data from external RDBMS databases into Hive. """ +from __future__ import absolute_import + import datetime import logging import textwrap @@ -91,7 +93,7 @@ def partition(self): def partition_location(self): """Provides location of Hive database table's partition data.""" # The actual folder name where the data is stored is expected to be in the format = - partition_name = '='.join(self.partition.items()[0]) + partition_name = '='.join(list(self.partition.items())[0]) # Make sure that input path ends with a slash, to indicate a directory. # (This is necessary for S3 paths that are output from Hadoop jobs.) return url_path_join(self.table_location, partition_name + '/') @@ -169,9 +171,8 @@ def requires(self): database=self.database, # Hive expects NULL to be represented by the string "\N" in the data. You have to pass in "\\N" to sqoop # since it uses that string directly in the generated Java code, so "\\N" actually looks like "\N" to the - # Java code. In order to get "\\N" onto the command line we have to use another set of escapes to tell the - # python code to pass through the "\" character. - null_string='\\\\N', + # Java code. In order to write "\\N" in python, we use a raw string prefix `r`. + null_string=r'\\N', # It's unclear why, but this setting prevents us from correctly substituting nulls with \N. mysql_delimiters=False, # This is a string that is interpreted as an octal number, so it is equivalent to the character Ctrl-A diff --git a/edx/analytics/tasks/insights/tests/test_answer_dist.py b/edx/analytics/tasks/insights/tests/test_answer_dist.py index b3691e233b..56088622a1 100644 --- a/edx/analytics/tasks/insights/tests/test_answer_dist.py +++ b/edx/analytics/tasks/insights/tests/test_answer_dist.py @@ -2,12 +2,13 @@ Tests for tasks that calculate answer distributions. """ +from __future__ import absolute_import import hashlib import json import math import os import shutil -import StringIO +from io import BytesIO import tempfile from unittest import TestCase @@ -20,6 +21,7 @@ ) from edx.analytics.tasks.util.tests.config import OPTION_REMOVED, with_luigi_config from edx.analytics.tasks.util.tests.opaque_key_mixins import InitializeLegacyKeysMixin, InitializeOpaqueKeysMixin +import six class ProblemCheckEventBaseTest(MapperTestMixin, ReducerTestMixin, TestCase): @@ -73,7 +75,7 @@ def _create_event_data_dict(self, **kwargs): @staticmethod def _update_with_kwargs(data_dict, **kwargs): """Updates a dict from kwargs only if it modifies a top-level value.""" - for key, value in kwargs.iteritems(): + for key, value in six.iteritems(kwargs): if key in data_dict: data_dict[key] = value @@ -317,7 +319,7 @@ def insert_answer_data(submission, attempt_category): submission: dictionary of all responses submitted at once for a user attempt_category: a string that is 'first' for a user's first submission and 'last' otherwise """ - for answer_id, submission_data in submission.iteritems(): + for answer_id, submission_data in six.iteritems(submission): answer_id_data = { "answer": submission_data['answer'], "problem_display_name": None, @@ -830,7 +832,7 @@ def _load_metadata(self, **kwargs): } } metadata_dict[self.answer_id].update(**kwargs) - answer_metadata = StringIO.StringIO(json.dumps(metadata_dict)) + answer_metadata = BytesIO(json.dumps(metadata_dict).encode('utf-8')) self.task.load_answer_metadata(answer_metadata) def test_non_submission_choice_with_metadata(self): @@ -944,7 +946,7 @@ def test_reduce_multiple_values(self): # To test sorting, the first sample is made to sort after the # second sample. - column_values_2 = [(k, unicode(k) + u'\u2603') for k in field_names] + column_values_2 = [(k, six.text_type(k) + u'\u2603') for k in field_names] column_values_2[3] = (column_values_2[3][0], 10) column_values_1 = list(column_values_2) column_values_1[4] = (column_values_1[4][0], u'ZZZZZZZZZZZ') @@ -958,9 +960,9 @@ def test_reduce_multiple_values(self): self.assertEquals(mock_output_file.write.mock_calls[0], call(expected_header_string)) # Confirm that the second sample appears before the first. - expected_row_1 = ','.join(unicode(v[1]).encode('utf8') for v in column_values_2) + '\r\n' + expected_row_1 = b','.join(six.text_type(v[1]).encode('utf8') for v in column_values_2) + b'\r\n' self.assertEquals(mock_output_file.write.mock_calls[1], call(expected_row_1)) - expected_row_2 = ','.join(unicode(v[1]).encode('utf8') for v in column_values_1) + '\r\n' + expected_row_2 = b','.join(six.text_type(v[1]).encode('utf8') for v in column_values_1) + b'\r\n' self.assertEquals(mock_output_file.write.mock_calls[2], call(expected_row_2)) def test_output_path_for_legacy_key(self): diff --git a/edx/analytics/tasks/insights/tests/test_course_blocks.py b/edx/analytics/tasks/insights/tests/test_course_blocks.py index 191172a573..93f39ad176 100644 --- a/edx/analytics/tasks/insights/tests/test_course_blocks.py +++ b/edx/analytics/tasks/insights/tests/test_course_blocks.py @@ -1,4 +1,6 @@ """Test course blocks tasks.""" +from future.standard_library import install_aliases +install_aliases() import json import logging @@ -6,7 +8,7 @@ import shutil import tempfile from unittest import TestCase -from urllib import urlencode +from urllib.parse import urlencode import httpretty from ddt import data, ddt, unpack @@ -53,7 +55,7 @@ def create_input_file(self, course_ids): """Create a tab-separated file containing the given course_ids.""" with open(self.input_file, 'w') as output: for course_id in course_ids: - output.write("\t".join([course_id, 'Name', 'Org', 'Number', 'http://'] + ['\\N'] * 9)) + output.write("\t".join([course_id, 'Name', 'Org', 'Number', 'http://'] + [r'\N'] * 9)) output.write("\r\n") def cleanup(self, dirname): @@ -174,18 +176,18 @@ class CourseBlocksApiDataReducerTaskTest(CourseBlocksTestMixin, ReducerTestMixin # data tuple fields are given in this order: # (block_id,block_type,display_name,is_root,is_orphan,is_dag,parent_block_id,course_path,sort_idx) @data( - ((('abc', 'course', 'ABC', '1', '0', '0', '\\N', '', '0'),), False), - ((('abc', 'course', 'ABC', '1', '0', '0', '\\N', '', '0'),), True), - ((('abc', 'block', 'ABC', '1', '0', '0', '\\N', '', '0'), + ((('abc', 'course', 'ABC', '1', '0', '0', r'\N', '', '0'),), False), + ((('abc', 'course', 'ABC', '1', '0', '0', r'\N', '', '0'),), True), + ((('abc', 'block', 'ABC', '1', '0', '0', r'\N', '', '0'), ('def', 'block', 'DEF', '0', '0', '0', 'abc', 'ABC', '1'), ('jkl', 'block', 'JKL', '0', '0', '1', 'def', 'ABC / DEF', '2'), ('vwx', 'block', 'VWX', '0', '0', '0', 'jkl', 'ABC / DEF / JKL', '3'), ('mno', 'block', 'MNO', '0', '0', '0', 'def', 'ABC / DEF', '4'), ('pqr', 'block', 'PQR', '0', '0', '0', 'mno', 'ABC / DEF / MNO', '5'), ('stu', 'block', 'STU', '0', '0', '0', 'abc', 'ABC', '6'), - ('ghi', 'block', 'GHI', '0', '1', '0', '\\N', '(Deleted block :)', '8')), False), - ((('ghi', 'block', 'GHI', '0', '1', '0', '\\N', '(Deleted block :)', '-1'), - ('abc', 'block', 'ABC', '1', '0', '0', '\\N', '', '0'), + ('ghi', 'block', 'GHI', '0', '1', '0', r'\N', '(Deleted block :)', '8')), False), + ((('ghi', 'block', 'GHI', '0', '1', '0', r'\N', '(Deleted block :)', '-1'), + ('abc', 'block', 'ABC', '1', '0', '0', r'\N', '', '0'), ('def', 'block', 'DEF', '0', '0', '0', 'abc', 'ABC', '1'), ('jkl', 'block', 'JKL', '0', '0', '1', 'def', 'ABC / DEF', '2'), ('vwx', 'block', 'VWX', '0', '0', '0', 'jkl', 'ABC / DEF / JKL', '3'), diff --git a/edx/analytics/tasks/insights/tests/test_course_list.py b/edx/analytics/tasks/insights/tests/test_course_list.py index 549b879463..0f879c97c4 100644 --- a/edx/analytics/tasks/insights/tests/test_course_list.py +++ b/edx/analytics/tasks/insights/tests/test_course_list.py @@ -1,4 +1,7 @@ """Test course list tasks.""" +from future.standard_library import install_aliases +install_aliases() + import json import logging import os @@ -6,7 +9,7 @@ import tempfile from datetime import datetime from unittest import TestCase -from urllib import urlencode +from urllib.parse import urlencode import httpretty from ddt import data, ddt, unpack diff --git a/edx/analytics/tasks/insights/tests/test_enrollments.py b/edx/analytics/tasks/insights/tests/test_enrollments.py index 06e1d893c0..25bea2125c 100644 --- a/edx/analytics/tasks/insights/tests/test_enrollments.py +++ b/edx/analytics/tasks/insights/tests/test_enrollments.py @@ -432,8 +432,8 @@ def test_no_events(self): def test_single_enrollment(self): inputs = [('2013-01-01T00:00:01', ACTIVATED, 'honor'), ] - expected = ((self.course_id, self.user_id, 'honor', '1', 'honor', '2013-01-01 00:00:01.000000', '\\N', '\\N', - '\\N', '2013-01-02 00:00:00.000000'),) + expected = ((self.course_id, self.user_id, 'honor', '1', 'honor', '2013-01-01 00:00:01.000000', r'\N', r'\N', + r'\N', '2013-01-02 00:00:00.000000'),) self._check_output_complete_tuple(inputs, expected) def create_enrollment_task(self, interval='2013-01-01'): @@ -461,7 +461,7 @@ def test_normal_multiple_event_sequence(self): ('2013-01-01T00:00:04', DEACTIVATED, 'honor'), ] expected = ((self.course_id, self.user_id, 'honor', '0', 'honor', '2013-01-01 00:00:01.000000', - '2013-01-01 00:00:04.000000', '\\N', '\\N', '2013-01-02 00:00:00.000000'),) + '2013-01-01 00:00:04.000000', r'\N', r'\N', '2013-01-02 00:00:00.000000'),) self._check_output_complete_tuple(inputs, expected) def test_ignore_conflicting_activate_after_mode_change(self): @@ -471,7 +471,7 @@ def test_ignore_conflicting_activate_after_mode_change(self): ('2013-01-01T00:00:03', ACTIVATED, 'honor'), ] expected = ((self.course_id, self.user_id, 'verified', '1', 'honor', '2013-01-01 00:00:01.000000', - '\\N', '2013-01-01 00:00:02.000000', '\\N', '2013-01-02 00:00:00.000000'),) + r'\N', '2013-01-01 00:00:02.000000', r'\N', '2013-01-02 00:00:00.000000'),) self._check_output_complete_tuple(inputs, expected) def test_ignore_redundant_unenroll_events(self): @@ -482,7 +482,7 @@ def test_ignore_redundant_unenroll_events(self): ('2013-01-01T00:00:04', ACTIVATED, 'honor'), ] expected = ((self.course_id, self.user_id, 'honor', '1', 'honor', '2013-01-01 00:00:01.000000', - '2013-01-01 00:00:02.000000', '\\N', '\\N', '2013-01-02 00:00:00.000000'),) + '2013-01-01 00:00:02.000000', r'\N', r'\N', '2013-01-02 00:00:00.000000'),) self._check_output_complete_tuple(inputs, expected) def test_events_out_of_order(self): @@ -494,7 +494,7 @@ def test_events_out_of_order(self): ('2013-01-01T00:00:02', DEACTIVATED, 'honor'), ] expected = ((self.course_id, self.user_id, 'honor', '0', 'honor', '2013-01-01 00:00:01.000000', - '2013-01-01 00:00:04.000000', '\\N', '\\N', '2013-01-02 00:00:00.000000'),) + '2013-01-01 00:00:04.000000', r'\N', r'\N', '2013-01-02 00:00:00.000000'),) self._check_output_complete_tuple(inputs, expected) def test_ignore_redundant_enroll_events(self): @@ -504,8 +504,8 @@ def test_ignore_redundant_enroll_events(self): ('2013-01-01T00:00:03', ACTIVATED, 'honor'), ('2013-01-01T00:00:04', ACTIVATED, 'honor'), ] - expected = ((self.course_id, self.user_id, 'honor', '1', 'honor', '2013-01-01 00:00:01.000000', '\\N', '\\N', - '\\N', '2013-01-02 00:00:00.000000'),) + expected = ((self.course_id, self.user_id, 'honor', '1', 'honor', '2013-01-01 00:00:01.000000', r'\N', r'\N', + r'\N', '2013-01-02 00:00:00.000000'),) self._check_output_complete_tuple(inputs, expected) def test_ignore_mode_change_on_redundant_enroll_events(self): @@ -513,8 +513,8 @@ def test_ignore_mode_change_on_redundant_enroll_events(self): ('2013-01-01T00:00:01', ACTIVATED, 'honor'), ('2013-01-01T00:00:02', ACTIVATED, 'verified'), ] - expected = ((self.course_id, self.user_id, 'honor', '1', 'honor', '2013-01-01 00:00:01.000000', '\\N', '\\N', - '\\N', '2013-01-02 00:00:00.000000'),) + expected = ((self.course_id, self.user_id, 'honor', '1', 'honor', '2013-01-01 00:00:01.000000', r'\N', r'\N', + r'\N', '2013-01-02 00:00:00.000000'),) self._check_output_complete_tuple(inputs, expected) def test_ignore_mode_change_while_deactivated(self): @@ -524,7 +524,7 @@ def test_ignore_mode_change_while_deactivated(self): ('2013-01-01T00:00:03', MODE_CHANGED, 'verified'), ] expected = ((self.course_id, self.user_id, 'honor', '0', 'honor', '2013-01-01 00:00:01.000000', - '2013-01-01 00:00:02.000000', '\\N', '\\N', '2013-01-02 00:00:00.000000'),) + '2013-01-01 00:00:02.000000', r'\N', r'\N', '2013-01-02 00:00:00.000000'),) self._check_output_complete_tuple(inputs, expected) def test_mode_change_via_activation_events(self): @@ -534,7 +534,7 @@ def test_mode_change_via_activation_events(self): ('2013-01-01T00:00:03', ACTIVATED, 'verified'), ] expected = ((self.course_id, self.user_id, 'verified', '1', 'honor', '2013-01-01 00:00:01.000000', - '2013-01-01 00:00:02.000000', '2013-01-01 00:00:03.000000', '\\N', '2013-01-02 00:00:00.000000'),) + '2013-01-01 00:00:02.000000', '2013-01-01 00:00:03.000000', r'\N', '2013-01-02 00:00:00.000000'),) self._check_output_complete_tuple(inputs, expected) def test_ignore_different_mode_on_unenroll_event(self): @@ -543,7 +543,7 @@ def test_ignore_different_mode_on_unenroll_event(self): ('2013-01-01T00:00:02', DEACTIVATED, 'verified'), ] expected = ((self.course_id, self.user_id, 'honor', '0', 'honor', '2013-01-01 00:00:01.000000', - '2013-01-01 00:00:02.000000', '\\N', '\\N', '2013-01-02 00:00:00.000000'),) + '2013-01-01 00:00:02.000000', r'\N', r'\N', '2013-01-02 00:00:00.000000'),) self._check_output_complete_tuple(inputs, expected) def test_normal_explicit_mode_change(self): @@ -552,8 +552,8 @@ def test_normal_explicit_mode_change(self): ('2013-01-01T00:00:01', ACTIVATED, 'honor'), ('2013-01-02T00:00:02', MODE_CHANGED, 'verified') ] - expected = ((self.course_id, self.user_id, 'verified', '1', 'honor', '2013-01-01 00:00:01.000000', '\\N', - '2013-01-02 00:00:02.000000', '\\N', '2013-01-03 00:00:00.000000'),) + expected = ((self.course_id, self.user_id, 'verified', '1', 'honor', '2013-01-01 00:00:01.000000', r'\N', + '2013-01-02 00:00:02.000000', r'\N', '2013-01-03 00:00:00.000000'),) self._check_output_complete_tuple(inputs, expected) def test_multiple_mode_change_events(self): @@ -565,7 +565,7 @@ def test_multiple_mode_change_events(self): ('2013-01-02T00:00:04', MODE_CHANGED, 'audit'), ('2013-01-02T00:00:05', MODE_CHANGED, 'credit') ] - expected = ((self.course_id, self.user_id, 'credit', '1', 'honor', '2013-01-01 00:00:01.000000', '\\N', + expected = ((self.course_id, self.user_id, 'credit', '1', 'honor', '2013-01-01 00:00:01.000000', r'\N', '2013-01-02 00:00:02.000000', '2013-01-02 00:00:05.000000', '2013-01-03 00:00:00.000000'),) self._check_output_complete_tuple(inputs, expected) self._check_output_complete_tuple(inputs, expected) @@ -579,7 +579,7 @@ def test_capture_first_verified_time(self): ('2013-01-01T00:00:04', ACTIVATED, 'verified') ] expected = ((self.course_id, self.user_id, 'verified', '1', 'honor', '2013-01-01 00:00:01.000000', - '2013-01-01 00:00:03.000000', '2013-01-01 00:00:02.000000', '\\N', '2013-01-02 00:00:00.000000'),) + '2013-01-01 00:00:03.000000', '2013-01-01 00:00:02.000000', r'\N', '2013-01-02 00:00:00.000000'),) self._check_output_complete_tuple(inputs, expected) def test_capture_first_credit_time(self): @@ -591,7 +591,7 @@ def test_capture_first_credit_time(self): ('2013-01-01T00:00:04', ACTIVATED, 'credit') ] expected = ((self.course_id, self.user_id, 'credit', '1', 'honor', '2013-01-01 00:00:01.000000', - '2013-01-01 00:00:03.000000', '\\N', '2013-01-01 00:00:02.000000', '2013-01-02 00:00:00.000000'),) + '2013-01-01 00:00:03.000000', r'\N', '2013-01-01 00:00:02.000000', '2013-01-02 00:00:00.000000'),) self._check_output_complete_tuple(inputs, expected) diff --git a/edx/analytics/tasks/insights/tests/test_video.py b/edx/analytics/tasks/insights/tests/test_video.py index a1e2f4d02b..80f2ff2476 100644 --- a/edx/analytics/tasks/insights/tests/test_video.py +++ b/edx/analytics/tasks/insights/tests/test_video.py @@ -397,8 +397,8 @@ def setUp(self): super(UserVideoViewingTaskReducerTest, self).setUp() self.user_id = 10 self.reduce_key = (self.user_id, self.COURSE_ID, self.VIDEO_MODULE_ID) - patcher = patch('edx.analytics.tasks.insights.video.urllib') - self.mock_urllib = patcher.start() + patcher = patch('edx.analytics.tasks.insights.video.urlopen') + self.mock_urlopen = patcher.start() self.addCleanup(patcher.stop) def test_simple_viewing(self): @@ -634,7 +634,7 @@ def prepare_youtube_api_mock_raw(self, response_string): mock_response = MagicMock(spec=file) mock_response.code = 200 mock_response.read.side_effect = [response_string, ''] - self.mock_urllib.urlopen.return_value = mock_response + self.mock_urlopen.return_value = mock_response def test_pause_after_end_of_video(self): self.prepare_youtube_api_mock('PT1M2S') diff --git a/edx/analytics/tasks/insights/video.py b/edx/analytics/tasks/insights/video.py index 2636caa11b..26b9705178 100644 --- a/edx/analytics/tasks/insights/video.py +++ b/edx/analytics/tasks/insights/video.py @@ -1,11 +1,14 @@ """Tasks for aggregating statistics about video viewing.""" +from future.standard_library import install_aliases +install_aliases() + import datetime import json import logging import math import re import textwrap -import urllib +from urllib.request import urlopen from collections import namedtuple import ciso8601 @@ -475,7 +478,7 @@ def get_video_duration(self, youtube_id): video_url = "https://www.googleapis.com/youtube/v3/videos?id={0}&part=contentDetails&key={1}".format( youtube_id, self.api_key ) - video_file = urllib.urlopen(video_url) + video_file = urlopen(video_url) content = json.load(video_file) items = content.get('items', []) if len(items) > 0: diff --git a/edx/analytics/tasks/launchers/local.py b/edx/analytics/tasks/launchers/local.py index 90b854c69f..a61d179c0a 100644 --- a/edx/analytics/tasks/launchers/local.py +++ b/edx/analytics/tasks/launchers/local.py @@ -20,7 +20,6 @@ import certifi import chardet import ciso8601 -import cjson import filechunkio import idna import luigi @@ -34,6 +33,11 @@ import six import stevedore import urllib3 +cjson, ujson = None, None +try: + import ujson +except ImportError: + import cjson import edx.analytics.tasks @@ -93,7 +97,7 @@ def main(): # Tell luigi what dependencies to pass to the Hadoop nodes: # - edx.analytics.tasks is used to load the pipeline code, since we cannot trust all will be loaded automatically. # - boto is used for all direct interactions with s3. - # - cjson is used for all parsing event logs. + # - cjson/ujson is used for all parsing event logs. # - filechunkio is used for multipart uploads of large files to s3. # - opaque_keys is used to interpret serialized course_ids # - opaque_keys extensions: ccx_keys @@ -101,7 +105,11 @@ def main(): # - requests has several dependencies: # - chardet, urllib3, certifi, idna luigi.contrib.hadoop.attach(edx.analytics.tasks) - luigi.contrib.hadoop.attach(boto, cjson, filechunkio, opaque_keys, bson, stevedore, six, ciso8601, chardet, urllib3, certifi, idna, requests, pytz) + if cjson: + luigi.contrib.hadoop.attach(cjson) + if ujson: + luigi.contrib.hadoop.attach(ujson) + luigi.contrib.hadoop.attach(boto, filechunkio, opaque_keys, bson, stevedore, six, ciso8601, chardet, urllib3, certifi, idna, requests, pytz) if configuration.getboolean('ccx', 'enabled', default=False): import ccx_keys diff --git a/edx/analytics/tasks/launchers/remote.py b/edx/analytics/tasks/launchers/remote.py index f392494c7f..e62b90383a 100755 --- a/edx/analytics/tasks/launchers/remote.py +++ b/edx/analytics/tasks/launchers/remote.py @@ -1,5 +1,7 @@ #!/usr/bin/env python """Execute tasks on a remote EMR cluster.""" +from future.standard_library import install_aliases +install_aliases() import argparse import json @@ -8,7 +10,7 @@ import sys import uuid from subprocess import PIPE, Popen -from urlparse import parse_qsl, urlparse +from urllib.parse import urlparse, parse_qsl STATIC_FILES_PATH = os.path.join(sys.prefix, 'share', 'edx.analytics.tasks') EC2_INVENTORY_PATH = os.path.join(STATIC_FILES_PATH, 'ec2.py') diff --git a/edx/analytics/tasks/monitor/tests/test_overall_events.py b/edx/analytics/tasks/monitor/tests/test_overall_events.py index f0fde7bec3..4795c11550 100644 --- a/edx/analytics/tasks/monitor/tests/test_overall_events.py +++ b/edx/analytics/tasks/monitor/tests/test_overall_events.py @@ -2,7 +2,7 @@ import json import sys -from StringIO import StringIO +from io import StringIO from unittest import TestCase from edx.analytics.tasks.common.tests.map_reduce_mixins import MapperTestMixin, ReducerTestMixin diff --git a/edx/analytics/tasks/monitor/tests/test_total_events_report.py b/edx/analytics/tasks/monitor/tests/test_total_events_report.py index 5eb87a49c8..17510fd8ef 100644 --- a/edx/analytics/tasks/monitor/tests/test_total_events_report.py +++ b/edx/analytics/tasks/monitor/tests/test_total_events_report.py @@ -6,7 +6,7 @@ import shutil import tempfile import textwrap -from StringIO import StringIO +from io import BytesIO from unittest import TestCase import pandas @@ -86,7 +86,7 @@ def reformat(string): task.run() data = output_target.buffer.read() - result = pandas.read_csv(StringIO(data), + result = pandas.read_csv(BytesIO(data), na_values=['-'], index_col=False, header=None, diff --git a/edx/analytics/tasks/tests/acceptance/__init__.py b/edx/analytics/tasks/tests/acceptance/__init__.py index c1db23c20d..50b450f19f 100644 --- a/edx/analytics/tasks/tests/acceptance/__init__.py +++ b/edx/analytics/tasks/tests/acceptance/__init__.py @@ -1,3 +1,5 @@ +from __future__ import print_function + import csv import hashlib import json @@ -374,19 +376,19 @@ def assert_data_frames_equal(data, expected): assert_frame_equal(data, expected) except AssertionError: pandas.set_option('display.max_columns', None) - print '----- The report generated this data: -----' - print data - print '----- vs expected: -----' - print expected + print('----- The report generated this data: -----') + print(data) + print('----- vs expected: -----') + print(expected) if data.shape != expected.shape: - print "Data shapes differ." + print("Data shapes differ.") else: for index, _series in data.iterrows(): # Try to print a more helpful/localized difference message: try: assert_series_equal(data.iloc[index, :], expected.iloc[index, :]) except AssertionError: - print "First differing row: {index}".format(index=index) + print("First differing row: {index}".format(index=index)) raise @staticmethod diff --git a/edx/analytics/tasks/tests/acceptance/services/task.py b/edx/analytics/tasks/tests/acceptance/services/task.py index 15a1abb774..361b8d93eb 100644 --- a/edx/analytics/tasks/tests/acceptance/services/task.py +++ b/edx/analytics/tasks/tests/acceptance/services/task.py @@ -1,11 +1,13 @@ +from __future__ import absolute_import -import ConfigParser +import six.moves.configparser import logging import os import sys import tempfile from edx.analytics.tasks.tests.acceptance.services import shell +import six log = logging.getLogger(__name__) @@ -31,7 +33,7 @@ def __init__(self, config, task_config_override, identifier): def launch(self, task_args, config_override=None): self.delete_existing_logs() - config_parser = ConfigParser.ConfigParser() + config_parser = six.moves.configparser.ConfigParser() config_parser.read(os.environ['LUIGI_CONFIG_PATH']) self.override_config(config_parser, self.default_config_override) if config_override: @@ -103,18 +105,18 @@ def delete_existing_logs(self): pass def override_config(self, config_parser, overrides): - for section_name, section in overrides.iteritems(): + for section_name, section in six.iteritems(overrides): if not config_parser.has_section(section_name): config_parser.add_section(section_name) - for key, value in section.iteritems(): + for key, value in six.iteritems(section): config_parser.set(section_name, key, value) def write_logs_to_standard_streams(self): if not self.log_path: return - for filename, output_file in self.logs.iteritems(): + for filename, output_file in six.iteritems(self.logs): try: with open(os.path.join(self.log_path, filename), 'r') as src_file: while True: diff --git a/edx/analytics/tasks/tests/acceptance/test_database_export.py b/edx/analytics/tasks/tests/acceptance/test_database_export.py index 489a6cb5fa..f20e389332 100644 --- a/edx/analytics/tasks/tests/acceptance/test_database_export.py +++ b/edx/analytics/tasks/tests/acceptance/test_database_export.py @@ -3,14 +3,17 @@ validate user visible outputs. """ +from future.standard_library import install_aliases +install_aliases() import datetime import logging import os import shutil +import stat import tempfile import textwrap -import urlparse +from urllib.parse import urlparse import gnupg @@ -56,7 +59,10 @@ def create_temporary_directories(self): for dir_path in [self.external_files_dir, self.working_dir, self.validation_dir, self.gpg_dir]: os.makedirs(dir_path) - os.chmod(self.gpg_dir, 0700) + os.chmod( + self.gpg_dir, + stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR, # equivalent to "0700" in Unix chmod. + ) # The exporter expects this directory to already exist. os.makedirs(os.path.join(self.working_dir, 'course-data')) diff --git a/edx/analytics/tasks/tests/acceptance/test_database_import.py b/edx/analytics/tasks/tests/acceptance/test_database_import.py index e9197008a0..7639e9eef6 100644 --- a/edx/analytics/tasks/tests/acceptance/test_database_import.py +++ b/edx/analytics/tasks/tests/acceptance/test_database_import.py @@ -50,7 +50,7 @@ def test_import_from_mysql(self): )) def map_null_to_hive_null(row): - return ['\\N' if x == 'NULL' else x for x in row] + return [r'\N' if x == 'NULL' else x for x in row] output_rows = [x.split('\t') for x in hive_output.splitlines() if '\t' in x] output_rows = map(map_null_to_hive_null, output_rows) @@ -59,16 +59,16 @@ def map_null_to_hive_null(row): expected_rows = [ [ - '1', '1', 'edX/Open_DemoX/edx_demo_course', '\\N', 'version-1', 'grading-policy-1', + '1', '1', 'edX/Open_DemoX/edx_demo_course', r'\N', 'version-1', 'grading-policy-1', '0.7', 'C', '2017-01-31 00:05:00', '2017-02-01 00:00:00', '2017-02-01 00:00:00', ], [ - '2', '2', 'edX/Open_DemoX/edx_demo_course', '\\N', 'version-1', 'grading-policy-1', + '2', '2', 'edX/Open_DemoX/edx_demo_course', r'\N', 'version-1', 'grading-policy-1', '0.8', 'B', '2017-01-31 00:05:00', '2017-02-01 00:00:00', '2017-02-01 00:00:00', ], [ - '3', '3', 'edX/Open_DemoX/edx_demo_course', '\\N', 'version-1', 'grading-policy-1', - '0.2', 'Fail', '\\N', '2017-02-01 00:00:00', '2017-02-01 00:00:00', + '3', '3', 'edX/Open_DemoX/edx_demo_course', r'\N', 'version-1', 'grading-policy-1', + '0.2', 'Fail', r'\N', '2017-02-01 00:00:00', '2017-02-01 00:00:00', ], ] diff --git a/edx/analytics/tasks/tests/acceptance/test_enrollment_validation.py b/edx/analytics/tasks/tests/acceptance/test_enrollment_validation.py index 4cdcfceb16..b9d04379a6 100644 --- a/edx/analytics/tasks/tests/acceptance/test_enrollment_validation.py +++ b/edx/analytics/tasks/tests/acceptance/test_enrollment_validation.py @@ -4,7 +4,7 @@ import gzip import json import logging -import StringIO +from io import StringIO from collections import defaultdict from edx.analytics.tasks.tests.acceptance import AcceptanceTestCase, as_list_param @@ -108,7 +108,7 @@ def check_synthetic_events(self, output_dir): histogram = defaultdict(int) # int() returns 0 for output in outputs: # Read S3 file into a buffer, since the S3 file doesn't support seek() and tell(). - gzip_output = StringIO.StringIO() + gzip_output = StringIO() with output.open('r') as event_file: gzip_output.write(event_file.read()) gzip_output.seek(0) diff --git a/edx/analytics/tasks/tests/acceptance/test_enrollments.py b/edx/analytics/tasks/tests/acceptance/test_enrollments.py index b053f7299f..dfc6414273 100644 --- a/edx/analytics/tasks/tests/acceptance/test_enrollments.py +++ b/edx/analytics/tasks/tests/acceptance/test_enrollments.py @@ -5,7 +5,7 @@ import datetime import logging import os -from cStringIO import StringIO +from io import StringIO import pandas from ddt import data, ddt diff --git a/edx/analytics/tasks/tools/obfuscate_eval.py b/edx/analytics/tasks/tools/obfuscate_eval.py index 513c3d8c19..d31a3cdb1d 100644 --- a/edx/analytics/tasks/tools/obfuscate_eval.py +++ b/edx/analytics/tasks/tools/obfuscate_eval.py @@ -14,6 +14,7 @@ read from the same directory as the database dump being analyzed. """ +from __future__ import print_function import argparse import errno @@ -24,11 +25,11 @@ import os import sys from collections import defaultdict, namedtuple -from cStringIO import StringIO +from io import StringIO -import cjson from pyinstrument import Profiler +from edx.analytics.tasks.util.fast_json import FastJson from edx.analytics.tasks.common.pathutil import PathSetTask from edx.analytics.tasks.util import eventlog from edx.analytics.tasks.util.obfuscate_util import Obfuscator, backslash_decode_value, backslash_encode_value @@ -378,12 +379,12 @@ def obfuscate_event_entry(self, line): log.info(u"Obfuscated %s event with event_type = '%s'", event_source, event_type) if event_json_decoded: - # TODO: should really use cjson, if that were originally used for decoding the json. + # TODO: should really use FastJson, if that were originally used for decoding the json. updated_event_data = json.dumps(updated_event_data) event['event'] = updated_event_data - # TODO: should really use cjson, if that were originally used for decoding the json. + # TODO: should really use FastJson, if that were originally used for decoding the json. return json.dumps(event) def obfuscate_courseware_file(self, input_filepath, output_dir): @@ -440,7 +441,7 @@ def obfuscate_courseware_entry(self, line, user_profile): # is not escaped in the same way. In particular, we will not decode and encode it. state_str = record.state.replace('\\\\', '\\') try: - state_dict = cjson.decode(state_str, all_unicode=True) + state_dict = FastJson.loads(state_str) except Exception as exc: log.exception(u"Unable to parse state as JSON for record %s: type = %s, state = %r", record.id, type(state_str), state_str) return line @@ -539,7 +540,7 @@ def obfuscate_forum_entry(self, line, user_profile): # are also different, as to when \u notation is used for a character as # opposed to a utf8 encoding of the character. try: - entry = cjson.decode(line, all_unicode=True) + entry = FastJson.loads(line) except ValueError as exc: log.error("Failed to parse json for line: %r", line) return "" @@ -698,7 +699,7 @@ def main(): finally: if profiler: profiler.stop() - print >>sys.stderr, profiler.output_text(unicode=True, color=True) + print(profiler.output_text(unicode=True, color=True), file=sys.stderr) if __name__ == '__main__': diff --git a/edx/analytics/tasks/tools/s3util.py b/edx/analytics/tasks/tools/s3util.py index 3fc7286818..6452bc0bde 100644 --- a/edx/analytics/tasks/tools/s3util.py +++ b/edx/analytics/tasks/tools/s3util.py @@ -1,4 +1,5 @@ """Command-line utility for using (and testing) s3 utility methods.""" +from __future__ import print_function import argparse import os @@ -14,7 +15,7 @@ def list_s3_files(source_url, patterns): for bucket, root, path in generate_s3_sources(s3_conn, source_url, patterns): source = join_as_s3_url(bucket, root, path) src_key = get_s3_key(s3_conn, source) - print "%10d %s" % (src_key.size if src_key is not None else -1, path) + print("%10d %s" % (src_key.size if src_key is not None else -1, path)) def get_s3_files(source_url, dest_root, patterns): @@ -28,7 +29,7 @@ def get_s3_files(source_url, dest_root, patterns): if src_key is not None: src_key.get_contents_to_filename(destination) else: - print "No key for source " + source + print("No key for source " + source) def main(): diff --git a/edx/analytics/tasks/util/csv_util.py b/edx/analytics/tasks/util/csv_util.py index 408082b0eb..539106eb84 100644 --- a/edx/analytics/tasks/util/csv_util.py +++ b/edx/analytics/tasks/util/csv_util.py @@ -1,9 +1,11 @@ """ Simple CSV utilities. """ +from __future__ import absolute_import import csv -from StringIO import StringIO +from io import BytesIO +import six class MySQLDumpDialect(csv.Dialect): @@ -47,21 +49,21 @@ class MySQLExportDialect(MySQLPipeDialect): 'mysqlexport': MySQLExportDialect } -for dialect_name, dialect_class in DIALECTS.iteritems(): +for dialect_name, dialect_class in six.iteritems(DIALECTS): csv.register_dialect(dialect_name, dialect_class) def parse_line(line, dialect='excel'): """Parse one line of CSV in the dialect specified.""" # csv.reader requires an iterable per row, so we wrap the line in a list - parsed = csv.reader([line], dialect=dialect).next() + parsed = next(csv.reader([line], dialect=dialect)) return parsed def to_csv_line(row, dialect='excel'): """Return a CSV line by joining the values in row in the dialect specified.""" - output = StringIO() + output = BytesIO() csv.writer(output, dialect=dialect).writerow(row) output.seek(0) diff --git a/edx/analytics/tasks/util/eventlog.py b/edx/analytics/tasks/util/eventlog.py index 90b24301b8..29b18272b6 100644 --- a/edx/analytics/tasks/util/eventlog.py +++ b/edx/analytics/tasks/util/eventlog.py @@ -4,9 +4,8 @@ import logging import re -import cjson - import edx.analytics.tasks.util.opaque_key_util as opaque_key_util +from edx.analytics.tasks.util.fast_json import FastJson log = logging.getLogger(__name__) @@ -15,14 +14,14 @@ def decode_json(line): """Wrapper to decode JSON string in an implementation-independent way.""" - # TODO: Verify correctness of cjson - return cjson.decode(line) + # TODO: Verify correctness of FastJson + return FastJson.loads(line) def encode_json(obj): """Wrapper to re-encode JSON string in an implementation-independent way.""" - # TODO: Verify correctness of cjson - return cjson.encode(obj) + # TODO: Verify correctness of FastJson + return FastJson.dumps(obj) def parse_json_event(line, nested=False): diff --git a/edx/analytics/tasks/util/fast_json.py b/edx/analytics/tasks/util/fast_json.py new file mode 100644 index 0000000000..7f91f4b112 --- /dev/null +++ b/edx/analytics/tasks/util/fast_json.py @@ -0,0 +1,36 @@ +""" +Provide an abstraction layer for fast json implementations across python 2 and 3. +""" +try: + import ujson + ujson_found = True +except ImportError: + import cjson + ujson_found = False + +class FastJson(object): + """ + Abstraction layer on top of cjson (python 2 only) and ujson (python 3 only). + """ + @staticmethod + def dumps(obj): + """ + Dump/encode the Python object into a JSON message. + """ + if ujson_found: + return ujson.dumps(obj) + else: + return cjson.encode(obj) + + @staticmethod + def loads(msg): + """ + Load/decode the JSON message and return a Python object. + + All strings in the decoded object will be unicode strings! This + matches the behavior of python's built-in json library. + """ + if ujson_found: + return ujson.loads(msg) + else: + return cjson.decode(msg, all_unicode=True) diff --git a/edx/analytics/tasks/util/hive.py b/edx/analytics/tasks/util/hive.py index cbe434ac7d..3c15bb6660 100644 --- a/edx/analytics/tasks/util/hive.py +++ b/edx/analytics/tasks/util/hive.py @@ -1,4 +1,5 @@ """Various helper utilities that are commonly used when working with Hive""" +from __future__ import absolute_import import logging import textwrap diff --git a/edx/analytics/tasks/util/id_codec.py b/edx/analytics/tasks/util/id_codec.py index 79ba02233c..fca48da4e6 100644 --- a/edx/analytics/tasks/util/id_codec.py +++ b/edx/analytics/tasks/util/id_codec.py @@ -1,10 +1,13 @@ """Various helper utilities to calculate reversible one-to-one mappings of sensitive ids.""" +from __future__ import absolute_import import base64 import logging import random import luigi +from six.moves import map +from six.moves import range try: import numpy as np @@ -15,13 +18,17 @@ def encode_id(scope, id_type, id_value): - """Encode a scope-type-value tuple into a single ID string.""" - return base64.b32encode('|'.join([scope, id_type, id_value])) + """ + Encode a scope-type-value tuple into a single ID string. + + All inputs must be bytestrings (or `str` in python 2). + """ + return base64.b32encode(b'|'.join([scope, id_type, id_value])) def decode_id(encoded_id): """Decode an ID string back to the original scope-type-value tuple.""" - scope, id_type, id_value = base64.b32decode(encoded_id).split('|') + scope, id_type, id_value = base64.b32decode(encoded_id).split(b'|') return scope, id_type, id_value @@ -49,7 +56,7 @@ def random_permutation_matrix(self, seed, matrix_dim): """Return a random permutation matrix of dimension matrix_dim using seed.""" rng = random.Random(seed) # Decide where each bit goes. - mapping = range(matrix_dim) + mapping = list(range(matrix_dim)) rng.shuffle(mapping) # Then make a matrix that does that. permutation = np.zeros((matrix_dim, matrix_dim), dtype=int) diff --git a/edx/analytics/tasks/util/obfuscate_util.py b/edx/analytics/tasks/util/obfuscate_util.py index 0d6e7f1942..03a369add9 100644 --- a/edx/analytics/tasks/util/obfuscate_util.py +++ b/edx/analytics/tasks/util/obfuscate_util.py @@ -434,7 +434,7 @@ def find_name_context(text, log_context=DEFAULT_LOG_CONTEXT): # Find phone numbers. PHONE_CONTEXT = re.compile( - r'(\bphone:|\bp:|b\c:|\bcall me\b|\(home\)|\(cell\)|my phone|phone number)', + r'(\bphone:|\bp:|\bc:|\bcall me\b|\(home\)|\(cell\)|my phone|phone number)', re.IGNORECASE, ) diff --git a/edx/analytics/tasks/util/opaque_key_util.py b/edx/analytics/tasks/util/opaque_key_util.py index 7904384017..fccc45ef1d 100644 --- a/edx/analytics/tasks/util/opaque_key_util.py +++ b/edx/analytics/tasks/util/opaque_key_util.py @@ -1,4 +1,5 @@ """Utility functions that wrap opaque_keys in useful ways.""" +from __future__ import absolute_import import logging import re @@ -6,6 +7,7 @@ from opaque_keys import InvalidKeyError from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.locator import CourseLocator +import six log = logging.getLogger(__name__) @@ -84,7 +86,7 @@ def get_filename_safe_course_id(course_id, replacement_char='_'): # The safest characters are A-Z, a-z, 0-9, , and . # We represent the first four with \w. # TODO: Once we support courses with unicode characters, we will need to revisit this. - return re.sub(r'[^\w\.\-]', unicode(replacement_char), filename) + return re.sub(r'[^\w\.\-]', six.text_type(replacement_char), filename) def get_course_key_from_url(url): diff --git a/edx/analytics/tasks/util/overwrite.py b/edx/analytics/tasks/util/overwrite.py index 68a264a9a1..c336b974fb 100644 --- a/edx/analytics/tasks/util/overwrite.py +++ b/edx/analytics/tasks/util/overwrite.py @@ -1,6 +1,8 @@ """ Provide support for overwriting existing output files. """ +from __future__ import absolute_import + import logging import luigi diff --git a/edx/analytics/tasks/util/record.py b/edx/analytics/tasks/util/record.py index 7250a23864..e16b5d92d7 100644 --- a/edx/analytics/tasks/util/record.py +++ b/edx/analytics/tasks/util/record.py @@ -18,7 +18,7 @@ bigquery_available = False # pylint: disable=invalid-name -DEFAULT_NULL_VALUE = '\\N' # This is the default string used by Hive to represent a NULL value. +DEFAULT_NULL_VALUE = r'\N' # This is the default string used by Hive to represent a NULL value. log = logging.getLogger(__name__) diff --git a/edx/analytics/tasks/util/retry.py b/edx/analytics/tasks/util/retry.py index d66c8c16a4..62c21e8a0d 100644 --- a/edx/analytics/tasks/util/retry.py +++ b/edx/analytics/tasks/util/retry.py @@ -1,4 +1,5 @@ """Utility decorator for retrying functions that fail.""" +from __future__ import absolute_import import logging import time diff --git a/edx/analytics/tasks/util/s3_util.py b/edx/analytics/tasks/util/s3_util.py index 280820a269..7814b0ca35 100644 --- a/edx/analytics/tasks/util/s3_util.py +++ b/edx/analytics/tasks/util/s3_util.py @@ -1,11 +1,14 @@ """ Utility methods for interacting with S3 via boto. """ +from future.standard_library import install_aliases +install_aliases() + import logging import os import time from fnmatch import fnmatch -from urlparse import urlparse, urlunparse +from urllib.parse import urlparse, urlunparse from luigi.contrib.hdfs.format import Plain from luigi.contrib.hdfs.target import HdfsTarget diff --git a/edx/analytics/tasks/util/tests/config.py b/edx/analytics/tasks/util/tests/config.py index c7eb87d2c1..6f1885d6fa 100644 --- a/edx/analytics/tasks/util/tests/config.py +++ b/edx/analytics/tasks/util/tests/config.py @@ -1,8 +1,10 @@ """Support modifying luigi configuration settings in tests.""" +from __future__ import absolute_import from functools import wraps from luigi.configuration import LuigiConfigParser +import six def with_luigi_config(*decorator_args): @@ -65,7 +67,7 @@ def modify_config(section, option, value): new_instance.set(section, option, str(value)) # Support the single override case: @with_luigi_config('section', 'option', 'value') - if isinstance(decorator_args[0], basestring): + if isinstance(decorator_args[0], six.string_types): section, option, value = decorator_args modify_config(section, option, value) else: diff --git a/edx/analytics/tasks/util/tests/opaque_key_mixins.py b/edx/analytics/tasks/util/tests/opaque_key_mixins.py index be874e34d7..be70dafe8b 100644 --- a/edx/analytics/tasks/util/tests/opaque_key_mixins.py +++ b/edx/analytics/tasks/util/tests/opaque_key_mixins.py @@ -1,6 +1,8 @@ """Mixin classes for providing opaque or legacy key values.""" +from __future__ import absolute_import from opaque_keys.edx.locator import CourseLocator +import six class InitializeOpaqueKeysMixin(object): @@ -9,13 +11,13 @@ class InitializeOpaqueKeysMixin(object): def initialize_ids(self): """Define set of id values for use in tests.""" course_key = CourseLocator(org=u'FooX\u00e9', course='1.23x', run='2013_Spring') - self.course_id = unicode(course_key) + self.course_id = six.text_type(course_key) self.encoded_course_id = self.course_id.encode('utf8') self.org_id = course_key.org self.encoded_org_id = self.org_id.encode('utf8') block_id = "9cee77a606ea4c1aa5440e0ea5d0f618" - self.problem_id = unicode(course_key.make_usage_key("problem", block_id)) + self.problem_id = six.text_type(course_key.make_usage_key("problem", block_id)) self.encoded_problem_id = self.problem_id.encode('utf8') self.answer_id = "{block_id}_2_1".format(block_id=block_id) self.second_answer_id = "{block_id}_3_1".format(block_id=block_id) diff --git a/edx/analytics/tasks/util/tests/target.py b/edx/analytics/tasks/util/tests/target.py index 5f6dfd4ca6..e237cd7826 100644 --- a/edx/analytics/tasks/util/tests/target.py +++ b/edx/analytics/tasks/util/tests/target.py @@ -1,14 +1,18 @@ """ Emulates a luigi target, storing all data in memory. """ +from __future__ import absolute_import from contextlib import contextmanager -from StringIO import StringIO +from io import BytesIO +import six class FakeTarget(object): - """Fake Luigi-like target that saves data in memory, using a StringIO buffer.""" - def __init__(self, path=None, value=''): + """ + Fake Luigi-like target that saves data in memory, using a BytesIO buffer. + """ + def __init__(self, path=None, value=b''): self.value = value self.path = path @@ -18,7 +22,7 @@ def value(self): @value.setter def value(self, value): - self.buffer = StringIO(value) + self.buffer = BytesIO(value) # Rewind the buffer head so the value can be read self.buffer.seek(0) diff --git a/edx/analytics/tasks/util/tests/test_hive.py b/edx/analytics/tasks/util/tests/test_hive.py index 081f7c861a..0ac67ec2d7 100644 --- a/edx/analytics/tasks/util/tests/test_hive.py +++ b/edx/analytics/tasks/util/tests/test_hive.py @@ -1,4 +1,5 @@ """Tests for some hive related utilities""" +from __future__ import absolute_import from datetime import date from unittest import TestCase diff --git a/edx/analytics/tasks/util/tests/test_id_codec.py b/edx/analytics/tasks/util/tests/test_id_codec.py index 1fa942c5de..9155ffce95 100644 --- a/edx/analytics/tasks/util/tests/test_id_codec.py +++ b/edx/analytics/tasks/util/tests/test_id_codec.py @@ -1,15 +1,17 @@ """ Tests for encoding/decoding id values. """ +from __future__ import absolute_import + from unittest import TestCase from ddt import data, ddt import edx.analytics.tasks.util.id_codec as id_codec -SCOPE = "Arbitrary Scope" -TYPE = "Arbitrary Type" -VALUE = "Arbitrary Value" +SCOPE = b"Arbitrary Scope" +TYPE = b"Arbitrary Type" +VALUE = b"Arbitrary Value" @ddt @@ -17,15 +19,15 @@ class EncodeDecodeIdTest(TestCase): """Test that encoding works in round-trip.""" @data( - '', + b'', + b'test', u'\ufffd'.encode('utf8'), u'\u00e9'.encode('utf8'), - u'test', ) def test_round_trip(self, suffix): - encoded_id = id_codec.encode_id(SCOPE + suffix, TYPE + suffix, VALUE + suffix) - decoded = id_codec.decode_id(encoded_id) - self.assertEquals((SCOPE + suffix, TYPE + suffix, VALUE + suffix), decoded) + input_id = (SCOPE + suffix, TYPE + suffix, VALUE + suffix) + decoded_id = id_codec.decode_id(id_codec.encode_id(*input_id)) + self.assertEquals(input_id, decoded_id) class PermutationGeneratorTest(TestCase): diff --git a/edx/analytics/tasks/util/tests/test_opaque_key_util.py b/edx/analytics/tasks/util/tests/test_opaque_key_util.py index 2bc25fa5f5..862b69c834 100644 --- a/edx/analytics/tasks/util/tests/test_opaque_key_util.py +++ b/edx/analytics/tasks/util/tests/test_opaque_key_util.py @@ -1,6 +1,8 @@ """ Tests for utilities that parse event logs. """ +from __future__ import absolute_import + from unittest import TestCase from ccx_keys.locator import CCXLocator @@ -8,14 +10,15 @@ from opaque_keys.edx.locator import CourseLocator import edx.analytics.tasks.util.opaque_key_util as opaque_key_util +import six -VALID_COURSE_ID = unicode(CourseLocator(org='org', course='course_id', run='course_run')) +VALID_COURSE_ID = six.text_type(CourseLocator(org='org', course='course_id', run='course_run')) VALID_LEGACY_COURSE_ID = "org/course_id/course_run" INVALID_LEGACY_COURSE_ID = "org:course_id:course_run" INVALID_NONASCII_LEGACY_COURSE_ID = u"org/course\ufffd_id/course_run" VALID_NONASCII_LEGACY_COURSE_ID = u"org/cours\u00e9_id/course_run" -VALID_CCX_COURSE_ID = unicode(CCXLocator(org='org', course='course_id', run='course_run', ccx='13')) -COURSE_ID_WITH_COLONS = unicode(CourseLocator(org='org', course='course:id', run='course:run')) +VALID_CCX_COURSE_ID = six.text_type(CCXLocator(org='org', course='course_id', run='course_run', ccx='13')) +COURSE_ID_WITH_COLONS = six.text_type(CourseLocator(org='org', course='course:id', run='course:run')) @ddt @@ -97,7 +100,7 @@ def test_get_filename_with_default_separator(self, course_id, expected_filename, def test_get_course_key_from_url(self, course_id): url = u"https://courses.edx.org/courses/{course_id}/stuff".format(course_id=course_id) course_key = opaque_key_util.get_course_key_from_url(url) - self.assertEquals(unicode(course_key), course_id) + self.assertEquals(six.text_type(course_key), course_id) @data( INVALID_LEGACY_COURSE_ID, diff --git a/edx/analytics/tasks/util/tests/test_record.py b/edx/analytics/tasks/util/tests/test_record.py index 1daaff573a..cbf89408d1 100644 --- a/edx/analytics/tasks/util/tests/test_record.py +++ b/edx/analytics/tasks/util/tests/test_record.py @@ -134,7 +134,7 @@ def test_to_string_tuple_nulls(self): test_record = SampleStruct(None, 0, None) self.assertEqual( test_record.to_string_tuple(), - ('\\N', '0', '\\N') + (r'\N', '0', r'\N') ) def test_to_string_tuple_custom_nulls(self): @@ -152,7 +152,7 @@ def test_from_string_tuple(self): self.assertEqual(test_record.date, datetime.date(2015, 11, 1)) def test_from_string_tuple_nulls(self): - string_tuple = ('\\N', '0', '2015-11-01') + string_tuple = (r'\N', '0', '2015-11-01') test_record = SampleStruct.from_string_tuple(string_tuple) self.assertEqual(test_record.name, None) self.assertEqual(test_record.index, 0) diff --git a/edx/analytics/tasks/util/tests/test_retry.py b/edx/analytics/tasks/util/tests/test_retry.py index df28799dbf..198d320718 100644 --- a/edx/analytics/tasks/util/tests/test_retry.py +++ b/edx/analytics/tasks/util/tests/test_retry.py @@ -1,5 +1,8 @@ """Test the retry decorator""" +from __future__ import absolute_import + +import six from datetime import datetime, timedelta from unittest import TestCase @@ -73,7 +76,7 @@ def some_func(): some_func() self.assertEqual(self.func_call_counter, 4) - self.assertItemsEqual(self.mock_sleep.mock_calls, [call(0.5), call(1), call(2)]) + six.assertCountEqual(self, self.mock_sleep.mock_calls, [call(0.5), call(1), call(2)]) def test_different_base_delay(self): @@ -85,7 +88,7 @@ def some_func(): raise Exception('error') some_func() - self.assertItemsEqual(self.mock_sleep.mock_calls, [call(1), call(2), call(4), call(8)]) + six.assertCountEqual(self, self.mock_sleep.mock_calls, [call(1), call(2), call(4), call(8)]) def test_fatal_exception(self): diff --git a/edx/analytics/tasks/util/tests/test_s3_util.py b/edx/analytics/tasks/util/tests/test_s3_util.py index 68e4c39120..c6504ca656 100644 --- a/edx/analytics/tasks/util/tests/test_s3_util.py +++ b/edx/analytics/tasks/util/tests/test_s3_util.py @@ -1,10 +1,10 @@ """ Tests for S3-related utility functionality. """ -from __future__ import print_function +from __future__ import absolute_import, print_function +import six from unittest import TestCase - from ddt import data, ddt, unpack from mock import MagicMock @@ -33,7 +33,7 @@ def _make_s3_generator(self, bucket_name, root, path_info, patterns): s3_bucket = MagicMock() s3_conn.get_bucket = MagicMock(return_value=s3_bucket) target_list = [self._make_key("{root}/{path}".format(root=root, path=path), size) - for path, size in path_info.iteritems()] + for path, size in six.iteritems(path_info)] s3_bucket.list = MagicMock(return_value=target_list) print([(k.key, k.size) for k in target_list]) diff --git a/edx/analytics/tasks/util/tests/test_url.py b/edx/analytics/tasks/util/tests/test_url.py index e036dcd17b..eca1298e87 100644 --- a/edx/analytics/tasks/util/tests/test_url.py +++ b/edx/analytics/tasks/util/tests/test_url.py @@ -1,4 +1,5 @@ """Tests for URL-related functionality.""" +from __future__ import absolute_import from unittest import TestCase diff --git a/edx/analytics/tasks/util/url.py b/edx/analytics/tasks/util/url.py index 1d70af0391..04ae6e8e3f 100644 --- a/edx/analytics/tasks/util/url.py +++ b/edx/analytics/tasks/util/url.py @@ -9,11 +9,14 @@ hdfs://some/directory/ """ from __future__ import absolute_import +from future.standard_library import install_aliases +install_aliases() import logging import os import time -import urlparse +from urllib.parse import urlparse, urlunparse +import six import luigi import luigi.configuration @@ -124,7 +127,7 @@ def open(self, mode='r'): def get_target_class_from_url(url, marker=False): """Returns a luigi target class based on the url scheme""" - parsed_url = urlparse.urlparse(url) + parsed_url = urlparse(url) if marker: target_class = URL_SCHEME_TO_MARKER_TARGET_CLASS.get(parsed_url.scheme, DEFAULT_MARKER_TARGET_CLASS) @@ -174,6 +177,13 @@ def url_path_join(url, *extra_path): Returns: The URL with the path component joined with `extra_path` argument. """ - (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url) + (scheme, netloc, path, params, query, fragment) = urlparse(url) joined_path = os.path.join(path, *extra_path) - return urlparse.urlunparse((scheme, netloc, joined_path, params, query, fragment)) + return urlunparse(( + six.text_type(scheme), + six.text_type(netloc), + six.text_type(joined_path), + six.text_type(params), + six.text_type(query), + six.text_type(fragment), + )) diff --git a/edx/analytics/tasks/warehouse/financial/affiliate_window.py b/edx/analytics/tasks/warehouse/financial/affiliate_window.py index 1af075c8ae..5bd6526f2a 100644 --- a/edx/analytics/tasks/warehouse/financial/affiliate_window.py +++ b/edx/analytics/tasks/warehouse/financial/affiliate_window.py @@ -1,6 +1,8 @@ """ Tasks to support pulling Affiliate Window reports from their REST API to the data warehouse. """ +from __future__ import absolute_import, print_function + import csv import datetime import json @@ -251,7 +253,7 @@ def run(self): json.dumps(row) ] - result = [col if col is not None else '\N' for col in result] + result = [col if col is not None else r'\N' for col in result] writer.writerow(result) def output(self): diff --git a/edx/analytics/tasks/warehouse/financial/cybersource.py b/edx/analytics/tasks/warehouse/financial/cybersource.py index 80d9eb5363..ae625a2acd 100644 --- a/edx/analytics/tasks/warehouse/financial/cybersource.py +++ b/edx/analytics/tasks/warehouse/financial/cybersource.py @@ -1,4 +1,5 @@ """Collect information about payments from third-party sources for financial reporting.""" +from __future__ import absolute_import import csv import datetime @@ -178,7 +179,7 @@ def run(self): row['currency'], row['amount'], # Transaction fee - '\\N', + r'\N', TRANSACTION_TYPE_MAP[row['transaction_type']], # We currently only process credit card transactions with Cybersource 'credit_card', diff --git a/edx/analytics/tasks/warehouse/financial/ed_services_financial_report.py b/edx/analytics/tasks/warehouse/financial/ed_services_financial_report.py index a14086e4fb..2e66ff74df 100644 --- a/edx/analytics/tasks/warehouse/financial/ed_services_financial_report.py +++ b/edx/analytics/tasks/warehouse/financial/ed_services_financial_report.py @@ -1,4 +1,6 @@ """Generates a financial report to be delivered to our good friends in Ed Services.""" +from __future__ import absolute_import + import luigi from edx.analytics.tasks.common.mapreduce import MapReduceJobTaskMixin diff --git a/edx/analytics/tasks/warehouse/financial/fees.py b/edx/analytics/tasks/warehouse/financial/fees.py index ab2530828d..61ea719f1d 100644 --- a/edx/analytics/tasks/warehouse/financial/fees.py +++ b/edx/analytics/tasks/warehouse/financial/fees.py @@ -1,8 +1,9 @@ """ Tasks associated with pulling and storing financial fees related data. """ -import logging +from __future__ import absolute_import +import logging import luigi from edx.analytics.tasks.common.vertica_load import VerticaCopyTask diff --git a/edx/analytics/tasks/warehouse/financial/finance_reports.py b/edx/analytics/tasks/warehouse/financial/finance_reports.py index ef8bb5b21e..6af1bb4a59 100644 --- a/edx/analytics/tasks/warehouse/financial/finance_reports.py +++ b/edx/analytics/tasks/warehouse/financial/finance_reports.py @@ -1,4 +1,6 @@ """Provide entry-point for generating finance reports.""" +from __future__ import absolute_import + import luigi from edx.analytics.tasks.common.mapreduce import MapReduceJobTaskMixin diff --git a/edx/analytics/tasks/warehouse/financial/orders_import.py b/edx/analytics/tasks/warehouse/financial/orders_import.py index 2f8ba06a19..c723525d05 100644 --- a/edx/analytics/tasks/warehouse/financial/orders_import.py +++ b/edx/analytics/tasks/warehouse/financial/orders_import.py @@ -1,4 +1,5 @@ """Import Orders: Shopping Cart Tables from the LMS, Orders from Otto.""" +from __future__ import absolute_import import luigi diff --git a/edx/analytics/tasks/warehouse/financial/payment.py b/edx/analytics/tasks/warehouse/financial/payment.py index 5f98c2a436..19f43debce 100644 --- a/edx/analytics/tasks/warehouse/financial/payment.py +++ b/edx/analytics/tasks/warehouse/financial/payment.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import luigi diff --git a/edx/analytics/tasks/warehouse/financial/paypal.py b/edx/analytics/tasks/warehouse/financial/paypal.py index 3d54fdf536..5531f39373 100644 --- a/edx/analytics/tasks/warehouse/financial/paypal.py +++ b/edx/analytics/tasks/warehouse/financial/paypal.py @@ -5,6 +5,7 @@ https://developer.paypal.com/docs/classic/payflow/reporting/ """ +from __future__ import absolute_import import datetime import logging @@ -12,7 +13,7 @@ import time import xml.etree.cElementTree as ET from collections import OrderedDict, namedtuple -from cStringIO import StringIO +from io import BytesIO from decimal import Decimal import luigi @@ -24,6 +25,8 @@ from edx.analytics.tasks.util.hive import WarehouseMixin from edx.analytics.tasks.util.overwrite import OverwriteOutputMixin from edx.analytics.tasks.util.url import ExternalURL, get_target_from_url, url_path_join +import six +from six.moves import range log = logging.getLogger(__name__) @@ -180,7 +183,10 @@ class PaypalMalformedResponseError(PaypalError): def __init__(self, message, root_node=None): with_tree = message if root_node: - with_tree = message + ':' + ET.tostring(root_node, encoding='UTF-8', method='xml') + with_tree = u'{}:{}'.format( + message, + ET.tostring(root_node, encoding='UTF-8', method='xml').decode('utf-8'), + ) super(PaypalMalformedResponseError, self).__init__(with_tree) @@ -216,11 +222,11 @@ def create_request_document(self): self.append_request_node(root_node) # NOTE: we have to use this API to get the XML declaration, it is suboptimal that we have to construct a - # StringIO buffer to write to. + # BytesIO buffer to write to. tree = ET.ElementTree(root_node) - string_buffer = StringIO() - tree.write(string_buffer, encoding='UTF-8', xml_declaration=True) - return string_buffer.getvalue() + bytes_buffer = BytesIO() + tree.write(bytes_buffer, encoding='UTF-8', xml_declaration=True) + return bytes_buffer.getvalue() def append_authentication_node(self, root_node): """Inject the authentication elements into the request.""" @@ -228,7 +234,7 @@ def append_authentication_node(self, root_node): for attribute in ('user', 'vendor', 'partner', 'password'): child_node = ET.SubElement(auth_node, attribute) - child_node.text = unicode(getattr(self, attribute)) + child_node.text = six.text_type(getattr(self, attribute)) def append_request_node(self, root_node): """Inject the request-specific elements into the request.""" @@ -339,17 +345,17 @@ def append_request_node(self, root_node): # WARNING: the paypal XML parser is position sensitive. Do NOT change the ordering of the fields in the request. request_node = ET.SubElement(root_node, 'runReportRequest') name_node = ET.SubElement(request_node, 'reportName') - name_node.text = unicode(self.report_name) + name_node.text = six.text_type(self.report_name) - for param_name, param_value in self.report_params.iteritems(): + for param_name, param_value in six.iteritems(self.report_params): param_node = ET.SubElement(request_node, 'reportParam') param_name_node = ET.SubElement(param_node, 'paramName') - param_name_node.text = unicode(param_name) + param_name_node.text = six.text_type(param_name) param_value_node = ET.SubElement(param_node, 'paramValue') - param_value_node.text = unicode(param_value) + param_value_node.text = six.text_type(param_value) page_size_node = ET.SubElement(request_node, 'pageSize') - page_size_node.text = unicode(self.page_size) + page_size_node.text = six.text_type(self.page_size) ColumnMetadata = namedtuple('ColumnMetadata', ('name', 'data_type')) # pylint: disable=invalid-name @@ -411,7 +417,7 @@ def __init__(self, report_id): def append_request_node(self, root_node): request_node = ET.SubElement(root_node, 'getMetaDataRequest') report_id_node = ET.SubElement(request_node, 'reportId') - report_id_node.text = unicode(self.report_id) + report_id_node.text = six.text_type(self.report_id) class PaypalReportDataResponse(PaypalApiResponse): @@ -465,9 +471,9 @@ def __init__(self, report_id, page_num=1): def append_request_node(self, root_node): request_node = ET.SubElement(root_node, 'getDataRequest') report_id_node = ET.SubElement(request_node, 'reportId') - report_id_node.text = unicode(self.report_id) + report_id_node.text = six.text_type(self.report_id) page_num_node = ET.SubElement(request_node, 'pageNum') - page_num_node.text = unicode(self.page_num) + page_num_node.text = six.text_type(self.page_num) class PaypalReportResultsRequest(PaypalApiRequest): @@ -487,7 +493,7 @@ def __init__(self, report_id): def append_request_node(self, root_node): request_node = ET.SubElement(root_node, 'getResultsRequest') report_id_node = ET.SubElement(request_node, 'reportId') - report_id_node.text = unicode(self.report_id) + report_id_node.text = six.text_type(self.report_id) BaseSettlementReportRecord = namedtuple('SettlementReportRecord', [ # pylint: disable=invalid-name @@ -655,7 +661,7 @@ def write_transaction_record(self, row, output_tsv_file): # identifier for the transaction payment_record.paypal_transaction_id, ] - output_tsv_file.write('\t'.join(record) + '\n') + output_tsv_file.write(b'\t'.join(field.encode('utf-8') for field in record) + b'\n') def output(self): # NOTE: both the cybersource and paypal tasks write to the payments folder diff --git a/edx/analytics/tasks/warehouse/financial/reconcile.py b/edx/analytics/tasks/warehouse/financial/reconcile.py index a62bf603e9..80a59678d8 100644 --- a/edx/analytics/tasks/warehouse/financial/reconcile.py +++ b/edx/analytics/tasks/warehouse/financial/reconcile.py @@ -1,4 +1,5 @@ """Perform reconciliation of transaction history against order history""" +from __future__ import absolute_import import csv import json @@ -169,7 +170,7 @@ def mapper(self, line): key_index = ORDERITEM_FIELDS.index('payment_ref_id') key = fields[key_index] - # Convert Hive null values ('\\N') in fields like 'product_detail': + # Convert Hive null values ("\N") in fields like 'product_detail': defaults = ( ('product_detail', ''), ('refunded_amount', '0.0'), @@ -184,7 +185,7 @@ def mapper(self, line): ) for field_name, default_value in defaults: index = ORDERITEM_FIELD_INDICES[field_name] - if fields[index] == '\\N': + if fields[index] == r'\N': fields[index] = default_value elif len(fields) == len(TRANSACTION_FIELDS): @@ -192,7 +193,7 @@ def mapper(self, line): record_type = TransactionRecord.__name__ key = fields[3] # payment_ref_id # Convert nulls in 'transaction_fee'. - if fields[6] == '\\N': + if fields[6] == r'\N': fields[6] = None # Edx-only: if the transaction was within a time period when @@ -677,12 +678,20 @@ def format_transaction_table_output(self, audit_code, transaction, orderitem, tr orderitem.partner_short_code if orderitem else self.default_partner_short_code, orderitem.payment_ref_id if orderitem else transaction.payment_ref_id, orderitem.order_id if orderitem else None, - encode_id(orderitem.order_processor, "order_id", orderitem.order_id) if orderitem else None, + encode_id( + orderitem.order_processor.encode('utf-8'), + b"order_id", + orderitem.order_id.encode('utf-8') + ) if orderitem else None, orderitem.date_placed if orderitem else None, # transaction information transaction.date if transaction else None, transaction.transaction_id if transaction else None, - encode_id(transaction.payment_gateway_id, "transaction_id", transaction.transaction_id) if transaction else None, + encode_id( + transaction.payment_gateway_id.encode('utf-8'), + b"transaction_id", + transaction.transaction_id.encode('utf-8') + ) if transaction else None, transaction.payment_gateway_id if transaction else None, transaction.payment_gateway_account_id if transaction else None, transaction.transaction_type if transaction else None, @@ -695,7 +704,11 @@ def format_transaction_table_output(self, audit_code, transaction, orderitem, tr str(transaction_fee_per_item) if transaction_fee_per_item is not None else None, # orderitem information orderitem.line_item_id if orderitem else None, - encode_id(orderitem.order_processor, "line_item_id", orderitem.line_item_id) if orderitem else None, + encode_id( + orderitem.order_processor.encode('utf-8'), + b"line_item_id", + orderitem.line_item_id.encode('utf-8') + ) if orderitem else None, orderitem.line_item_product_id if orderitem else None, orderitem.line_item_price if orderitem else None, orderitem.line_item_unit_price if orderitem else None, @@ -771,13 +784,13 @@ class OrderTransactionRecord(OrderTransactionRecordBase): def to_tsv(self): """Serializes the record to a TSV-formatted string.""" - return '\t'.join([str(v) if v is not None else "\\N" for v in self]) + return '\t'.join([str(v) if v is not None else r'\N' for v in self]) @staticmethod def from_job_output(tsv_str): """Constructor that reads format generated by to_tsv().""" record = tsv_str.split('\t') - nulled_record = [v if v != "\\N" else None for v in record] + nulled_record = [v if v != r'\N' else None for v in record] return OrderTransactionRecord(*nulled_record) @@ -1121,13 +1134,13 @@ class FullOrderTransactionRecord(FullOrderTransactionRecordBase): def to_tsv(self): """Serializes the record to a TSV-formatted string.""" - return '\t'.join([str(v) if v is not None else "\\N" for v in self]) + return '\t'.join([str(v) if v is not None else r'\N' for v in self]) @staticmethod def from_job_output(tsv_str): """Constructor that reads format generated by to_tsv().""" record = tsv_str.split('\t') - nulled_record = [v if v != "\\N" else None for v in record] + nulled_record = [v if v != r'\N' else None for v in record] return FullOrderTransactionRecord(*nulled_record) @@ -1159,7 +1172,7 @@ def mapper(self, line): key_index = FULLORDERITEM_FIELDS.index('payment_ref_id') key = fields[key_index] - # Convert Hive null values ('\\N') in fields like 'product_detail': + # Convert Hive null values ("\N") in fields like 'product_detail': defaults = ( ('product_detail', ''), ('refunded_amount', '0.0'), @@ -1172,7 +1185,7 @@ def mapper(self, line): ) for field_name, default_value in defaults: index = FULLORDERITEM_FIELD_INDICES[field_name] - if fields[index] == '\\N': + if fields[index] == r'\N': fields[index] = default_value elif len(fields) == len(TRANSACTION_FIELDS): @@ -1180,7 +1193,7 @@ def mapper(self, line): record_type = TransactionRecord.__name__ key = fields[3] # payment_ref_id # Convert nulls in 'transaction_fee'. - if fields[6] == '\\N': + if fields[6] == r'\N': fields[6] = None # Edx-only: if the transaction was within a time period when @@ -1238,12 +1251,20 @@ def format_transaction_table_output(self, audit_code, transaction, orderitem, tr orderitem.partner_short_code if orderitem else self.default_partner_short_code, orderitem.payment_ref_id if orderitem else transaction.payment_ref_id, orderitem.order_id if orderitem else None, - encode_id(orderitem.order_processor, "order_id", orderitem.order_id) if orderitem else None, + encode_id( + orderitem.order_processor.encode('utf-8'), + b"order_id", + orderitem.order_id.encode('utf-8'), + ) if orderitem else None, orderitem.date_placed if orderitem else None, # transaction information transaction.date if transaction else None, transaction.transaction_id if transaction else None, - encode_id(transaction.payment_gateway_id, "transaction_id", transaction.transaction_id) if transaction else None, + encode_id( + transaction.payment_gateway_id.encode('utf-8'), + b"transaction_id", + transaction.transaction_id.encode('utf-8'), + ) if transaction else None, transaction.payment_gateway_id if transaction else None, transaction.payment_gateway_account_id if transaction else None, transaction.transaction_type if transaction else None, @@ -1256,7 +1277,11 @@ def format_transaction_table_output(self, audit_code, transaction, orderitem, tr str(transaction_fee_per_item) if transaction_fee_per_item is not None else None, # orderitem information orderitem.line_item_id if orderitem else None, - encode_id(orderitem.order_processor, "line_item_id", orderitem.line_item_id) if orderitem else None, + encode_id( + orderitem.order_processor.encode('utf-8'), + b"line_item_id", + orderitem.line_item_id.encode('utf-8'), + ) if orderitem else None, orderitem.line_item_product_id if orderitem else None, orderitem.line_item_price if orderitem else None, orderitem.line_item_unit_price if orderitem else None, diff --git a/edx/analytics/tasks/warehouse/financial/tests/test_paypal.py b/edx/analytics/tasks/warehouse/financial/tests/test_paypal.py index faed863124..d65244fe2f 100644 --- a/edx/analytics/tasks/warehouse/financial/tests/test_paypal.py +++ b/edx/analytics/tasks/warehouse/financial/tests/test_paypal.py @@ -1,7 +1,8 @@ +from __future__ import absolute_import import xml.etree.cElementTree as ET from collections import OrderedDict -from cStringIO import StringIO +from io import BytesIO from unittest import TestCase import httpretty @@ -16,6 +17,8 @@ PaypalReportMetadataRequest, PaypalReportRequest, PaypalReportResultsRequest, PaypalTimeoutError, PaypalTransactionsByDayTask, SettlementReportRecord ) +import six +from six.moves import zip TEST_URL = 'http://test.api/endpoint' @@ -30,9 +33,9 @@ def setUp(self): def on_post_return_xml(self): element_tree = ET.ElementTree(self.response_xml_root) - string_buffer = StringIO() - element_tree.write(string_buffer, encoding='UTF-8', xml_declaration=True) - response_xml_root_string = string_buffer.getvalue() + bytes_buffer = BytesIO() + element_tree.write(bytes_buffer, encoding='UTF-8', xml_declaration=True) + response_xml_root_string = bytes_buffer.getvalue() httpretty.register_uri(httpretty.POST, TEST_URL, response_xml_root_string) def remove_xml_node(self, path): @@ -42,7 +45,7 @@ def remove_xml_node(self, path): def set_xml_node_text(self, path, value): element = self.response_xml_root.findall(path)[0] - element.text = unicode(value) + element.text = six.text_type(value) def parse_request_xml(self): http_request = httpretty.last_request() @@ -755,9 +758,20 @@ def test_normal_run(self): self.task.run() - expected_record = ['2015-08-28', 'paypal', 'testing', 'EDX-123456', 'USD', '50.00', '1.40', 'sale', - 'instant_transfer', 'paypal', '1FW12345678901234'] - self.assertEquals(self.output_target.value.strip(), '\t'.join(expected_record)) + expected_record = [ + b'2015-08-28', + b'paypal', + b'testing', + b'EDX-123456', + b'USD', + b'50.00', + b'1.40', + b'sale', + b'instant_transfer', + b'paypal', + b'1FW12345678901234', + ] + self.assertEquals(self.output_target.value.strip(), b'\t'.join(expected_record)) @data( (4, 'Report has failed'), @@ -864,9 +878,20 @@ def test_delayed_report(self, mock_time): call(5) ]) - expected_record = ['2015-08-28', 'paypal', 'testing', 'EDX-123456', 'USD', '50.00', '1.40', 'sale', - 'instant_transfer', 'paypal', '1FW12345678901234'] - self.assertEquals(self.output_target.value.strip(), '\t'.join(expected_record)) + expected_record = [ + b'2015-08-28', + b'paypal', + b'testing', + b'EDX-123456', + b'USD', + b'50.00', + b'1.40', + b'sale', + b'instant_transfer', + b'paypal', + b'1FW12345678901234', + ] + self.assertEquals(self.output_target.value.strip(), b'\t'.join(expected_record)) @with_luigi_config('paypal', 'timeout', '1') @patch('edx.analytics.tasks.warehouse.financial.paypal.time') diff --git a/edx/analytics/tasks/warehouse/financial/tests/test_reconcile.py b/edx/analytics/tasks/warehouse/financial/tests/test_reconcile.py index e2f4f07faf..356c473527 100644 --- a/edx/analytics/tasks/warehouse/financial/tests/test_reconcile.py +++ b/edx/analytics/tasks/warehouse/financial/tests/test_reconcile.py @@ -1,4 +1,6 @@ """Tests for Order-transaction reconciliation and reporting.""" +from __future__ import absolute_import + import uuid from unittest import TestCase @@ -10,11 +12,13 @@ LOW_ORDER_ID_SHOPPINGCART_ORDERS, BaseOrderItemRecord, BaseTransactionRecord, OrderItemRecord, OrderTransactionRecord, ReconcileOrdersAndTransactionsTask, TransactionRecord ) +import six +from six.moves import zip TEST_DATE = '2015-06-01' TEST_LATER_DATE = '2015-06-10' DEFAULT_REF_ID = "EDX-12345" -HIVE_NULL = '\\N' +HIVE_NULL = r'\N' FIRST_ORDER_ITEM = '2345678' SECOND_ORDER_ITEM = '2345679' FIRST_TRANSACTION = '123423453456' @@ -206,7 +210,7 @@ def record_sort_key(record): # so that column names can be used instead of numbers. output_dict = record._asdict() # pylint: disable=no-member,protected-access expected_columns.update(**extra_values) - for column_num, expected_value in expected_columns.iteritems(): + for column_num, expected_value in six.iteritems(expected_columns): self.assertEquals(output_dict[column_num], expected_value) def test_no_transaction(self): diff --git a/edx/analytics/tasks/warehouse/lms_courseware_link_clicked.py b/edx/analytics/tasks/warehouse/lms_courseware_link_clicked.py index c945f51558..010f79623d 100644 --- a/edx/analytics/tasks/warehouse/lms_courseware_link_clicked.py +++ b/edx/analytics/tasks/warehouse/lms_courseware_link_clicked.py @@ -1,8 +1,11 @@ """ Tasks for collecting link click data per course, per day, and uploading that data to Vertica. """ +from future.standard_library import install_aliases +install_aliases() + import logging -from urlparse import urlparse +from urllib.parse import urlparse import luigi.task diff --git a/edx/analytics/tasks/warehouse/tests/test_course_subjects.py b/edx/analytics/tasks/warehouse/tests/test_course_subjects.py index 591b2ed516..af16b70247 100644 --- a/edx/analytics/tasks/warehouse/tests/test_course_subjects.py +++ b/edx/analytics/tasks/warehouse/tests/test_course_subjects.py @@ -110,8 +110,8 @@ def test_course_no_subjects(self): expected = { 'course_id': 'foo', 'date': '2015-06-25', - 'subject_uri': '\N', # pylint: disable-msg=anomalous-unicode-escape-in-string - 'subject_title': '\N', # pylint: disable-msg=anomalous-unicode-escape-in-string + 'subject_uri': r'\N', # pylint: disable-msg=anomalous-unicode-escape-in-string + 'subject_title': r'\N', # pylint: disable-msg=anomalous-unicode-escape-in-string 'subject_language': 'en', } self.assertTrue(self.check_subject_entry(data, 0, expected)) @@ -250,7 +250,7 @@ def test_catalog_missing_keys(self): 'course_id': 'bar', 'date': '2015-06-25', 'subject_uri': '/course/subject/testing', - 'subject_title': '\N', # pylint: disable-msg=anomalous-unicode-escape-in-string + 'subject_title': r'\N', # pylint: disable-msg=anomalous-unicode-escape-in-string 'subject_language': 'en' } # We expect only one row, a row for the course with a course_id. diff --git a/requirements/base.in b/requirements/base.in index d542595403..2416f4b4d9 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -3,12 +3,13 @@ # Workaround for https://github.com/ansible/ansible/issues/8875 --no-binary ansible -ansible==1.4.5 # GPL v3 License +ansible<2.9.0 # GPL v3 License boto==2.48.0 # MIT ecdsa==0.13 # MIT Jinja2 # BSD pycrypto==2.6.1 # public domain wheel==0.30.0 +future # MIT # Add this here, not because it's needed here, but because # 1.11.0 will collide with a dependency in default.in. diff --git a/requirements/base.txt b/requirements/base.txt index 10249852c5..ff9153ecf1 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -6,23 +6,19 @@ # --no-binary ansible -ansible==1.4.5 +ansible==2.8.3 asn1crypto==0.24.0 # via cryptography -bcrypt==3.1.6 # via paramiko boto==2.48.0 -cffi==1.12.3 # via bcrypt, cryptography, pynacl -cryptography==2.6.1 # via paramiko +cffi==1.12.3 # via cryptography +cryptography==2.7 # via ansible ecdsa==0.13 enum34==1.1.6 # via cryptography -httplib2==0.12.3 # via ansible +future==0.17.1 ipaddress==1.0.22 # via cryptography jinja2==2.10.1 markupsafe==1.1.1 # via jinja2 -paramiko==2.4.2 # via ansible -pyasn1==0.4.5 # via paramiko pycparser==2.19 # via cffi pycrypto==2.6.1 -pynacl==1.3.0 # via paramiko -pyyaml==5.1 # via ansible +pyyaml==5.1.1 # via ansible six==1.10.0 wheel==0.30.0 diff --git a/requirements/default.in b/requirements/default.in index 9660a50b47..4823aee915 100644 --- a/requirements/default.in +++ b/requirements/default.in @@ -4,7 +4,7 @@ -r base.txt argparse==1.2.1 # Python Software Foundation License -boto3==1.4.8 # Apache 2.0 +boto3 # Apache 2.0 ciso8601==1.0.3 # MIT edx-ccx-keys==0.2.1 # AGPL edx-opaque-keys==0.4 # AGPL @@ -18,24 +18,26 @@ html5lib==1.0b3 # MIT isoweek==1.3.3 # BSD numpy==1.11.3 # BSD paypalrestsdk==1.9.0 # Paypal SDK License -psycopg2==2.6.2 # LGPL +psycopg2 # LGPL pygeoip==0.3.2 # LGPL -python-cjson==1.1.0 # LGPL +python-cjson; python_version <= "2.7" # LGPL +ujson; python_version > "2.7" # BSD python-dateutil==2.6.1 # BSD # There seems to be an issue(see: https://pagure.io/python-daemon/issue/18) with dependency installation on the # latest version of python-daemon(2.2.0), so we pin it to an earlier version. -python-daemon==2.1.2 +python-daemon python-gnupg==0.3.9 # BSD pytz==2017.3 # ZPL requests==2.18.4 # Apache 2.0 -six==1.10.0 # MIT +six # MIT stevedore==1.19.1 # Apache 2.0 ua-parser==0.3.6 # Apache urllib3==1.22 # MIT user-agents==0.3.2 # MIT vertica-python==0.6.11 # MIT yarn-api-client==0.2.3 # BSD -snowflake-connector-python==1.7.9 +snowflake-connector-python +futures; python_version == "2.7" -e git+https://github.com/edx/luigi.git@eb45bcc52243de11b2b16a81229ac584fe1e601b#egg=luigi # Apache License 2.0 diff --git a/requirements/default.txt b/requirements/default.txt index 2d0904fe77..50a5616b0e 100644 --- a/requirements/default.txt +++ b/requirements/default.txt @@ -8,25 +8,24 @@ -e git+https://github.com/edx/luigi.git@eb45bcc52243de11b2b16a81229ac584fe1e601b#egg=luigi -e git+https://github.com/edx/pyinstrument.git@a35ff76df4c3d5ff9a2876d859303e33d895e78f#egg=pyinstrument -ansible==1.4.5 +ansible==2.8.3 argparse==1.2.1 asn1crypto==0.24.0 -azure-common==1.1.21 # via azure-storage-blob, azure-storage-common, snowflake-connector-python +azure-common==1.1.23 # via azure-storage-blob, azure-storage-common, snowflake-connector-python azure-nspkg==3.0.2 # via azure-common, azure-storage-nspkg azure-storage-blob==2.0.1 # via snowflake-connector-python azure-storage-common==2.0.0 # via azure-storage-blob azure-storage-nspkg==3.1.0 # via azure-storage-common backports-abc==0.5 # via tornado -bcrypt==3.1.6 -boto3==1.4.8 +boto3==1.9.196 boto==2.48.0 -botocore==1.8.50 # via boto3, s3transfer, snowflake-connector-python +botocore==1.12.196 # via boto3, s3transfer, snowflake-connector-python cachetools==3.1.1 # via google-auth -certifi==2019.3.9 # via requests, snowflake-connector-python, tornado +certifi==2019.6.16 # via requests, snowflake-connector-python, tornado cffi==1.12.3 chardet==3.0.4 # via requests ciso8601==1.0.3 -cryptography==2.6.1 +cryptography==2.7 distlib==0.2.2 docutils==0.14 # via botocore, python-daemon ecdsa==0.13 @@ -35,8 +34,8 @@ edx-opaque-keys==0.4 elasticsearch==1.7.0 enum34==1.1.6 filechunkio==1.8 -future==0.17.1 # via snowflake-connector-python, vertica-python -futures==3.2.0 # via azure-storage-blob, google-cloud-core, s3transfer +future==0.17.1 +futures==3.3.0 ; python_version == "2.7" google-api-python-client==1.7.7 google-auth-httplib2==0.0.3 # via google-api-python-client google-auth==1.6.3 # via google-api-python-client, google-auth-httplib2, google-cloud-bigquery, google-cloud-core @@ -47,9 +46,9 @@ googleapis-common-protos==1.6.0 # via google-cloud-core graphitesend==0.10.0 gspread==3.1.0 html5lib==1.0b3 -httplib2==0.12.3 +httplib2==0.13.0 # via google-api-python-client, google-auth-httplib2 idna==2.6 # via requests, snowflake-connector-python -ijson==2.3 # via snowflake-connector-python +ijson==2.4 # via snowflake-connector-python ipaddress==1.0.22 isoweek==1.3.3 jinja2==2.10.1 @@ -57,33 +56,31 @@ jmespath==0.9.4 # via boto3, botocore lockfile==0.12.2 # via python-daemon markupsafe==1.1.1 numpy==1.11.3 -paramiko==2.4.2 paypalrestsdk==1.9.0 -pbr==5.2.1 # via stevedore -protobuf==3.8.0 # via google-cloud-core, googleapis-common-protos -psycopg2==2.6.2 +pbr==5.4.1 # via stevedore +protobuf==3.9.0 # via google-cloud-core, googleapis-common-protos +psycopg2==2.8.3 pyasn1-modules==0.2.5 # via google-auth, snowflake-connector-python -pyasn1==0.4.5 +pyasn1==0.4.5 # via pyasn1-modules, rsa, snowflake-connector-python pycparser==2.19 pycrypto==2.6.1 pycryptodomex==3.8.2 # via snowflake-connector-python pygeoip==0.3.2 pyjwt==1.7.1 # via snowflake-connector-python pymongo==3.8.0 # via edx-opaque-keys -pynacl==1.3.0 pyopenssl==19.0.0 # via paypalrestsdk, snowflake-connector-python -python-cjson==1.1.0 -python-daemon==2.1.2 +python-cjson==1.2.1 ; python_version <= "2.7" +python-daemon==2.2.3 python-dateutil==2.6.1 python-gnupg==0.3.9 pytz==2017.3 -pyyaml==5.1 +pyyaml==5.1.1 requests==2.18.4 rsa==4.0 # via google-auth -s3transfer==0.1.13 # via boto3 +s3transfer==0.2.1 # via boto3 singledispatch==3.4.0.3 # via tornado six==1.10.0 -snowflake-connector-python==1.7.9 +snowflake-connector-python==1.8.5 stevedore==1.19.1 tornado==4.5.3 ua-parser==0.3.6 @@ -92,4 +89,7 @@ urllib3==1.22 user-agents==0.3.2 vertica-python==0.6.11 wheel==0.30.0 -yarn-api-client==0.2.3 \ No newline at end of file +yarn-api-client==0.2.3 + +# The following packages are considered to be unsafe in a requirements file: +# setuptools==41.0.1 # via google-cloud-core, protobuf, python-daemon diff --git a/requirements/docs.txt b/requirements/docs.txt index baca202202..d6945f5d47 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -9,26 +9,25 @@ -e git+https://github.com/edx/luigi.git@eb45bcc52243de11b2b16a81229ac584fe1e601b#egg=luigi -e git+https://github.com/edx/pyinstrument.git@a35ff76df4c3d5ff9a2876d859303e33d895e78f#egg=pyinstrument alabaster==0.7.12 # via sphinx -ansible==1.4.5 +ansible==2.8.3 argparse==1.2.1 asn1crypto==0.24.0 -azure-common==1.1.18 +azure-common==1.1.23 azure-nspkg==3.0.2 -azure-storage-blob==1.5.0 -azure-storage-common==1.4.0 +azure-storage-blob==2.0.1 +azure-storage-common==2.0.0 azure-storage-nspkg==3.1.0 -babel==2.6.0 # via sphinx +babel==2.7.0 # via sphinx backports-abc==0.5 -bcrypt==3.1.6 -boto3==1.4.8 +boto3==1.9.196 boto==2.48.0 -botocore==1.8.50 -cachetools==3.1.0 -certifi==2019.3.9 -cffi==1.12.2 +botocore==1.12.196 +cachetools==3.1.1 +certifi==2019.6.16 +cffi==1.12.3 chardet==3.0.4 ciso8601==1.0.3 -cryptography==2.6.1 +cryptography==2.7 distlib==0.2.2 docutils==0.14 ecdsa==0.13 @@ -38,56 +37,55 @@ elasticsearch==1.7.0 enum34==1.1.6 filechunkio==1.8 future==0.17.1 -futures==3.2.0 +futures==3.3.0 ; python_version == "2.7" google-api-python-client==1.7.7 google-auth-httplib2==0.0.3 google-auth==1.6.3 google-cloud-bigquery==0.27.0 google-cloud-core==0.27.1 google-resumable-media==0.3.2 -googleapis-common-protos==1.5.9 +googleapis-common-protos==1.6.0 graphitesend==0.10.0 +gspread==3.1.0 html5lib==1.0b3 -httplib2==0.12.1 +httplib2==0.13.0 idna==2.6 -ijson==2.3 +ijson==2.4 imagesize==1.1.0 # via sphinx ipaddress==1.0.22 isoweek==1.3.3 -jinja2==2.8.1 +jinja2==2.10.1 jmespath==0.9.4 lockfile==0.12.2 markupsafe==1.1.1 numpy==1.11.3 -paramiko==2.4.2 paypalrestsdk==1.9.0 -pbr==5.1.3 -protobuf==3.7.1 -psycopg2==2.6.2 -pyasn1-modules==0.2.4 +pbr==5.4.1 +protobuf==3.9.0 +psycopg2==2.8.3 +pyasn1-modules==0.2.5 pyasn1==0.4.5 pycparser==2.19 pycrypto==2.6.1 -pycryptodomex==3.8.0 +pycryptodomex==3.8.2 pygeoip==0.3.2 -pygments==2.3.1 # via sphinx +pygments==2.4.2 # via sphinx pyjwt==1.7.1 -pymongo==3.7.2 -pynacl==1.3.0 +pymongo==3.8.0 pyopenssl==19.0.0 -python-cjson==1.1.0 -python-daemon==2.1.2 +python-cjson==1.2.1 ; python_version <= "2.7" +python-daemon==2.2.3 python-dateutil==2.6.1 python-gnupg==0.3.9 pytz==2017.3 -pyyaml==5.1 +pyyaml==5.1.1 requests==2.18.4 rsa==4.0 -s3transfer==0.1.13 +s3transfer==0.2.1 singledispatch==3.4.0.3 six==1.10.0 -snowballstemmer==1.2.1 # via sphinx -snowflake-connector-python==1.7.9 +snowballstemmer==1.9.0 # via sphinx +snowflake-connector-python==1.8.5 sphinx==1.5.1 stevedore==1.19.1 tornado==4.5.3 @@ -97,4 +95,7 @@ urllib3==1.22 user-agents==0.3.2 vertica-python==0.6.11 wheel==0.30.0 -yarn-api-client==0.2.3 \ No newline at end of file +yarn-api-client==0.2.3 + +# The following packages are considered to be unsafe in a requirements file: +# setuptools==41.0.1 # via google-cloud-core, protobuf, python-daemon diff --git a/requirements/extra.txt b/requirements/extra.txt index 43f6804127..2ee387709f 100644 --- a/requirements/extra.txt +++ b/requirements/extra.txt @@ -1,2 +1,10 @@ # Generic URLs are not yet supported by pip-compile. http://cdn.mysql.com/Downloads/Connector-Python/mysql-connector-python-1.2.2.zip # GPL v2 with FOSS License Exception + +# When pip-compile is run under python 3, it omits all packages with a python 2 +# condition. Re-add them here, pre-pinned. +python-cjson==1.1.0 ; python_version <= "2.7" + +# When pip-compile is run under python 2, it omits all packages with a python 3 +# condition. Re-add them here, pre-pinned. +ujson==1.35 ; python_version > "2.7" diff --git a/requirements/pip-tools.txt b/requirements/pip-tools.txt index 547630ffa4..fb165b7848 100644 --- a/requirements/pip-tools.txt +++ b/requirements/pip-tools.txt @@ -5,5 +5,5 @@ # make upgrade # click==7.0 # via pip-tools -pip-tools==3.7.0 +pip-tools==4.0.0 six==1.10.0 diff --git a/requirements/pip.txt b/requirements/pip.txt index 42d92b4bbb..893fe72454 100644 --- a/requirements/pip.txt +++ b/requirements/pip.txt @@ -1,2 +1,2 @@ -pip==9.0.1 +pip==19.1.1 setuptools==36.4.0 diff --git a/requirements/test.in b/requirements/test.in index ff6b7b828b..b0c3fe8547 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -23,7 +23,7 @@ pandas==0.13.0 # Stuff for quality -isort==4.2.15 +isort pycodestyle==2.3.1 pylint==1.6.4 diff --git a/requirements/test.txt b/requirements/test.txt index 269583e50b..e87169766a 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -8,31 +8,30 @@ -e git+https://github.com/edx/luigi.git@eb45bcc52243de11b2b16a81229ac584fe1e601b#egg=luigi -e git+https://github.com/edx/pyinstrument.git@a35ff76df4c3d5ff9a2876d859303e33d895e78f#egg=pyinstrument -ansible==1.4.5 +ansible==2.8.3 argparse==1.2.1 asn1crypto==0.24.0 astroid==1.4.9 # via pylint -azure-common==1.1.18 +azure-common==1.1.23 azure-nspkg==3.0.2 -azure-storage-blob==1.5.0 -azure-storage-common==1.4.0 +azure-storage-blob==2.0.1 +azure-storage-common==2.0.0 azure-storage-nspkg==3.1.0 backports-abc==0.5 -backports.functools-lru-cache==1.5 # via pylint -bcrypt==3.1.6 -boto3==1.4.8 +backports.functools-lru-cache==1.5 # via isort, pylint +boto3==1.9.196 boto==2.48.0 -botocore==1.8.50 -cachetools==3.1.0 -certifi==2019.3.9 -cffi==1.12.2 +botocore==1.12.196 +cachetools==3.1.1 +certifi==2019.6.16 +cffi==1.12.3 chardet==3.0.4 ciso8601==1.0.3 configparser==3.7.4 # via pylint coverage==4.3.1 -cryptography==2.6.1 +cryptography==2.7 ddt==1.1.1 -diff-cover==1.0.7 +diff-cover==2.3.0 distlib==0.2.2 docutils==0.14 ecdsa==0.13 @@ -44,28 +43,29 @@ filechunkio==1.8 freezegun==0.3.9 funcsigs==1.0.2 # via mock future==0.17.1 -futures==3.2.0 +futures==3.3.0 ; python_version == "2.7" google-api-python-client==1.7.7 google-auth-httplib2==0.0.3 google-auth==1.6.3 google-cloud-bigquery==0.27.0 google-cloud-core==0.27.1 google-resumable-media==0.3.2 -googleapis-common-protos==1.5.9 +googleapis-common-protos==1.6.0 graphitesend==0.10.0 +gspread==3.1.0 html5lib==1.0b3 -httplib2==0.12.1 +httplib2==0.13.0 httpretty==0.8.14 idna==2.6 -ijson==2.3 +ijson==2.4 inflect==2.1.0 # via jinja2-pluralize ipaddress==1.0.22 -isort==4.2.15 +isort==4.3.21 isoweek==1.3.3 jinja2-pluralize==0.3.0 # via diff-cover -jinja2==2.8.1 +jinja2==2.10.1 jmespath==0.9.4 -lazy-object-proxy==1.3.1 # via astroid +lazy-object-proxy==1.4.1 # via astroid lockfile==0.12.2 markupsafe==1.1.1 mccabe==0.6.1 # via pylint @@ -74,36 +74,34 @@ nose-ignore-docstring==0.2 nose==1.3.7 numpy==1.11.3 pandas==0.13.0 -paramiko==2.4.2 paypalrestsdk==1.9.0 -pbr==5.1.3 -protobuf==3.7.1 -psycopg2==2.6.2 -pyasn1-modules==0.2.4 +pbr==5.4.1 +protobuf==3.9.0 +psycopg2==2.8.3 +pyasn1-modules==0.2.5 pyasn1==0.4.5 pycodestyle==2.3.1 pycparser==2.19 pycrypto==2.6.1 -pycryptodomex==3.8.0 +pycryptodomex==3.8.2 pygeoip==0.3.2 -pygments==2.3.1 # via diff-cover +pygments==2.4.2 # via diff-cover pyjwt==1.7.1 pylint==1.6.4 -pymongo==3.7.2 -pynacl==1.3.0 +pymongo==3.8.0 pyopenssl==19.0.0 -python-cjson==1.1.0 -python-daemon==2.1.2 +python-cjson==1.2.1 ; python_version <= "2.7" +python-daemon==2.2.3 python-dateutil==2.6.1 python-gnupg==0.3.9 pytz==2017.3 -pyyaml==5.1 +pyyaml==5.1.1 requests==2.18.4 rsa==4.0 -s3transfer==0.1.13 +s3transfer==0.2.1 singledispatch==3.4.0.3 six==1.10.0 -snowflake-connector-python==1.7.9 +snowflake-connector-python==1.8.5 stevedore==1.19.1 tornado==4.5.3 ua-parser==0.3.6 @@ -112,5 +110,8 @@ urllib3==1.22 user-agents==0.3.2 vertica-python==0.6.11 wheel==0.30.0 -wrapt==1.11.1 # via astroid -yarn-api-client==0.2.3 \ No newline at end of file +wrapt==1.11.2 # via astroid +yarn-api-client==0.2.3 + +# The following packages are considered to be unsafe in a requirements file: +# setuptools==41.0.1 # via google-cloud-core, protobuf, python-daemon diff --git a/share/task.yml b/share/task.yml index 748ef890f1..74e449016e 100644 --- a/share/task.yml +++ b/share/task.yml @@ -3,7 +3,7 @@ - name: Configure luigi hosts: "{{ name }}" gather_facts: True - sudo: True + become: True vars: write_luigi_config: "yes" common_debian_variants: @@ -60,93 +60,100 @@ register: home_output - name: set the home variable - set_fact: home="{{ home_output.stdout }}" + set_fact: "home={{ home_output.stdout }}" - name: known_hosts file exists command: touch {{ home }}/.ssh/known_hosts creates={{ home }}/.ssh/known_hosts - name: git server in known_hosts file - lineinfile: > - dest={{ home }}/.ssh/known_hosts - regexp=^{{item.hostname}} - line="{{ item.hostname }} {{ item.public_key }}" - with_items: git_servers + lineinfile: + dest: "{{ home }}/.ssh/known_hosts" + regexp: "^{{item.hostname}}" + line: "{{ item.hostname }} {{ item.public_key }}" + loop: "{{ git_servers }}" - name: root directories created file: path={{ item }} state=directory owner=root group=root - sudo: True + become: True with_items: - "{{ root_data_dir }}" - "{{ root_log_dir }}" - name: working directories created file: path={{ item }} state=directory mode=777 owner={{ ansible_ssh_user }} group={{ ansible_ssh_user }} - sudo: True + become: True with_items: - "{{ working_dir }}" - "{{ working_venv_dir }}" - name: log directory created file: path={{ item }} state=directory mode=777 owner={{ ansible_ssh_user }} group={{ ansible_ssh_user }} - sudo: True + become: True with_items: - "{{ log_dir }}" - name: make sure git is available on the Debian server command: apt-get install -q -y git - sudo: True + become: True when: ansible_distribution in common_debian_variants - name: make sure git is available on the RHEL server yum: pkg=git state=present - sudo: True + become: True when: ansible_distribution in common_redhat_variants - name: repositories checked out - git: > - repo={{ item.url }} - dest={{ working_dir }}/{{ item.dir_name }} - version=master - with_items: repos + git: + repo: "{{ item.url }}" + dest: "{{ working_dir }}/{{ item.dir_name }}" + version: master + force: yes + loop: "{{ repos }}" - name: branches fetched - command: git fetch --all chdir={{ working_dir }}/{{ item.dir_name }} - with_items: repos + command: "git fetch --all chdir={{ working_dir }}/{{ item.dir_name }}" + loop: "{{ repos }}" - name: origin/HEAD updated - command: git remote set-head origin --auto chdir={{ working_dir }}/{{ item.dir_name }} - with_items: repos + command: "git remote set-head origin --auto chdir={{ working_dir }}/{{ item.dir_name }}" + loop: "{{ repos }}" - name: branches checked out - command: git checkout {{ item.branch }} chdir={{ working_dir }}/{{ item.dir_name }} - with_items: repos + command: "git checkout {{ item.branch }} chdir={{ working_dir }}/{{ item.dir_name }}" + loop: "{{ repos }}" - name: ensure system packages are installed command: make system-requirements chdir={{ working_repo_dir }} - sudo: True + become: True - name: bootstrap pip on Debian command: apt-get install -q -y python-pip - sudo: True + become: True when: ansible_distribution in common_debian_variants - name: bootstrap pip on RHEL command: yum install -q -y python-pip - sudo: True + become: True when: ansible_distribution in common_redhat_variants - name: virtualenv installed - pip: name=virtualenv version=1.10.1 - sudo: True + pip: name=virtualenv version=16.6.1 + become: True - - name: check if virtualenv already created - stat: path={{ working_venv_dir }}/bin/activate - register: virtualenv_created + #- name: check if virtualenv already created + # stat: + # path: "{{ working_venv_dir }}/bin/activate" + # register: virtualenv_created + + # This is required for running unit tests inside docker, at least until the + # default image comes with a py3 compatible virtualenv. + - name: virtualenv deleted + shell: "rm -rf {{ working_venv_dir }}/*" - name: virtualenv created command: > - virtualenv --python={{ virtualenv_python }} {{ virtualenv_extra_args }} {{ working_venv_dir }} - when: not virtualenv_created.stat.exists + virtualenv --clear --python={{ virtualenv_python }} {{ virtualenv_extra_args }} {{ working_venv_dir }} + #when: not virtualenv_created.stat.exists - name: update pip command: > @@ -155,28 +162,32 @@ - name: virtualenv initialized on Debian shell: > . {{ working_venv_dir }}/bin/activate && make install - chdir={{ working_repo_dir }} + args: + chdir: "{{ working_repo_dir }}" when: ansible_distribution in common_debian_variants - name: virtualenv initialized on RHEL shell: > . {{ working_venv_dir }}/bin/activate && make install - chdir={{ working_repo_dir }} + args: + chdir: "{{ working_repo_dir }}" when: ansible_distribution in common_redhat_variants - name: additional packages installed on Debian shell: > . {{ working_venv_dir }}/bin/activate && pip install {{ item }} - chdir={{ working_repo_dir }} + args: + chdir: "{{ working_repo_dir }}" when: ansible_distribution in common_debian_variants - with_items: packages + loop: "{{ packages }}" - name: additional packages installed on RHEL shell: > . {{ working_venv_dir }}/bin/activate && pip install {{ item }} - chdir={{ working_repo_dir }} + args: + chdir: "{{ working_repo_dir }}" when: ansible_distribution in common_redhat_variants - with_items: packages + loop: "{{ packages }}" - name: logging configured template: src=logging.cfg.j2 dest={{ working_repo_dir }}/logging.cfg From 8d5f4923be003e5143f45c95bcee903c826e5978 Mon Sep 17 00:00:00 2001 From: Troy Sankey Date: Fri, 26 Jul 2019 10:25:30 -0400 Subject: [PATCH 02/13] percolate python_version down to the python-executable hadoop config --- edx/analytics/tasks/launchers/remote.py | 2 ++ edx/analytics/tasks/tests/acceptance/__init__.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/edx/analytics/tasks/launchers/remote.py b/edx/analytics/tasks/launchers/remote.py index e62b90383a..c389b8e654 100755 --- a/edx/analytics/tasks/launchers/remote.py +++ b/edx/analytics/tasks/launchers/remote.py @@ -121,6 +121,8 @@ def run_task_playbook(inventory, arguments, uid): if arguments.workflow_profiler: env_vars['WORKFLOW_PROFILER'] = arguments.workflow_profiler env_vars['WORKFLOW_PROFILER_PATH'] = log_dir + if arguments.python_version: + env_vars['HADOOP_PYTHON_EXECUTABLE'] = arguments.python_version env_var_string = ' '.join('{0}={1}'.format(k, v) for k, v in env_vars.iteritems()) diff --git a/edx/analytics/tasks/tests/acceptance/__init__.py b/edx/analytics/tasks/tests/acceptance/__init__.py index 50b450f19f..84bb6980cc 100644 --- a/edx/analytics/tasks/tests/acceptance/__init__.py +++ b/edx/analytics/tasks/tests/acceptance/__init__.py @@ -209,6 +209,9 @@ def setUp(self): self.warehouse_path = url_path_join(self.test_root, 'warehouse') self.edx_rest_api_cache_root = url_path_join(self.test_src, 'edx-rest-api-cache') task_config_override = { + 'hadoop': { + 'python-executable': os.environ('HADOOP_PYTHON_EXECUTABLE', '/usr/bin/python') + } 'hive': { 'database': database_name, 'warehouse_path': self.warehouse_path From 803501b6177118c25f53bc6979cabd5199fa835e Mon Sep 17 00:00:00 2001 From: brianhw Date: Fri, 26 Jul 2019 12:31:26 -0400 Subject: [PATCH 03/13] Brian/de 1589 py3 in finance tasks (#750) Change default for now to python 3.6, for testing. Also cast INT to STRING before UNION in Hive code. Pass 'future' package to all remote nodes. Fix creation of Vertica marker tables in Python 3. Convert OrderTransactionRecord from bytes. --- edx/analytics/tasks/launchers/local.py | 22 +++++++++++-------- edx/analytics/tasks/util/vertica_target.py | 3 ++- .../warehouse/financial/orders_import.py | 4 ++-- .../tasks/warehouse/financial/reconcile.py | 2 +- share/task.yml | 2 +- 5 files changed, 19 insertions(+), 14 deletions(-) diff --git a/edx/analytics/tasks/launchers/local.py b/edx/analytics/tasks/launchers/local.py index a61d179c0a..532a59d968 100644 --- a/edx/analytics/tasks/launchers/local.py +++ b/edx/analytics/tasks/launchers/local.py @@ -20,7 +20,8 @@ import certifi import chardet import ciso8601 -import filechunkio +import __future__ +import future import idna import luigi import luigi.configuration @@ -35,9 +36,11 @@ import urllib3 cjson, ujson = None, None try: - import ujson + import ujson + ujson_found = True except ImportError: - import cjson + import cjson + ujson_found = False import edx.analytics.tasks @@ -98,18 +101,19 @@ def main(): # - edx.analytics.tasks is used to load the pipeline code, since we cannot trust all will be loaded automatically. # - boto is used for all direct interactions with s3. # - cjson/ujson is used for all parsing event logs. - # - filechunkio is used for multipart uploads of large files to s3. # - opaque_keys is used to interpret serialized course_ids # - opaque_keys extensions: ccx_keys # - dependencies of opaque_keys: bson, stevedore, six # - requests has several dependencies: # - chardet, urllib3, certifi, idna luigi.contrib.hadoop.attach(edx.analytics.tasks) - if cjson: - luigi.contrib.hadoop.attach(cjson) - if ujson: + if ujson_found: luigi.contrib.hadoop.attach(ujson) - luigi.contrib.hadoop.attach(boto, filechunkio, opaque_keys, bson, stevedore, six, ciso8601, chardet, urllib3, certifi, idna, requests, pytz) + else: + luigi.contrib.hadoop.attach(cjson) + luigi.contrib.hadoop.attach(boto, opaque_keys, bson, stevedore, six, ciso8601, chardet, urllib3, certifi, idna, requests, pytz) + # Try to get this to work with Python3 as well: + luigi.contrib.hadoop.attach(__future__, future) if configuration.getboolean('ccx', 'enabled', default=False): import ccx_keys @@ -184,7 +188,7 @@ def output_dependency_tree(cmdline_args): """Print out a tree representation of the dependencies of the given task.""" with luigi.cmdline_parser.CmdlineParser.global_instance(cmdline_args) as command_parser: task = command_parser.get_task_obj() - print print_dependency_tree(task) + print(print_dependency_tree(task)) if __name__ == '__main__': diff --git a/edx/analytics/tasks/util/vertica_target.py b/edx/analytics/tasks/util/vertica_target.py index 6d72978403..743273f9c6 100644 --- a/edx/analytics/tasks/util/vertica_target.py +++ b/edx/analytics/tasks/util/vertica_target.py @@ -146,7 +146,8 @@ def create_marker_table(self): """.format(marker_schema=self.marker_schema, marker_table=self.marker_table) ) except vertica_python.errors.QueryError as err: - if 'Sqlstate: 42710' in err.args[0]: # This Sqlstate will appear if the marker table already exists. + # Sqlstate 42710 will appear if the marker table already exists. + if 'Sqlstate:' in err.args[0] and '42710' in err.args[0]: pass else: raise diff --git a/edx/analytics/tasks/warehouse/financial/orders_import.py b/edx/analytics/tasks/warehouse/financial/orders_import.py index c723525d05..6ad2504419 100644 --- a/edx/analytics/tasks/warehouse/financial/orders_import.py +++ b/edx/analytics/tasks/warehouse/financial/orders_import.py @@ -299,7 +299,7 @@ def insert_query(self): -- the complete line item quantity and amount IF(oi.status = 'refunded', oi.qty * oi.unit_cost, NULL) AS refunded_amount, IF(oi.status = 'refunded', oi.qty, NULL) AS refunded_quantity, - oi.order_id AS payment_ref_id, + CAST(oi.order_id AS STRING) AS payment_ref_id, -- The partner short code is extracted from the course ID during order reconciliation. '' AS partner_short_code, @@ -613,7 +613,7 @@ def insert_query(self): -- the complete line item quantity and amount IF(oi.status = 'refunded', oi.qty * oi.unit_cost, NULL) AS refunded_amount, IF(oi.status = 'refunded', oi.qty, NULL) AS refunded_quantity, - oi.order_id AS payment_ref_id, + CAST(oi.order_id AS STRING) AS payment_ref_id, -- The partner short code is extracted from the course ID during order reconciliation. '' AS partner_short_code, diff --git a/edx/analytics/tasks/warehouse/financial/reconcile.py b/edx/analytics/tasks/warehouse/financial/reconcile.py index 80a59678d8..afc33b40af 100644 --- a/edx/analytics/tasks/warehouse/financial/reconcile.py +++ b/edx/analytics/tasks/warehouse/financial/reconcile.py @@ -904,7 +904,7 @@ def run(self): # first load all records in memory so that we can sort them for record_str in self.input().open('r'): - record = OrderTransactionRecord.from_job_output(record_str) + record = OrderTransactionRecord.from_job_output(record_str.decode('utf8')) if record.transaction_date is not None and record.transaction_date != '': # pylint: disable=no-member all_records.append(record) diff --git a/share/task.yml b/share/task.yml index 74e449016e..e49260c440 100644 --- a/share/task.yml +++ b/share/task.yml @@ -27,7 +27,7 @@ - working_dir: "{{ root_data_dir }}/{{ uuid }}" - log_dir: "{{ root_log_dir }}/{{ uuid}}" - working_venv_dir: "{{ working_dir }}/venv" - - virtualenv_python: "/usr/bin/python2.7" + - virtualenv_python: "/usr/bin/python3.6" - virtualenv_extra_args: '' - git_servers: # Analytics repositories are currently hosted on github. From 5ff56f81dd015b9281843223e2be683d38200d5a Mon Sep 17 00:00:00 2001 From: Troy Sankey Date: Fri, 26 Jul 2019 12:59:35 -0400 Subject: [PATCH 04/13] fixup syntax --- edx/analytics/tasks/tests/acceptance/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edx/analytics/tasks/tests/acceptance/__init__.py b/edx/analytics/tasks/tests/acceptance/__init__.py index 84bb6980cc..66777a24e5 100644 --- a/edx/analytics/tasks/tests/acceptance/__init__.py +++ b/edx/analytics/tasks/tests/acceptance/__init__.py @@ -211,7 +211,7 @@ def setUp(self): task_config_override = { 'hadoop': { 'python-executable': os.environ('HADOOP_PYTHON_EXECUTABLE', '/usr/bin/python') - } + }, 'hive': { 'database': database_name, 'warehouse_path': self.warehouse_path From 63a359114cd62020f9dff304058a586b5434411f Mon Sep 17 00:00:00 2001 From: Troy Sankey Date: Fri, 26 Jul 2019 13:43:03 -0400 Subject: [PATCH 05/13] fixup: use os.getenv --- edx/analytics/tasks/tests/acceptance/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edx/analytics/tasks/tests/acceptance/__init__.py b/edx/analytics/tasks/tests/acceptance/__init__.py index 66777a24e5..94b16796da 100644 --- a/edx/analytics/tasks/tests/acceptance/__init__.py +++ b/edx/analytics/tasks/tests/acceptance/__init__.py @@ -210,7 +210,7 @@ def setUp(self): self.edx_rest_api_cache_root = url_path_join(self.test_src, 'edx-rest-api-cache') task_config_override = { 'hadoop': { - 'python-executable': os.environ('HADOOP_PYTHON_EXECUTABLE', '/usr/bin/python') + 'python-executable': os.getenv('HADOOP_PYTHON_EXECUTABLE', '/usr/bin/python') }, 'hive': { 'database': database_name, From e34ab6dab68873d858ad4d1e77aed567b3de7354 Mon Sep 17 00:00:00 2001 From: Troy Sankey Date: Fri, 26 Jul 2019 15:34:52 -0400 Subject: [PATCH 06/13] fixup: override virtualenv_python correctly in remote-task --- edx/analytics/tasks/launchers/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edx/analytics/tasks/launchers/remote.py b/edx/analytics/tasks/launchers/remote.py index c389b8e654..e8ac235d1a 100755 --- a/edx/analytics/tasks/launchers/remote.py +++ b/edx/analytics/tasks/launchers/remote.py @@ -195,7 +195,7 @@ def convert_args_to_extra_vars(arguments, uid): if arguments.virtualenv_extra_args: extra_vars['virtualenv_extra_args'] = arguments.virtualenv_extra_args if arguments.python_version: - extra_vars['python_version'] = arguments.python_version + extra_vars['virtualenv_python'] = arguments.python_version if arguments.package: extra_vars['packages'] = arguments.package From 6fda690e6c6d3567a8c47fc2200e0fe7bb105f67 Mon Sep 17 00:00:00 2001 From: Brian Wilson Date: Fri, 26 Jul 2019 13:51:53 -0400 Subject: [PATCH 07/13] revert ansible default to python. --- share/task.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/task.yml b/share/task.yml index e49260c440..705b44d1ac 100644 --- a/share/task.yml +++ b/share/task.yml @@ -27,7 +27,7 @@ - working_dir: "{{ root_data_dir }}/{{ uuid }}" - log_dir: "{{ root_log_dir }}/{{ uuid}}" - working_venv_dir: "{{ working_dir }}/venv" - - virtualenv_python: "/usr/bin/python3.6" + - virtualenv_python: "/usr/bin/python" - virtualenv_extra_args: '' - git_servers: # Analytics repositories are currently hosted on github. From 594a291e6b84310dcad642db5e65e5161dd477a9 Mon Sep 17 00:00:00 2001 From: Brian Wilson Date: Sat, 27 Jul 2019 01:34:00 -0400 Subject: [PATCH 08/13] Get python_executable into config. --- edx/analytics/tasks/tests/acceptance/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/edx/analytics/tasks/tests/acceptance/__init__.py b/edx/analytics/tasks/tests/acceptance/__init__.py index 94b16796da..ba9ac01528 100644 --- a/edx/analytics/tasks/tests/acceptance/__init__.py +++ b/edx/analytics/tasks/tests/acceptance/__init__.py @@ -208,9 +208,11 @@ def setUp(self): elasticsearch_alias = 'alias_test_' + self.identifier self.warehouse_path = url_path_join(self.test_root, 'warehouse') self.edx_rest_api_cache_root = url_path_join(self.test_src, 'edx-rest-api-cache') + # Use config directly, rather than os.getenv('HADOOP_PYTHON_EXECUTABLE', '/usr/bin/python') + python_executable = self.config.get('python_version', '/usr/bin/python') task_config_override = { 'hadoop': { - 'python-executable': os.getenv('HADOOP_PYTHON_EXECUTABLE', '/usr/bin/python') + 'python-executable': python_executable, }, 'hive': { 'database': database_name, From 98b69aff63e2bf00da1c7ed91772a6d1e826fc55 Mon Sep 17 00:00:00 2001 From: Troy Sankey Date: Mon, 29 Jul 2019 10:42:32 -0400 Subject: [PATCH 09/13] Use six.moves instead of future's install_aliases() --- edx/analytics/tasks/common/bigquery_load.py | 4 +--- edx/analytics/tasks/export/obfuscation.py | 7 ++----- edx/analytics/tasks/insights/tests/test_course_blocks.py | 5 +---- edx/analytics/tasks/insights/tests/test_course_list.py | 9 ++++----- edx/analytics/tasks/insights/video.py | 5 +---- edx/analytics/tasks/launchers/remote.py | 5 +---- .../tasks/tests/acceptance/test_database_export.py | 6 +----- edx/analytics/tasks/util/s3_util.py | 5 +---- edx/analytics/tasks/util/url.py | 4 +--- .../tasks/warehouse/lms_courseware_link_clicked.py | 5 +---- 10 files changed, 14 insertions(+), 41 deletions(-) diff --git a/edx/analytics/tasks/common/bigquery_load.py b/edx/analytics/tasks/common/bigquery_load.py index a05de7cc42..83334b166e 100644 --- a/edx/analytics/tasks/common/bigquery_load.py +++ b/edx/analytics/tasks/common/bigquery_load.py @@ -1,6 +1,4 @@ from __future__ import absolute_import -from future.standard_library import install_aliases -install_aliases() import json import logging @@ -8,7 +6,7 @@ import subprocess import tempfile import time -from urllib.parse import urlparse +from six.moves.urllib.parse import urlparse import luigi diff --git a/edx/analytics/tasks/export/obfuscation.py b/edx/analytics/tasks/export/obfuscation.py index 2a3a653e94..8062ee1d69 100644 --- a/edx/analytics/tasks/export/obfuscation.py +++ b/edx/analytics/tasks/export/obfuscation.py @@ -1,13 +1,10 @@ """Tasks to obfuscate course data for RDX.""" -from future.standard_library import install_aliases -install_aliases() - import errno import json import logging import os import tarfile -from urllib.parse import urlparse +from six.moves.urllib.parse import urlparse import luigi @@ -128,7 +125,7 @@ def run(self): for target in path_task.output(): with target.open('r') as input_file: # Get path without urlscheme. - course_files_path = urlparse.urlparse(self.course_files_url).path + course_files_path = urlparse(self.course_files_url).path # Calculates target's relative path to course_files_path by getting the substring that # occurs after course_files_path substring in target's path. # Needed as target.path returns path with urlscheme for s3target & without for hdfstarget. diff --git a/edx/analytics/tasks/insights/tests/test_course_blocks.py b/edx/analytics/tasks/insights/tests/test_course_blocks.py index 93f39ad176..a7ab936884 100644 --- a/edx/analytics/tasks/insights/tests/test_course_blocks.py +++ b/edx/analytics/tasks/insights/tests/test_course_blocks.py @@ -1,14 +1,11 @@ """Test course blocks tasks.""" -from future.standard_library import install_aliases -install_aliases() - import json import logging import os import shutil import tempfile from unittest import TestCase -from urllib.parse import urlencode +from six.moves.urllib.parse import urlencode import httpretty from ddt import data, ddt, unpack diff --git a/edx/analytics/tasks/insights/tests/test_course_list.py b/edx/analytics/tasks/insights/tests/test_course_list.py index 0f879c97c4..0f96056b48 100644 --- a/edx/analytics/tasks/insights/tests/test_course_list.py +++ b/edx/analytics/tasks/insights/tests/test_course_list.py @@ -1,7 +1,6 @@ -"""Test course list tasks.""" -from future.standard_library import install_aliases -install_aliases() - +""" +Test course list tasks. +""" import json import logging import os @@ -9,7 +8,7 @@ import tempfile from datetime import datetime from unittest import TestCase -from urllib.parse import urlencode +from six.moves.urllib.parse import urlencode import httpretty from ddt import data, ddt, unpack diff --git a/edx/analytics/tasks/insights/video.py b/edx/analytics/tasks/insights/video.py index 26b9705178..01bccede87 100644 --- a/edx/analytics/tasks/insights/video.py +++ b/edx/analytics/tasks/insights/video.py @@ -1,14 +1,11 @@ """Tasks for aggregating statistics about video viewing.""" -from future.standard_library import install_aliases -install_aliases() - import datetime import json import logging import math import re import textwrap -from urllib.request import urlopen +from six.moves.urllib.request import urlopen from collections import namedtuple import ciso8601 diff --git a/edx/analytics/tasks/launchers/remote.py b/edx/analytics/tasks/launchers/remote.py index e8ac235d1a..6c8e8bc525 100755 --- a/edx/analytics/tasks/launchers/remote.py +++ b/edx/analytics/tasks/launchers/remote.py @@ -1,8 +1,5 @@ #!/usr/bin/env python """Execute tasks on a remote EMR cluster.""" -from future.standard_library import install_aliases -install_aliases() - import argparse import json import os @@ -10,7 +7,7 @@ import sys import uuid from subprocess import PIPE, Popen -from urllib.parse import urlparse, parse_qsl +from six.moves.urllib.parse import urlparse, parse_qsl STATIC_FILES_PATH = os.path.join(sys.prefix, 'share', 'edx.analytics.tasks') EC2_INVENTORY_PATH = os.path.join(STATIC_FILES_PATH, 'ec2.py') diff --git a/edx/analytics/tasks/tests/acceptance/test_database_export.py b/edx/analytics/tasks/tests/acceptance/test_database_export.py index f20e389332..aef555456c 100644 --- a/edx/analytics/tasks/tests/acceptance/test_database_export.py +++ b/edx/analytics/tasks/tests/acceptance/test_database_export.py @@ -3,9 +3,6 @@ validate user visible outputs. """ -from future.standard_library import install_aliases -install_aliases() - import datetime import logging import os @@ -13,8 +10,7 @@ import stat import tempfile import textwrap -from urllib.parse import urlparse - +from six.moves.urllib.parse import urlparse import gnupg from edx.analytics.tasks.tests.acceptance import AcceptanceTestCase, when_exporter_available diff --git a/edx/analytics/tasks/util/s3_util.py b/edx/analytics/tasks/util/s3_util.py index 7814b0ca35..40ffd4fce0 100644 --- a/edx/analytics/tasks/util/s3_util.py +++ b/edx/analytics/tasks/util/s3_util.py @@ -1,14 +1,11 @@ """ Utility methods for interacting with S3 via boto. """ -from future.standard_library import install_aliases -install_aliases() - import logging import os import time from fnmatch import fnmatch -from urllib.parse import urlparse, urlunparse +from six.moves.urllib.parse import urlparse, urlunparse from luigi.contrib.hdfs.format import Plain from luigi.contrib.hdfs.target import HdfsTarget diff --git a/edx/analytics/tasks/util/url.py b/edx/analytics/tasks/util/url.py index 04ae6e8e3f..94618337c4 100644 --- a/edx/analytics/tasks/util/url.py +++ b/edx/analytics/tasks/util/url.py @@ -9,13 +9,11 @@ hdfs://some/directory/ """ from __future__ import absolute_import -from future.standard_library import install_aliases -install_aliases() import logging import os import time -from urllib.parse import urlparse, urlunparse +from six.moves.urllib.parse import urlparse, urlunparse import six import luigi diff --git a/edx/analytics/tasks/warehouse/lms_courseware_link_clicked.py b/edx/analytics/tasks/warehouse/lms_courseware_link_clicked.py index 010f79623d..3cc52ed9af 100644 --- a/edx/analytics/tasks/warehouse/lms_courseware_link_clicked.py +++ b/edx/analytics/tasks/warehouse/lms_courseware_link_clicked.py @@ -1,11 +1,8 @@ """ Tasks for collecting link click data per course, per day, and uploading that data to Vertica. """ -from future.standard_library import install_aliases -install_aliases() - import logging -from urllib.parse import urlparse +from six.moves.urllib.parse import urlparse import luigi.task From abd2091986cfaf7372463c335e578cd11929177a Mon Sep 17 00:00:00 2001 From: Brian Wilson Date: Mon, 29 Jul 2019 16:57:03 -0400 Subject: [PATCH 10/13] Get cybersource and PayPal tasks running in py3. --- .../tasks/warehouse/financial/cybersource.py | 14 ++++++++------ edx/analytics/tasks/warehouse/financial/paypal.py | 14 +++++++++----- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/edx/analytics/tasks/warehouse/financial/cybersource.py b/edx/analytics/tasks/warehouse/financial/cybersource.py index ae625a2acd..3a9b3f5a35 100644 --- a/edx/analytics/tasks/warehouse/financial/cybersource.py +++ b/edx/analytics/tasks/warehouse/financial/cybersource.py @@ -86,11 +86,11 @@ def run(self): raise Exception(msg) # if there are no transactions in response, there will be no merchant id. - if self.merchant_id not in response.content and not self.is_empty_transaction_allowed: + if self.merchant_id.encode('utf8') not in response.content and not self.is_empty_transaction_allowed: raise Exception('No transactions to process.') with self.output().open('w') as output_file: - output_file.write(response.content) + output_file.write(response.content.decode('utf8')) def output(self): """Output is in the form {output_root}/cybersource/{CCYY-mm}/cybersource_{merchant}_{CCYYmmdd}.csv""" @@ -230,10 +230,12 @@ def __init__(self, *args, **kwargs): path_targets = PathSetTask([path], include=[file_pattern], include_zero_length=True).output() paths = list(set([os.path.dirname(target.path) for target in path_targets])) dates = [path.rsplit('/', 2)[-1] for path in paths] - latest_date = sorted(dates)[-1] - - latest_completion_date = datetime.datetime.strptime(latest_date, "dt=%Y-%m-%d").date() - run_date = latest_completion_date + datetime.timedelta(days=1) + if dates: + latest_date = sorted(dates)[-1] + latest_completion_date = datetime.datetime.strptime(latest_date, "dt=%Y-%m-%d").date() + run_date = latest_completion_date + datetime.timedelta(days=1) + else: + run_date = self.interval_start # Limit intervals to merchant account close date(if any). if self.merchant_close_date: diff --git a/edx/analytics/tasks/warehouse/financial/paypal.py b/edx/analytics/tasks/warehouse/financial/paypal.py index 5531f39373..b6fe96be57 100644 --- a/edx/analytics/tasks/warehouse/financial/paypal.py +++ b/edx/analytics/tasks/warehouse/financial/paypal.py @@ -661,7 +661,9 @@ def write_transaction_record(self, row, output_tsv_file): # identifier for the transaction payment_record.paypal_transaction_id, ] - output_tsv_file.write(b'\t'.join(field.encode('utf-8') for field in record) + b'\n') + # output_tsv_file.write(b'\t'.join(field.encode('utf-8') for field in record) + b'\n') + # Apparently the write wants str, not bytes. + output_tsv_file.write('\t'.join(field for field in record) + '\n') def output(self): # NOTE: both the cybersource and paypal tasks write to the payments folder @@ -714,10 +716,12 @@ def __init__(self, *args, **kwargs): path_targets = PathSetTask([path], include=['*paypal.tsv']).output() paths = list(set([os.path.dirname(target.path) for target in path_targets])) dates = [path.rsplit('/', 2)[-1] for path in paths] - latest_date = sorted(dates)[-1] - - latest_completion_date = datetime.datetime.strptime(latest_date, "dt=%Y-%m-%d").date() - run_date = latest_completion_date + datetime.timedelta(days=1) + if dates: + latest_date = sorted(dates)[-1] + latest_completion_date = datetime.datetime.strptime(latest_date, "dt=%Y-%m-%d").date() + run_date = latest_completion_date + datetime.timedelta(days=1) + else: + run_date = self.interval_start self.selection_interval = date_interval.Custom(self.interval_start, run_date) self.run_interval = date_interval.Custom(run_date, self.interval_end) From c8b255b5097e5224b9c2b9f65a1c6bf1d067983a Mon Sep 17 00:00:00 2001 From: Troy Sankey Date: Tue, 30 Jul 2019 15:27:32 -0400 Subject: [PATCH 11/13] fixup, futile attempt to fix finance acceptance tests under py2 --- edx/analytics/tasks/tests/acceptance/__init__.py | 4 +++- .../tasks/tests/acceptance/test_financial_reports.py | 4 +++- requirements/base.in | 5 +---- requirements/base.txt | 2 +- requirements/default.txt | 10 +++++----- requirements/docs.txt | 10 +++++----- requirements/test.txt | 10 +++++----- 7 files changed, 23 insertions(+), 22 deletions(-) diff --git a/edx/analytics/tasks/tests/acceptance/__init__.py b/edx/analytics/tasks/tests/acceptance/__init__.py index ba9ac01528..65862e35be 100644 --- a/edx/analytics/tasks/tests/acceptance/__init__.py +++ b/edx/analytics/tasks/tests/acceptance/__init__.py @@ -1,4 +1,5 @@ from __future__ import print_function +from __future__ import absolute_import import csv import hashlib @@ -16,6 +17,7 @@ from edx.analytics.tasks.tests.acceptance.services import db, elasticsearch_service, fs, hive, task, vertica from edx.analytics.tasks.util.s3_util import ScalableS3Client from edx.analytics.tasks.util.url import get_target_from_url, url_path_join +import six log = logging.getLogger(__name__) @@ -125,7 +127,7 @@ def as_list_param(value, escape_quotes=True): def coerce_columns_to_string(row): # Vertica response includes datatypes in some columns i-e. datetime, Decimal etc. so convert # them into string before comparison with expected output. Also a challenge with 'None' values. - return [unicode(x) for x in row] + return [six.text_type(x) for x in row] def read_csv_fixture_as_list(fixture_file_path): diff --git a/edx/analytics/tasks/tests/acceptance/test_financial_reports.py b/edx/analytics/tasks/tests/acceptance/test_financial_reports.py index e0120d4cf5..8bb8dc35c3 100644 --- a/edx/analytics/tasks/tests/acceptance/test_financial_reports.py +++ b/edx/analytics/tasks/tests/acceptance/test_financial_reports.py @@ -1,12 +1,14 @@ """ End to end test of the financial reporting workflow. """ +from __future__ import absolute_import import logging import os import luigi import pandas +from six.moves import map from edx.analytics.tasks.tests.acceptance import ( AcceptanceTestCase, coerce_columns_to_string, read_csv_fixture_as_list, when_vertica_available @@ -71,7 +73,7 @@ def test_end_to_end(self): )) response = cursor.fetchall() - f_orderitem_transactions = pandas.DataFrame(map(coerce_columns_to_string, response), columns=columns) + f_orderitem_transactions = pandas.DataFrame(list(map(coerce_columns_to_string, response)), columns=columns) for frame in (f_orderitem_transactions, expected): frame.sort(['payment_ref_id', 'transaction_type'], inplace=True, ascending=[True, False]) diff --git a/requirements/base.in b/requirements/base.in index 2416f4b4d9..48a45f7625 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -10,7 +10,4 @@ Jinja2 # BSD pycrypto==2.6.1 # public domain wheel==0.30.0 future # MIT - -# Add this here, not because it's needed here, but because -# 1.11.0 will collide with a dependency in default.in. -six==1.10.0 +six # MIT diff --git a/requirements/base.txt b/requirements/base.txt index ff9153ecf1..80a18852af 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -20,5 +20,5 @@ markupsafe==1.1.1 # via jinja2 pycparser==2.19 # via cffi pycrypto==2.6.1 pyyaml==5.1.1 # via ansible -six==1.10.0 +six==1.12.0 wheel==0.30.0 diff --git a/requirements/default.txt b/requirements/default.txt index 50a5616b0e..a56fcebfa2 100644 --- a/requirements/default.txt +++ b/requirements/default.txt @@ -17,9 +17,9 @@ azure-storage-blob==2.0.1 # via snowflake-connector-python azure-storage-common==2.0.0 # via azure-storage-blob azure-storage-nspkg==3.1.0 # via azure-storage-common backports-abc==0.5 # via tornado -boto3==1.9.196 +boto3==1.9.198 boto==2.48.0 -botocore==1.12.196 # via boto3, s3transfer, snowflake-connector-python +botocore==1.12.198 # via boto3, s3transfer, snowflake-connector-python cachetools==3.1.1 # via google-auth certifi==2019.6.16 # via requests, snowflake-connector-python, tornado cffi==1.12.3 @@ -46,7 +46,7 @@ googleapis-common-protos==1.6.0 # via google-cloud-core graphitesend==0.10.0 gspread==3.1.0 html5lib==1.0b3 -httplib2==0.13.0 # via google-api-python-client, google-auth-httplib2 +httplib2==0.13.1 # via google-api-python-client, google-auth-httplib2 idna==2.6 # via requests, snowflake-connector-python ijson==2.4 # via snowflake-connector-python ipaddress==1.0.22 @@ -79,8 +79,8 @@ requests==2.18.4 rsa==4.0 # via google-auth s3transfer==0.2.1 # via boto3 singledispatch==3.4.0.3 # via tornado -six==1.10.0 -snowflake-connector-python==1.8.5 +six==1.12.0 +snowflake-connector-python==1.8.6 stevedore==1.19.1 tornado==4.5.3 ua-parser==0.3.6 diff --git a/requirements/docs.txt b/requirements/docs.txt index d6945f5d47..55c117facb 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -19,9 +19,9 @@ azure-storage-common==2.0.0 azure-storage-nspkg==3.1.0 babel==2.7.0 # via sphinx backports-abc==0.5 -boto3==1.9.196 +boto3==1.9.198 boto==2.48.0 -botocore==1.12.196 +botocore==1.12.198 cachetools==3.1.1 certifi==2019.6.16 cffi==1.12.3 @@ -48,7 +48,7 @@ googleapis-common-protos==1.6.0 graphitesend==0.10.0 gspread==3.1.0 html5lib==1.0b3 -httplib2==0.13.0 +httplib2==0.13.1 idna==2.6 ijson==2.4 imagesize==1.1.0 # via sphinx @@ -83,9 +83,9 @@ requests==2.18.4 rsa==4.0 s3transfer==0.2.1 singledispatch==3.4.0.3 -six==1.10.0 +six==1.12.0 snowballstemmer==1.9.0 # via sphinx -snowflake-connector-python==1.8.5 +snowflake-connector-python==1.8.6 sphinx==1.5.1 stevedore==1.19.1 tornado==4.5.3 diff --git a/requirements/test.txt b/requirements/test.txt index e87169766a..a454103c90 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -19,9 +19,9 @@ azure-storage-common==2.0.0 azure-storage-nspkg==3.1.0 backports-abc==0.5 backports.functools-lru-cache==1.5 # via isort, pylint -boto3==1.9.196 +boto3==1.9.198 boto==2.48.0 -botocore==1.12.196 +botocore==1.12.198 cachetools==3.1.1 certifi==2019.6.16 cffi==1.12.3 @@ -54,7 +54,7 @@ googleapis-common-protos==1.6.0 graphitesend==0.10.0 gspread==3.1.0 html5lib==1.0b3 -httplib2==0.13.0 +httplib2==0.13.1 httpretty==0.8.14 idna==2.6 ijson==2.4 @@ -100,8 +100,8 @@ requests==2.18.4 rsa==4.0 s3transfer==0.2.1 singledispatch==3.4.0.3 -six==1.10.0 -snowflake-connector-python==1.8.5 +six==1.12.0 +snowflake-connector-python==1.8.6 stevedore==1.19.1 tornado==4.5.3 ua-parser==0.3.6 From e111a6c82619fa999a9e343582be80cc5b3d91fa Mon Sep 17 00:00:00 2001 From: Brian Wilson Date: Tue, 30 Jul 2019 17:30:49 -0400 Subject: [PATCH 12/13] Enable verbose for finance-related Sqoop imports. --- .../tasks/warehouse/financial/reconcile.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/edx/analytics/tasks/warehouse/financial/reconcile.py b/edx/analytics/tasks/warehouse/financial/reconcile.py index afc33b40af..12afbced8c 100644 --- a/edx/analytics/tasks/warehouse/financial/reconcile.py +++ b/edx/analytics/tasks/warehouse/financial/reconcile.py @@ -154,7 +154,8 @@ def __init__(self, *args, **kwargs): def requires(self): yield ( OrderTableTask( - import_date=self.import_date + import_date=self.import_date, + verbose=True, ), PaymentTask( import_date=self.import_date, @@ -1153,14 +1154,16 @@ class ReconcileFullOrdersAndTransactionsTask(ReconcileOrdersAndTransactionsTask) def requires(self): yield ( FullOttoOrderTableTask( - import_date=self.import_date + import_date=self.import_date, + verbose=True, ), FullShoppingcartOrderTableTask( - import_date=self.import_date + import_date=self.import_date, + verbose=True, ), PaymentTask( import_date=self.import_date, - is_empty_transaction_allowed=self.is_empty_transaction_allowed + is_empty_transaction_allowed=self.is_empty_transaction_allowed, ) ) @@ -1442,6 +1445,7 @@ def insert_source_task(self): return ( FullOttoOrderTableTask( import_date=self.import_date, + verbose=True, # DO NOT PASS OVERWRITE FURTHER. We mean for overwrite here # to just apply to the writing to Vertica, not to anything further upstream. # overwrite=self.overwrite, @@ -1464,6 +1468,7 @@ def insert_source_task(self): return ( FullShoppingcartOrderTableTask( import_date=self.import_date, + verbose=True, # DO NOT PASS OVERWRITE FURTHER. We mean for overwrite here # to just apply to the writing to Vertica, not to anything further upstream. # overwrite=self.overwrite, From d85796c284f6503f755f0ef2a9357f373b58380d Mon Sep 17 00:00:00 2001 From: Hassan Javeed Date: Thu, 18 Jul 2019 19:11:42 +0500 Subject: [PATCH 13/13] Cybersource: Replaced deprecated servlets method with REST API. --- .../tasks/warehouse/financial/cybersource.py | 93 ++++++++++++------- requirements/base.txt | 2 - requirements/default.in | 1 + requirements/default.txt | 61 ++++++++---- 4 files changed, 105 insertions(+), 52 deletions(-) diff --git a/edx/analytics/tasks/warehouse/financial/cybersource.py b/edx/analytics/tasks/warehouse/financial/cybersource.py index 3a9b3f5a35..ab7cf0637b 100644 --- a/edx/analytics/tasks/warehouse/financial/cybersource.py +++ b/edx/analytics/tasks/warehouse/financial/cybersource.py @@ -1,11 +1,14 @@ """Collect information about payments from third-party sources for financial reporting.""" from __future__ import absolute_import +import codecs import csv import datetime import logging import os +from CyberSource import ReportDownloadsApi +from CyberSource.rest import ApiException import luigi import requests from luigi import date_interval @@ -38,9 +41,8 @@ def __init__(self, *args, **kwargs): config = get_config() section_name = 'cybersource:' + self.merchant_id - self.host = config.get(section_name, 'host') - self.username = config.get(section_name, 'username') - self.password = config.get(section_name, 'password') + self.keyid = config.get(section_name, 'keyid') + self.secretkey = config.get(section_name, 'secretkey') self.interval_start = luigi.DateParameter().parse(config.get(section_name, 'interval_start')) self.merchant_close_date = None @@ -71,26 +73,45 @@ class DailyPullFromCybersourceTask(PullFromCybersourceTaskMixin, luigi.Task): # This is the table that we had been using for gathering and # storing historical Cybersource data. It adds one additional # column over the 'PaymentBatchDetailReport' format. - REPORT_NAME = 'PaymentSubmissionDetailReport' + REPORT_NAME = 'PaymentSubmissionDetailReport_Daily_Classic' REPORT_FORMAT = 'csv' + # Specify the production/live environemnt for report downloads. + ENVIRONMENT = 'CyberSource.Environment.PRODUCTION' def requires(self): pass def run(self): self.remove_output_on_overwrite() - auth = (self.username, self.password) - response = requests.get(self.query_url, auth=auth) - if response.status_code != requests.codes.ok: # pylint: disable=no-member - msg = "Encountered status {} on request to Cybersource for {}".format(response.status_code, self.run_date) - raise Exception(msg) + merchant_config = { + 'authentication_type': 'http_signature', + 'merchantid': self.merchant_id, + 'run_environment': self.ENVIRONMENT, + 'merchant_keyid': self.keyid, + 'merchant_secretkey': self.secretkey, + 'enable_log': False, + } + report_download_obj = ReportDownloadsApi(merchant_config) + + # With the REST API, to get the same report we got with the servlets method, we need to specify `date + 1`. + batch_date = self.run_date + datetime.timedelta(days=1) + try: + return_data, status, body = report_download_obj.download_report( + batch_date.isoformat(), + self.REPORT_NAME, + organization_id=self.merchant_id + ) + except ApiException as e: + log.error("Exception while downloading report for date(%s): %s", batch_date.isoformat(), e) + raise - # if there are no transactions in response, there will be no merchant id. - if self.merchant_id.encode('utf8') not in response.content and not self.is_empty_transaction_allowed: - raise Exception('No transactions to process.') + # Cybersource REST API returns status as either 200(OK), 400(Invalid request) or 404(No reports found). + if status != requests.codes.ok: + msg = "Encountered status {} on request to Cybersource for {}".format(status, batch_date) + raise Exception(msg) with self.output().open('w') as output_file: - output_file.write(response.content.decode('utf8')) + output_file.write(body.encode('utf-8')) def output(self): """Output is in the form {output_root}/cybersource/{CCYY-mm}/cybersource_{merchant}_{CCYYmmdd}.csv""" @@ -104,22 +125,11 @@ def output(self): url_with_filename = url_path_join(self.output_root, "cybersource", month_year_string, filename) return get_target_from_url(url_with_filename) - @property - def query_url(self): - """Generate the url to download a report from a Cybersource account.""" - slashified_date = self.run_date.strftime('%Y/%m/%d') # pylint: disable=no-member - url = 'https://{host}/DownloadReport/{date}/{merchant_id}/{report_name}.{report_format}'.format( - host=self.host, - date=slashified_date, - merchant_id=self.merchant_id, - report_name=self.REPORT_NAME, - report_format=self.REPORT_FORMAT - ) - return url - TRANSACTION_TYPE_MAP = { 'ics_bill': 'sale', + 'ics_auth,ics_bill': 'sale', + 'ics_bill, ics_auth': 'sale', 'ics_credit': 'refund' } @@ -157,16 +167,25 @@ def run(self): # Skip the first line, which provides information about the source # of the file. The second line should define the column headings. _download_header = input_file.readline() - reader = csv.DictReader(input_file, delimiter=',') + reader = csv.DictReader(codecs.iterdecode(input_file, 'utf-8'), delimiter=',') with self.output().open('w') as output_file: for row in reader: # Output most of the fields from the original source. # The values not included are: # batch_id: CyberSource batch in which the transaction was sent. # payment_processor: code for organization that processes the payment. + + # Cybersource servlets reports used to return a negative amount in case of a refund. + # However, the REST API does not, so we manually prepend a minus(-). + transaction_type = self.get_transaction_type(row['ics_applications']) + if transaction_type == 'refund' and float(row['amount']) > 0: + row_amount = '-' + row['amount'] + else: + row_amount = row['amount'] + result = [ - # Date - row['batch_date'], + # Date(Using the REST API Cybersource returns the timestamp). + row['batch_date'].split('T')[0], # Name of system. 'cybersource', # CyberSource merchant ID used for the transaction. @@ -177,19 +196,19 @@ def run(self): row['merchant_ref_number'], # ISO currency code used for the transaction. row['currency'], - row['amount'], + row_amount, # Transaction fee r'\N', - TRANSACTION_TYPE_MAP[row['transaction_type']], + transaction_type, # We currently only process credit card transactions with Cybersource 'credit_card', # Type of credit card used - row['payment_method'].lower().replace(' ', '_'), + row['payment_type'].lower().replace(' ', '_'), # Identifier for the transaction. row['request_id'], ] - output_file.write('\t'.join(result)) - output_file.write('\n') + output_file.write(b'\t'.join(field.encode('utf-8') for field in result)) + output_file.write(b'\n') def output(self): """ @@ -203,6 +222,12 @@ def output(self): url_with_filename = url_path_join(self.output_root, "payments", partition_path_spec, filename) return get_target_from_url(url_with_filename) + def get_transaction_type(self, ics_applications): + if 'ics_bill' in ics_applications: + return 'sale' + elif 'ics_credit' in ics_applications: + return 'refund' + class IntervalPullFromCybersourceTask(PullFromCybersourceTaskMixin, WarehouseMixin, luigi.WrapperTask): """Determines a set of dates to pull, and requires them.""" diff --git a/requirements/base.txt b/requirements/base.txt index 80a18852af..237fb7291e 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -12,9 +12,7 @@ boto==2.48.0 cffi==1.12.3 # via cryptography cryptography==2.7 # via ansible ecdsa==0.13 -enum34==1.1.6 # via cryptography future==0.17.1 -ipaddress==1.0.22 # via cryptography jinja2==2.10.1 markupsafe==1.1.1 # via jinja2 pycparser==2.19 # via cffi diff --git a/requirements/default.in b/requirements/default.in index 4823aee915..a07eaa39d1 100644 --- a/requirements/default.in +++ b/requirements/default.in @@ -38,6 +38,7 @@ vertica-python==0.6.11 # MIT yarn-api-client==0.2.3 # BSD snowflake-connector-python futures; python_version == "2.7" +cybersource-rest-client-python==0.0.8 -e git+https://github.com/edx/luigi.git@eb45bcc52243de11b2b16a81229ac584fe1e601b#egg=luigi # Apache License 2.0 diff --git a/requirements/default.txt b/requirements/default.txt index a56fcebfa2..2b7bfb5b2d 100644 --- a/requirements/default.txt +++ b/requirements/default.txt @@ -11,31 +11,39 @@ ansible==2.8.3 argparse==1.2.1 asn1crypto==0.24.0 +attrs==19.1.0 # via jsonschema azure-common==1.1.23 # via azure-storage-blob, azure-storage-common, snowflake-connector-python -azure-nspkg==3.0.2 # via azure-common, azure-storage-nspkg azure-storage-blob==2.0.1 # via snowflake-connector-python azure-storage-common==2.0.0 # via azure-storage-blob azure-storage-nspkg==3.1.0 # via azure-storage-common backports-abc==0.5 # via tornado +bcrypt==3.1.7 # via cybersource-rest-client-python, paramiko boto3==1.9.198 boto==2.48.0 botocore==1.12.198 # via boto3, s3transfer, snowflake-connector-python cachetools==3.1.1 # via google-auth -certifi==2019.6.16 # via requests, snowflake-connector-python, tornado +certifi==2019.6.16 # via cybersource-rest-client-python, requests, snowflake-connector-python cffi==1.12.3 -chardet==3.0.4 # via requests +chardet==3.0.4 # via cybersource-rest-client-python, requests ciso8601==1.0.3 +configparser==3.7.4 # via cybersource-rest-client-python +coverage==4.5.3 # via cybersource-rest-client-python +crypto==1.4.1 # via cybersource-rest-client-python cryptography==2.7 +cybersource-rest-client-python==0.0.8 +datetime==4.3 # via cybersource-rest-client-python distlib==0.2.2 docutils==0.14 # via botocore, python-daemon ecdsa==0.13 edx-ccx-keys==0.2.1 edx-opaque-keys==0.4 elasticsearch==1.7.0 -enum34==1.1.6 +enum34==1.1.6 # via cybersource-rest-client-python +extras==1.0.0 # via cybersource-rest-client-python, python-subunit, testtools filechunkio==1.8 +fixtures==3.0.0 # via cybersource-rest-client-python, testtools +funcsigs==1.0.2 # via cybersource-rest-client-python future==0.17.1 -futures==3.3.0 ; python_version == "2.7" google-api-python-client==1.7.7 google-auth-httplib2==0.0.3 # via google-api-python-client google-auth==1.6.3 # via google-api-python-client, google-auth-httplib2, google-cloud-bigquery, google-cloud-core @@ -47,49 +55,70 @@ graphitesend==0.10.0 gspread==3.1.0 html5lib==1.0b3 httplib2==0.13.1 # via google-api-python-client, google-auth-httplib2 -idna==2.6 # via requests, snowflake-connector-python +idna==2.6 # via cybersource-rest-client-python, requests, snowflake-connector-python ijson==2.4 # via snowflake-connector-python -ipaddress==1.0.22 +ipaddress==1.0.22 # via cybersource-rest-client-python isoweek==1.3.3 jinja2==2.10.1 jmespath==0.9.4 # via boto3, botocore +jsonschema==3.0.1 # via cybersource-rest-client-python +linecache2==1.0.0 # via cybersource-rest-client-python, traceback2 lockfile==0.12.2 # via python-daemon +logger==1.4 # via cybersource-rest-client-python markupsafe==1.1.1 +naked==0.1.31 # via crypto, cybersource-rest-client-python +nose==1.3.7 # via cybersource-rest-client-python numpy==1.11.3 +paramiko==2.6.0 # via cybersource-rest-client-python paypalrestsdk==1.9.0 -pbr==5.4.1 # via stevedore +pbr==5.4.1 # via cybersource-rest-client-python, fixtures, stevedore, testtools protobuf==3.9.0 # via google-cloud-core, googleapis-common-protos psycopg2==2.8.3 -pyasn1-modules==0.2.5 # via google-auth, snowflake-connector-python -pyasn1==0.4.5 # via pyasn1-modules, rsa, snowflake-connector-python +pyasn1-modules==0.2.5 # via google-auth +pyasn1==0.4.5 # via cybersource-rest-client-python, pyasn1-modules, rsa, x509 pycparser==2.19 pycrypto==2.6.1 -pycryptodomex==3.8.2 # via snowflake-connector-python +pycryptodome==3.8.2 # via cybersource-rest-client-python +pycryptodomex==3.8.2 # via cybersource-rest-client-python, snowflake-connector-python pygeoip==0.3.2 -pyjwt==1.7.1 # via snowflake-connector-python +pyjwt==1.7.1 # via cybersource-rest-client-python, snowflake-connector-python pymongo==3.8.0 # via edx-opaque-keys -pyopenssl==19.0.0 # via paypalrestsdk, snowflake-connector-python -python-cjson==1.2.1 ; python_version <= "2.7" +pynacl==1.3.0 # via cybersource-rest-client-python, paramiko +pyopenssl==19.0.0 # via cybersource-rest-client-python, paypalrestsdk, snowflake-connector-python +pypi==2.1 # via cybersource-rest-client-python +pyrsistent==0.15.4 # via jsonschema python-daemon==2.2.3 python-dateutil==2.6.1 python-gnupg==0.3.9 +python-mimeparse==1.6.0 # via cybersource-rest-client-python, testtools +python-subunit==1.3.0 # via cybersource-rest-client-python +python-toolbox==1.0.7 # via cybersource-rest-client-python pytz==2017.3 pyyaml==5.1.1 requests==2.18.4 -rsa==4.0 # via google-auth +rsa==4.0 # via cybersource-rest-client-python, google-auth s3transfer==0.2.1 # via boto3 +shellescape==3.4.1 # via crypto, cybersource-rest-client-python singledispatch==3.4.0.3 # via tornado six==1.12.0 snowflake-connector-python==1.8.6 stevedore==1.19.1 +testtools==2.3.0 # via cybersource-rest-client-python, fixtures, python-subunit tornado==4.5.3 +traceback2==1.4.0 # via cybersource-rest-client-python, testtools, unittest2 +typing==3.7.4 # via cybersource-rest-client-python ua-parser==0.3.6 +ujson==1.35 ; python_version > "2.7" +unittest2==1.1.0 # via testtools uritemplate==3.0.0 # via google-api-python-client urllib3==1.22 user-agents==0.3.2 vertica-python==0.6.11 wheel==0.30.0 +x509==0.1 # via cybersource-rest-client-python yarn-api-client==0.2.3 +zope.interface==4.6.0 # via cybersource-rest-client-python, datetime # The following packages are considered to be unsafe in a requirements file: -# setuptools==41.0.1 # via google-cloud-core, protobuf, python-daemon +# pip==19.2.1 # via cybersource-rest-client-python +# setuptools==41.0.1 # via cybersource-rest-client-python, google-cloud-core, jsonschema, protobuf, python-daemon, python-toolbox, zope.interface