diff --git a/ckanext-hdx_crisis/ckanext/hdx_crisis/controllers/custom_location_controller.py b/ckanext-hdx_crisis/ckanext/hdx_crisis/controllers/custom_location_controller.py index 53f07cd233..6f55706fb4 100644 --- a/ckanext-hdx_crisis/ckanext/hdx_crisis/controllers/custom_location_controller.py +++ b/ckanext-hdx_crisis/ckanext/hdx_crisis/controllers/custom_location_controller.py @@ -96,6 +96,7 @@ def generate_template_data(self, id, group_info, custom_dict): template_data = { 'data': { + 'country_id': group_info['id'], 'country_name': group_info['name'], 'country_title': group_info.get('title', group_info['name']), 'topline_chart_sections': self._create_sections(top_line_items, charts_config_data), diff --git a/ckanext-hdx_org_group/ckanext/hdx_org_group/controllers/organization_controller.py b/ckanext-hdx_org_group/ckanext/hdx_org_group/controllers/organization_controller.py index 6adc2b5477..dbf463d484 100644 --- a/ckanext-hdx_org_group/ckanext/hdx_org_group/controllers/organization_controller.py +++ b/ckanext-hdx_org_group/ckanext/hdx_org_group/controllers/organization_controller.py @@ -273,7 +273,9 @@ def edit(self, id, data=None, errors=None, error_summary=None): self._setup_template_variables(context, data, group_type=group_type) c.form = render(self._group_form(group_type), extra_vars=vars) - return render(self._edit_template(c.group.type)) + + # The extra_vars are needed here to send analytics information like org name and id + return render(self._edit_template(c.group.type), extra_vars={'data': data}) def check_access(self, action_name, data_dict=None): if data_dict is None: diff --git a/ckanext-hdx_package/ckanext/hdx_package/controllers/contribute_flow_controller.py b/ckanext-hdx_package/ckanext/hdx_package/controllers/contribute_flow_controller.py index e6d27a5e37..da1cd7a623 100644 --- a/ckanext-hdx_package/ckanext/hdx_package/controllers/contribute_flow_controller.py +++ b/ckanext-hdx_package/ckanext/hdx_package/controllers/contribute_flow_controller.py @@ -7,6 +7,8 @@ import ckan.lib.navl.dictization_functions as dict_fns import ckan.lib.helpers as h +import ckanext.hdx_package.helpers.analytics as analytics + from ckan.common import _, request, response, c from ckan.lib.search import SearchIndexError from ckan.controllers.api import CONTENT_TYPES @@ -110,8 +112,12 @@ def _abort(self, save_type, status_code, message): def _prepare_and_render(self, save_type='', data=None, errors=None, error_summary=None): save_type = save_type if save_type else '' + + analytics_dict = self._generate_analytics_data(data) + template_data = { 'data': data, + 'analytics': analytics_dict, 'errors': errors, 'error_summary': error_summary, 'aborted': False @@ -123,6 +129,20 @@ def _prepare_and_render(self, save_type='', data=None, errors=None, error_summar else: return base.render('contribute_flow/create_edit.html', extra_vars=template_data) + def _generate_analytics_data(self, data): + # in case of an edit event we populate the analytics info + analytics_dict = {} + if data and data.get('id'): + analytics_dict['is_cod'] = analytics.is_cod(data) + analytics_dict['is_indicator'] = analytics.is_indicator(data) + analytics_dict['group_names'], analytics_dict['group_ids'] = analytics.extract_locations_in_json(data) + else: + analytics_dict['is_cod'] = 'false' + analytics_dict['is_indicator'] = 'false' + analytics_dict['group_names'] = '[]' + analytics_dict['group_ids'] = '[]' + return analytics_dict + def _save_or_update(self, context, package_type=None): data_dict = {} try: diff --git a/ckanext-hdx_package/ckanext/hdx_package/controllers/dataset_controller.py b/ckanext-hdx_package/ckanext/hdx_package/controllers/dataset_controller.py index 6440df67bd..a493ef96f8 100644 --- a/ckanext-hdx_package/ckanext/hdx_package/controllers/dataset_controller.py +++ b/ckanext-hdx_package/ckanext/hdx_package/controllers/dataset_controller.py @@ -31,6 +31,8 @@ import ckan.lib.dictization.model_dictize as model_dictize import ckan.lib.search as search +import ckanext.hdx_package.helpers.analytics as analytics + from ckan.common import _, json, request, c, g, response from ckan.controllers.home import CACHE_PARAMETERS @@ -682,6 +684,12 @@ def read(self, id, format='html'): context, {'id': resource['id']}) resource['has_views'] = len(resource_views) > 0 + if helpers.is_ckan_domain(resource['url']): + resource['url'] = helpers.make_url_relative(resource['url']) + + if resource.get('perma_link') and helpers.is_ckan_domain(resource['perma_link']): + resource['perma_link'] = helpers.make_url_relative(resource['perma_link']) + # Is this an indicator? Load up graph data #c.pkg_dict['indicator'] = 1 try: @@ -704,8 +712,9 @@ def read(self, id, format='html'): template = template[:template.index('.') + 1] + format # set dataset type for google analytics - modified by HDX - c.ga_dataset_type = self._google_analytics_dataset_type(c.pkg_dict) - c.ga_location = self._google_analytics_location(c.pkg_dict) + c.analytics_is_cod = analytics.is_cod(c.pkg_dict) + c.analytics_is_indicator = analytics.is_indicator(c.pkg_dict) + c.analytics_group_names, c.analytics_group_ids = analytics.extract_locations_in_json(c.pkg_dict) # changes done for indicator act_data_dict = {'id': c.pkg_dict['id'], 'limit': 7} @@ -770,32 +779,6 @@ def read(self, id, format='html'): assert False, "We should never get here" - def _google_analytics_dataset_type(self, pkg_dict): - type = 'standard' - tags = [tag.get('name', '') for tag in pkg_dict.get('tags', [])] - - if int(pkg_dict.get('indicator', 0)) == 1: - type = 'indicator' - if 'cod' in tags: - type = 'cod~indicator' if type == 'indicator' else 'cod' - - return type - - def _google_analytics_location(self, pkg_dict): - limit = 15 - locations = pkg_dict.get('groups', []) - if len(locations) >= limit: - result = 'many' - else: - locations = [item.get('name', '') for item in locations] - locations.sort() - result = "~".join(locations) - - if not result: - result = 'none' - - return result - def _get_org_extras(self, org_id): """ Get the extras for our orgs @@ -1101,8 +1084,10 @@ def resource_read(self, id, resource_id): c.resource['has_views'] = len(resource_views) > 0 # set dataset type for google analytics - modified by HDX - c.ga_dataset_type = self._google_analytics_dataset_type(c.package) - c.ga_location = self._google_analytics_location(c.package) + # c.ga_dataset_type = self._google_analytics_dataset_type(c.package) + c.analytics_is_cod = analytics.is_cod(c.package) + c.analytics_is_indicator = analytics.is_indicator(c.package) + c.analytics_group_names, c.analytics_group_ids = analytics.extract_locations_in_json(c.package) current_resource_view = None view_id = request.GET.get('view_id') @@ -1123,6 +1108,13 @@ def resource_read(self, id, resource_id): 'current_resource_view': current_resource_view, 'dataset_type': dataset_type} + download_url = c.resource.get('perma_link') if c.resource.get('perma_link') else c.resource['url'] + c.resource['original_url'] = download_url + c.resource['download_url'] = download_url + if helpers.is_ckan_domain(download_url): + c.resource['download_url'] = helpers.make_url_relative(download_url) + + template = self._resource_template(dataset_type) return render(template, extra_vars=vars) diff --git a/ckanext-hdx_package/ckanext/hdx_package/helpers/analytics.py b/ckanext-hdx_package/ckanext/hdx_package/helpers/analytics.py new file mode 100644 index 0000000000..15e0f4ce75 --- /dev/null +++ b/ckanext-hdx_package/ckanext/hdx_package/helpers/analytics.py @@ -0,0 +1,182 @@ +import logging +import json +import urlparse +import requests + +import pylons.config as config + +import ckan.model as model +import ckan.lib.base as base +import ckan.logic as logic +import ckan.controllers.package as package_controller + +from ckan.common import _, c, request + +log = logging.getLogger(__name__) + + +def is_indicator(pkg_dict): + if int(pkg_dict.get('indicator', 0)) == 1: + return 'true' + return 'false' + + +def is_cod(pkg_dict): + tags = [tag.get('name', '') for tag in pkg_dict.get('tags', [])] + if 'cod' in tags: + return 'true' + return 'false' + + +def extract_locations(pkg_dict): + locations = pkg_dict.get('groups', []) + location_names = [] + location_ids = [] + for l in sorted(locations, key=lambda item: item.get('name', '')): + location_names.append(l.get('name', '')) + location_ids.append(l.get('id', '')) + + return location_names, location_ids + + +def extract_locations_in_json(pkg_dict): + locations = pkg_dict.get('groups', []) + location_names = [] + location_ids = [] + for l in sorted(locations, key=lambda item: item.get('name', '')): + location_names.append(l.get('name', '')) + location_ids.append(l.get('id', '')) + + return json.dumps(location_names), json.dumps(location_ids) + + +def _ga_dataset_type(is_indicator, is_cod): + ''' + :param is_indicator: + :type is_indicator: bool + :param is_cod: + :type is_cod: bool + :return: standard / indicator / cod / cod~indicator + :rtype: str + ''' + + type = 'standard' + if is_indicator: + type = 'indicator' + if is_cod: + type = 'cod~indicator' if type == 'indicator' else 'cod' + + return type + + +def _ga_location(location_names): + ''' + :param location_names: + :type location_names: list[str] + :return: + :rtype: str + ''' + limit = 15 + if len(location_names) >= limit: + result = 'many' + else: + result = "~".join(location_names) + + if not result: + result = 'none' + + return result + + +def wrap_resource_download_function(): + original_resource_download = package_controller.PackageController.resource_download + + def new_resource_download(self, id, resource_id, filename=None): + send_event = True + + referer_url = request.referer + remote_addr = request.remote_addr + request_url = request.url + + if referer_url: + ckan_url = config.get('ckan.site_url', '//localhost:5000') + ckan_parsed_url = urlparse.urlparse(ckan_url) + referer_parsed_url = urlparse.urlparse(referer_url) + + if ckan_parsed_url.hostname == referer_parsed_url.hostname: + send_event = False + try: + if send_event: + context = {'model': model, 'session': model.Session, + 'user': c.user or c.author, 'auth_user_obj': c.userobj} + resource_dict = logic.get_action('resource_show')(context, {'id': resource_id}) + dataset_dict = logic.get_action('package_show')(context, {'id': id}) + location_names, location_ids = extract_locations(dataset_dict) + + dataset_title = dataset_dict.get('title', dataset_dict.get('name')) + dataset_is_cod = is_cod(dataset_dict) == 'true' + dataset_is_indicator = is_indicator(dataset_dict) == 'true' + + analytics_enqueue_url = config.get('hdx.analytics.enqueue_url') + analytics_dict = { + 'event_name': 'resource download', + 'mixpanel_tracking_id': 'anonymous', + 'mixpanel_token': config.get('hdx.analytics.mixpanel.token'), + 'send_mixpanel': True, + 'send_ga': True, + 'mixpanel_meta': { + "resource name": resource_dict.get('name'), + "resource id": resource_dict.get('id'), + "dataset name": dataset_dict.get('title'), + "dataset id": dataset_dict.get('id'), + "org name": dataset_dict.get('organization', {}).get('name'), + "org id": dataset_dict.get('organization', {}).get('id'), + "group names": location_names, + "group ids": location_ids, + "is cod": dataset_is_cod, + "is indicator": dataset_is_indicator, + "event source": "direct", + "referer url": referer_url + }, + 'ga_meta': { + 'v': '1', + 't': 'event', + 'cid': 'anonymous', + 'tid': config.get('hdx.analytics.ga.token'), + 'ds': 'direct', + 'uip': remote_addr, + 'ec': 'resource', # event category + 'ea': 'download', # event action + 'dl': request_url, + 'el': '{} ({})'.format(resource_dict.get('name'), dataset_title), # event label + 'cd1': dataset_dict.get('organization', {}).get('name'), + 'cd2': _ga_dataset_type(dataset_is_indicator, dataset_is_cod), # type + 'cd3': _ga_location(location_names), # locations + + + + + } + } + + response = requests.post(analytics_enqueue_url, allow_redirects=True, timeout=2, + data=json.dumps(analytics_dict), headers={'Content-type': 'application/json'}) + response.raise_for_status() + enq_result = response.json() + log.info('Enqueuing result was: {}'.format(enq_result.get('success'))) + except logic.NotFound: + base.abort(404, _('Resource not found')) + except logic.NotAuthorized: + base.abort(401, _('Unauthorized to read resource %s') % id) + except requests.ConnectionError, e: + log.error("There was a connection error to the analytics enqueuing service: {}".format(str(e))) + except requests.HTTPError, e: + log.error("Bad HTTP response from analytics enqueuing service: {}".format(str(e))) + except requests.Timeout, e: + log.error("Request timed out: {}".format(str(e))) + except Exception, e: + log.error('Unexpected error {}'.format(e)) + + return original_resource_download(self, id, resource_id, filename) + + package_controller.PackageController.resource_download = new_resource_download diff --git a/ckanext-hdx_package/ckanext/hdx_package/helpers/helpers.py b/ckanext-hdx_package/ckanext/hdx_package/helpers/helpers.py index 5202d0550e..ecc49b1507 100644 --- a/ckanext-hdx_package/ckanext/hdx_package/helpers/helpers.py +++ b/ckanext-hdx_package/ckanext/hdx_package/helpers/helpers.py @@ -565,17 +565,13 @@ def hdx_get_proxified_resource_url(data_dict, proxy_schemes=['http','https']): 2) Return a domain relative url (without schema, domain or port) for local resources. :param data_dict: contains a resource and package dict - :type data_dict: dictionary + :type data_dict: dict :param proxy_schemes: list of url schemes to proxy for. :type data_dict: list ''' - ckan_url = config.get('ckan.site_url', '//localhost:5000') - url = data_dict['resource']['url'] - - parsed_url = urlparse.urlparse(url) - ckan_parsed_url = urlparse.urlparse(ckan_url) - same_domain = True if not parsed_url.hostname or parsed_url.hostname == ckan_parsed_url.hostname else False + same_domain = is_ckan_domain(data_dict['resource']['url']) + parsed_url = urlparse.urlparse(data_dict['resource']['url']) scheme = parsed_url.scheme if not same_domain and scheme in proxy_schemes: @@ -590,6 +586,30 @@ def hdx_get_proxified_resource_url(data_dict, proxy_schemes=['http','https']): return url +def is_ckan_domain(url): + ''' + :param url: url to check whether it's on the same domain as ckan + :type url: str + :return: True if it's the same domain. False otherwise + :rtype: bool + ''' + ckan_url = config.get('ckan.site_url', '//localhost:5000') + parsed_url = urlparse.urlparse(url) + ckan_parsed_url = urlparse.urlparse(ckan_url) + same_domain = True if not parsed_url.hostname or parsed_url.hostname == ckan_parsed_url.hostname else False + return same_domain + +def make_url_relative(url): + ''' + Transforms something like http://testdomain.com/test to /test + :param url: url to check whether it's on the same domain as ckan + :type url: str + :return: the new url as a string + :rtype: str + ''' + parsed_url = urlparse.urlparse(url) + return urlparse.urlunparse((None, None) + parsed_url[2:]) + def generate_mandatory_fields(): ''' diff --git a/ckanext-hdx_package/ckanext/hdx_package/plugin.py b/ckanext-hdx_package/ckanext/hdx_package/plugin.py index 927441d136..4befc847fc 100644 --- a/ckanext-hdx_package/ckanext/hdx_package/plugin.py +++ b/ckanext-hdx_package/ckanext/hdx_package/plugin.py @@ -31,6 +31,7 @@ import ckanext.hdx_package.actions.delete as hdx_delete import ckanext.hdx_package.helpers.helpers as hdx_helpers import ckanext.hdx_package.helpers.tracking_changes as tracking_changes +import ckanext.hdx_package.helpers.analytics as analytics import ckanext.hdx_package.actions.get as hdx_get import ckanext.hdx_org_group.helpers.organization_helper as org_helper @@ -56,6 +57,9 @@ def run_on_startup(): # replace original get_proxified_resource_url, check hdx_get_proxified_resource_url for more info resourceproxy_plugin.get_proxified_resource_url = hdx_helpers.hdx_get_proxified_resource_url + # wrap resource download function so that we can track download events + analytics.wrap_resource_download_function() + def _generate_license_list(): package.Package._license_register = license.LicenseRegister() diff --git a/ckanext-hdx_theme/ckanext/hdx_theme/fanstatic/google-analytics.js b/ckanext-hdx_theme/ckanext/hdx_theme/fanstatic/google-analytics.js index c5bc0875d6..510b720370 100644 --- a/ckanext-hdx_theme/ckanext/hdx_theme/fanstatic/google-analytics.js +++ b/ckanext-hdx_theme/ckanext/hdx_theme/fanstatic/google-analytics.js @@ -1,31 +1,148 @@ +$(function setUpSearchTracking() { + var formEl = $("#dataset-filter-form"); + if (formEl.length > 0) { + var mixpanelMapping = { + 'q': { + 'name': 'search term', + 'isList': false, + 'mandatory': true + }, + 'tags': { + 'name': 'tag filters', + 'isList': true, + 'mandatory': true + }, + 'res_format': { + 'name': 'format filters', + 'isList': true, + 'mandatory': true + }, + 'organization': { + 'name': 'org filters', + 'isList': true, + 'mandatory': true + }, + 'groups': { + 'name': 'location filters', + 'isList': true, + 'mandatory': true + }, + /*'ext_page_size': { + 'name': 'items per page', + 'isList': false, + 'mandatory': false + }, + 'sort': { + 'name': 'sorting', + 'isList': false, + 'mandatory': false + },*/ + 'ext_cod': { + 'name': 'cod filter', + 'isList': false, + 'mandatory': true + } + }; + var numberOfResults = parseInt($('#analytics-number-of-results').text().trim()) || 0; -function setUpResourcesTracking(){ - $('.ga-download').on('click', function(){ - //var rTitle = $(this).parents(".resource-item").find(".heading").attr("title"); - //var dTitle = $(".itemTitle").text().trim(); - var rTitle = $(this).find(".ga-download-resource-title").text().trim(); - var dTitle = $(this).find(".ga-download-dataset-title").text().trim(); - ga('send', 'event', 'resource', 'download', rTitle + " (" + dTitle +")"); - ga('send', 'event', 'dataset', 'resource-download', dTitle); - }); + var paramList = formEl.serializeArray(); + var mixpanelEventMeta = { + "page title": analyticsInfo.pageTitle, + "number of results": numberOfResults + /*"org name": analyticsInfo.organizationName, + "org id": analyticsInfo.organizationId, + "group names": analyticsInfo.groupNames, + "group ids": analyticsInfo.groupIds*/ + }; + var sendTrackingEvent = false; + for (var i = 0; i < paramList.length; i++) { + var param = paramList[i]; + var mappingInfo = mixpanelMapping[param.name]; + var paramValue = param.value.trim(); + if (mappingInfo && paramValue) { + populateMetadata(mixpanelEventMeta, mappingInfo, paramValue); + sendTrackingEvent = sendTrackingEvent || mappingInfo.mandatory; + } + } + if (sendTrackingEvent){ + var reResult = /ext_search_source=([^&]+)(&|$)/.exec(location.href); + if (reResult && reResult.length > 1) { + mixpanelEventMeta["search box location"] = reResult[1]; + } + else { + mixpanelEventMeta["search box location"] = "in-page"; + } + console.log(JSON.stringify(mixpanelEventMeta)); + mixpanel.track("search", mixpanelEventMeta); + } + else { + console.log("No mandatory properties found. Not sending search event to mixpanel."); + } + } - $('.ga-share').on('click', function(){ - var rTitle = $(this).parents(".resource-item").find(".heading").attr("title"); - var dTitle = $(".itemTitle").text().trim(); - ga('send', 'event', 'resource', 'share', rTitle + " (" + dTitle +")"); - ga('send', 'event', 'dataset', 'resource-share', dTitle); - }); + /** + * Populates the object that is sent to mixpanel for one