Skip to content
This repository has been archived by the owner on Jun 19, 2023. It is now read-only.

Commit

Permalink
Initial iteration
Browse files Browse the repository at this point in the history
  • Loading branch information
David Read committed Oct 4, 2012
1 parent aabb73b commit 29a4a6f
Show file tree
Hide file tree
Showing 12 changed files with 409 additions and 4 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
*.py[co]
*.py~
.gitignore

# Packages
*.egg
Expand Down
4 changes: 0 additions & 4 deletions README.md

This file was deleted.

62 changes: 62 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
ckanext-ga-report
=================

**Status:** Development

**CKAN Version:** 1.7.1+


Overview
--------

For creating detailed reports of CKAN analytics, including totals per group.

Whereas ckanext-googleanalytics focusses on providing page view stats a recent period and for all time (aimed at end users), ckanext-ga-report is more interested in building regular periodic reports (more for site managers to monitor).

Contents of this extension:

* Use the CLI tool to download Google Analytics data for each time period into this extension's database tables

* Users can view the data as web page reports


Installation
------------

1. Activate you CKAN python environment and install this extension's software::

$ pyenv/bin/activate
$ pip install -e git+https://github.com/okfn/ckanext-ga-report.git#egg=ckanext-ga-report

2. Ensure you development.ini (or similar) contains the info about your Google Analytics account and configuration::

googleanalytics.id = UA-1010101-1
googleanalytics.username = [email protected]
googleanalytics.password = googlepassword
ga-report.period = monthly

Note that your password will be readable by system administrators on your server. Rather than use sensitive account details, it is suggested you give access to the GA account to a new Google account that you create just for this purpose.

3. Set up this extension's database tables using a paster command. (Ensure your CKAN pyenv is still activated, run the command from ``src/ckanext-ga-report``, alter the ``--config`` option to point to your site config file)::

$ paster initdb --config=../ckan/development.ini

4. Enable the extension in your CKAN config file by adding it to ``ckan.plugins``::

ckan.plugins = ga-report


Tutorial
--------

Download some GA data and store it in CKAN's db. (Ensure your CKAN pyenv is still activated, run the command from ``src/ckanext-ga-report``, alter the ``--config`` option to point to your site config file)::

$ paster loadanalytics latest --config=../ckan/development.ini


Software Licence
================

This software is developed by Cabinet Office. It is Crown Copyright and opened up under the Open Government Licence (OGL) (which is compatible with Creative Commons Attibution License).

OGL terms: http://www.nationalarchives.gov.uk/doc/open-government-licence/
7 changes: 7 additions & 0 deletions ckanext/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# this is a namespace package
try:
import pkg_resources
pkg_resources.declare_namespace(__name__)
except ImportError:
import pkgutil
__path__ = pkgutil.extend_path(__path__, __name__)
7 changes: 7 additions & 0 deletions ckanext/ga_report/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# this is a namespace package
try:
import pkg_resources
pkg_resources.declare_namespace(__name__)
except ImportError:
import pkgutil
__path__ = pkgutil.extend_path(__path__, __name__)
57 changes: 57 additions & 0 deletions ckanext/ga_report/command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import logging

from ckan.lib.cli import CkanCommand
# No other CKAN imports allowed until _load_config is run, or logging is disabled

class InitDB(CkanCommand):
"""Initialise the extension's database tables
"""
summary = __doc__.split('\n')[0]
usage = __doc__
max_args = 0
min_args = 0

def command(self):
self._load_config()

import ckan.model as model
model.Session.remove()
model.Session.configure(bind=model.meta.engine)
log = logging.getLogger('ckanext.ga-report')

import ga_model
ga_model.init_tables()
log.info("DB tables are setup")

class LoadAnalytics(CkanCommand):
"""Get data from Google Analytics API and save it
in the ga_model
Usage: paster loadanalytics <time-period>
Where <time-period> is:
all - data for all time
latest - (default) just the 'latest' data
YYYY-MM-DD - just data for all time periods going
back to (and including) this date
"""
summary = __doc__.split('\n')[0]
usage = __doc__
max_args = 1
min_args = 0

def command(self):
self._load_config()

from download_analytics import DownloadAnalytics
downloader = DownloadAnalytics()

time_period = self.args[0] if self.args else 'latest'
if time_period == 'all':
downloader.all_()
elif time_period == 'latest':
downloader.latest()
else:
since_date = datetime.datetime.strptime(time_period, '%Y-%m-%d')
downloader.since_date(since_date)

9 changes: 9 additions & 0 deletions ckanext/ga_report/controller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import logging
from ckan.lib.base import BaseController, c, render
import report_model

log = logging.getLogger('ckanext.ga-report')

class GaReport(BaseController):
def index(self):
return render('index.html')
115 changes: 115 additions & 0 deletions ckanext/ga_report/download_analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import logging
import datetime

from pylons import config

import ga_model
from ga_client import GA

log = logging.getLogger('ckanext.ga-report')

FORMAT_MONTH = '%Y-%m'

class DownloadAnalytics(object):
'''Downloads and stores analytics info'''
def __init__(self):
self.period = config['ga-report.period']

def all_(self):
pass

def latest(self):
if self.period == 'monthly':
# from first of this month to today
now = datetime.datetime.now()
first_of_this_month = datetime.datetime(now.year, now.month, 1)
periods = ((now.strftime(FORMAT_MONTH),
now.day,
first_of_this_month, now),)
else:
raise NotImplementedError
self.download_and_store(periods)


def since_date(self, since_date):
assert isinstance(since_date, datetime.datetime)
periods = [] # (period_name, period_complete_day, start_date, end_date)
if self.period == 'monthly':
first_of_the_months_until_now = []
year = since_date.year
month = since_date.month
now = datetime.datetime.now()
first_of_this_month = datetime.datetime(now.year, now.month, 1)
while True:
first_of_the_month = datetime.datetime(year, month, 1)
if first_of_the_month == first_of_this_month:
periods.append((now.strftime(FORMAT_MONTH),
now.day,
first_of_this_month, now))
break
elif first_of_the_month < first_of_this_month:
in_the_next_month = first_of_the_month + datetime.timedelta(40)
last_of_the_month == datetime.datetime(in_the_next_month.year,
in_the_next_month.month, a)\
- datetime.timedelta(1)
periods.append((now.strftime(FORMAT_MONTH), 0,
first_of_the_month, last_of_the_month))
else:
# first_of_the_month has got to the future somehow
break
month += 1
if month > 12:
year += 1
month = 1
else:
raise NotImplementedError
self.download_and_store(periods)

@staticmethod
def get_full_period_name(period_name, period_complete_day):
if period_complete_day:
return period_name + ' (up to %ith)' % period_complete_day
else:
return period_name


def download_and_store(self, periods):
for period_name, period_complete_day, start_date, end_date in periods:
log.info('Downloading Analytics for period "%s" (%s - %s)',
self.get_full_period_name(period_name, period_complete_day),
start_date.strftime('%Y %m %d'),
end_date.strftime('%Y %m %d'))
data = self.download(start_date, end_date)
log.info('Storing Analytics for period "%s"',
self.get_full_period_name(period_name, period_complete_day))
self.store(period_name, period_complete_day, data)

@classmethod
def download(cls, start_date, end_date):
'''Get data from GA for a given time period'''
start_date = start_date.strftime('%Y-%m-%d')
end_date = end_date.strftime('%Y-%m-%d')
# url
#query = 'ga:pagePath=~^%s,ga:pagePath=~^%s' % \
# (PACKAGE_URL, self.resource_url_tag)
query = 'ga:pagePath=~^/dataset/'
metrics = 'ga:uniquePageviews'
sort = '-ga:uniquePageviews'
for entry in GA.ga_query(query_filter=query,
from_date=start_date,
metrics=metrics,
sort=sort,
to_date=end_date):
print entry
import pdb; pdb.set_trace()
for dim in entry.dimension:
if dim.name == "ga:pagePath":
package = dim.value
count = entry.get_metric(
'ga:uniquePageviews').value or 0
packages[package] = int(count)
return packages

def store(self, period_name, period_complete_day, data):
if 'url' in data:
ga_model.update_url_stats(period_name, period_complete_day, data['url'])
90 changes: 90 additions & 0 deletions ckanext/ga_report/ga_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import re
import uuid

from sqlalchemy import Table, Column, MetaData
from sqlalchemy import types
from sqlalchemy.sql import select, text
from sqlalchemy import func

import ckan.model as model
from ckan.model.types import JsonType
from ckan.lib.base import *

def make_uuid():
return unicode(uuid.uuid4())

def init_tables():
metadata = MetaData()
package_stats = Table('ga_url', metadata,
Column('id', types.UnicodeText, primary_key=True, default=make_uuid),
Column('period_name', types.UnicodeText),
Column('period_complete_day', types.Integer),
Column('visits', types.Integer),
Column('group_id', types.String(60)),
Column('next_page', JsonType),
)
metadata.create_all(model.meta.engine)


cached_tables = {}

def get_table(name):
if name not in cached_tables:
meta = MetaData()
meta.reflect(bind=model.meta.engine)
table = meta.tables[name]
cached_tables[name] = table
return cached_tables[name]


def _normalize_url(url):
'''Strip off the hostname etc. Do this before storing it.
>>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices')
'/dataset/weekly_fuel_prices'
'''
url = re.sub('https?://(www\.)?data.gov.uk', '', url)
return url

def _get_department_id_of_url(url):
# e.g. /dataset/fuel_prices
# e.g. /dataset/fuel_prices/resource/e63380d4
dataset_match = re.match('/dataset/([^/]+)(/.*)?', url)
if dataset_match:
dataset_ref = dataset_match.groups()[0]
dataset = model.Package.get(dataset_ref)
if dataset:
publisher_groups = dataset.get_groups('publisher')
if publisher_groups:
return publisher_groups[0].id

def update_url_stats(period_name, period_complete_day, url_data):
table = get_table('ga_url')
connection = model.Session.connection()
for url, views, next_page in url_data:
url = _normalize_url(url)
department_id = _get_department_id_of_url(url)
# see if the row for this url & month is in the table already
s = select([func.count(id_col)],
table.c.period_name == period_name,
table.c.url == url)
count = connection.execute(s).fetchone()
if count and count[0]:
# update the row
connection.execute(table.update()\
.where(table.c.period_name == period_name,
table.c.url == url)\
.values(period_complete_day=period_complete_day,
views=views,
department_id=department_id,
next_page=next_page))
else:
# create the row
values = {'period_name': period_name,
'period_complete_day': period_complete_day,
'url': url,
'views': views,
'department_id': department_id,
'next_page': next_page}
connection.execute(stats.insert()\
.values(**values))
25 changes: 25 additions & 0 deletions ckanext/ga_report/plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import logging
import ckan.lib.helpers as h
from ckan.plugins import implements, toolkit
import gasnippet
import commands
import dbutil

log = logging.getLogger('ckanext.ga-report')

class GoogleAnalyticsPlugin(p.SingletonPlugin):
implements(p.IConfigurer, inherit=True)
implements(p.IRoutes, inherit=True)

def update_config(self, config):
toolkit.add_template_directory(config, 'templates')
toolkit.add_public_directory(config, 'public')

def after_map(self, map):
map.connect(
'/data/analytics/index',
controller='ckanext.ga-report.controller:GaReport',
action='index'
)
return map

Empty file.
Loading

0 comments on commit 29a4a6f

Please sign in to comment.