This repository has been archived by the owner on Jun 19, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
David Read
committed
Oct 4, 2012
1 parent
aabb73b
commit 29a4a6f
Showing
12 changed files
with
409 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
*.py[co] | ||
*.py~ | ||
.gitignore | ||
|
||
# Packages | ||
*.egg | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
ckanext-ga-report | ||
================= | ||
|
||
**Status:** Development | ||
|
||
**CKAN Version:** 1.7.1+ | ||
|
||
|
||
Overview | ||
-------- | ||
|
||
For creating detailed reports of CKAN analytics, including totals per group. | ||
|
||
Whereas ckanext-googleanalytics focusses on providing page view stats a recent period and for all time (aimed at end users), ckanext-ga-report is more interested in building regular periodic reports (more for site managers to monitor). | ||
|
||
Contents of this extension: | ||
|
||
* Use the CLI tool to download Google Analytics data for each time period into this extension's database tables | ||
|
||
* Users can view the data as web page reports | ||
|
||
|
||
Installation | ||
------------ | ||
|
||
1. Activate you CKAN python environment and install this extension's software:: | ||
|
||
$ pyenv/bin/activate | ||
$ pip install -e git+https://github.com/okfn/ckanext-ga-report.git#egg=ckanext-ga-report | ||
|
||
2. Ensure you development.ini (or similar) contains the info about your Google Analytics account and configuration:: | ||
|
||
googleanalytics.id = UA-1010101-1 | ||
googleanalytics.username = [email protected] | ||
googleanalytics.password = googlepassword | ||
ga-report.period = monthly | ||
|
||
Note that your password will be readable by system administrators on your server. Rather than use sensitive account details, it is suggested you give access to the GA account to a new Google account that you create just for this purpose. | ||
|
||
3. Set up this extension's database tables using a paster command. (Ensure your CKAN pyenv is still activated, run the command from ``src/ckanext-ga-report``, alter the ``--config`` option to point to your site config file):: | ||
|
||
$ paster initdb --config=../ckan/development.ini | ||
|
||
4. Enable the extension in your CKAN config file by adding it to ``ckan.plugins``:: | ||
|
||
ckan.plugins = ga-report | ||
|
||
|
||
Tutorial | ||
-------- | ||
|
||
Download some GA data and store it in CKAN's db. (Ensure your CKAN pyenv is still activated, run the command from ``src/ckanext-ga-report``, alter the ``--config`` option to point to your site config file):: | ||
|
||
$ paster loadanalytics latest --config=../ckan/development.ini | ||
|
||
|
||
Software Licence | ||
================ | ||
|
||
This software is developed by Cabinet Office. It is Crown Copyright and opened up under the Open Government Licence (OGL) (which is compatible with Creative Commons Attibution License). | ||
|
||
OGL terms: http://www.nationalarchives.gov.uk/doc/open-government-licence/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# this is a namespace package | ||
try: | ||
import pkg_resources | ||
pkg_resources.declare_namespace(__name__) | ||
except ImportError: | ||
import pkgutil | ||
__path__ = pkgutil.extend_path(__path__, __name__) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# this is a namespace package | ||
try: | ||
import pkg_resources | ||
pkg_resources.declare_namespace(__name__) | ||
except ImportError: | ||
import pkgutil | ||
__path__ = pkgutil.extend_path(__path__, __name__) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import logging | ||
|
||
from ckan.lib.cli import CkanCommand | ||
# No other CKAN imports allowed until _load_config is run, or logging is disabled | ||
|
||
class InitDB(CkanCommand): | ||
"""Initialise the extension's database tables | ||
""" | ||
summary = __doc__.split('\n')[0] | ||
usage = __doc__ | ||
max_args = 0 | ||
min_args = 0 | ||
|
||
def command(self): | ||
self._load_config() | ||
|
||
import ckan.model as model | ||
model.Session.remove() | ||
model.Session.configure(bind=model.meta.engine) | ||
log = logging.getLogger('ckanext.ga-report') | ||
|
||
import ga_model | ||
ga_model.init_tables() | ||
log.info("DB tables are setup") | ||
|
||
class LoadAnalytics(CkanCommand): | ||
"""Get data from Google Analytics API and save it | ||
in the ga_model | ||
Usage: paster loadanalytics <time-period> | ||
Where <time-period> is: | ||
all - data for all time | ||
latest - (default) just the 'latest' data | ||
YYYY-MM-DD - just data for all time periods going | ||
back to (and including) this date | ||
""" | ||
summary = __doc__.split('\n')[0] | ||
usage = __doc__ | ||
max_args = 1 | ||
min_args = 0 | ||
|
||
def command(self): | ||
self._load_config() | ||
|
||
from download_analytics import DownloadAnalytics | ||
downloader = DownloadAnalytics() | ||
|
||
time_period = self.args[0] if self.args else 'latest' | ||
if time_period == 'all': | ||
downloader.all_() | ||
elif time_period == 'latest': | ||
downloader.latest() | ||
else: | ||
since_date = datetime.datetime.strptime(time_period, '%Y-%m-%d') | ||
downloader.since_date(since_date) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import logging | ||
from ckan.lib.base import BaseController, c, render | ||
import report_model | ||
|
||
log = logging.getLogger('ckanext.ga-report') | ||
|
||
class GaReport(BaseController): | ||
def index(self): | ||
return render('index.html') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
import logging | ||
import datetime | ||
|
||
from pylons import config | ||
|
||
import ga_model | ||
from ga_client import GA | ||
|
||
log = logging.getLogger('ckanext.ga-report') | ||
|
||
FORMAT_MONTH = '%Y-%m' | ||
|
||
class DownloadAnalytics(object): | ||
'''Downloads and stores analytics info''' | ||
def __init__(self): | ||
self.period = config['ga-report.period'] | ||
|
||
def all_(self): | ||
pass | ||
|
||
def latest(self): | ||
if self.period == 'monthly': | ||
# from first of this month to today | ||
now = datetime.datetime.now() | ||
first_of_this_month = datetime.datetime(now.year, now.month, 1) | ||
periods = ((now.strftime(FORMAT_MONTH), | ||
now.day, | ||
first_of_this_month, now),) | ||
else: | ||
raise NotImplementedError | ||
self.download_and_store(periods) | ||
|
||
|
||
def since_date(self, since_date): | ||
assert isinstance(since_date, datetime.datetime) | ||
periods = [] # (period_name, period_complete_day, start_date, end_date) | ||
if self.period == 'monthly': | ||
first_of_the_months_until_now = [] | ||
year = since_date.year | ||
month = since_date.month | ||
now = datetime.datetime.now() | ||
first_of_this_month = datetime.datetime(now.year, now.month, 1) | ||
while True: | ||
first_of_the_month = datetime.datetime(year, month, 1) | ||
if first_of_the_month == first_of_this_month: | ||
periods.append((now.strftime(FORMAT_MONTH), | ||
now.day, | ||
first_of_this_month, now)) | ||
break | ||
elif first_of_the_month < first_of_this_month: | ||
in_the_next_month = first_of_the_month + datetime.timedelta(40) | ||
last_of_the_month == datetime.datetime(in_the_next_month.year, | ||
in_the_next_month.month, a)\ | ||
- datetime.timedelta(1) | ||
periods.append((now.strftime(FORMAT_MONTH), 0, | ||
first_of_the_month, last_of_the_month)) | ||
else: | ||
# first_of_the_month has got to the future somehow | ||
break | ||
month += 1 | ||
if month > 12: | ||
year += 1 | ||
month = 1 | ||
else: | ||
raise NotImplementedError | ||
self.download_and_store(periods) | ||
|
||
@staticmethod | ||
def get_full_period_name(period_name, period_complete_day): | ||
if period_complete_day: | ||
return period_name + ' (up to %ith)' % period_complete_day | ||
else: | ||
return period_name | ||
|
||
|
||
def download_and_store(self, periods): | ||
for period_name, period_complete_day, start_date, end_date in periods: | ||
log.info('Downloading Analytics for period "%s" (%s - %s)', | ||
self.get_full_period_name(period_name, period_complete_day), | ||
start_date.strftime('%Y %m %d'), | ||
end_date.strftime('%Y %m %d')) | ||
data = self.download(start_date, end_date) | ||
log.info('Storing Analytics for period "%s"', | ||
self.get_full_period_name(period_name, period_complete_day)) | ||
self.store(period_name, period_complete_day, data) | ||
|
||
@classmethod | ||
def download(cls, start_date, end_date): | ||
'''Get data from GA for a given time period''' | ||
start_date = start_date.strftime('%Y-%m-%d') | ||
end_date = end_date.strftime('%Y-%m-%d') | ||
# url | ||
#query = 'ga:pagePath=~^%s,ga:pagePath=~^%s' % \ | ||
# (PACKAGE_URL, self.resource_url_tag) | ||
query = 'ga:pagePath=~^/dataset/' | ||
metrics = 'ga:uniquePageviews' | ||
sort = '-ga:uniquePageviews' | ||
for entry in GA.ga_query(query_filter=query, | ||
from_date=start_date, | ||
metrics=metrics, | ||
sort=sort, | ||
to_date=end_date): | ||
print entry | ||
import pdb; pdb.set_trace() | ||
for dim in entry.dimension: | ||
if dim.name == "ga:pagePath": | ||
package = dim.value | ||
count = entry.get_metric( | ||
'ga:uniquePageviews').value or 0 | ||
packages[package] = int(count) | ||
return packages | ||
|
||
def store(self, period_name, period_complete_day, data): | ||
if 'url' in data: | ||
ga_model.update_url_stats(period_name, period_complete_day, data['url']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import re | ||
import uuid | ||
|
||
from sqlalchemy import Table, Column, MetaData | ||
from sqlalchemy import types | ||
from sqlalchemy.sql import select, text | ||
from sqlalchemy import func | ||
|
||
import ckan.model as model | ||
from ckan.model.types import JsonType | ||
from ckan.lib.base import * | ||
|
||
def make_uuid(): | ||
return unicode(uuid.uuid4()) | ||
|
||
def init_tables(): | ||
metadata = MetaData() | ||
package_stats = Table('ga_url', metadata, | ||
Column('id', types.UnicodeText, primary_key=True, default=make_uuid), | ||
Column('period_name', types.UnicodeText), | ||
Column('period_complete_day', types.Integer), | ||
Column('visits', types.Integer), | ||
Column('group_id', types.String(60)), | ||
Column('next_page', JsonType), | ||
) | ||
metadata.create_all(model.meta.engine) | ||
|
||
|
||
cached_tables = {} | ||
|
||
def get_table(name): | ||
if name not in cached_tables: | ||
meta = MetaData() | ||
meta.reflect(bind=model.meta.engine) | ||
table = meta.tables[name] | ||
cached_tables[name] = table | ||
return cached_tables[name] | ||
|
||
|
||
def _normalize_url(url): | ||
'''Strip off the hostname etc. Do this before storing it. | ||
>>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices') | ||
'/dataset/weekly_fuel_prices' | ||
''' | ||
url = re.sub('https?://(www\.)?data.gov.uk', '', url) | ||
return url | ||
|
||
def _get_department_id_of_url(url): | ||
# e.g. /dataset/fuel_prices | ||
# e.g. /dataset/fuel_prices/resource/e63380d4 | ||
dataset_match = re.match('/dataset/([^/]+)(/.*)?', url) | ||
if dataset_match: | ||
dataset_ref = dataset_match.groups()[0] | ||
dataset = model.Package.get(dataset_ref) | ||
if dataset: | ||
publisher_groups = dataset.get_groups('publisher') | ||
if publisher_groups: | ||
return publisher_groups[0].id | ||
|
||
def update_url_stats(period_name, period_complete_day, url_data): | ||
table = get_table('ga_url') | ||
connection = model.Session.connection() | ||
for url, views, next_page in url_data: | ||
url = _normalize_url(url) | ||
department_id = _get_department_id_of_url(url) | ||
# see if the row for this url & month is in the table already | ||
s = select([func.count(id_col)], | ||
table.c.period_name == period_name, | ||
table.c.url == url) | ||
count = connection.execute(s).fetchone() | ||
if count and count[0]: | ||
# update the row | ||
connection.execute(table.update()\ | ||
.where(table.c.period_name == period_name, | ||
table.c.url == url)\ | ||
.values(period_complete_day=period_complete_day, | ||
views=views, | ||
department_id=department_id, | ||
next_page=next_page)) | ||
else: | ||
# create the row | ||
values = {'period_name': period_name, | ||
'period_complete_day': period_complete_day, | ||
'url': url, | ||
'views': views, | ||
'department_id': department_id, | ||
'next_page': next_page} | ||
connection.execute(stats.insert()\ | ||
.values(**values)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import logging | ||
import ckan.lib.helpers as h | ||
from ckan.plugins import implements, toolkit | ||
import gasnippet | ||
import commands | ||
import dbutil | ||
|
||
log = logging.getLogger('ckanext.ga-report') | ||
|
||
class GoogleAnalyticsPlugin(p.SingletonPlugin): | ||
implements(p.IConfigurer, inherit=True) | ||
implements(p.IRoutes, inherit=True) | ||
|
||
def update_config(self, config): | ||
toolkit.add_template_directory(config, 'templates') | ||
toolkit.add_public_directory(config, 'public') | ||
|
||
def after_map(self, map): | ||
map.connect( | ||
'/data/analytics/index', | ||
controller='ckanext.ga-report.controller:GaReport', | ||
action='index' | ||
) | ||
return map | ||
|
Empty file.
Oops, something went wrong.