diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..0ed5352 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,22 @@ +root = true + +[*] +indent_style = space +indent_size = 4 +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +[*.py] +max_line_length = 120 + +[*.md] +indent_size = 2 +trim_trailing_whitespace = false + +[**/*.scss] +indent_size = 2 + +[**/*.{json,yaml,yml}] +indent_size = 2 diff --git a/.gitignore b/.gitignore index 2ca8682..b903df7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,91 @@ -_site/ -.sass-cache/ -.jekyll-cache/ -.jekyll-metadata +# ---> Python +# Byte-compiled / optimized / DLL files +__pycache__/ +.pytest_cache +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover + +# Translations +# *.mo +# *.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# ---> Node +# Logs +logs +*.log +npm-debug.log* + +# Runtime data +pids +*.pid +*.seed + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage + +# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (http://nodejs.org/api/addons.html) +build/Release + +# Dependency directory +# https://docs.npmjs.com/misc/faq#should-i-check-my-node-modules-folder-into-git +node_modules +output/ diff --git a/README.md b/README.md index 0fda1a2..6298d33 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,3 @@ # getudata + udata website diff --git a/articles/2018-08-28-hello-world.md b/articles/2018-08-28-hello-world.md new file mode 100644 index 0000000..b944d90 --- /dev/null +++ b/articles/2018-08-28-hello-world.md @@ -0,0 +1,13 @@ +--- +title: Hello World +date: 2018-08-28 17:59 +modified: 2018-08-28 17:59 +image: https://placehold.it/1920x1080 +tags: + - test +slug: hello-world +lang: en +authors: Open Data Team +summary: Just a sample article +# status: draft +--- diff --git a/data/config.yml b/data/config.yml new file mode 100644 index 0000000..75cb63e --- /dev/null +++ b/data/config.yml @@ -0,0 +1,2 @@ +--- +tagline: Open, customizable and skinnable platform dedicated to open data diff --git a/data/plugins/udata-ckan.md b/data/plugins/udata-ckan.md new file mode 100644 index 0000000..0a6952f --- /dev/null +++ b/data/plugins/udata-ckan.md @@ -0,0 +1,4 @@ +--- +repository: https://github.com/opendatateam/udata-ckan +--- +CKAN support diff --git a/data/showcase/data.gouv.fr.md b/data/showcase/data.gouv.fr.md new file mode 100644 index 0000000..35587e1 --- /dev/null +++ b/data/showcase/data.gouv.fr.md @@ -0,0 +1,4 @@ +--- +link: https://www.data.gouv.fr +--- +French national open data portal diff --git a/develop.pip b/develop.pip new file mode 100644 index 0000000..91bca45 --- /dev/null +++ b/develop.pip @@ -0,0 +1,3 @@ +-r requirements.pip +invoke +livereload diff --git a/pages/demo.md b/pages/demo.md new file mode 100644 index 0000000..0dc25e9 --- /dev/null +++ b/pages/demo.md @@ -0,0 +1,12 @@ +--- +title: Demo +date: 2018-08-28 18:00 +modified: 2018-08-28 18:00 +image: https://placehold.it/1920x1080 +tags: + - demo +slug: demo +lang: en +authors: Open Data Team +summary: Just a demo page +--- diff --git a/pelicanconf.py b/pelicanconf.py new file mode 100644 index 0000000..79e5687 --- /dev/null +++ b/pelicanconf.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +import os + +AUTHOR = 'Open Data Team' +SITENAME = 'udata' + +SITEDESCRIPTION = 'Bla bla bla' + +TAGS = ('opendata', 'data') + +# PATH = 'articles' + +TIMEZONE = 'Europe/Paris' + +DEFAULT_LANG = 'en' + +# THEME = 'theme' + +PATH = os.path.dirname(__file__) +OUTPUT_PATH = os.path.join(PATH, 'output') +ARTICLE_PATHS = [ + 'articles', +] + +CONTACT_EMAIL = 'contact@opendata.team' + +# PAGE_PATHS = [ +# 'pages', +# ] + + +# Feed generation is usually not desired when developing +FEED_ALL_ATOM = None +CATEGORY_FEED_ATOM = None +TRANSLATION_FEED_ATOM = None +AUTHOR_FEED_ATOM = None +AUTHOR_FEED_RSS = None + +# Blogroll +LINKS = (('Pelican', 'http://getpelican.com/'), + ('Python.org', 'http://python.org/'), + ('Jinja2', 'http://jinja.pocoo.org/'), + ('You can modify those links in your config file', '#'),) + +DEFAULT_PAGINATION = 10 + +# Uncomment following line if you want document-relative URLs when developing +#RELATIVE_URLS = True + +STATIC_PATHS = [ + 'images', +] + +PLUGIN_PATHS = [ + 'plugins', +] + +PLUGINS = [ + 'sitemap', + 'frontmark', + 'data', + 'related_posts', + 'jinja_tools', +] + +DATA_COMMON = 'data' +DATA_PATHS = [ + DATA_COMMON, + os.path.join(DATA_COMMON, DEFAULT_LANG), +] + +DATA = [ + 'config.yml', + 'showcase', + 'plugins', +] + +TEMPLATE_PAGES = { + # 'templates/index.html': 'index.html' +} + +# Serve the blog on /blog/ +# INDEX_URL = 'blog.html' +# INDEX_SAVE_AS = 'blog.html' + +ARTICLE_URL = 'blog/{date:%Y}/{date:%m}/{date:%d}/{slug}.html' +ARTICLE_SAVE_AS = 'blog/{date:%Y}/{date:%m}/{date:%d}/{slug}.html' + +CATEGORY_URL = 'blog/category/{slug}.html' +CATEGORY_SAVE_AS = 'blog/category/{slug}.html' + +TAG_URL = 'blog/tag/{slug}.html' +TAG_SAVE_AS = 'blog/tag/{slug}.html' + +PAGE_URL = '{slug}.html' +PAGE_SAVE_AS = '{slug}.html' + + +SITEMAP = { + 'format': 'xml', + 'priorities': { + 'articles': 0.5, + 'indexes': 0.5, + 'pages': 0.5 + }, + 'changefreqs': { + 'articles': 'daily', + 'indexes': 'daily', + 'pages': 'monthly' + } +} + +RELATED_POSTS_MAX = 3 + +SOCIAL = ( + ('github', 'https://github.com/opendatateam'), + ('twitter', 'https://twitter.com/udata_project'), + ('Gitter', 'https://gitter.im/opendatateam/udata'), +) diff --git a/plugins/__init__.py b/plugins/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/plugins/data.py b/plugins/data.py new file mode 100644 index 0000000..1f100f6 --- /dev/null +++ b/plugins/data.py @@ -0,0 +1,339 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function, unicode_literals + +import collections +import json +import logging +import os +import yaml + +from blinker import signal + +from pelican import signals +from pelican.contents import Content, Page +from pelican.generators import CachingGenerator +from pelican.settings import DEFAULT_CONFIG +from pelican.utils import (slugify, DateFormatter, copy, mkdir_p, posixize_path, + process_translations, python_2_unicode_compatible) + + +logger = logging.getLogger(__name__) + + +data_generator_init = signal('data_generator_init') +data_generator_finalized = signal('data_generator_finalized') +data_writer_finalized = signal('data_writer_finalized') + +data_generator_preread = signal('data_generator_preread') +data_generator_context = signal('data_generator_context') + +SUPPORTED_FORMATS = 'json yaml yml'.split() + + +def dict_representer(dumper, data): + return dumper.represent_dict(data.iteritems()) + + +def dict_constructor(loader, node): + return collections.OrderedDict(loader.construct_pairs(node)) + + +yaml.add_representer(collections.OrderedDict, dict_representer) +yaml.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, dict_constructor) + + +@python_2_unicode_compatible +class Data(object): + ''' + Represents a single data item from a collection. + + :param content: the string to parse, containing the original content. + :param metadata: the metadata associated to this page (optional). + :param settings: the settings dictionary (optional). + :param source_path: The location of the source of this content (if any). + :param context: The shared context between generators. + ''' + + def __init__(self, content, metadata=None, settings=None, + source_path=None, context=None): + + if metadata is None: + metadata = {} + if settings is None: + settings = copy.deepcopy(DEFAULT_CONFIG) + + self.settings = settings + self.content = content + if context is None: + context = {} + self._context = context + self.translations = [] + + local_metadata = dict() + local_metadata.update(metadata) + + # set metadata as attributes + for key, value in local_metadata.items(): + # if key in ('save_as', 'url'): + # key = 'override_' + key + setattr(self, key.lower(), value) + + # also keep track of the metadata attributes available + self.metadata = local_metadata + + #default template if it's not defined in page + # self.template = self._get_template() + + # First, read the authors from "authors", if not, fallback to "author" + # and if not use the settings defined one, if any. + # if not hasattr(self, 'author'): + # if hasattr(self, 'authors'): + # self.author = self.authors[0] + # elif 'AUTHOR' in settings: + # self.author = Author(settings['AUTHOR'], settings) + # + # if not hasattr(self, 'authors') and hasattr(self, 'author'): + # self.authors = [self.author] + + # XXX Split all the following code into pieces, there is too much here. + + # manage languages + # self.in_default_lang = True + # if 'DEFAULT_LANG' in settings: + # default_lang = settings['DEFAULT_LANG'].lower() + # if not hasattr(self, 'lang'): + # self.lang = default_lang + # + # self.in_default_lang = (self.lang == default_lang) + + # create the slug if not existing, + # generate slug according to the filename + if not hasattr(self, 'slug'): + basename = os.path.basename(os.path.splitext(source_path)[0]) + self.slug = slugify(basename, settings.get('SLUG_SUBSTITUTIONS', ())) + + self.source_path = source_path + + # manage the date format + # if not hasattr(self, 'date_format'): + # if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']: + # self.date_format = settings['DATE_FORMATS'][self.lang] + # else: + # self.date_format = settings['DEFAULT_DATE_FORMAT'] + # + # if isinstance(self.date_format, tuple): + # locale_string = self.date_format[0] + # if sys.version_info < (3, ) and isinstance(locale_string, + # six.text_type): + # locale_string = locale_string.encode('ascii') + # locale.setlocale(locale.LC_ALL, locale_string) + # self.date_format = self.date_format[1] + # + # # manage timezone + # default_timezone = settings.get('TIMEZONE', 'UTC') + # timezone = getattr(self, 'timezone', default_timezone) + # + # if hasattr(self, 'date'): + # self.date = set_date_tzinfo(self.date, timezone) + # self.locale_date = strftime(self.date, self.date_format) + # + # if hasattr(self, 'modified'): + # self.modified = set_date_tzinfo(self.modified, timezone) + # self.locale_modified = strftime(self.modified, self.date_format) + # + # # manage status + # if not hasattr(self, 'status'): + # self.status = settings['DEFAULT_STATUS'] + # if not settings['WITH_FUTURE_DATES'] and hasattr(self, 'date'): + # if self.date.tzinfo is None: + # now = SafeDatetime.now() + # else: + # now = SafeDatetime.utcnow().replace(tzinfo=pytz.utc) + # if self.date > now: + # self.status = 'draft' + # + # # store the summary metadata if it is set + # if 'summary' in metadata: + # self._summary = metadata['summary'] + + signals.content_object_init.send(self) + + def __str__(self): + return self.source_path or repr(self) + + # def __getattr__(self, name): + + + def get_relative_source_path(self, source_path=None): + """Return the relative path (from the content path) to the given + source_path. + + If no source path is specified, use the source path of this + content object. + """ + if not source_path: + source_path = self.source_path + if source_path is None: + return None + + return posixize_path( + os.path.relpath( + os.path.abspath(os.path.join(self.settings['PATH'], source_path)), + os.path.abspath(self.settings['PATH']) + )) + + +class Collection(list): + '''An augmented list to act as Data storage''' + def __init__(self, name, path, *args): + self.name = name + self.path = path + super(Collection, self).__init__(*args) + + +class DataGenerator(CachingGenerator): + ''' + Load data into context and optionnaly render pages for them + ''' + + def __init__(self, *args, **kwargs): + self.data = {} + super(DataGenerator, self).__init__(*args, **kwargs) + data_generator_init.send(self) + + def is_supported(self, name): + paths = self.settings.setdefault('DATA_PATHS', []) + paths = [os.path.join(p, name) for p in paths] + extensions = SUPPORTED_FORMATS + list(self.readers.extensions) + return any(map(os.path.isdir, paths)) or any(name.endswith(ext) for ext in extensions) + + def generate_context(self): + for name in self.settings['DATA']: + if not self.is_supported(name): + logger.warning('Unsupported file format: %s', name) + continue + data = None + for root in self.settings.setdefault('DATA_PATHS', []): + path = os.path.join(root, name) + if os.path.isdir(path): + data = self.context_for_dir(name, path) + elif os.path.exists(path): + name, ext = os.path.splitext(name) + if ext in ('.yaml', '.yml'): + data = self.context_for_yaml(name, path) + elif ext == '.json': + data = self.context_for_json(name, path) + else: + data = self.context_for_reader(name, path) + break + else: + continue + + if not data: + logger.warning('Missing data: %s', name) + continue + + self.data[name] = data + + self.context['data'] = self.data + + self.save_cache() + self.readers.save_cache() + data_generator_finalized.send(self) + + def context_for_dir(self, name, path): + collection = Collection(name, path) + + for f in self.get_files(collection.path): + item = self.get_cached_data(f, None) + if item is None: + try: + item = self.readers.read_file( + base_path=self.path, path=f, content_class=Data, + context=self.context, + preread_signal=data_generator_preread, + preread_sender=self, + context_signal=data_generator_context, + context_sender=self) + except Exception as e: + logger.error('Could not process %s\n%s', f, e, + exc_info=self.settings.get('DEBUG', False)) + self._add_failed_source_path(f) + continue + + self.cache_data(f, item) + + self.add_source_path(item) + collection.append(item) + return collection + + def context_for_yaml(self, name, path): + data = self.get_cached_data(path, None) + if data is None: + try: + with open(path) as f: + data = yaml.load(f) + except Exception as e: + logger.error('Could not process %s\n%s', path, e, + exc_info=self.settings.get('DEBUG', False)) + self._add_failed_source_path(path) + return + + self.cache_data(path, data) + return data + + def context_for_json(self, name, path): + data = self.get_cached_data(path, None) + if data is None: + try: + with open(path) as f: + data = json.load(f) + except Exception as e: + logger.error('Could not process %s\n%s', path, e, + exc_info=self.settings.get('DEBUG', False)) + self._add_failed_source_path(path) + return + + self.cache_data(path, data) + return data + + def context_for_reader(self, name, path): + data = self.get_cached_data(path, None) + if data is None: + try: + data = self.readers.read_file( + base_path=self.path, path=path, content_class=Data, + context=self.context, + preread_signal=data_generator_preread, + preread_sender=self, + context_signal=data_generator_context, + context_sender=self) + except Exception as e: + logger.error('Could not process %s\n%s', path, e, + exc_info=self.settings.get('DEBUG', False)) + self._add_failed_source_path(path) + return + + self.cache_data(path, data) + + self.add_source_path(data) + return data + + # + # def generate_output(self, writer): + # for page in chain(self.translations, self.pages, + # self.hidden_translations, self.hidden_pages): + # writer.write_file( + # page.save_as, self.get_template(page.template), + # self.context, page=page, + # relative_urls=self.settings['RELATIVE_URLS'], + # override_output=hasattr(page, 'override_save_as')) + # data_writer_finalized.send(self, writer=writer) + + +def get_generators(sender, **kwargs): + return DataGenerator + + +def register(): + signals.get_generators.connect(get_generators) diff --git a/plugins/gzip_cache.py b/plugins/gzip_cache.py new file mode 100644 index 0000000..5ecc8ba --- /dev/null +++ b/plugins/gzip_cache.py @@ -0,0 +1,124 @@ +''' +Copyright (c) 2012 Matt Layman + +Gzip cache +---------- + +A plugin to create .gz cache files for optimization. +''' + +import logging +import os +import zlib + +from pelican import signals + +logger = logging.getLogger(__name__) + +# A list of file types to exclude from possible compression +EXCLUDE_TYPES = [ + # Compressed types + '.bz2', + '.gz', + + # Audio types + '.aac', + '.flac', + '.mp3', + '.wma', + + # Image types + '.gif', + '.jpg', + '.jpeg', + '.png', + + # Video types + '.avi', + '.mov', + '.mp4', + '.webm', + + # Internally-compressed fonts. gzip can often shave ~50 more bytes off, + # but it's not worth it. + '.woff', +] + +COMPRESSION_LEVEL = 9 # Best Compression + +""" According to zlib manual: 'Add 16 to +windowBits to write a simple gzip header and trailer around the +compressed data instead of a zlib wrapper. The gzip header will +have no file name, no extra data, no comment, no modification +time (set to zero), no header crc, and the operating system +will be set to 255 (unknown)' +""" +WBITS = zlib.MAX_WBITS | 16 + + +def create_gzip_cache(pelican): + '''Create a gzip cache file for every file that a webserver would + reasonably want to cache (e.g., text type files). + + :param pelican: The Pelican instance + ''' + for dirpath, _, filenames in os.walk(pelican.settings['OUTPUT_PATH']): + for name in filenames: + if should_compress(name): + filepath = os.path.join(dirpath, name) + create_gzip_file(filepath, should_overwrite(pelican.settings)) + + +def should_compress(filename): + '''Check if the filename is a type of file that should be compressed. + + :param filename: A file name to check against + ''' + for extension in EXCLUDE_TYPES: + if filename.endswith(extension): + return False + + return True + +def should_overwrite(settings): + '''Check if the gzipped files should overwrite the originals. + + :param settings: The pelican instance settings + ''' + return settings.get('GZIP_CACHE_OVERWRITE', False) + +def create_gzip_file(filepath, overwrite): + '''Create a gzipped file in the same directory with a filepath.gz name. + + :param filepath: A file to compress + :param overwrite: Whether the original file should be overwritten + ''' + compressed_path = filepath + '.gz' + + with open(filepath, 'rb') as uncompressed: + gzip_compress_obj = zlib.compressobj(COMPRESSION_LEVEL, + zlib.DEFLATED, WBITS) + + uncompressed_data = uncompressed.read() + gzipped_data = gzip_compress_obj.compress(uncompressed_data) + gzipped_data += gzip_compress_obj.flush() + + if len(gzipped_data) >= len(uncompressed_data): + logger.debug('No improvement: %s' % filepath) + return + + with open(compressed_path, 'wb') as compressed: + logger.debug('Compressing: %s' % filepath) + try: + compressed.write(gzipped_data) + except Exception as ex: + logger.critical('Gzip compression failed: %s' % ex) + + if overwrite: + logger.debug('Overwriting: %s with %s' % (filepath, compressed_path)) + os.remove(filepath) + os.rename(compressed_path, filepath) + +def register(): + signals.finalized.connect(create_gzip_cache) + diff --git a/plugins/image_optimizer.py b/plugins/image_optimizer.py new file mode 100644 index 0000000..763429b --- /dev/null +++ b/plugins/image_optimizer.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- + +""" +Optimized images (gif, jpeg & png) +Assumes that Gifsicle, ImageMagick's convert and pngquant are isntalled on path +http://www.lcdf.org/gifsicle/ +http://www.imagemagick.org/ +http://pngquant.org/ +Copyright (c) 2014 Marc Alexandre (http://www.malexandre.fr) +""" + +import os +import re +import shutil +from subprocess import call + +from pelican import signals + +# The commands list per file type +JPEG = 'cjpeg -quality 80 {filename} > {filename}.' + +COMMANDS = { + '.jpg': JPEG, + '.jpeg': JPEG, + '.png': 'pngquant -o "{filename}." "{filename}"', + '.gif': 'gifsicle --no-warnings -O "{filename}" -o "{filename}."' +} + +OPTIMIZED = '_optimized' +FLAG = 'IMAGE_OPTIMIZATION_ONCE_AND_FOR_ALL' + + +def image_optimizer_initialized(pelican): + """ + Optimized gif, jpg and png images. + + @param pelican: The Pelican instance + """ + if not pelican.settings.get(FLAG): + return + + for dirpath, _, filenames in os.walk(pelican.settings['PATH']): + for name in filenames: + if os.path.splitext(name)[1] in COMMANDS.keys(): + if not re.search(OPTIMIZED + r'\.(png|jpg|jpeg|gif)', name): + optimize(pelican, dirpath, name) + + +def image_optimizer_finalized(pelican): + """ + Optimized gif, jpg and png images. If the + FLAG settings is set to True, just rename + the file alread optimized. + + @param pelican: The Pelican instance + """ + for dirpath, _, filenames in os.walk(pelican.settings['OUTPUT_PATH']): + for name in filenames: + if os.path.splitext(name)[1] in COMMANDS.keys(): + if pelican.settings.get(FLAG): + if '_optimized' in name: + filepath = os.path.join(dirpath, name) + newname = re.sub(OPTIMIZED + r'\.(png|jpg|jpeg|gif)', + r'.\1', name) + newfilepath = os.path.join(dirpath, newname) + shutil.move(filepath, newfilepath) + else: + optimize(pelican, dirpath, name) + + +def optimize(pelican, dirpath, filename): + """ + Optimize the image. + + @param dirpath: Path of folder containing the file to optimze + @param filename: File name to optimize + """ + filepath = os.path.join(dirpath, filename) + + ext = os.path.splitext(filename)[1] + command = COMMANDS[ext].format(filename=filepath) + call(command, shell=True) + originsize = os.path.getsize(filepath) + newsize = os.path.getsize(filepath + '.') + + if newsize < originsize: + shutil.move(filepath + '.', filepath) + else: + os.remove(filepath + '.') + + if pelican.settings.get(FLAG): + new_name = re.sub(r'\.(png|jpg|jpeg|gif)', + OPTIMIZED + r'.\1', filename) + shutil.move(filepath, os.path.join(dirpath, new_name)) + + +def register(): + """ + Register the plugin in Pelican. + """ + signals.initialized.connect(image_optimizer_initialized) + signals.finalized.connect(image_optimizer_finalized) diff --git a/plugins/jinja_tools.py b/plugins/jinja_tools.py new file mode 100644 index 0000000..ef9d768 --- /dev/null +++ b/plugins/jinja_tools.py @@ -0,0 +1,134 @@ +import json +import logging + +from inspect import isfunction +from pprint import pprint +from jinja2 import contextfilter, contextfunction +from pelican import signals + +JINJA_FILTERS = {} +JINJA_GLOBALS = {} + + +def register_filter(func, name=None, ctx=False): + name = name or func.__name__ + if ctx: + func = contextfilter(func) + JINJA_FILTERS[name] = func + + +def register_global(func, name=None, ctx=False): + name = name or func.__name__ + if ctx: + func = contextfunction(func) + JINJA_GLOBALS[name] = func + + +def jinjafilter(name_or_func=None, ctx=False): + if isfunction(name_or_func): + register_filter(name_or_func) + return name_or_func + else: + def wrapper(func): + register_filter(func, name_or_func, ctx) + return func + return wrapper + + +def jinjaglobal(name_or_func=None, ctx=False): + if isfunction(name_or_func): + register_global(name_or_func) + return name_or_func + else: + def wrapper(func): + register_global(func, name_or_func, ctx) + return func + return wrapper + + +@jinjafilter +def prev_page(current, pages): + prev_page = None + for page in sorted(pages, key=lambda p: p.date): + if page == current: + break + prev_page = page + return prev_page + + +@jinjafilter +def next_page(current, pages): + found = False + for page in sorted(pages, key=lambda p: p.date): + if found: + return page + if page == current: + found = True + + +@jinjafilter('json') +def json_filter(value): + return json.dumps(value) + + +@jinjafilter +def linebreaks(value): + if isinstance(value, str): + return value.replace('\n', '
') + return value + + +def get_page_from_slug(ctx, slug): + lang = ctx['DEFAULT_LANG'] + for page in ctx['pages'] + ctx['hidden_pages']: + if page.slug == slug and page.lang == lang: + return page + + +@jinjaglobal(ctx=True) +def page_for(ctx, slug): + return get_page_from_slug(ctx, slug) + + +@jinjafilter(ctx=True) +def page_url(ctx, slug): + page = get_page_from_slug(ctx, slug) + site_url = ctx['SITEURL'] + if page: + return '/'.join((site_url, page.url)) + + +@jinjafilter(ctx=True) +def page_title(ctx, slug): + page = get_page_from_slug(ctx, slug) + if page: + return page.title + + +@jinjaglobal() +def translation_for(obj, lang): + if hasattr(obj, 'translations'): + for translation in obj.translations: + if translation.lang == lang and translation.status not in ('draft', 'hidden'): + return translation + + +@jinjafilter +def debug(value, *args): + print('---- debug value ----') + pprint(value) + print('---------------------') + if args: + print('---- debug args ----') + pprint(args) + print('---------------------') + return value + + +def register_jinja_tools(generator): + generator.env.filters.update(JINJA_FILTERS) + generator.env.globals.update(JINJA_GLOBALS) + + +def register(): + signals.generator_init.connect(register_jinja_tools) diff --git a/plugins/related_posts.py b/plugins/related_posts.py new file mode 100644 index 0000000..fbb2426 --- /dev/null +++ b/plugins/related_posts.py @@ -0,0 +1,56 @@ +""" +Related posts plugin for Pelican +================================ + +Adds related_posts variable to article's context +""" + +from pelican import signals +from collections import Counter +from itertools import chain + + +def add_related_posts(generator): + # get the max number of entries from settings + # or fall back to default (5) + numentries = generator.settings.get('RELATED_POSTS_MAX', 5) + # Skip all posts in the same category as the article + skipcategory = generator.settings.get('RELATED_POSTS_SKIP_SAME_CATEGORY', False) + for article in chain(generator.articles, generator.drafts): + # set priority in case of forced related posts + if hasattr(article,'related_posts'): + # split slugs + related_posts = article.related_posts.split(',') + posts = [] + # get related articles + for slug in related_posts: + i = 0 + slug = slug.strip() + for a in generator.articles: + if i >= numentries: # break in case there are max related psots + break + if a.slug == slug: + posts.append(a) + i += 1 + + article.related_posts = posts + else: + # no tag, no relation + if not hasattr(article, 'tags'): + continue + + # score = number of common tags + related = chain(*(generator.tags[tag] for tag in article.tags)) + if skipcategory: + related = (other for other in related + if other.category != article.category) + scores = Counter(related) + + # remove itself + scores.pop(article, None) + + article.related_posts = [other for other, count + in scores.most_common(numentries)] + +def register(): + signals.article_generator_finalized.connect(add_related_posts) diff --git a/plugins/sitemap.py b/plugins/sitemap.py new file mode 100644 index 0000000..8ce492a --- /dev/null +++ b/plugins/sitemap.py @@ -0,0 +1,268 @@ +# -*- coding: utf-8 -*- +''' +Sitemap +------- + +The sitemap plugin generates plain-text or XML sitemaps. +''' + +from __future__ import unicode_literals + +import re +import collections +import os.path + +from datetime import datetime +from logging import warning, info +from codecs import open +from pytz import timezone + +from pelican import signals, contents +from pelican.utils import get_date + +TXT_HEADER = """{0}/index.html +{0}/archives.html +{0}/tags.html +{0}/categories.html +""" + +XML_HEADER = """ + +""" + +XML_URL = """ + +{0}/{1} +{2} +{3} +{4} + +""" + +XML_FOOTER = """ + +""" + + +def format_date(date): + if date.tzinfo: + tz = date.strftime('%z') + tz = tz[:-2] + ':' + tz[-2:] + else: + tz = "-00:00" + return date.strftime("%Y-%m-%dT%H:%M:%S") + tz + +class SitemapGenerator(object): + + def __init__(self, context, settings, path, theme, output_path, *null): + + self.output_path = output_path + self.context = context + self.now = datetime.now() + self.siteurl = settings.get('SITEURL') + + + self.default_timezone = settings.get('TIMEZONE', 'UTC') + self.timezone = getattr(self, 'timezone', self.default_timezone) + self.timezone = timezone(self.timezone) + + self.format = 'xml' + + self.changefreqs = { + 'articles': 'monthly', + 'indexes': 'daily', + 'pages': 'monthly' + } + + self.priorities = { + 'articles': 0.5, + 'indexes': 0.5, + 'pages': 0.5 + } + + self.sitemapExclude = [] + + config = settings.get('SITEMAP', {}) + + if not isinstance(config, dict): + warning("sitemap plugin: the SITEMAP setting must be a dict") + else: + fmt = config.get('format') + pris = config.get('priorities') + chfreqs = config.get('changefreqs') + self.sitemapExclude = config.get('exclude', []) + + if fmt not in ('xml', 'txt'): + warning("sitemap plugin: SITEMAP['format'] must be `txt' or `xml'") + warning("sitemap plugin: Setting SITEMAP['format'] on `xml'") + elif fmt == 'txt': + self.format = fmt + return + + valid_keys = ('articles', 'indexes', 'pages') + valid_chfreqs = ('always', 'hourly', 'daily', 'weekly', 'monthly', + 'yearly', 'never') + + if isinstance(pris, dict): + # We use items for Py3k compat. .iteritems() otherwise + for k, v in pris.items(): + if k in valid_keys and not isinstance(v, (int, float)): + default = self.priorities[k] + warning("sitemap plugin: priorities must be numbers") + warning("sitemap plugin: setting SITEMAP['priorities']" + "['{0}'] on {1}".format(k, default)) + pris[k] = default + self.priorities.update(pris) + elif pris is not None: + warning("sitemap plugin: SITEMAP['priorities'] must be a dict") + warning("sitemap plugin: using the default values") + + if isinstance(chfreqs, dict): + # .items() for py3k compat. + for k, v in chfreqs.items(): + if k in valid_keys and v not in valid_chfreqs: + default = self.changefreqs[k] + warning("sitemap plugin: invalid changefreq `{0}'".format(v)) + warning("sitemap plugin: setting SITEMAP['changefreqs']" + "['{0}'] on '{1}'".format(k, default)) + chfreqs[k] = default + self.changefreqs.update(chfreqs) + elif chfreqs is not None: + warning("sitemap plugin: SITEMAP['changefreqs'] must be a dict") + warning("sitemap plugin: using the default values") + + def write_url(self, page, fd): + + if getattr(page, 'status', 'published') != 'published': + return + + # We can disable categories/authors/etc by using False instead of '' + if not page.save_as: + return + + page_path = os.path.join(self.output_path, page.save_as) + if not os.path.exists(page_path): + return + + lastdate = getattr(page, 'date', self.now) + try: + lastdate = self.get_date_modified(page, lastdate) + except ValueError: + warning("sitemap plugin: " + page.save_as + " has invalid modification date,") + warning("sitemap plugin: using date value as lastmod.") + lastmod = format_date(lastdate) + + if isinstance(page, contents.Article): + pri = self.priorities['articles'] + chfreq = self.changefreqs['articles'] + elif isinstance(page, contents.Page): + pri = self.priorities['pages'] + chfreq = self.changefreqs['pages'] + else: + pri = self.priorities['indexes'] + chfreq = self.changefreqs['indexes'] + + pageurl = '' if page.url == 'index.html' else page.url + + #Exclude URLs from the sitemap: + if self.format == 'xml': + flag = False + for regstr in self.sitemapExclude: + if re.match(regstr, pageurl): + flag = True + break + if not flag: + fd.write(XML_URL.format(self.siteurl, pageurl, lastmod, chfreq, pri)) + else: + fd.write(self.siteurl + '/' + pageurl + '\n') + + def get_date_modified(self, page, default): + if hasattr(page, 'modified'): + if isinstance(page.modified, datetime): + return page.modified + return get_date(page.modified) + else: + return default + + def set_url_wrappers_modification_date(self, wrappers): + for (wrapper, articles) in wrappers: + lastmod = datetime.min.replace(tzinfo=self.timezone) + for article in articles: + lastmod = max(lastmod, article.date.replace(tzinfo=self.timezone)) + try: + modified = self.get_date_modified(article, datetime.min).replace(tzinfo=self.timezone) + lastmod = max(lastmod, modified) + except ValueError: + # Supressed: user will be notified. + pass + setattr(wrapper, 'modified', str(lastmod)) + + def generate_output(self, writer): + path = os.path.join(self.output_path, 'sitemap.{0}'.format(self.format)) + + pages = self.context['pages'] + self.context['articles'] \ + + [ c for (c, a) in self.context['categories']] \ + + [ t for (t, a) in self.context['tags']] \ + + [ a for (a, b) in self.context['authors']] + + self.set_url_wrappers_modification_date(self.context['categories']) + self.set_url_wrappers_modification_date(self.context['tags']) + self.set_url_wrappers_modification_date(self.context['authors']) + + for article in self.context['articles']: + pages += article.translations + + info('writing {0}'.format(path)) + + with open(path, 'w', encoding='utf-8') as fd: + + if self.format == 'xml': + fd.write(XML_HEADER) + else: + fd.write(TXT_HEADER.format(self.siteurl)) + + FakePage = collections.namedtuple('FakePage', + ['status', + 'date', + 'url', + 'save_as']) + + for standard_page_url in ['index.html', + 'archives.html', + 'tags.html', + 'categories.html']: + fake = FakePage(status='published', + date=self.now, + url=standard_page_url, + save_as=standard_page_url) + self.write_url(fake, fd) + + # add template pages + # We use items for Py3k compat. .iteritems() otherwise + for path, template_page_url in self.context['TEMPLATE_PAGES'].items(): + + # don't add duplicate entry for index page + if template_page_url == 'index.html': + continue + + fake = FakePage(status='published', + date=self.now, + url=template_page_url, + save_as=template_page_url) + self.write_url(fake, fd) + + for page in pages: + self.write_url(page, fd) + + if self.format == 'xml': + fd.write(XML_FOOTER) + + +def get_generators(generators): + return SitemapGenerator + + +def register(): + signals.get_generators.connect(get_generators) diff --git a/publishconf.py b/publishconf.py new file mode 100644 index 0000000..a5bbf6a --- /dev/null +++ b/publishconf.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +import os +import sys +sys.path.append(os.curdir) +from pelicanconf import * + +SITEURL = 'https://getudata.org' +RELATIVE_URLS = False + +FEED_DOMAIN = SITEURL +FEED_ALL_ATOM = 'feeds/all.atom' +CATEGORY_FEED_ATOM = 'feeds/%s.atom' + +ARTICLE_URL = 'blog/{date:%Y}/{date:%m}/{date:%d}/{slug}/' +CATEGORY_URL = 'blog/category/{slug}/' +TAG_URL = 'blog/tag/{slug}/' +PAGE_URL = '{slug}/' + + +DELETE_OUTPUT_DIRECTORY = True + +# Following items are often useful when publishing + +#DISQUS_SITENAME = "" +#GOOGLE_ANALYTICS = "" + +PLUGINS += ( + 'image_optimizer', + 'gzip_cache', +) diff --git a/requirements.pip b/requirements.pip new file mode 100644 index 0000000..34da531 --- /dev/null +++ b/requirements.pip @@ -0,0 +1,9 @@ +pelican==3.7.1 +Pygments +feedgenerator +feedparser +typogrify +PyYAML +pelican-social>=0.1.1 +pelican-frontmark +awesome-slugify diff --git a/tasks/__init__.py b/tasks/__init__.py new file mode 100644 index 0000000..4c986b7 --- /dev/null +++ b/tasks/__init__.py @@ -0,0 +1,170 @@ +from __future__ import unicode_literals + +import logging +import os +import shutil +import slugify +import sys + +from datetime import datetime + +from invoke import run as raw_run, task + +from pelican import Pelican, log + +from pelican.settings import read_settings +from jinja2 import Environment, FileSystemLoader + +if sys.version_info[0] == 3: + try: + from importlib import reload + except: + from imp import reload + + +#: Project absolute root path +TASKS_ROOT = os.path.dirname(__file__) +ROOT = os.path.abspath(os.path.join(TASKS_ROOT, '..')) + +CONF_FILE = os.path.join(ROOT, 'pelicanconf.py') + +THEME = 'arisn' + +# Port for `serve` +PORT = 5000 + + +class objdict(dict): + def __getattr__(self, name): + return self[name] + + +def get_settings(): + return objdict(read_settings(CONF_FILE)) + + +jinja_env = Environment(loader=FileSystemLoader(TASKS_ROOT)) + + +def jinja(template, filename, **ctx): + template = jinja_env.get_template(template) + with open(filename, 'wb') as out: + data = template.render(**ctx) + out.write(data.encode('utf-8')) + + +def run(cmd, *args, **kwargs): + '''Run a command ensuring cwd is project root''' + return raw_run('cd {0} && {1}'.format(ROOT, cmd), *args, **kwargs) + + +@task +def clean(ctx): + '''Remove generated files''' + settings = get_settings() + if os.path.isdir(settings.OUTPUT_PATH): + shutil.rmtree(settings.OUTPUT_PATH) + os.makedirs(settings.OUTPUT_PATH) + + +@task() +def build(ctx, verbose=False, debug=False): + '''Build local version of site''' + cmd = 'pelican -s publishconf.py' + if verbose: + cmd += ' -v' + if verbose: + cmd += ' -D' + ctx.run(cmd) + + +def draft(article=False): + '''Create a draft page''' + title = input('Title: ') + slug = slugify.slugify(title, to_lower=True) + slug = input('Slug ({0}): '.format(slug)) or slug + summary = input('Summary: ') + tags = [t for t in input('Tags: ').split(',') if t] + category = input('Category: ') if article else None + now = datetime.now() + if article: + filename = '{0}-{1}.md'.format(now.date().isoformat(), slug) + filename = os.path.join('articles', filename) + else: + filename = os.path.join('pages', '{0}.md'.format(slug)) + os.makedirs(os.path.dirname(filename), exist_ok=True) + jinja('draft.j2.md', filename, + title=title, + slug=slug, + category=category, + summary=summary, + tags=tags, + is_article=article, + date=now) + + +@task +def page(ctx): + '''Create a draft page''' + draft(article=False) + + +@task +def article(ctx): + '''Create a draft article''' + draft(article=True) + + +def reload_and_compile(): + _sys_path = sys.path[:] + settings = get_settings() + for pluginpath in settings.PLUGIN_PATHS: + sys.path.insert(0, pluginpath) + for name, module in sys.modules.items(): + root_module = name.split('.', 1)[0] + if root_module in settings.PLUGINS: + reload(module) + + sys.path = _sys_path + compile() + + +def compile(): + settings = get_settings() + p = Pelican(settings) + try: + p.run() + except SystemExit as e: + pass + + +@task +def watch(ctx, verbose=False): + '''Serve the blog and watch changes''' + from livereload import Server + + settings = get_settings() + + log.init(logging.DEBUG if verbose else logging.INFO) + logging.getLogger('livereload').propagate = False + logging.getLogger('tornado').propagate = False + + compile() + server = Server() + server.watch(CONF_FILE, compile) + + server.watch('theme', compile) + server.watch('local_plugins', reload_and_compile) + + DATA_PATHS = getattr(settings, 'DATA_PATHS', []) + for root in set(DATA_PATHS): + for data in getattr(settings, 'DATA', []): + path = os.path.join(root, data) + if os.path.exists(path): + server.watch(path, compile) + + paths = settings.ARTICLE_PATHS + settings.PAGE_PATHS + for path in paths: + server.watch(path, compile) + + server.serve(port=PORT, root=settings.OUTPUT_PATH) diff --git a/tasks/draft.j2.md b/tasks/draft.j2.md new file mode 100644 index 0000000..c379cff --- /dev/null +++ b/tasks/draft.j2.md @@ -0,0 +1,19 @@ +--- +title: {{ title }} +date: {{ date.strftime('%Y-%m-%d %H:%M') }} +modified: {{ date.strftime('%Y-%m-%d %H:%M') }} +image: https://placehold.it/1920x1080 +tags:{% for tag in tags %} + - {{ tag|trim }} + {%- endfor %} +slug: {{ slug }} +lang: en +{% if category -%} +category: {{ category }} +{% endif -%} +authors: Open Data Team +summary: {{ summary }} +{% if is_article -%} +status: draft +{% endif -%} +--- diff --git a/theme/README.md b/theme/README.md new file mode 100644 index 0000000..4bbe580 --- /dev/null +++ b/theme/README.md @@ -0,0 +1 @@ +# Theme goes here