diff --git a/ocdsindex/__main__.py b/ocdsindex/__main__.py index 272183b..f97d141 100644 --- a/ocdsindex/__main__.py +++ b/ocdsindex/__main__.py @@ -33,8 +33,8 @@ def main(): @click.argument("base-url") def sphinx(directory, base_url): """ - Crawls the DIRECTORY of the Sphinx build of the OCDS documentation, generates documents to index, assigns documents - unique URLs from the BASE_URL, and prints the base URL, timestamp, and documents as JSON. + Crawl the DIRECTORY of the Sphinx build of the OCDS documentation, generate documents to index, assign documents + unique URLs from the BASE_URL, and print the base URL, timestamp, and documents as JSON. """ documents = Crawler(directory, base_url, extract_sphinx, allow=allow_sphinx).get_documents_by_language() json.dump({"base_url": base_url, "created_at": int(time.time()), "documents": documents}, sys.stdout) @@ -44,8 +44,8 @@ def sphinx(directory, base_url): @click.argument("file", type=click.File()) def extension_explorer(file): """ - Crawls the Extension Explorer's `extensions.json` file, generates documents to index, assigns documents unique - URLs, and prints the base URL, timestamp, and documents as JSON. + Crawl the Extension Explorer's `extensions.json` file, generate documents to index, assign documents unique + URLs, and print the base URL, timestamp, and documents as JSON. """ "https://extensions.open-contracting.org" @@ -55,15 +55,15 @@ def extension_explorer(file): @click.argument("file", type=click.File()) def index(file, host): """ - Adds documents to Elasticsearch indices. + Add documents to Elasticsearch indices. - Reads a JSON file in which the "base_url" key is the remote URL at which the documents will be accessible, and the + Read a JSON file in which the "base_url" key is the remote URL at which the documents will be accessible, and the "documents" key is an object in which the key is a language code and the value is the documents to index. The `sphinx` and `extension-explorer` commands create such files. - Connects to Elasticsearch at HOST and, for each language, creates an `ocdsindex_XX` index, deletes existing - documents matching the base URL, and indexes the new documents in that language. + Connect to Elasticsearch at HOST and, for each language, create an `ocdsindex_XX` index, delete existing + documents matching the base URL, and index the new documents in that language. """ language_map = { "en": "english", @@ -117,9 +117,7 @@ def index(file, host): @click.argument("source") @click.argument("destination") def copy(host, source, destination): - """ - Adds a document with a DESTINATION base URL for each document with a SOURCE base URL. - """ + """Add a document with a DESTINATION base URL for each document with a SOURCE base URL.""" with connect(host) as es: body = [] @@ -143,9 +141,7 @@ def copy(host, source, destination): "--exclude-file", type=click.File(), help="exclude any document whose base URL is equal to a line in this file" ) def expire(host, exclude_file): - """ - Deletes documents from Elasticsearch indices that were crawled more than 180 days ago. - """ + """Delete documents from Elasticsearch indices that were crawled more than 180 days ago.""" threshold = int(time.time()) - 15552000 # 180 days base_urls = [line.strip() for line in exclude_file] if exclude_file else [] diff --git a/ocdsindex/allow.py b/ocdsindex/allow.py index b63d56b..25611f6 100644 --- a/ocdsindex/allow.py +++ b/ocdsindex/allow.py @@ -1,13 +1,11 @@ -""" -``allow_`` methods that return whether to crawl a file. -""" +"""``allow_`` methods that return whether to crawl a file.""" import os -def allow_sphinx(root, file): +def allow_sphinx(root, _file): """ - Allows all files, except the 404 page. + Allow all files, except the 404 page. :param str root: a directory path :param str file: a file basename diff --git a/ocdsindex/crawler.py b/ocdsindex/crawler.py index ec34ff2..6eccd54 100644 --- a/ocdsindex/crawler.py +++ b/ocdsindex/crawler.py @@ -5,14 +5,12 @@ import lxml.html -def true(root, file): +def true(_root, _file): return True class Crawler: - """ - Crawls a directory for documents to index. - """ + """Crawl a directory for documents to index.""" def __init__(self, directory, base_url, extract, *, allow=true): """ @@ -30,7 +28,7 @@ def __init__(self, directory, base_url, extract, *, allow=true): def get_documents_by_language(self): """ - Returns the documents to index for each language. + Return the documents to index for each language. :returns: a dict in which the key is a language code and the value is the documents to index :rtype: dict @@ -51,7 +49,7 @@ def get_documents_by_language(self): def get_documents_from_file(self, path): """ - Parses the file's HTML contents, calculates its remote URL, and returns the documents to index from the file. + Parse the file's HTML contents, calculate its remote URL, and return the documents to index from the file. :param str path: a file path :returns: the documents to index diff --git a/ocdsindex/exceptions.py b/ocdsindex/exceptions.py index c502338..45a4d84 100644 --- a/ocdsindex/exceptions.py +++ b/ocdsindex/exceptions.py @@ -1,6 +1,6 @@ class OCDSIndexError(Exception): - """Base class for exceptions from within this package""" + """Base class for exceptions from within this package.""" class MissingHeadingError(OCDSIndexError, IndexError): - """Raised when a section is missing a heading""" + """Raised when a section is missing a heading.""" diff --git a/ocdsindex/extract.py b/ocdsindex/extract.py index bc91a18..96b54d7 100644 --- a/ocdsindex/extract.py +++ b/ocdsindex/extract.py @@ -1,5 +1,7 @@ """ -``extract_`` methods that return the documents to index as a list of dicts. Each dict sets these keys: +``extract_`` methods that return the documents to index as a list of dicts. + +Each dict sets these keys: url The remote URL of the document, which might include a fragment identifier @@ -44,7 +46,7 @@ def _select_div_by_class(tree, class_name): def extract_sphinx(url, tree): """ - Extracts one document per section of the page. + Extract one document per section of the page. :param str url: the file's remote URL :param tree: the file's root HTML element diff --git a/pyproject.toml b/pyproject.toml index d90cfea..5d3da1b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,4 +45,18 @@ line-length = 119 target-version = "py39" [tool.ruff.lint] -select = ["E", "C4", "F", "I", "W"] +select = ["ALL"] +ignore = [ + "ANN", "C901", "COM812", "D203", "D212", "D415", "EM", "PERF203", "PLR091", "Q000", + "PLR2004", "PLW2901", "D100", "D103", "D104", "D205", + "PTH", +] + +[tool.ruff.lint.flake8-builtins] +builtins-ignorelist = ["copyright"] + +[tool.ruff.lint.per-file-ignores] +"docs/conf.py" = ["D100", "INP001"] +"tests/*" = [ + "ARG001", "D", "FBT003", "INP001", "PLR2004", "S", "TRY003", +]