From eaf39de40566f727b972c085f2d5989a7a853a77 Mon Sep 17 00:00:00 2001 From: Dillon Walls Date: Fri, 20 Sep 2024 12:32:01 -0400 Subject: [PATCH] encryption documentation --- docs/api/ming.encryption.rst | 7 ++ docs/baselevel.rst | 2 +- docs/encryption.rst | 39 ++++++++++++ docs/index.rst | 1 + ming/datastore.py | 10 +-- ming/encryption.py | 120 +++++++++++++++++++++++++++++------ 6 files changed, 152 insertions(+), 27 deletions(-) create mode 100644 docs/api/ming.encryption.rst create mode 100644 docs/encryption.rst diff --git a/docs/api/ming.encryption.rst b/docs/api/ming.encryption.rst new file mode 100644 index 0000000..519f24c --- /dev/null +++ b/docs/api/ming.encryption.rst @@ -0,0 +1,7 @@ +:mod:`ming.encryption` module +================================ + + +.. automodule:: ming.encryption + :members: + :private-members: diff --git a/docs/baselevel.rst b/docs/baselevel.rst index 65de085..8cf7c28 100644 --- a/docs/baselevel.rst +++ b/docs/baselevel.rst @@ -17,7 +17,7 @@ you want. While this dynamic behavior is handy in a rapid development environment where you might delete and re-create the database many times a day, it starts to be a problem when you *need* to make guarantees of the type of data in a collection -(because you code depends on it). The goal of Ming is to allow you to specify +(because your code depends on it). The goal of Ming is to allow you to specify the schema for your data in Python code and then develop in confidence, knowing the format of data you get from a query. diff --git a/docs/encryption.rst b/docs/encryption.rst new file mode 100644 index 0000000..d78fef1 --- /dev/null +++ b/docs/encryption.rst @@ -0,0 +1,39 @@ +:tocdepth: 3 + +.. _odm-encryption: + +============================ +Encrypting Sensitive Data +============================ + +This section describes how Ming can be used to automatically encrypt and decrypt your document's fields. This is accomplished by leveraging MongoDB's `Client-Side Field Level Encryption (CSFLE)`_ feature. + + + +.. _Client-Side Field Level Encryption (CSFLE): https://pymongo.readthedocs.io/en/stable/examples/encryption.html#client-side-field-level-encryption + + +Declarative Field-Level Encryption +================================== + +When declaratively working with models by subclassing the :class:`ming.Document` in the :ref:`ming_baselevel` this is accomplished by pairing a :class:`~ming.encryption.DecryptedField` with a :class:`~ming.metadata.Field`. + +A simple example might look like the following.:: + + class UserEmail(Document): + class __mongometa__: + session = session + name = 'user_emails' + _id = Field(schema.ObjectId) + + email_encrypted = Field(S.Binary, if_missing=None) + email = DecryptedField(str, 'email_encrypted') + + +Breaking it Down +======================== + +This approach requires that you follow a few conventions in order to function correctly. + +.. 1. Fields encrypted data must be named with the suffix `_encrypted`. + diff --git a/docs/index.rst b/docs/index.rst index da0321d..4c64105 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -101,6 +101,7 @@ Documentation Content polymorphism custom_properties baselevel + encryption reference news diff --git a/ming/datastore.py b/ming/datastore.py index 5036aa0..2fac2c9 100644 --- a/ming/datastore.py +++ b/ming/datastore.py @@ -19,7 +19,7 @@ Conn = Union[mim.Connection, MongoClient] -def create_engine(*args, **kwargs): +def create_engine(*args, **kwargs) -> Engine: """Creates a new :class:`.Engine` instance. According to the provided url schema ``mongodb://`` or ``mim://`` @@ -39,7 +39,7 @@ def create_engine(*args, **kwargs): return Engine(use_class, args, kwargs, connect_retry, auto_ensure_indexes) -def create_datastore(uri, **kwargs): +def create_datastore(uri, **kwargs) -> DataStore: """Creates a new :class:`.DataStore` for the database identified by ``uri``. ``uri`` is a mongodb url in the form ``mongodb://username:password@address:port/dbname``, @@ -98,8 +98,8 @@ def create_datastore(uri, **kwargs): class Engine: """Engine represents the connection to a MongoDB (or in-memory database). - The ``Engine`` class lazily creates the connection the firs time it's - actually accessed. + The ``Engine`` class lazily creates the connection the first time it's + accessed. """ def __init__(self, Connection, @@ -167,7 +167,7 @@ class DataStore: :func:`.create_datastore` function. """ - def __init__(self, bind, name, encryption_config: encryption.EncryptionConfig = None): + def __init__(self, bind: Engine, name: str, encryption_config: encryption.EncryptionConfig = None): self.bind = bind self.name = name self._encryption_config = encryption_config diff --git a/ming/encryption.py b/ming/encryption.py index 4f39b80..8e433e4 100644 --- a/ming/encryption.py +++ b/ming/encryption.py @@ -1,8 +1,6 @@ from __future__ import annotations -from copy import deepcopy from functools import lru_cache -import json from typing import TYPE_CHECKING, TypeVar, Generic from pymongo.encryption import ClientEncryption, Algorithm @@ -20,39 +18,72 @@ class MingEncryptionError(Exception): class EncryptionConfig: + """ + A class to hold the encryption configuration for a ming datastore. + :param encryption_config: a dictionary that closely resembles various features of the MongoDB + encryption that we support. + """ def __init__(self, config: dict): - self._encryption_config = self.clean_config(config) - - @classmethod - def clean_config(cls, config: dict) -> dict: - config = deepcopy(config) - - # ensure key_alt_names is a list - if config.get('provider_options', None): - for provider, values in list((config['provider_options'] or dict()).items()): - if 'key_alt_names' in values and not isinstance(values['key_alt_names'], list): - try: - config['provider_options'][provider]['key_alt_names'] = json.loads(values['key_alt_names']) - except json.JSONDecodeError: - key_alt_names = [s.strip() for s in values['key_alt_names'].split(',') if s] - config['provider_options'][provider]['key_alt_names'] = key_alt_names - - return config + self._encryption_config = config @property def kms_providers(self) -> dict: + """ + Returns the kms providers used in this configuration. These values are passed directly to pymongo. + + See the documentation for the :class:`pymongo.encryption.ClientEncryption` constructor + for more information on valid values for kms_providers. + + A typical example of the kms_providers field using the `local` provider would look like this: + + .. :code-block: json + + { + "local": { + "key": "", + } + } + + """ return self._encryption_config.get('kms_providers') + # FIXME: should be able to call `create_data_key('local', ...)` multiple times + # FIXME: rename provder_options -> ...? data_key_options, create_data_key_options @property def provider_options(self) -> dict: + """ + Returns all of the provider options used by this configuration when calling the underlying + :meth:`pymongo.encryption.ClientEncryption.create_data_key` method. + + See the documentation for pymongo's :meth:`pymongo.encryption.ClientEncryption.create_data_key` + method for more information on valid values for ``provider_options``. + + A typical example of the ``provider_options`` field using the ``local`` provider would look like this: + + .. :code-block: json + + { + "local": { + "key_alt_names": ["datakey_test1", "datakey_test2"] + }, + "gcp": { ... }, + ... + } + + """ return self._encryption_config.get('provider_options') - def key_alt_name(self, provider='local') -> str: + def get_key_alt_names(self, provider='local') -> str: return self.provider_options.get(provider)['key_alt_names'][0] @property def key_vault_namespace(self) -> str: + """Describes which mongodb database/collection combo your auto-generated + encryption data keys will be stored. + + This is a string in the format ``.``. + """ return self._encryption_config.get('key_vault_namespace') @@ -62,6 +93,23 @@ def key_vault_namespace(self) -> str: class DecryptedField(Generic[T]): def __init__(self, field_type: type[T], encrypted_field: str): + """ + Creates a field that acts as an automatic getter/setter for the target + field name specified ``encrypted_field``. + + .. note:: + + Interally :class:``.DecryptedField`` uses getattr and setattr on ``self`` using the ``encrypted_field`` name. + + .. code-block:: python + + class MyDocument(Document): + email_encrypted = Field(ming.schema.Binary) + email = DecryptedField(str, 'email_encrypted') + + :param field_type: The Type of the decrypted field + :param encrypted_field: The name of the encrypted attribute to operate on + """ self.field_type = field_type self.encrypted_field = encrypted_field @@ -75,10 +123,18 @@ def __set__(self, instance: EncryptedDocumentMixin, value: T): class EncryptedDocumentMixin: + """A mixin intended to be used with ming.schema.Document classes to provide encryption. + All configuration is handled by an instance of a :class:`ming.encryption.EncryptionConfig` + that is passed to the :class:`ming.datastore.DataStore` instance that the Document is bound to. + """ @classmethod @lru_cache(maxsize=99) def encryptor(cls, ming_ds: ming.datastore.DataStore) -> ClientEncryption: + """Creates and returns a :class:`pymongo.encryption.ClientEncryption` instance for the given ming datastore. It uses this to handle encryption/decryption using pymongo's native routines. + + :param ming_ds: the :class:`ming.datastore.Datastore` for which this encryptor should be configured with. + """ if not ming_ds.encryption: raise MingEncryptionError(f'No encryption settings found for {ming_ds}') conn: MongoClient = ming_ds.conn @@ -88,6 +144,8 @@ def encryptor(cls, ming_ds: ming.datastore.DataStore) -> ClientEncryption: @classmethod def make_data_key(cls): + """Mongodb's Client Side Field Level Encryption (CSFLE) requires a data key to be present in the key vault collection. This ensures that the key vault collection is properly indexed and that a data key is present for each provider. + """ ming_ds: ming.datastore.DataStore = cls.m.session.bind encryptor = cls.encryptor(ming_ds) # index recommended by mongodb docs: @@ -101,6 +159,9 @@ def make_data_key(cls): @classmethod def encr(cls, s: str | None, _first_attempt=True, provider='local') -> bytes | None: + """Encrypts a string using the encryption configuration of the ming datastore that this class is bound to. + Most of the time, you won't need to call this directly, as it is used by the :meth:`ming.encryption.EncryptedDocumentMixin.encrypt_some_fields` method. + """ if s is None: return None try: @@ -108,7 +169,7 @@ def encr(cls, s: str | None, _first_attempt=True, provider='local') -> bytes | N encryptor = cls.encryptor(ming_ds) return encryptor.encrypt(s, Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic, - key_alt_name=ming_ds.encryption.key_alt_name()) + key_alt_name=ming_ds.encryption.get_key_alt_names(provider)[0]) except (EncryptionError, MongoCryptError) as e: if _first_attempt and 'not all keys requested were satisfied' in str(e): cls.make_data_key() @@ -118,22 +179,39 @@ def encr(cls, s: str | None, _first_attempt=True, provider='local') -> bytes | N @classmethod def decr(cls, b: bytes | None) -> str | None: + """Decrypts a string using the encryption configuration of the ming datastore that this class is bound to. + """ if b is None: return None return cls.encryptor(cls.m.session.bind).decrypt(b) @classmethod def decrypted_field_names(cls) -> list[str]: + """ + Returns a list of field names that have ``_encrypted`` counterts. + + For example, if a class has fields ``email`` and ``email_encrypted``, this method would return ``['email']``. + """ return [fld.replace('_encrypted', '') for fld in cls.encrypted_field_names()] @classmethod def encrypted_field_names(cls) -> list[str]: + """ + Returns the field names of all encrypted fields. Fields are assumed to be encrypted if they end with ``_encrypted``. + + For example if a class has fields ``email`` and ``email_encrypted``, this method would return ``['email_encrypted']``. + """ return [fld for fld in dir(cls) if fld.endswith('_encrypted')] @classmethod def encrypt_some_fields(cls, data: dict) -> dict: + """Encrypts some fields in a dictionary using the encryption configuration of the ming datastore that this class is bound to. + + :param data: a dictionary of data to be encrypted + :return: a modified copy of the ``data`` param with the currently-unencrypted-but-encryptable fields replaced with ``_encrypted`` counterparts. + """ encrypted_data = data.copy() for fld in cls.decrypted_field_names(): if fld in encrypted_data: