Skip to content

Commit

Permalink
encryption documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
dill0wn committed Sep 20, 2024
1 parent 8e1315b commit eaf39de
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 27 deletions.
7 changes: 7 additions & 0 deletions docs/api/ming.encryption.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
:mod:`ming.encryption` module
================================


.. automodule:: ming.encryption
:members:
:private-members:
2 changes: 1 addition & 1 deletion docs/baselevel.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ you want.
While this dynamic behavior is handy in a rapid development environment where you
might delete and re-create the database many times a day, it starts to be a
problem when you *need* to make guarantees of the type of data in a collection
(because you code depends on it). The goal of Ming is to allow you to specify
(because your code depends on it). The goal of Ming is to allow you to specify
the schema for your data in Python code and then develop in confidence, knowing
the format of data you get from a query.

Expand Down
39 changes: 39 additions & 0 deletions docs/encryption.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
:tocdepth: 3

.. _odm-encryption:

============================
Encrypting Sensitive Data
============================

This section describes how Ming can be used to automatically encrypt and decrypt your document's fields. This is accomplished by leveraging MongoDB's `Client-Side Field Level Encryption (CSFLE)`_ feature.



.. _Client-Side Field Level Encryption (CSFLE): https://pymongo.readthedocs.io/en/stable/examples/encryption.html#client-side-field-level-encryption


Declarative Field-Level Encryption
==================================

When declaratively working with models by subclassing the :class:`ming.Document` in the :ref:`ming_baselevel` this is accomplished by pairing a :class:`~ming.encryption.DecryptedField` with a :class:`~ming.metadata.Field`.

A simple example might look like the following.::

class UserEmail(Document):
class __mongometa__:
session = session
name = 'user_emails'
_id = Field(schema.ObjectId)

email_encrypted = Field(S.Binary, if_missing=None)
email = DecryptedField(str, 'email_encrypted')


Breaking it Down
========================

This approach requires that you follow a few conventions in order to function correctly.

.. 1. Fields encrypted data must be named with the suffix `_encrypted`.
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ Documentation Content
polymorphism
custom_properties
baselevel
encryption
reference
news

Expand Down
10 changes: 5 additions & 5 deletions ming/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
Conn = Union[mim.Connection, MongoClient]


def create_engine(*args, **kwargs):
def create_engine(*args, **kwargs) -> Engine:
"""Creates a new :class:`.Engine` instance.
According to the provided url schema ``mongodb://`` or ``mim://``
Expand All @@ -39,7 +39,7 @@ def create_engine(*args, **kwargs):
return Engine(use_class, args, kwargs, connect_retry, auto_ensure_indexes)


def create_datastore(uri, **kwargs):
def create_datastore(uri, **kwargs) -> DataStore:
"""Creates a new :class:`.DataStore` for the database identified by ``uri``.
``uri`` is a mongodb url in the form ``mongodb://username:password@address:port/dbname``,
Expand Down Expand Up @@ -98,8 +98,8 @@ def create_datastore(uri, **kwargs):
class Engine:
"""Engine represents the connection to a MongoDB (or in-memory database).
The ``Engine`` class lazily creates the connection the firs time it's
actually accessed.
The ``Engine`` class lazily creates the connection the first time it's
accessed.
"""

def __init__(self, Connection,
Expand Down Expand Up @@ -167,7 +167,7 @@ class DataStore:
:func:`.create_datastore` function.
"""

def __init__(self, bind, name, encryption_config: encryption.EncryptionConfig = None):
def __init__(self, bind: Engine, name: str, encryption_config: encryption.EncryptionConfig = None):
self.bind = bind
self.name = name
self._encryption_config = encryption_config
Expand Down
120 changes: 99 additions & 21 deletions ming/encryption.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from __future__ import annotations

from copy import deepcopy
from functools import lru_cache
import json
from typing import TYPE_CHECKING, TypeVar, Generic

from pymongo.encryption import ClientEncryption, Algorithm
Expand All @@ -20,39 +18,72 @@ class MingEncryptionError(Exception):


class EncryptionConfig:
"""
A class to hold the encryption configuration for a ming datastore.
:param encryption_config: a dictionary that closely resembles various features of the MongoDB
encryption that we support.
"""

def __init__(self, config: dict):
self._encryption_config = self.clean_config(config)

@classmethod
def clean_config(cls, config: dict) -> dict:
config = deepcopy(config)

# ensure key_alt_names is a list
if config.get('provider_options', None):
for provider, values in list((config['provider_options'] or dict()).items()):
if 'key_alt_names' in values and not isinstance(values['key_alt_names'], list):
try:
config['provider_options'][provider]['key_alt_names'] = json.loads(values['key_alt_names'])
except json.JSONDecodeError:
key_alt_names = [s.strip() for s in values['key_alt_names'].split(',') if s]
config['provider_options'][provider]['key_alt_names'] = key_alt_names

return config
self._encryption_config = config

@property
def kms_providers(self) -> dict:
"""
Returns the kms providers used in this configuration. These values are passed directly to pymongo.
See the documentation for the :class:`pymongo.encryption.ClientEncryption` constructor
for more information on valid values for kms_providers.
A typical example of the kms_providers field using the `local` provider would look like this:
.. :code-block: json
{
"local": {
"key": "<base64-encoded-key>",
}
}
"""
return self._encryption_config.get('kms_providers')

# FIXME: should be able to call `create_data_key('local', ...)` multiple times
# FIXME: rename provder_options -> ...? data_key_options, create_data_key_options
@property
def provider_options(self) -> dict:
"""
Returns all of the provider options used by this configuration when calling the underlying
:meth:`pymongo.encryption.ClientEncryption.create_data_key` method.
See the documentation for pymongo's :meth:`pymongo.encryption.ClientEncryption.create_data_key`
method for more information on valid values for ``provider_options``.
A typical example of the ``provider_options`` field using the ``local`` provider would look like this:
.. :code-block: json
{
"local": {
"key_alt_names": ["datakey_test1", "datakey_test2"]
},
"gcp": { ... },
...
}
"""
return self._encryption_config.get('provider_options')

def key_alt_name(self, provider='local') -> str:
def get_key_alt_names(self, provider='local') -> str:
return self.provider_options.get(provider)['key_alt_names'][0]

@property
def key_vault_namespace(self) -> str:
"""Describes which mongodb database/collection combo your auto-generated
encryption data keys will be stored.
This is a string in the format ``<database>.<collection>``.
"""
return self._encryption_config.get('key_vault_namespace')


Expand All @@ -62,6 +93,23 @@ def key_vault_namespace(self) -> str:
class DecryptedField(Generic[T]):

def __init__(self, field_type: type[T], encrypted_field: str):
"""
Creates a field that acts as an automatic getter/setter for the target
field name specified ``encrypted_field``.
.. note::
Interally :class:``.DecryptedField`` uses getattr and setattr on ``self`` using the ``encrypted_field`` name.
.. code-block:: python
class MyDocument(Document):
email_encrypted = Field(ming.schema.Binary)
email = DecryptedField(str, 'email_encrypted')
:param field_type: The Type of the decrypted field
:param encrypted_field: The name of the encrypted attribute to operate on
"""
self.field_type = field_type
self.encrypted_field = encrypted_field

Expand All @@ -75,10 +123,18 @@ def __set__(self, instance: EncryptedDocumentMixin, value: T):


class EncryptedDocumentMixin:
"""A mixin intended to be used with ming.schema.Document classes to provide encryption.
All configuration is handled by an instance of a :class:`ming.encryption.EncryptionConfig`
that is passed to the :class:`ming.datastore.DataStore` instance that the Document is bound to.
"""

@classmethod
@lru_cache(maxsize=99)
def encryptor(cls, ming_ds: ming.datastore.DataStore) -> ClientEncryption:
"""Creates and returns a :class:`pymongo.encryption.ClientEncryption` instance for the given ming datastore. It uses this to handle encryption/decryption using pymongo's native routines.
:param ming_ds: the :class:`ming.datastore.Datastore` for which this encryptor should be configured with.
"""
if not ming_ds.encryption:
raise MingEncryptionError(f'No encryption settings found for {ming_ds}')
conn: MongoClient = ming_ds.conn
Expand All @@ -88,6 +144,8 @@ def encryptor(cls, ming_ds: ming.datastore.DataStore) -> ClientEncryption:

@classmethod
def make_data_key(cls):
"""Mongodb's Client Side Field Level Encryption (CSFLE) requires a data key to be present in the key vault collection. This ensures that the key vault collection is properly indexed and that a data key is present for each provider.
"""
ming_ds: ming.datastore.DataStore = cls.m.session.bind
encryptor = cls.encryptor(ming_ds)
# index recommended by mongodb docs:
Expand All @@ -101,14 +159,17 @@ def make_data_key(cls):

@classmethod
def encr(cls, s: str | None, _first_attempt=True, provider='local') -> bytes | None:
"""Encrypts a string using the encryption configuration of the ming datastore that this class is bound to.
Most of the time, you won't need to call this directly, as it is used by the :meth:`ming.encryption.EncryptedDocumentMixin.encrypt_some_fields` method.
"""
if s is None:
return None
try:
ming_ds: ming.datastore.DataStore = cls.m.session.bind
encryptor = cls.encryptor(ming_ds)
return encryptor.encrypt(s,
Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic,
key_alt_name=ming_ds.encryption.key_alt_name())
key_alt_name=ming_ds.encryption.get_key_alt_names(provider)[0])
except (EncryptionError, MongoCryptError) as e:
if _first_attempt and 'not all keys requested were satisfied' in str(e):
cls.make_data_key()
Expand All @@ -118,22 +179,39 @@ def encr(cls, s: str | None, _first_attempt=True, provider='local') -> bytes | N

@classmethod
def decr(cls, b: bytes | None) -> str | None:
"""Decrypts a string using the encryption configuration of the ming datastore that this class is bound to.
"""
if b is None:
return None
return cls.encryptor(cls.m.session.bind).decrypt(b)

@classmethod
def decrypted_field_names(cls) -> list[str]:
"""
Returns a list of field names that have ``_encrypted`` counterts.
For example, if a class has fields ``email`` and ``email_encrypted``, this method would return ``['email']``.
"""
return [fld.replace('_encrypted', '')
for fld in cls.encrypted_field_names()]

@classmethod
def encrypted_field_names(cls) -> list[str]:
"""
Returns the field names of all encrypted fields. Fields are assumed to be encrypted if they end with ``_encrypted``.
For example if a class has fields ``email`` and ``email_encrypted``, this method would return ``['email_encrypted']``.
"""
return [fld for fld in dir(cls)
if fld.endswith('_encrypted')]

@classmethod
def encrypt_some_fields(cls, data: dict) -> dict:
"""Encrypts some fields in a dictionary using the encryption configuration of the ming datastore that this class is bound to.
:param data: a dictionary of data to be encrypted
:return: a modified copy of the ``data`` param with the currently-unencrypted-but-encryptable fields replaced with ``_encrypted`` counterparts.
"""
encrypted_data = data.copy()
for fld in cls.decrypted_field_names():
if fld in encrypted_data:
Expand Down

0 comments on commit eaf39de

Please sign in to comment.