diff --git a/README.md b/README.md index ff35f13c..7697ac33 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,10 @@ The `storage_options` can be instantiated with a variety of keyword arguments de - `account_key` - `sas_token` - `tenant_id`, `client_id`, and `client_secret` are combined for an Azure ServicePrincipal e.g. `storage_options={'account_name': ACCOUNT_NAME, 'tenant_id': TENANT_ID, 'client_id': CLIENT_ID, 'client_secret': CLIENT_SECRET}` -- `anon`: `True` or `False`. The default value for anon (i.e. anonymous) is True +- `anon`: boo, optional. + The value to use for whether to attempt anonymous access if no other credential is passed. By default (`None`), the + `AZURE_STORAGE_ANON` environment variable is checked. False values (`false`, `0`, `f`) will resolve to `False` and + anonymous access will not be attempted. Otherwise the value for `anon` resolves to True. - `location_mode`: valid values are "primary" or "secondary" and apply to RA-GRS accounts For more argument details see all arguments for [`AzureBlobFileSystem` here](https://github.com/fsspec/adlfs/blob/f15c37a43afd87a04f01b61cd90294dd57181e1d/adlfs/spec.py#L328) and [`AzureDatalakeFileSystem` here](https://github.com/fsspec/adlfs/blob/f15c37a43afd87a04f01b61cd90294dd57181e1d/adlfs/spec.py#L69). @@ -92,7 +95,8 @@ The following environmental variables can also be set and picked up for authenti The filesystem can be instantiated for different use cases based on a variety of `storage_options` combinations. The following list describes some common use cases utilizing `AzureBlobFileSystem`, i.e. protocols `abfs`or `az`. Note that all cases require the `account_name` argument to be provided: 1. Anonymous connection to public container: `storage_options={'account_name': ACCOUNT_NAME, 'anon': True}` will assume the `ACCOUNT_NAME` points to a public container, and attempt to use an anonymous login. Note, the default value for `anon` is True. 2. Auto credential solving using Azure's DefaultAzureCredential() library: `storage_options={'account_name': ACCOUNT_NAME, 'anon': False}` will use [`DefaultAzureCredential`](https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.defaultazurecredential?view=azure-python) to get valid credentials to the container `ACCOUNT_NAME`. `DefaultAzureCredential` attempts to authenticate via the [mechanisms and order visualized here](https://learn.microsoft.com/en-us/python/api/overview/azure/identity-readme?view=azure-python#defaultazurecredential). -3. Azure ServicePrincipal: `tenant_id`, `client_id`, and `client_secret` are all used as credentials for an Azure ServicePrincipal: e.g. `storage_options={'account_name': ACCOUNT_NAME, 'tenant_id': TENANT_ID, 'client_id': CLIENT_ID, 'client_secret': CLIENT_SECRET}`. +3. Auto credential solving without requiring `storage_options`: Set `AZURE_STORAGE_ANON` to `false`, resulting in automatic credential resolution. Useful for compatibility with fsspec. +4. Azure ServicePrincipal: `tenant_id`, `client_id`, and `client_secret` are all used as credentials for an Azure ServicePrincipal: e.g. `storage_options={'account_name': ACCOUNT_NAME, 'tenant_id': TENANT_ID, 'client_id': CLIENT_ID, 'client_secret': CLIENT_SECRET}`. ### Append Blob The `AzureBlobFileSystem` accepts [all of the Async BlobServiceClient arguments](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python). diff --git a/adlfs/spec.py b/adlfs/spec.py index b437092f..0fb643ec 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -159,8 +159,14 @@ class AzureBlobFileSystem(AsyncFileSystem): Client secret to use when authenticating using an AD Service Principal client/secret. tenant_id: str Tenant ID to use when authenticating using an AD Service Principal client/secret. + anon: boolean, optional + The value to use for whether to attempt anonymous access if no other credential is + passed. By default (``None``), the ``AZURE_STORAGE_ANON`` environment variable is + checked. False values (``false``, ``0``, ``f``) will resolve to `False` and + anonymous access will not be attempted. Otherwise the value for ``anon`` resolves + to ``True``. default_fill_cache: bool = True - Whether to use cache filling with opoen by default + Whether to use cache filling with open by default default_cache_type: string ('bytes') If given, the default cache_type value used for "open()". Set to none if no caching is desired. Docs in fsspec @@ -168,8 +174,8 @@ class AzureBlobFileSystem(AsyncFileSystem): Whether to support blob versioning. If enable this will require the user to have the necessary permissions for dealing with versioned blobs. assume_container_exists: Optional[bool] (None) - Set this to true to not check for existance of containers at all, assuming they exist. - None (default) means to warn in case of a failure when checking for existance of a container + Set this to true to not check for existence of containers at all, assuming they exist. + None (default) means to warn in case of a failure when checking for existence of a container False throws if retrieving container properties fails, which might happen if your authentication is only valid at the storage container level, and not the storage account level. @@ -239,7 +245,7 @@ def __init__( client_id: str = None, client_secret: str = None, tenant_id: str = None, - anon: bool = True, + anon: bool = None, location_mode: str = "primary", loop=None, asynchronous: bool = False, @@ -271,7 +277,11 @@ def __init__( self.client_id = client_id or os.getenv("AZURE_STORAGE_CLIENT_ID") self.client_secret = client_secret or os.getenv("AZURE_STORAGE_CLIENT_SECRET") self.tenant_id = tenant_id or os.getenv("AZURE_STORAGE_TENANT_ID") - self.anon = anon + self.anon = anon or os.getenv("AZURE_STORAGE_ANON", "true").lower() not in [ + "false", + "0", + "f", + ] self.location_mode = location_mode self.credential = credential self.request_session = request_session diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 95d2ddd8..a40df93f 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -1,4 +1,5 @@ import datetime +import os import tempfile from unittest import mock @@ -29,6 +30,17 @@ def test_connect(storage): AzureBlobFileSystem(account_name=storage.account_name, connection_string=CONN_STR) +def test_anon_env(storage): + with mock.patch.dict(os.environ, {"AZURE_STORAGE_ANON": "false"}): + # Setting cachable to false to avoid re-testing the instance from the previous test + AzureBlobFileSystem.cachable = False + x = AzureBlobFileSystem( + account_name=storage.account_name, connection_string=CONN_STR + ) + assert not x.anon + AzureBlobFileSystem.cachable = True # Restoring cachable value + + def assert_blob_equals(blob, expected_blob): irregular_props = [ "etag",