From 7eb49950f0ec62fa77a899064a023813f3b3da51 Mon Sep 17 00:00:00 2001 From: Reid Sunderland Date: Tue, 11 Feb 2025 22:01:54 +0000 Subject: [PATCH] support anonymous access for s3, and create buckets that don't exist, and docs --- docs/source/Reference/sr3_credentials.7.rst | 9 ++++ .../source/fr/Reference/sr3_credentials.7.rst | 9 ++++ sarracenia/config/credentials.py | 3 ++ sarracenia/transfer/s3.py | 44 +++++++++++++++---- 4 files changed, 57 insertions(+), 8 deletions(-) diff --git a/docs/source/Reference/sr3_credentials.7.rst b/docs/source/Reference/sr3_credentials.7.rst index 8f3335797..fe011f591 100644 --- a/docs/source/Reference/sr3_credentials.7.rst +++ b/docs/source/Reference/sr3_credentials.7.rst @@ -50,6 +50,11 @@ passwords and settings needed by components. The format is one entry per line. - **ftp://user8:%2fdot8@host:990 implicit_ftps** - **https://ladsweb.modaps.eosdis.nasa.gov/ bearer_token=89APCBF0-FEBE-11EA-A705-B0QR41911BF4** +- **s3://bucket-name s3_anonymous** +- **s3://access_key_id:secret_access_key@bucket-name** +- **s3://access_key_id:secret_access_key@bucket-name s3_session_token=a_big_string** +- **s3://access_key_id:secret_access_key@bucket-name s3_endpoint=https://my-endpoint.com/** + In other configuration files or on the command line, the url simply lacks the password or key specification. The url given in the other files is looked @@ -73,6 +78,10 @@ Supported details: - ``bearer_token=`` (or ``bt=``) - (HTTP) Bearer token for authentication - ``login_method=`` - (AMQP) By default, the login method will be automatically determined. This can be overriden by explicity specifying a login method, which may be required if a broker supports multiple methods and an incorrect one is automatically selected. - ``implicit_ftps`` - (FTPS) Use implicit FTPS (otherwise, explicit FTPS is used). Setting this will also set ``tls`` to True. +- Details for the S3 protocol: + - ``s3_endpoint=`` - use a specific endpoint, such as a non-Amazon S3 service. + - ``s3_session_token=`` - when specifying credentials for S3, the username field is used as the "Access Key ID", the password as the "Secret Access Key". Sometimes a Session Token is also required, and can be provided with this option. + - ``s3_anonymous`` - do not sign requests (anonymous access). Equivalent to ``--no-sign-request`` when using the S3 CLI. Note:: SFTP credentials are optional, in that sarracenia will look in the .ssh directory diff --git a/docs/source/fr/Reference/sr3_credentials.7.rst b/docs/source/fr/Reference/sr3_credentials.7.rst index b3f45425a..15cc1934b 100644 --- a/docs/source/fr/Reference/sr3_credentials.7.rst +++ b/docs/source/fr/Reference/sr3_credentials.7.rst @@ -49,6 +49,11 @@ ainsi que les paramètres nécessaires aux composants. Le format est d'une entr - **ftp://user8:%2fdot8@host:990 implicit_ftps** - **https://ladsweb.modaps.eosdis.nasa.gov/ bearer_token=89APCBF0-FEBE-11EA-A705-B0QR41911BF4** +- **s3://nom-du-compartiment s3_anonymous** +- **s3://ID_de_clé_d'accès:clé_d'accès_secrète@nom-du-compartiment** +- **s3://ID_de_clé_d'accès:clé_d'accès_secrète@nom-du-compartiment s3_session_token=une_grande_chaîne** +- **s3://ID_de_clé_d'accès:clé_d'accès_secrète@nom-du-compartiment s3_endpoint=https://my-endpoint.com/** + Dans d’autres fichiers de configuration ou sur la ligne de commande, l’url n’a tout simplement pas le spécification du mot de passe ou de la clé. L’url donné dans les autres fichiers est recherchée dans credentials.conf. @@ -73,6 +78,10 @@ Détails pris en charge : - ``bearer_token=`` (ou ``bt=``) - (HTTP) Jeton Bearer pour l’authentification - ``login_method=`` - (AMQP) Par défaut, la méthode de connexion sera automatiquement - ``implicit_ftps`` - (FTPS) Utilisez FTPS implicite (sinon, FTPS explicite est utilisé). Définir ceci définira également ``tls`` sur True. +- Détails du protocole S3: + - ``s3_endpoint=`` - utiliser un point de terminaison spécifique, comme un service non Amazon S3. + - ``s3_session_token=`` - lors de la spécification des informations d'identification pour S3, le champ du nom d'utilisateur est utilisé comme « ID de clé d'accès », le mot de passe comme « clé d'accès secrète ». Parfois, un jeton de session est également requis et peut être fourni avec cette option. + - ``s3_anonymous`` - ne pas signer les demandes (accès anonyme). Équivalent à « --no-sign-request » lors de l'utilisation de la CLI S3. déterminée. Cela peut être remplacé en spécifiant une méthode Particulière de connexion, ce qui peut être nécessaire si un broker prend en charge plusieurs méthodes et qu’une méthode incorrecte est automatiquement diff --git a/sarracenia/config/credentials.py b/sarracenia/config/credentials.py index 98cb2151b..9f7af8770 100755 --- a/sarracenia/config/credentials.py +++ b/sarracenia/config/credentials.py @@ -101,6 +101,7 @@ def __init__(self, urlstr=None): self.login_method = None self.s3_endpoint = None self.s3_session_token = None + self.s3_anonymous = False self.azure_credentials = None self.implicit_ftps = False @@ -391,6 +392,8 @@ def _parse(self, line): details.s3_session_token = urllib.parse.unquote(parts[1].strip()) elif keyword == 's3_endpoint': details.s3_endpoint = parts[1].strip() + elif keyword == 's3_anonymous': + details.s3_anonymous = True elif keyword == 'azure_storage_credentials': details.azure_credentials = urllib.parse.unquote(parts[1].strip()) elif keyword == 'implicit_ftps': diff --git a/sarracenia/transfer/s3.py b/sarracenia/transfer/s3.py index 49604d2ad..d242593df 100644 --- a/sarracenia/transfer/s3.py +++ b/sarracenia/transfer/s3.py @@ -106,6 +106,10 @@ def __credentials(self) -> bool: self.client_args['aws_session_token'] = details.s3_session_token if hasattr(details, 's3_endpoint'): self.client_args['endpoint_url'] = details.s3_endpoint + # equivalent to --no-sign-request with the s3 CLI + if hasattr(details, 's3_anonymous') and details.s3_anonymous: + self.s3_client_config = self.s3_client_config.merge( + botocore.config.Config(signature_version=botocore.UNSIGNED)) return True @@ -160,13 +164,37 @@ def connect(self) -> bool: try: self.client = boto3.client('s3', config=self.s3_client_config, **self.client_args) - buckets = self.client.list_buckets() - if self.bucket in [b['Name'] for b in buckets['Buckets']]: - self.connected = True - logger.debug(f"Connected to bucket {self.bucket} in {self.client.get_bucket_location(Bucket=self.bucket)['LocationConstraint']}") - return True - else: - logger.error(f"Can't find bucket called {self.bucket}") + + # does the bucket exist? + exists = False + response = None + try: + response = self.client.head_bucket(Bucket=self.bucket) + exists = True + logger.debug(f"bucket exists: {response}") + except botocore.exceptions.ClientError: + exists = False + + # try to create the bucket if it doesn't exist + if not exists: + try: + # TODO: should support other parameters like ACL, etc. + self.client.create_bucket(Bucket=self.bucket) + response = self.client.head_bucket(Bucket=self.bucket) + logger.info(f"bucket {self.bucket} was created successfully") + exists = True + except Exception as e: + logger.error(f"cannot access bucket {self.bucket}. {e} ({response})") + logger.debug("Exception details:", exc_info=True) + + if exists and response is not None: + try: + loc = response['ResponseMetadata']['HTTPHeaders']['x-amz-bucket-region'] + except: + loc = 'Unknown Location' + logger.info(f"Connected to bucket {self.bucket} in {loc}") + + return exists except botocore.exceptions.ClientError as e: logger.error(f"unable to establish boto3 connection: {e}") @@ -175,7 +203,7 @@ def connect(self) -> bool: except Exception as e: logger.error(f"Something else happened: {e}", exc_info=True) - return False + return False def delete(self, path): logger.debug("deleting %s" % path)