From b31852b96b7e7b3b02e7f952691b0c753b75deb1 Mon Sep 17 00:00:00 2001 From: Noah Pederson Date: Sun, 29 Oct 2023 17:01:37 -0500 Subject: [PATCH] Adds basic AWS S3 client (#1019) Includes support for: - Bucket get/set/list/delete/exists? - Server-side object copying - Object get/set/list/delete - Bucket exists Partially resolves #840 --------- Co-authored-by: vyzo --- doc/.vuepress/config.js | 3 +- doc/reference/std/net/README.md | 1 + doc/reference/std/net/s3.md | 148 +++++++++++++++++++ src/std/build-spec.ss | 4 + src/std/interface.ss | 4 +- src/std/net/s3.ss | 5 + src/std/net/s3/api.ss | 244 ++++++++++++++++++++++++++++++++ src/std/net/s3/interface.ss | 22 +++ src/std/net/s3/sigv4.ss | 106 ++++++++++++++ 9 files changed, 534 insertions(+), 3 deletions(-) create mode 100644 doc/reference/std/net/s3.md create mode 100644 src/std/net/s3.ss create mode 100644 src/std/net/s3/api.ss create mode 100644 src/std/net/s3/interface.ss create mode 100644 src/std/net/s3/sigv4.ss diff --git a/doc/.vuepress/config.js b/doc/.vuepress/config.js index da17864b2..f9ff0340b 100644 --- a/doc/.vuepress/config.js +++ b/doc/.vuepress/config.js @@ -94,7 +94,8 @@ module.exports = { "net/uri", "net/address", "net/sasl", - "net/repl" + "net/repl", + "net/s3" ] }, diff --git a/doc/reference/std/net/README.md b/doc/reference/std/net/README.md index feaa0a3c3..4969a7d8e 100644 --- a/doc/reference/std/net/README.md +++ b/doc/reference/std/net/README.md @@ -11,3 +11,4 @@ These are libraries related to network programming: - [:std/net/address](net.md) - [:std/net/sasl](sasl.md) - [:std/net/repl](repl.md) +- [:std/net/s3](s3.md) diff --git a/doc/reference/std/net/s3.md b/doc/reference/std/net/s3.md new file mode 100644 index 000000000..0aaa47942 --- /dev/null +++ b/doc/reference/std/net/s3.md @@ -0,0 +1,148 @@ +# Amazon S3 Client + +The `:std/net/s3` library provides basic support for interfacing with Amazon S3 and +compatible services. + +::: warning +Only HTTPS is currently supported +::: + +## Creating a Client + +The primary way to interact with S3 via this library is with the `S3` interface. + +To create an instance of an `S3` client, use the `S3Client` constructor: + +```scheme +(S3Client + endpoint: (endpoint "s3.amazonaws.com") + access-key: (access-key (getenv "AWS_ACCESS_KEY_ID" #f)) + secret-key: (secret-key (getenv "AWS_SECRET_KEY" #f)) + region: (region (getenv "AWS_DEFAULT_REGION" "us-east-1"))) +``` + +If `access-key` or `secret-key` aren't passed in and cannot be retrieved from the +environment (via `AWS_ACCESS_KEY_ID` and `AWS_SECRET_KEY` respectively) a `S3Error` is +raised. + +## Client + +The `S3` interface has the following signature: +```scheme +(interface S3 + (get-bucket (name :~ string?)) + (create-bucket! (name :~ string?)) + (delete-bucket! (name :~ string?)) + (bucket-exists? (name :~ string?)) + (list-buckets)) +``` + +## S3-get-bucket + +```scheme +(S3-get-bucket client bucket-name) -> S3Bucket +``` + +`S3-get-bucket` retrieves a `S3Bucket` instance by name. If the bucket does not exist, a +`S3Error` is raised. Buckets are searched for in the service and region provided to +`S3Client`'s `endpoint` and `region` arguments. + +## S3-create-bucket! + +```scheme +(S3-create-bucket! client bucket-name) -> S3Bucket +``` + +`S3-create-bucket!` creates a new bucket in the service and region the client was +instantiated with. If a bucket with the name already exists, a `S3Error` is raised +indicating there was a conflict. If the bucket is successfully created, an instance of +`S3Bucket` corresponding to the newly created bucket is returned. + +## S3-delete-bucket! + +```scheme +(S3-delete-bucket! client bucket-name) -> void +``` + +`S3-delete-bucket!` attempts to delete a bucket with the given name. `delete-bucket!` is +idempotent and will **not** error if the bucket does not exist. + +## S3-bucket-exists? + +```scheme +(S3-bucket-exists? client bucket-name) -> bool +``` + +`S3-bucket-exists?` checks if a bucket with the provided name exists in the client's +configured region and endpoint. It returns `#t` if a bucket exists and `#f` otherwise. + +## S3-list-buckets + +```scheme +(S3-list-buckets client) -> list : S3Bucket +``` + +`S3-list-buckets` returns a list of all buckets available to the client as configured. If +none are available, an empty list is returned. All buckets are instances of `S3Bucket`. + +## S3Bucket + +The `S3Bucket` interface provides a consistent way to interact with buckets. + +```scheme +(interface S3Bucket + (get (name :~ string?)) + (put! (name :~ string?) + (data :~ u8vector?)) + (delete! (name :~ string?)) + (copy-to! (src :~ string?) (dest :~ string?)) + (list-objects)) +``` +## S3Bucket-get + +```scheme +(S3Bucket-get bucket object-name) -> u8vector +``` + +`S3Bucket-get` retrieves a object by name. If the object does not exist or the client +does not have permission to retrieve the object, an `S3Error` is raised. + +## S3Bucket-put! + +```scheme +(S3Bucket-put! bucket object-name) -> void +``` + +`S3Bucket-put!` stores `data` in a object with the provided `name`. Any failures result +in a `S3Error` being raised. All data is stored with `content-type: "octet-stream"` MIME +type. + +## S3Bucket-delete! + +```scheme +(S3Bucket-delete! bucket object-name) -> void +``` + +`S3Bucket-delete!` delete's an object with the provided `name`. If the object does not +exist, an `S3Error` is raised. + +## S3Bucket-copy-to! + +```scheme +(S3Bucket-copy-to! bucket src dest) -> void +``` + +`S3Bucket-copy-to!` performs server-side copy of object described by `src`. The `src` +format is `/`. The `dest` is the same as if performing a `put!` and +will store the object in the current `S3Bucket`. Copies must be within a region and +otherwise possible to perform using the `S3` client this bucket was created with. +Any failures result in a `S3Error` being raised. + +## S3Bucket-list-objects + +```scheme +(S3Bucket-list-objects bucket) -> list : string +``` + +`S3Bucket-list-objects` enumerates all objects in the bucket, returning their names as +strings. diff --git a/src/std/build-spec.ss b/src/std/build-spec.ss index 004805b57..4d64a47db 100644 --- a/src/std/build-spec.ss +++ b/src/std/build-spec.ss @@ -217,6 +217,10 @@ "net/uri" "net/request" "net/json-rpc" + "net/s3" + "net/s3/interface" + "net/s3/api" + "net/s3/sigv4" "net/websocket/interface" "net/websocket/socket" "net/websocket/client" diff --git a/src/std/interface.ss b/src/std/interface.ss index b04998df4..1a4237e6f 100644 --- a/src/std/interface.ss +++ b/src/std/interface.ss @@ -405,7 +405,7 @@ ((kw (id default) . rest) (stx-keyword? #'kw) (lp #'rest (cons* #'(id absent-obj) #'kw args))) - ((kw (id . contract) .rest) + ((kw (id . contract) . rest) (stx-keyword? #'kw) (cond ((get-contract-default #'contract) @@ -439,7 +439,7 @@ ((kw (id default) . rest) (stx-keyword? #'kw) (lp #'rest (cons* #'(id default) #'kw args))) - ((kw (id . contract) .rest) + ((kw (id . contract) . rest) (stx-keyword? #'kw) (cond ((get-contract-default #'contract) diff --git a/src/std/net/s3.ss b/src/std/net/s3.ss new file mode 100644 index 000000000..976179298 --- /dev/null +++ b/src/std/net/s3.ss @@ -0,0 +1,5 @@ +;;; -*- Gerbil -*- +;;; © vyzo, ngp +;;; AWS S3 Client +(import ./s3/api ./s3/interface) +(export (import: ./s3/api ./s3/interface)) diff --git a/src/std/net/s3/api.ss b/src/std/net/s3/api.ss new file mode 100644 index 000000000..832efa05a --- /dev/null +++ b/src/std/net/s3/api.ss @@ -0,0 +1,244 @@ +;;; -*- Gerbil -*- +;;; (C) vyzo +;;; AWS S3 client +(import "sigv4" + "interface" + :std/net/request + :std/misc/func + :std/contract + :std/net/uri + :std/crypto/digest + :std/text/hex + :std/xml + :std/error + :std/sugar + :std/srfi/19) + +(export S3Client S3ClientError) + +; precomputed empty sha256 +(def emptySHA256 (sha256 #u8())) + +(def (S3Client + endpoint: (endpoint "s3.amazonaws.com") + access-key: (access-key (getenv "AWS_ACCESS_KEY_ID" #f)) + secret-key: (secret-key (getenv "AWS_SECRET_KEY" #f)) + region: (region (getenv "AWS_DEFAULT_REGION" "us-east-1"))) + (cond + ((not access-key) + (raise-s3-error make-s3-client "Must provide access key" "access-key")) + ((not secret-key) + (raise-s3-error make-s3-client "Must provide secret key" "secret-key"))) + (S3 (make-s3-client endpoint access-key secret-key region))) + +(defstruct s3-client (endpoint access-key secret-key region) + final: #t) + +(defstruct bucket (client name region) + final: #t) + +(deferror-class (S3ClientError Error) () s3-client-error?) + +(defraise/context (raise-s3-error where message irritants ...) + (S3ClientError message irritants: [irritants ...])) + +; Retrieves buckets accessible to this client. +(defmethod {list-buckets s3-client} ; => (list : bucket) + (lambda (self) + (using (self :- s3-client) + (let* ((req (s3-request/error self verb: 'GET)) + (xml (s3-parse-xml req)) + (buckets (sxml-find xml (sxml-e? 's3:Buckets) sxml-children)) + (names (map (chain <> + (sxml-select <> (sxml-e? 's3:Name)) + (cadar <>) + (make-bucket self <> (s3-client-region self)) + (S3Bucket <>)) + buckets))) + ; buckets is #f if none are returned + (request-close req) + names)))) + +;; NOTE: all bucket operations need the correct region for the bucket or they will 400 +(defmethod {create-bucket! s3-client} + (lambda (self bucket-name) + (using (self :- s3-client) + (let (req (s3-request/error self verb: 'PUT bucket: bucket-name)) + (request-close req) + (S3Bucket (make-bucket self bucket-name self.region)))))) + +; Gets a bucket struct that can be used to fetch objects. +(defmethod {get-bucket s3-client} ; => bucket + (lambda (self bucket-name) + (using (self :- s3-client) + (if (s3-client::bucket-exists? self bucket-name) + (S3Bucket (make-bucket self bucket-name self.region)) + (raise-s3-error s3-client::get-bucket "Bucket does not exist" bucket-name))))) + +; Delete a bucket by name +(defmethod {delete-bucket! s3-client} + (lambda (self bucket) + (using (self :- s3-client) + (when (s3-client::bucket-exists? self bucket) + (let (req (s3-request/error self verb: 'DELETE bucket: bucket)) + (request-close req) + (void)))))) + +(defmethod {bucket-exists? s3-client} + (lambda (self bucket) + (using (self :- s3-client) + (let* ((bucket (if (bucket? bucket) (bucket-name bucket) bucket)) + (req {self.request verb: 'HEAD bucket: bucket}) + (code (request-status req))) + ; 200 and 404 are expected codes + ; we explicitly handle 404 so we get proper predicate + ; semantics and don't raise on what would otherwise be + ; #f condition. + (if (memq code [200 404]) + (begin + (request-close req) + (= code 200)) + (with-request-error req)))))) + +(defmethod {bucket s3-client} + (lambda (self name) + (using (self :- s3-client) + (if (s3-client::bucket-exists? self name) + (S3Bucket (make-bucket self name (s3-client-region self))) + (raise-s3-error s3-client::bucket "bucket does not exist" name))))) + +; Lists the objects stored within the bucket +(defmethod {list-objects bucket} + (lambda (self) + (using ((self :- bucket) + (client self.client :- s3-client)) + (let* ((name (bucket-name self)) + (req (s3-request/error client verb: 'GET bucket: name)) + (xml (s3-parse-xml req)) + (keys (sxml-select xml (sxml-e? 's3:Key) cadr))) + (request-close req) + keys)))) + +(defmethod {get bucket} + (lambda (self key) + (using ((self :- bucket) + (client self.client :- s3-client)) + (let* ((req (s3-request/error client + verb: 'GET + bucket: (bucket-name self) + path: (string-append "/" key))) + (data (request-content req))) + (request-close req) + data)))) + +(defmethod {put! bucket} + (lambda (self key data content-type: (content-type "binary/octet-stream")) + (using ((self :- bucket) + (client self.client :- s3-client)) + (let (req (s3-request/error client verb: 'PUT bucket: (bucket-name self) + path: (string-append "/" key) + body: data + content-type: content-type)) + (request-close req) + (void))))) + +(defmethod {delete! bucket} + (lambda (self key) + (using ((self :- bucket) + (client self.client :- s3-client)) + (let (req (s3-request/error client verb: 'DELETE bucket: (bucket-name self) + path: (string-append "/" key))) + (request-close req) + (void))))) + +(defmethod {copy-to! bucket} + (lambda (self src dest) + (using ((self :- bucket) + (client self.client :- s3-client)) + (let* ((headers [["x-amz-copy-source" :: src]]) + (req (s3-client::request client + verb: 'PUT + bucket: (bucket-name self) + path: (string-append "/" dest) + extra-headers: headers)) + (error (s3-response-error? (s3-parse-xml req)))) + (request-close req) + (when error + (raise-s3-error + bucket::copy-to! + "Unable to perform server-side copy" + ; when error isn't empty, it should be a parsed XML tree + (sxml-find error (sxml-e? 'Code) cadr) + (request-status-text req))) + (void))))) + + +; The core request method. Handles AWS Sig. v4, auth, and calls correct http- function based on +; `verb`. +(defmethod {request s3-client} + (lambda (self + verb: (verb 'GET) + bucket: (bucket #f) + path: (path "/") + query: (query #f) + body: (body #f) + ; optional extra headers + extra-headers: (extra-headers #f) + content-type: (content-type #f)) ; must be specified if body is specified + (using (self :- s3-client) + (let* ((now (current-date)) + (ts (date->string now "~Y~m~dT~H~M~SZ")) + (scopets (date->string now "~Y~m~d")) + (scope (string-append scopets "/" (s3-client-region self) "/s3")) + (hash (if body (sha256 body) emptySHA256)) + (host (if bucket + (string-append bucket "." (s3-client-endpoint self)) + (s3-client-endpoint self))) + (headers [["Host" :: (string-append host ":443")] + ["x-amz-date" :: ts] + ["x-amz-content-sha256" :: (hex-encode hash)] + (if body [["Content-Type" :: content-type]] []) ... + (if extra-headers extra-headers []) ...]) + (creq (aws4-canonical-request + verb: verb + uri: path + query: query + headers: headers + hash: hash)) + (headers [["Authorization" :: (aws4-auth scope creq ts headers + (s3-client-secret-key self) (s3-client-access-key self))] + :: headers]) + (url (string-append "https://" host path))) + (case verb + ((GET) + (http-get url headers: headers params: query)) + ((PUT) + (http-put url headers: headers params: query data: body)) + ((DELETE) + (http-delete url headers: headers params: query)) + ((HEAD) + (http-head url headers: headers params: query)) + (else + (error "Bad request verb" verb))))))) + +(defrule (s3-request/error self ...) + (with-request-error + (s3-client::request self ...))) + +(def (s3-parse-xml req) + (read-xml (request-content req) + namespaces: '(("http://s3.amazonaws.com/doc/2006-03-01/" . "s3")))) + +(defrule (s3-response-error? xml) + (sxml-find xml (sxml-e? 'Error))) + +(def (with-request-error req) + (using (req :~ request?) + (if (and (fx>= (request-status req) 200) + (fx< (request-status req) 300)) + req + (begin + (request-close req) + (raise-s3-error + (request-status req) + (request-status-text req)))))) diff --git a/src/std/net/s3/interface.ss b/src/std/net/s3/interface.ss new file mode 100644 index 000000000..41eed201d --- /dev/null +++ b/src/std/net/s3/interface.ss @@ -0,0 +1,22 @@ +(import :std/interface + :std/contract + :std/misc/alist) + +(export #t) + +(interface S3 + (get-bucket (name :~ string?)) + (create-bucket! (name :~ string?)) + (delete-bucket! (name :~ string?)) + (bucket-exists? (name :~ string?)) + (list-buckets)) + +(interface S3Bucket + (get (name :~ string?)) + (put! (name :~ string?) + (data :~ u8vector?)) + (delete! (name :~ string?)) + ; src should follow `bucket/file/path` format. Destination should just be `file/path`. + ; Copies *from* src to *dest* in this ObjectMap. + (copy-to! (src :~ string?) (dest :~ string?)) + (list-objects)) diff --git a/src/std/net/s3/sigv4.ss b/src/std/net/s3/sigv4.ss new file mode 100644 index 000000000..d029ca50e --- /dev/null +++ b/src/std/net/s3/sigv4.ss @@ -0,0 +1,106 @@ +;;; -*- Gerbil -*- +;;; (C) vyzo +;;; AWS sigv4 request signatures +(import :std/misc/bytes + :std/srfi/13 + :std/crypto/digest + :std/crypto/hmac + :std/text/hex + :std/net/uri + :std/contract + :std/sort) +(export aws4-canonical-request aws4-sign aws4-auth) + +;; Reference: http://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html + +;; create a canonical request string for signing +(def (aws4-canonical-request + verb: verb ; symbol -- http verb (GET PUT DELETE ...) + uri: uri ; string -- canonical request uri + query: query ; [[string . value] ...] -- query parameters + headers: headers ; [[string . value] ...] -- signed request headers + hash: hash ; bytes -- SHA256 content hash + ) + (string-append + (symbol->string verb) "\n" + uri "\n" + (if query (canonical-query-string query) "") "\n" + (canonical-headers headers) "\n" + (signed-headers headers) "\n" + (hex-encode hash))) + +;; calculate a signature for a canonical request +;; scope is the request scope: string in the form yyyymmdd/region/service +;; ts is the request timestamp string +;; request is a the canonical request string +(def (aws4-sign scope request-str ts secret-key) + (let ((key (signing-key scope secret-key)) + (str (string-to-sign scope request-str ts))) + (hmac-sha256 key (string->bytes str)))) + +;; Calcuate the authorization header +(def (aws4-auth scope request-str ts headers secret-key access-key) + (let (sig (aws4-sign scope request-str ts secret-key)) + (string-append "AWS4-HMAC-SHA256 " + "Credential=" access-key "/" scope "/aws4_request," + "SignedHeaders=" (signed-headers headers) "," + "Signature=" (hex-encode sig)))) + +;;; internal +(def (car-stringbytes + (string-append "AWS4" secret-key)) + (string->bytes date))) + (date-region-key + (hmac-sha256 date-key + (string->bytes region))) + (date-region-svc-key + (hmac-sha256 date-region-key + (string->bytes service)))) + (hmac-sha256 date-region-svc-key + (@bytes "aws4_request")))) + (else + (error "Bad request scope; expected date/region/service string" scope)))) + +(def (string-to-sign scope req ts) + (string-append "AWS4-HMAC-SHA256\n" + ts "\n" + scope "/aws4_request" "\n" + (hex-encode (sha256 req))))