diff --git a/README.md b/README.md index b6d5580..33ae417 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ The cli emulates the [aws cli](https://aws.amazon.com/cli/) as close as possible - [X] List - ls - [X] Remove - rm -- [ ] Copy - cp +- [X] Copy - cp - [ ] Syncronize - sync Use `--help` on the command line to help you along the way. @@ -58,6 +58,8 @@ Commands: ls [] list + cp [] + copy ``` --profile will always look in the usual place for your aws `credentials` or `config` file @@ -69,7 +71,7 @@ All commands can take the `--auto-region` flag to automatically detect the right ## List - ls Nothing special here. Just remember S3 has prefixes, not directory paths. -## Remove - rmgit +## Remove - rm ``` --recursive Recurisvley delete --all-versions Delete all versions @@ -82,9 +84,33 @@ Remember when using `--all-versions` to delete all versions of an object at once When deleting a large number of objects, the final outcome may not be reflected by `ls` immediately due to eventual consistency. ## Copy - cp -This is WIP +This is WIP, some further features to come. Please raise an issue if theres a specific feature you. would like considered or prioritised. + +``` + -r, --recursive Recursively copy + -c, --concurrent=10 Maximum number of concurrent uploads to S3. + --sse=AES256 Specifies server-side encryption of the object in S3. Valid values are AES256 and aws:kms. + --sse-kms-key-id=SSE-KMS-KEY-ID + The AWS KMS key ID that should be used to server-side encrypt the object in S3. + --acl=private Object ACL + --storage-class=STANDARD Storage Class + +Args: + file or s3 location + file or s3 location +``` + +Tha maximum. concurrent uploads (`--concurrent` or `-c`) is dependent not only on your upload bandwidth but also the maximum open file limits per process on your system and the performance of the soucre drive. + +You can check your file limits in linux, macos and other flavour of OS with `ulimit -n`. Changing this limit in the os is possible and not always dangerous. Instructions on how to change it vary between OS so they are not described here. `s3kor` impacts these limits both in walking the file system and uploading the file so there is not a 1 to 1 correlation between the max limit ond the value you pass to `--concurrent`. Try to pass `s3kor` a max value that is about 20% less than the systems max limit value. + +Currently if you hit a file limit, the error is not reported + +For optimal throughput consider using a S3 VPC Gateway endpoint if you are executing s3kor from within an AWS VPC. -## Sync - cp +And remember the performance of the source storage device is important, you don't want to choke it reading lots of data at once. Use an optimized iops device or SAN. + +## Sync - sync This is WIP diff --git a/copy.go b/copy.go index e85a20a..09e40c3 100644 --- a/copy.go +++ b/copy.go @@ -152,7 +152,7 @@ func ACL(acl string) func(copier *BucketCopier) { } } -func NewBucketCopier(source string, dest string, sess *session.Session, template s3manager.UploadInput) (*BucketCopier, error) { +func NewBucketCopier(source string, dest string, threads int, sess *session.Session, template s3manager.UploadInput) (*BucketCopier, error) { var svc *s3.S3 = nil sourceURL, err := url.Parse(source) @@ -192,9 +192,9 @@ func NewBucketCopier(source string, dest string, sess *session.Session, template source: *sourceURL, target: *destURL, uploadManager: *s3manager.NewUploaderWithClient(svc), - threads: make(semaphore, 1000), - files: make(chan fileJob, 1000), - fileCounter: make(chan int64, 1000), + threads: make(semaphore, threads), + files: make(chan fileJob, 10000), + fileCounter: make(chan int64, 10000), wg: &sync.WaitGroup{}, template: template, } diff --git a/filewalker.go b/filewalker.go index 8643150..96d64e8 100644 --- a/filewalker.go +++ b/filewalker.go @@ -26,11 +26,12 @@ func upLoadFile(files chan<- fileJob, fileSize chan<- int64) filepath.WalkFunc { //We are only interested in regular files if info.Mode().IsRegular() { + fileSize <- info.Size() files <- fileJob{ path: path, info: info, } - fileSize <- info.Size() + } return nil } diff --git a/s3kor.go b/s3kor.go index 3fa26b5..7ef11c4 100755 --- a/s3kor.go +++ b/s3kor.go @@ -26,7 +26,7 @@ var ( pVerbose = app.Flag("verbose", "Verbose Logging").Default("false").Bool() rm = app.Command("rm", "remove") - rmRecursive = rm.Flag("recursive", "Recurisvley delete").Default("false").Bool() + rmRecursive = rm.Flag("recursive", "Recurisvley delete").Short('r').Default("false").Bool() rmAllVersions = rm.Flag("all-versions", "Delete all versions").Default("false").Bool() rmPath = rm.Arg("S3Uri", "S3 URL").Required().String() @@ -37,7 +37,8 @@ var ( cp = app.Command("cp", "copy") cpSource = cp.Arg("source", "file or s3 location").Required().String() cpDestination = cp.Arg("destination", "file or s3 location").Required().String() - cpRecursive = cp.Flag("recursive", "Recurisvley copy").Short('r').Default("False").Bool() + cpRecursive = cp.Flag("recursive", "Recursively copy").Short('r').Default("False").Bool() + cpConcurrent = cp.Flag("concurrent", "Maximum number of concurrent uploads to S3.").Short('c').Default("10").Int() cpSSE = cp.Flag("sse", "Specifies server-side encryption of the object in S3. Valid values are AES256 and aws:kms.").Default("AES256").Enum("AES256", "aws:kms") cpSSEKMSKeyId = cp.Flag("sse-kms-key-id", "The AWS KMS key ID that should be used to server-side encrypt the object in S3.").String() cpACL = cp.Flag("acl", "Object ACL").Default(s3.ObjectCannedACLPrivate).Enum(s3.ObjectCannedACLAuthenticatedRead, @@ -127,7 +128,7 @@ func main() { inputTemplate.ServerSideEncryption = cpSSEKMSKeyId } - myCopier, err := NewBucketCopier(*cpSource, *cpDestination, sess, inputTemplate) + myCopier, err := NewBucketCopier(*cpSource, *cpDestination, *cpConcurrent, sess, inputTemplate) if err != nil { fmt.Println(err.Error()) logger.Fatal(err.Error())