From 4041ec57628b2ccdfc8f2fff6d4b703f8ac51bba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Tue, 18 Jul 2023 16:18:29 +0300 Subject: [PATCH 01/31] command/sync: --include flag support --- command/include.go | 44 +++++++++++++++++ command/sync.go | 120 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 139 insertions(+), 25 deletions(-) create mode 100644 command/include.go diff --git a/command/include.go b/command/include.go new file mode 100644 index 000000000..85dc34a56 --- /dev/null +++ b/command/include.go @@ -0,0 +1,44 @@ +package command + +import ( + "path/filepath" + "regexp" + "strings" + + "github.com/peak/s5cmd/v2/strutil" +) + +// createIncludesFromWildcard creates regex strings from wildcard. +func createIncludesFromWildcard(inputIncludes []string) ([]*regexp.Regexp, error) { + var result []*regexp.Regexp + for _, input := range inputIncludes { + if input != "" { + regex := strutil.WildCardToRegexp(input) + regex = strutil.MatchFromStartToEnd(regex) + regex = strutil.AddNewLineFlag(regex) + regexpCompiled, err := regexp.Compile(regex) + if err != nil { + return nil, err + } + result = append(result, regexpCompiled) + } + } + return result, nil +} + +// isURLIncluded checks whether given urlPath matches any of the include patterns. +func isURLIncluded(includePatterns []*regexp.Regexp, urlPath, sourcePrefix string) bool { + if len(includePatterns) == 0 { + return false + } + if !strings.HasSuffix(sourcePrefix, "/") { + sourcePrefix += "/" + } + sourcePrefix = filepath.ToSlash(sourcePrefix) + for _, includePattern := range includePatterns { + if includePattern.MatchString(strings.TrimPrefix(urlPath, sourcePrefix)) { + return true + } + } + return false +} diff --git a/command/sync.go b/command/sync.go index 3b586f5d2..cb383bc8c 100644 --- a/command/sync.go +++ b/command/sync.go @@ -6,6 +6,7 @@ import ( "io" "os" "path/filepath" + "regexp" "strings" "sync" @@ -64,6 +65,9 @@ Examples: 10. Sync all files to S3 bucket but exclude the ones with txt and gz extension > s5cmd {{.HelpName}} --exclude "*.txt" --exclude "*.gz" dir/ s3://bucket + + 10. Sync all files to S3 bucket but include the only ones with txt and gz extension + > s5cmd {{.HelpName}} --include "*.txt" --include "*.gz" dir/ s3://bucket ` func NewSyncCommandFlags() []cli.Flag { @@ -76,6 +80,10 @@ func NewSyncCommandFlags() []cli.Flag { Name: "size-only", Usage: "make size of object only criteria to decide whether an object should be synced", }, + &cli.StringSliceFlag{ + Name: "include", + Usage: "include objects with given pattern", + }, } sharedFlags := NewSharedFlags() return append(syncFlags, sharedFlags...) @@ -99,7 +107,11 @@ func NewSyncCommand() *cli.Command { Action: func(c *cli.Context) (err error) { defer stat.Collect(c.Command.FullName(), &err)() - return NewSync(c).Run(c) + sync, err := NewSync(c) + if err != nil { + return err + } + return sync.Run(c) }, } @@ -113,14 +125,20 @@ type ObjectPair struct { // Sync holds sync operation flags and states. type Sync struct { - src string - dst string + src *url.URL + dst *url.URL op string fullCommand string // flags delete bool sizeOnly bool + exclude []string + include []string + + // patterns + excludePatterns []*regexp.Regexp + includePatterns []*regexp.Regexp // s3 options storageOpts storage.Options @@ -134,16 +152,37 @@ type Sync struct { } // NewSync creates Sync from cli.Context -func NewSync(c *cli.Context) Sync { - return Sync{ - src: c.Args().Get(0), - dst: c.Args().Get(1), +func NewSync(c *cli.Context) (*Sync, error) { + fullCommand := commandFromContext(c) + + src, err := url.New(c.Args().Get(0), url.WithVersion(c.String("version-id")), + url.WithRaw(c.Bool("raw"))) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return nil, err + } + + dst, err := url.New(c.Args().Get(1), url.WithRaw(c.Bool("raw"))) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return nil, err + } + + return &Sync{ + src: src, + dst: dst, op: c.Command.Name, fullCommand: commandFromContext(c), // flags delete: c.Bool("delete"), sizeOnly: c.Bool("size-only"), + exclude: c.StringSlice("exclude"), + include: c.StringSlice("include"), + + // patterns + excludePatterns: nil, + includePatterns: nil, // flags followSymlinks: !c.Bool("no-follow-symlinks"), @@ -153,36 +192,40 @@ func NewSync(c *cli.Context) Sync { srcRegion: c.String("source-region"), dstRegion: c.String("destination-region"), storageOpts: NewStorageOpts(c), - } + }, nil } // Run compares files, plans necessary s5cmd commands to execute // and executes them in order to sync source to destination. func (s Sync) Run(c *cli.Context) error { - srcurl, err := url.New(s.src, url.WithRaw(s.raw)) + var err error + + s.excludePatterns, err = createExcludesFromWildcard(s.exclude) if err != nil { + printError(s.fullCommand, s.op, err) return err } - dsturl, err := url.New(s.dst, url.WithRaw(s.raw)) + s.includePatterns, err = createIncludesFromWildcard(s.include) if err != nil { + printError(s.fullCommand, s.op, err) return err } - sourceObjects, destObjects, err := s.getSourceAndDestinationObjects(c.Context, srcurl, dsturl) + sourceObjects, destObjects, err := s.getSourceAndDestinationObjects(c.Context, s.src, s.dst) if err != nil { printError(s.fullCommand, s.op, err) return err } - isBatch := srcurl.IsWildcard() - if !isBatch && !srcurl.IsRemote() { - sourceClient, err := storage.NewClient(c.Context, srcurl, s.storageOpts) + isBatch := s.src.IsWildcard() + if !isBatch && !s.src.IsRemote() { + sourceClient, err := storage.NewClient(c.Context, s.src, s.storageOpts) if err != nil { return err } - obj, _ := sourceClient.Stat(c.Context, srcurl) + obj, _ := sourceClient.Stat(c.Context, s.src) isBatch = obj != nil && obj.Type.IsDir() } @@ -215,7 +258,7 @@ func (s Sync) Run(c *cli.Context) error { pipeReader, pipeWriter := io.Pipe() // create a reader, writer pipe to pass commands to run // Create commands in background. - go s.planRun(c, onlySource, onlyDest, commonObjects, dsturl, strategy, pipeWriter, isBatch) + go s.planRun(c, onlySource, onlyDest, commonObjects, s.dst, strategy, pipeWriter, isBatch) err = NewRun(c, pipeReader).Run(c.Context) return multierror.Append(err, merrorWaiter).ErrorOrNil() @@ -291,15 +334,7 @@ func (s Sync) getSourceAndDestinationObjects(ctx context.Context, srcurl, dsturl return nil, nil, err } - // add * to end of destination string, to get all objects recursively. - var destinationURLPath string - if strings.HasSuffix(s.dst, "/") { - destinationURLPath = s.dst + "*" - } else { - destinationURLPath = s.dst + "/*" - } - - destObjectsURL, err := url.New(destinationURLPath) + destObjectsURL, err := url.New(s.dst.Path) if err != nil { return nil, nil, err } @@ -331,6 +366,9 @@ func (s Sync) getSourceAndDestinationObjects(ctx context.Context, srcurl, dsturl if s.shouldSkipObject(st, true) { continue } + if !s.shouldSyncObject(st, true) { + continue + } filteredSrcObjectChannel <- *st } }() @@ -370,6 +408,9 @@ func (s Sync) getSourceAndDestinationObjects(ctx context.Context, srcurl, dsturl if s.shouldSkipObject(dt, false) { continue } + if !s.shouldSyncObject(dt, true) { + continue + } filteredDstObjectChannel <- *dt } }() @@ -534,3 +575,32 @@ func (s Sync) shouldSkipObject(object *storage.Object, verbose bool) bool { } return false } + +// shouldSkipObject checks is object should be skipped. +func (s Sync) shouldSyncObject(object *storage.Object, verbose bool) bool { + if err := object.Err; err != nil { + if verbose { + printError(s.fullCommand, s.op, err) + } + return false + } + + switch { + case len(s.excludePatterns) == 0 && len(s.includePatterns) == 0: + fmt.Println("case 1") + return true + case len(s.excludePatterns) == 0 && len(s.includePatterns) > 0: + fmt.Println("case 3") + return isURLIncluded(s.includePatterns, object.URL.Path, s.src.Prefix) + case len(s.excludePatterns) > 0 && len(s.includePatterns) == 0: + fmt.Println("case 2") + return !isURLExcluded(s.excludePatterns, object.URL.Path, s.src.Prefix) + case len(s.excludePatterns) > 0 && len(s.includePatterns) > 0: + if isURLExcluded(s.excludePatterns, object.URL.Path, s.src.Prefix) { + return false + } + return isURLIncluded(s.includePatterns, object.URL.Path, s.src.Prefix) + } + fmt.Println("case 6") + return true +} From 8543e2b025c69db6e35d63b6eb730b077bec529c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Thu, 20 Jul 2023 14:09:30 +0300 Subject: [PATCH 02/31] command/cp: add --include flag --- command/cp.go | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/command/cp.go b/command/cp.go index e294d7b69..e5db45009 100644 --- a/command/cp.go +++ b/command/cp.go @@ -9,6 +9,7 @@ import ( "net/http" "os" "path/filepath" + "regexp" "strings" "github.com/hashicorp/go-multierror" @@ -167,6 +168,10 @@ func NewSharedFlags() []cli.Flag { Name: "exclude", Usage: "exclude objects with given pattern", }, + &cli.StringSliceFlag{ + Name: "include", + Usage: "include objects with given pattern", + }, &cli.BoolFlag{ Name: "raw", Usage: "disable the wildcard operations, useful with filenames that contains glob characters", @@ -271,11 +276,16 @@ type Copy struct { forceGlacierTransfer bool ignoreGlacierWarnings bool exclude []string + include []string cacheControl string expires string contentType string contentEncoding string + // patterns + excludePatterns []*regexp.Regexp + includePatterns []*regexp.Regexp + // region settings srcRegion string dstRegion string @@ -324,6 +334,7 @@ func NewCopy(c *cli.Context, deleteSource bool) (*Copy, error) { forceGlacierTransfer: c.Bool("force-glacier-transfer"), ignoreGlacierWarnings: c.Bool("ignore-glacier-warnings"), exclude: c.StringSlice("exclude"), + include: c.StringSlice("include"), cacheControl: c.String("cache-control"), expires: c.String("expires"), contentType: c.String("content-type"), @@ -390,7 +401,13 @@ func (c Copy) Run(ctx context.Context) error { isBatch = obj != nil && obj.Type.IsDir() } - excludePatterns, err := createExcludesFromWildcard(c.exclude) + c.excludePatterns, err = createExcludesFromWildcard(c.exclude) + if err != nil { + printError(c.fullCommand, c.op, err) + return err + } + + c.includePatterns, err = createIncludesFromWildcard(c.include) if err != nil { printError(c.fullCommand, c.op, err) return err @@ -416,7 +433,7 @@ func (c Copy) Run(ctx context.Context) error { continue } - if isURLExcluded(excludePatterns, object.URL.Path, c.src.Prefix) { + if !c.shouldCopyObject(object, true) { continue } @@ -767,6 +784,31 @@ func (c Copy) shouldOverride(ctx context.Context, srcurl *url.URL, dsturl *url.U return stickyErr } +// shouldCopyObject checks is object should be skipped. +func (c Copy) shouldCopyObject(object *storage.Object, verbose bool) bool { + if err := object.Err; err != nil { + if verbose { + printError(c.fullCommand, c.op, err) + } + return false + } + + switch { + case len(c.excludePatterns) == 0 && len(c.includePatterns) == 0: + return true + case len(c.excludePatterns) == 0 && len(c.includePatterns) > 0: + return isURLIncluded(c.includePatterns, object.URL.Path, c.src.Prefix) + case len(c.excludePatterns) > 0 && len(c.includePatterns) == 0: + return !isURLExcluded(c.excludePatterns, object.URL.Path, c.src.Prefix) + case len(c.excludePatterns) > 0 && len(c.includePatterns) > 0: + if isURLExcluded(c.excludePatterns, object.URL.Path, c.src.Prefix) { + return false + } + return isURLIncluded(c.includePatterns, object.URL.Path, c.src.Prefix) + } + return true +} + // prepareRemoteDestination will return a new destination URL for // remote->remote and local->remote copy operations. func prepareRemoteDestination( From 8c96aaca789b17cccf19f3801eef6c528c6dbe82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Thu, 20 Jul 2023 14:09:38 +0300 Subject: [PATCH 03/31] command/rm: add --include flag --- command/rm.go | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/command/rm.go b/command/rm.go index 94698abf0..db862d388 100644 --- a/command/rm.go +++ b/command/rm.go @@ -3,6 +3,7 @@ package command import ( "context" "fmt" + "regexp" "github.com/hashicorp/go-multierror" "github.com/urfave/cli/v2" @@ -66,6 +67,10 @@ func NewDeleteCommand() *cli.Command { Name: "exclude", Usage: "exclude objects with given pattern", }, + &cli.StringSliceFlag{ + Name: "include", + Usage: "include objects with given pattern", + }, &cli.BoolFlag{ Name: "all-versions", Usage: "list all versions of object(s)", @@ -101,6 +106,7 @@ func NewDeleteCommand() *cli.Command { // flags exclude: c.StringSlice("exclude"), + include: c.StringSlice("include"), storageOpts: NewStorageOpts(c), }.Run(c.Context) @@ -119,6 +125,11 @@ type Delete struct { // flag options exclude []string + include []string + + // patterns + excludePatterns []*regexp.Regexp + includePatterns []*regexp.Regexp // storage options storageOpts storage.Options @@ -135,7 +146,7 @@ func (d Delete) Run(ctx context.Context) error { return err } - excludePatterns, err := createExcludesFromWildcard(d.exclude) + d.excludePatterns, err = createExcludesFromWildcard(d.exclude) if err != nil { printError(d.fullCommand, d.op, err) return err @@ -164,7 +175,7 @@ func (d Delete) Run(ctx context.Context) error { continue } - if isURLExcluded(excludePatterns, object.URL.Path, srcurl.Prefix) { + if !d.shouldDeleteObject(object, true, srcurl.Prefix) { continue } @@ -195,6 +206,31 @@ func (d Delete) Run(ctx context.Context) error { return multierror.Append(merrorResult, merrorObjects).ErrorOrNil() } +// shouldDeleteObject checks is object should be deleted. +func (d Delete) shouldDeleteObject(object *storage.Object, verbose bool, prefix string) bool { + if err := object.Err; err != nil { + if verbose { + printError(d.fullCommand, d.op, err) + } + return false + } + + switch { + case len(d.excludePatterns) == 0 && len(d.includePatterns) == 0: + return true + case len(d.excludePatterns) == 0 && len(d.includePatterns) > 0: + return isURLIncluded(d.includePatterns, object.URL.Path, prefix) + case len(d.excludePatterns) > 0 && len(d.includePatterns) == 0: + return !isURLExcluded(d.excludePatterns, object.URL.Path, prefix) + case len(d.excludePatterns) > 0 && len(d.includePatterns) > 0: + if isURLExcluded(d.excludePatterns, object.URL.Path, prefix) { + return false + } + return isURLIncluded(d.includePatterns, object.URL.Path, prefix) + } + return true +} + // newSources creates object URL list from given sources. func newURLs(isRaw bool, versionID string, isAllVersions bool, sources ...string) ([]*url.URL, error) { var urls []*url.URL From 12d1dc523319b5ff4cab4390ed4c49df08855d2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Thu, 20 Jul 2023 14:09:54 +0300 Subject: [PATCH 04/31] command/sync: refactor --include flag --- command/sync.go | 43 ++++--------------------------------------- 1 file changed, 4 insertions(+), 39 deletions(-) diff --git a/command/sync.go b/command/sync.go index cb383bc8c..319f799a4 100644 --- a/command/sync.go +++ b/command/sync.go @@ -80,10 +80,6 @@ func NewSyncCommandFlags() []cli.Flag { Name: "size-only", Usage: "make size of object only criteria to decide whether an object should be synced", }, - &cli.StringSliceFlag{ - Name: "include", - Usage: "include objects with given pattern", - }, } sharedFlags := NewSharedFlags() return append(syncFlags, sharedFlags...) @@ -366,9 +362,6 @@ func (s Sync) getSourceAndDestinationObjects(ctx context.Context, srcurl, dsturl if s.shouldSkipObject(st, true) { continue } - if !s.shouldSyncObject(st, true) { - continue - } filteredSrcObjectChannel <- *st } }() @@ -408,9 +401,10 @@ func (s Sync) getSourceAndDestinationObjects(ctx context.Context, srcurl, dsturl if s.shouldSkipObject(dt, false) { continue } - if !s.shouldSyncObject(dt, true) { - continue - } + /* + if !s.shouldSyncObject(dt, true) { + continue + }*/ filteredDstObjectChannel <- *dt } }() @@ -575,32 +569,3 @@ func (s Sync) shouldSkipObject(object *storage.Object, verbose bool) bool { } return false } - -// shouldSkipObject checks is object should be skipped. -func (s Sync) shouldSyncObject(object *storage.Object, verbose bool) bool { - if err := object.Err; err != nil { - if verbose { - printError(s.fullCommand, s.op, err) - } - return false - } - - switch { - case len(s.excludePatterns) == 0 && len(s.includePatterns) == 0: - fmt.Println("case 1") - return true - case len(s.excludePatterns) == 0 && len(s.includePatterns) > 0: - fmt.Println("case 3") - return isURLIncluded(s.includePatterns, object.URL.Path, s.src.Prefix) - case len(s.excludePatterns) > 0 && len(s.includePatterns) == 0: - fmt.Println("case 2") - return !isURLExcluded(s.excludePatterns, object.URL.Path, s.src.Prefix) - case len(s.excludePatterns) > 0 && len(s.includePatterns) > 0: - if isURLExcluded(s.excludePatterns, object.URL.Path, s.src.Prefix) { - return false - } - return isURLIncluded(s.includePatterns, object.URL.Path, s.src.Prefix) - } - fmt.Println("case 6") - return true -} From 7be5ea8d71e013f0308cface09cec28b9741dc96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Thu, 20 Jul 2023 14:47:24 +0300 Subject: [PATCH 05/31] command/cp: add tests for --include --- e2e/cp_test.go | 162 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/e2e/cp_test.go b/e2e/cp_test.go index ff2bf49b2..b3e4e7347 100644 --- a/e2e/cp_test.go +++ b/e2e/cp_test.go @@ -4169,3 +4169,165 @@ func TestLocalFileOverridenWhenDownloadFailed(t *testing.T) { expected := fs.Expected(t, fs.WithFile(filename, content)) assert.Assert(t, fs.Equal(workdir.Path(), expected)) } + +// cp --include "*.py" s3://bucket/* . +func TestCopyS3ObjectsWithIncludeFilter(t *testing.T) { + t.Parallel() + + s3client, s5cmd := setup(t) + + bucket := s3BucketFromTestName(t) + createBucket(t, s3client, bucket) + + const ( + includePattern = "*.py" + fileContent = "content" + ) + + files := [...]string{ + "file1.py", + "file2.py", + "file.txt", + "a.txt", + "src/file.txt", + } + + for _, filename := range files { + putFile(t, s3client, bucket, filename, fileContent) + } + + srcpath := fmt.Sprintf("s3://%s", bucket) + + cmd := s5cmd("cp", "--include", includePattern, srcpath+"/*", ".") + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + + assertLines(t, result.Stdout(), map[int]compareFunc{ + 0: equals("cp %v/file1.py %s", srcpath, files[0]), + 1: equals("cp %v/file2.py %s", srcpath, files[1]), + }, sortInput(true)) + + // assert s3 + for _, f := range files { + assert.Assert(t, ensureS3Object(s3client, bucket, f, fileContent)) + } + + expectedFileSystem := []fs.PathOp{ + fs.WithFile("file1.py", fileContent), + fs.WithFile("file2.py", fileContent), + } + // assert local filesystem + expected := fs.Expected(t, expectedFileSystem...) + assert.Assert(t, fs.Equal(cmd.Dir, expected)) +} + +// cp --include "file*" --exclude "*.py" s3://bucket/* . +func TestCopyS3ObjectsWithIncludeExcludeFilter(t *testing.T) { + t.Parallel() + + s3client, s5cmd := setup(t) + + bucket := s3BucketFromTestName(t) + createBucket(t, s3client, bucket) + + const ( + includePattern = "file*" + excludePattern = "*.py" + fileContent = "content" + ) + + files := [...]string{ + "file1.py", + "file2.py", + "test.py", + "app.py", + "docs/readme.md", + } + + for _, filename := range files { + putFile(t, s3client, bucket, filename, fileContent) + } + + srcpath := fmt.Sprintf("s3://%s", bucket) + + cmd := s5cmd("cp", "--include", includePattern, "--exclude", excludePattern, srcpath+"/*", ".") + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + + assertLines(t, result.Stdout(), map[int]compareFunc{ + 0: equals("cp %v/file1.py %s", srcpath, files[0]), + 1: equals("cp %v/file2.py %s", srcpath, files[1]), + }, sortInput(true)) + + // assert s3 + for _, f := range files { + assert.Assert(t, ensureS3Object(s3client, bucket, f, fileContent)) + } + + expectedFileSystem := []fs.PathOp{ + fs.WithFile("file1.py", fileContent), + fs.WithFile("file2.py", fileContent), + } + // assert local filesystem + expected := fs.Expected(t, expectedFileSystem...) + assert.Assert(t, fs.Equal(cmd.Dir, expected)) +} + +// cp --exclude "file*" --include "*.py" s3://bucket/* . +func TestCopyS3ObjectsWithIncludeExcludeFilter2(t *testing.T) { + t.Parallel() + + s3client, s5cmd := setup(t) + + bucket := s3BucketFromTestName(t) + createBucket(t, s3client, bucket) + + const ( + includePattern = "*.py" + excludePattern = "file*" + fileContent = "content" + ) + + files := [...]string{ + "file1.py", + "file2.py", + "test.py", + "app.py", + "docs/readme.md", + } + + for _, filename := range files { + putFile(t, s3client, bucket, filename, fileContent) + } + + srcpath := fmt.Sprintf("s3://%s", bucket) + + cmd := s5cmd("cp", "--exclude", excludePattern, "--include", includePattern, srcpath+"/*", ".") + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + + assertLines(t, result.Stdout(), map[int]compareFunc{ + 0: equals("cp %v/app.py %s", srcpath, files[3]), + 1: equals("cp %v/file1.py %s", srcpath, files[0]), + 2: equals("cp %v/file2.py %s", srcpath, files[1]), + 3: equals("cp %v/test.py %s", srcpath, files[2]), + }, sortInput(true)) + + // assert s3 + for _, f := range files { + assert.Assert(t, ensureS3Object(s3client, bucket, f, fileContent)) + } + + expectedFileSystem := []fs.PathOp{ + fs.WithFile("file1.py", fileContent), + fs.WithFile("file2.py", fileContent), + fs.WithFile("test.py", fileContent), + fs.WithFile("app.py", fileContent), + } + // assert local filesystem + expected := fs.Expected(t, expectedFileSystem...) + assert.Assert(t, fs.Equal(cmd.Dir, expected)) +} From 58a08d9149ea46b45238743d81b04c7e5017ecaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Thu, 20 Jul 2023 14:51:49 +0300 Subject: [PATCH 06/31] command/cp: update shouldCopyObject --- command/cp.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/command/cp.go b/command/cp.go index e5db45009..161b07126 100644 --- a/command/cp.go +++ b/command/cp.go @@ -794,17 +794,11 @@ func (c Copy) shouldCopyObject(object *storage.Object, verbose bool) bool { } switch { - case len(c.excludePatterns) == 0 && len(c.includePatterns) == 0: - return true case len(c.excludePatterns) == 0 && len(c.includePatterns) > 0: + case len(c.excludePatterns) > 0 && len(c.includePatterns) > 0: return isURLIncluded(c.includePatterns, object.URL.Path, c.src.Prefix) case len(c.excludePatterns) > 0 && len(c.includePatterns) == 0: return !isURLExcluded(c.excludePatterns, object.URL.Path, c.src.Prefix) - case len(c.excludePatterns) > 0 && len(c.includePatterns) > 0: - if isURLExcluded(c.excludePatterns, object.URL.Path, c.src.Prefix) { - return false - } - return isURLIncluded(c.includePatterns, object.URL.Path, c.src.Prefix) } return true } From 8a8f4fb4d28b9d367b39f1f2375b47df91cfa819 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Thu, 20 Jul 2023 14:52:08 +0300 Subject: [PATCH 07/31] command/rm: update shouldDeleteObject --- command/rm.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/command/rm.go b/command/rm.go index db862d388..5510ee8ae 100644 --- a/command/rm.go +++ b/command/rm.go @@ -216,17 +216,11 @@ func (d Delete) shouldDeleteObject(object *storage.Object, verbose bool, prefix } switch { - case len(d.excludePatterns) == 0 && len(d.includePatterns) == 0: - return true case len(d.excludePatterns) == 0 && len(d.includePatterns) > 0: + case len(d.excludePatterns) > 0 && len(d.includePatterns) > 0: return isURLIncluded(d.includePatterns, object.URL.Path, prefix) case len(d.excludePatterns) > 0 && len(d.includePatterns) == 0: return !isURLExcluded(d.excludePatterns, object.URL.Path, prefix) - case len(d.excludePatterns) > 0 && len(d.includePatterns) > 0: - if isURLExcluded(d.excludePatterns, object.URL.Path, prefix) { - return false - } - return isURLIncluded(d.includePatterns, object.URL.Path, prefix) } return true } From 54e504e7d3f491fd859714079566a9f426289d8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Thu, 20 Jul 2023 16:01:15 +0300 Subject: [PATCH 08/31] command/cp: refactor shouldCopyObject --- command/cp.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/command/cp.go b/command/cp.go index 161b07126..de009255f 100644 --- a/command/cp.go +++ b/command/cp.go @@ -792,12 +792,10 @@ func (c Copy) shouldCopyObject(object *storage.Object, verbose bool) bool { } return false } - - switch { - case len(c.excludePatterns) == 0 && len(c.includePatterns) > 0: - case len(c.excludePatterns) > 0 && len(c.includePatterns) > 0: + if len(c.includePatterns) > 0 { return isURLIncluded(c.includePatterns, object.URL.Path, c.src.Prefix) - case len(c.excludePatterns) > 0 && len(c.includePatterns) == 0: + } + if len(c.excludePatterns) > 0 { return !isURLExcluded(c.excludePatterns, object.URL.Path, c.src.Prefix) } return true From 0942e631f340512068a6b0b0df2c05987796c664 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Thu, 20 Jul 2023 16:01:42 +0300 Subject: [PATCH 09/31] command/rm: add --include support --- command/rm.go | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/command/rm.go b/command/rm.go index 5510ee8ae..9e9884632 100644 --- a/command/rm.go +++ b/command/rm.go @@ -99,6 +99,18 @@ func NewDeleteCommand() *cli.Command { return err } + excludePatterns, err := createExcludesFromWildcard(c.StringSlice("exclude")) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return err + } + + includePatterns, err := createIncludesFromWildcard(c.StringSlice("include")) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return err + } + return Delete{ src: srcUrls, op: c.Command.Name, @@ -108,6 +120,10 @@ func NewDeleteCommand() *cli.Command { exclude: c.StringSlice("exclude"), include: c.StringSlice("include"), + // patterns + excludePatterns: excludePatterns, + includePatterns: includePatterns, + storageOpts: NewStorageOpts(c), }.Run(c.Context) }, @@ -146,12 +162,6 @@ func (d Delete) Run(ctx context.Context) error { return err } - d.excludePatterns, err = createExcludesFromWildcard(d.exclude) - if err != nil { - printError(d.fullCommand, d.op, err) - return err - } - objch := expandSources(ctx, client, false, d.src...) var ( @@ -214,12 +224,10 @@ func (d Delete) shouldDeleteObject(object *storage.Object, verbose bool, prefix } return false } - - switch { - case len(d.excludePatterns) == 0 && len(d.includePatterns) > 0: - case len(d.excludePatterns) > 0 && len(d.includePatterns) > 0: + if len(d.includePatterns) > 0 { return isURLIncluded(d.includePatterns, object.URL.Path, prefix) - case len(d.excludePatterns) > 0 && len(d.includePatterns) == 0: + } + if len(d.excludePatterns) > 0 { return !isURLExcluded(d.excludePatterns, object.URL.Path, prefix) } return true From 538bf47850ec8556d9852083f731af7ff1ff126c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Thu, 20 Jul 2023 16:01:59 +0300 Subject: [PATCH 10/31] command/rm: add --include tests --- e2e/rm_test.go | 151 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/e2e/rm_test.go b/e2e/rm_test.go index 4a01b6a40..edcee9d1a 100644 --- a/e2e/rm_test.go +++ b/e2e/rm_test.go @@ -1301,3 +1301,154 @@ func TestRemoveByVersionID(t *testing.T) { result = icmd.RunCmd(cmd) assert.Assert(t, result.Stdout() == "") } + +// rm --include "*.py" s3://bucket/ +func TestRemoveS3ObjectsWithIncludeFilter(t *testing.T) { + t.Parallel() + + s3client, s5cmd := setup(t) + + bucket := s3BucketFromTestName(t) + createBucket(t, s3client, bucket) + + const ( + includePattern = "*.py" + fileContent = "content" + ) + + files := [...]string{ + "file1.py", + "file2.py", + "file.txt", + "data.txt", + "src/app.py", + } + filesKept := [...]string{ + "file.txt", + "data.txt", + } + + for _, filename := range files { + putFile(t, s3client, bucket, filename, fileContent) + } + + srcpath := fmt.Sprintf("s3://%s", bucket) + + cmd := s5cmd("rm", "--include", includePattern, srcpath+"/*") + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + + fmt.Println(result.Stdout()) + + assertLines(t, result.Stdout(), map[int]compareFunc{ + 0: equals("rm %v/%s", srcpath, files[0]), + 1: equals("rm %v/%s", srcpath, files[1]), + 2: equals("rm %v/%s", srcpath, files[4]), + }, sortInput(true)) + + // assert s3 + for _, f := range filesKept { + assert.Assert(t, ensureS3Object(s3client, bucket, f, fileContent)) + } +} + +// rm --include "file*" --exclude "*.py" s3://bucket/ +func TestRemoveS3ObjectsWithIncludeExcludeFilter(t *testing.T) { + t.Parallel() + + s3client, s5cmd := setup(t) + + bucket := s3BucketFromTestName(t) + createBucket(t, s3client, bucket) + + const ( + includePattern = "file*" + excludePattern = "*.py" + fileContent = "content" + ) + + files := [...]string{ + "file1.py", + "file2.py", + "test.py", + "app.py", + "docs/readme.md", + } + filesKept := [...]string{ + "test.py", + "app.py", + "docs/readme.md", + } + + for _, filename := range files { + putFile(t, s3client, bucket, filename, fileContent) + } + + srcpath := fmt.Sprintf("s3://%s", bucket) + + cmd := s5cmd("rm", "--include", includePattern, "--exclude", excludePattern, srcpath+"/*") + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + + assertLines(t, result.Stdout(), map[int]compareFunc{ + 0: equals("rm %v/%s", srcpath, files[0]), + 1: equals("rm %v/%s", srcpath, files[1]), + }, sortInput(true)) + + // assert s3 + for _, f := range filesKept { + assert.Assert(t, ensureS3Object(s3client, bucket, f, fileContent)) + } +} + +// rm --exclude "file*" --include "*.py" s3://bucket/ +func TestRemoveS3ObjectsWithIncludeExcludeFilter2(t *testing.T) { + t.Parallel() + + s3client, s5cmd := setup(t) + + bucket := s3BucketFromTestName(t) + createBucket(t, s3client, bucket) + + const ( + includePattern = "*.py" + excludePattern = "file*" + fileContent = "content" + ) + + files := [...]string{ + "file1.py", + "file2.py", + "test.py", + "app.py", + "docs/readme.md", + } + filesKept := [...]string{ + "docs/readme.md", + } + + for _, filename := range files { + putFile(t, s3client, bucket, filename, fileContent) + } + + srcpath := fmt.Sprintf("s3://%s", bucket) + + cmd := s5cmd("rm", "--exclude", excludePattern, "--include", includePattern, srcpath+"/*") + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + + assertLines(t, result.Stdout(), map[int]compareFunc{ + 0: equals("rm %v/%s", srcpath, files[3]), + 1: equals("rm %v/%s", srcpath, files[0]), + 2: equals("rm %v/%s", srcpath, files[1]), + 3: equals("rm %v/%s", srcpath, files[2]), + }, sortInput(true)) + + // assert s3 + for _, f := range filesKept { + assert.Assert(t, ensureS3Object(s3client, bucket, f, fileContent)) + } +} From 09ab702ec81770a6eb42e5950755b6a84bb28009 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Thu, 20 Jul 2023 16:17:57 +0300 Subject: [PATCH 11/31] command/sync: revert to the original --- command/sync.go | 86 ++++++++++++++++--------------------------------- 1 file changed, 27 insertions(+), 59 deletions(-) diff --git a/command/sync.go b/command/sync.go index 319f799a4..f5a63deee 100644 --- a/command/sync.go +++ b/command/sync.go @@ -6,7 +6,6 @@ import ( "io" "os" "path/filepath" - "regexp" "strings" "sync" @@ -65,8 +64,8 @@ Examples: 10. Sync all files to S3 bucket but exclude the ones with txt and gz extension > s5cmd {{.HelpName}} --exclude "*.txt" --exclude "*.gz" dir/ s3://bucket - - 10. Sync all files to S3 bucket but include the only ones with txt and gz extension + + 11. Sync all files to S3 bucket but include the only ones with txt and gz extension > s5cmd {{.HelpName}} --include "*.txt" --include "*.gz" dir/ s3://bucket ` @@ -103,11 +102,7 @@ func NewSyncCommand() *cli.Command { Action: func(c *cli.Context) (err error) { defer stat.Collect(c.Command.FullName(), &err)() - sync, err := NewSync(c) - if err != nil { - return err - } - return sync.Run(c) + return NewSync(c).Run(c) }, } @@ -121,20 +116,14 @@ type ObjectPair struct { // Sync holds sync operation flags and states. type Sync struct { - src *url.URL - dst *url.URL + src string + dst string op string fullCommand string // flags delete bool sizeOnly bool - exclude []string - include []string - - // patterns - excludePatterns []*regexp.Regexp - includePatterns []*regexp.Regexp // s3 options storageOpts storage.Options @@ -148,37 +137,16 @@ type Sync struct { } // NewSync creates Sync from cli.Context -func NewSync(c *cli.Context) (*Sync, error) { - fullCommand := commandFromContext(c) - - src, err := url.New(c.Args().Get(0), url.WithVersion(c.String("version-id")), - url.WithRaw(c.Bool("raw"))) - if err != nil { - printError(fullCommand, c.Command.Name, err) - return nil, err - } - - dst, err := url.New(c.Args().Get(1), url.WithRaw(c.Bool("raw"))) - if err != nil { - printError(fullCommand, c.Command.Name, err) - return nil, err - } - - return &Sync{ - src: src, - dst: dst, +func NewSync(c *cli.Context) Sync { + return Sync{ + src: c.Args().Get(0), + dst: c.Args().Get(1), op: c.Command.Name, fullCommand: commandFromContext(c), // flags delete: c.Bool("delete"), sizeOnly: c.Bool("size-only"), - exclude: c.StringSlice("exclude"), - include: c.StringSlice("include"), - - // patterns - excludePatterns: nil, - includePatterns: nil, // flags followSymlinks: !c.Bool("no-follow-symlinks"), @@ -188,40 +156,36 @@ func NewSync(c *cli.Context) (*Sync, error) { srcRegion: c.String("source-region"), dstRegion: c.String("destination-region"), storageOpts: NewStorageOpts(c), - }, nil + } } // Run compares files, plans necessary s5cmd commands to execute // and executes them in order to sync source to destination. func (s Sync) Run(c *cli.Context) error { - var err error - - s.excludePatterns, err = createExcludesFromWildcard(s.exclude) + srcurl, err := url.New(s.src, url.WithRaw(s.raw)) if err != nil { - printError(s.fullCommand, s.op, err) return err } - s.includePatterns, err = createIncludesFromWildcard(s.include) + dsturl, err := url.New(s.dst, url.WithRaw(s.raw)) if err != nil { - printError(s.fullCommand, s.op, err) return err } - sourceObjects, destObjects, err := s.getSourceAndDestinationObjects(c.Context, s.src, s.dst) + sourceObjects, destObjects, err := s.getSourceAndDestinationObjects(c.Context, srcurl, dsturl) if err != nil { printError(s.fullCommand, s.op, err) return err } - isBatch := s.src.IsWildcard() - if !isBatch && !s.src.IsRemote() { - sourceClient, err := storage.NewClient(c.Context, s.src, s.storageOpts) + isBatch := srcurl.IsWildcard() + if !isBatch && !srcurl.IsRemote() { + sourceClient, err := storage.NewClient(c.Context, srcurl, s.storageOpts) if err != nil { return err } - obj, _ := sourceClient.Stat(c.Context, s.src) + obj, _ := sourceClient.Stat(c.Context, srcurl) isBatch = obj != nil && obj.Type.IsDir() } @@ -254,7 +218,7 @@ func (s Sync) Run(c *cli.Context) error { pipeReader, pipeWriter := io.Pipe() // create a reader, writer pipe to pass commands to run // Create commands in background. - go s.planRun(c, onlySource, onlyDest, commonObjects, s.dst, strategy, pipeWriter, isBatch) + go s.planRun(c, onlySource, onlyDest, commonObjects, dsturl, strategy, pipeWriter, isBatch) err = NewRun(c, pipeReader).Run(c.Context) return multierror.Append(err, merrorWaiter).ErrorOrNil() @@ -330,7 +294,15 @@ func (s Sync) getSourceAndDestinationObjects(ctx context.Context, srcurl, dsturl return nil, nil, err } - destObjectsURL, err := url.New(s.dst.Path) + // add * to end of destination string, to get all objects recursively. + var destinationURLPath string + if strings.HasSuffix(s.dst, "/") { + destinationURLPath = s.dst + "*" + } else { + destinationURLPath = s.dst + "/*" + } + + destObjectsURL, err := url.New(destinationURLPath) if err != nil { return nil, nil, err } @@ -401,10 +373,6 @@ func (s Sync) getSourceAndDestinationObjects(ctx context.Context, srcurl, dsturl if s.shouldSkipObject(dt, false) { continue } - /* - if !s.shouldSyncObject(dt, true) { - continue - }*/ filteredDstObjectChannel <- *dt } }() From 58c089c6a92e4f1b98364e487c0516ef22c58706 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Thu, 20 Jul 2023 16:19:25 +0300 Subject: [PATCH 12/31] command/cp: update help text --- command/cp.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/command/cp.go b/command/cp.go index de009255f..fa22e7ee8 100644 --- a/command/cp.go +++ b/command/cp.go @@ -95,14 +95,17 @@ Examples: 19. Copy all files from S3 bucket to another S3 bucket but exclude the ones starts with log > s5cmd {{.HelpName}} --exclude "log*" "s3://bucket/*" s3://destbucket + + 20. Copy all files from S3 bucket to another S3 bucket but only the ones starts with log + > s5cmd {{.HelpName}} --include "log*" "s3://bucket/*" s3://destbucket - 20. Download an S3 object from a requester pays bucket + 21. Download an S3 object from a requester pays bucket > s5cmd --request-payer=requester {{.HelpName}} s3://bucket/prefix/object.gz . - 21. Upload a file to S3 with a content-type and content-encoding header + 22. Upload a file to S3 with a content-type and content-encoding header > s5cmd --content-type "text/css" --content-encoding "br" myfile.css.br s3://bucket/ - 22. Download the specific version of a remote object to working directory + 23. Download the specific version of a remote object to working directory > s5cmd {{.HelpName}} --version-id VERSION_ID s3://bucket/prefix/object . ` From 7e19c9b5cb4d61da1f35b3a7daeab4ec5b9e3cde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Thu, 20 Jul 2023 16:20:12 +0300 Subject: [PATCH 13/31] command/rm: update help text --- command/rm.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/command/rm.go b/command/rm.go index 9e9884632..71467ba64 100644 --- a/command/rm.go +++ b/command/rm.go @@ -39,17 +39,20 @@ Examples: 5. Delete all matching objects but exclude the ones with .txt extension or starts with "main" > s5cmd {{.HelpName}} --exclude "*.txt" --exclude "main*" "s3://bucketname/prefix/*" + + 6. Delete all matching objects but only the ones with .txt extension or starts with "main" + > s5cmd {{.HelpName}} --include "*.txt" --include "main*" "s3://bucketname/prefix/*" - 6. Delete the specific version of a remote object's content to stdout + 7. Delete the specific version of a remote object's content to stdout > s5cmd {{.HelpName}} --version-id VERSION_ID s3://bucket/prefix/object - 7. Delete all versions of an object in the bucket + 8. Delete all versions of an object in the bucket > s5cmd {{.HelpName}} --all-versions s3://bucket/object - 8. Delete all versions of all objects that starts with a prefix in the bucket + 9. Delete all versions of all objects that starts with a prefix in the bucket > s5cmd {{.HelpName}} --all-versions "s3://bucket/prefix*" - 9. Delete all versions of all objects in the bucket + 10. Delete all versions of all objects in the bucket > s5cmd {{.HelpName}} --all-versions "s3://bucket/*" ` From 9e1784772d1ae0ac3cc85c7230842d4ab7f3cbe2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Thu, 20 Jul 2023 16:42:51 +0300 Subject: [PATCH 14/31] command/sync: add --include test --- e2e/sync_test.go | 85 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/e2e/sync_test.go b/e2e/sync_test.go index 3cc2894af..d97bda19a 100644 --- a/e2e/sync_test.go +++ b/e2e/sync_test.go @@ -1793,3 +1793,88 @@ func TestIssue435(t *testing.T) { assertError(t, err, errS3NoSuchKey) } } + +// sync --include pattern s3://bucket/* s3://anotherbucket/prefix/ +func TestSyncS3ObjectsIntoAnotherBucketWithIncludeFilters(t *testing.T) { + t.Parallel() + + srcbucket := s3BucketFromTestNameWithPrefix(t, "src") + dstbucket := s3BucketFromTestNameWithPrefix(t, "dst") + + s3client, s5cmd := setup(t) + + createBucket(t, s3client, srcbucket) + createBucket(t, s3client, dstbucket) + + srcFiles := []string{ + "file_already_exists_in_destination.txt", + "file_not_exists_in_destination.txt", + "main.py", + "main.js", + "readme.md", + "main.pdf", + "main/file.txt", + } + + dstFiles := []string{ + "prefix/file_already_exists_in_destination.txt", + } + + excludedFiles := []string{ + "prefix/file_not_exists_in_destination.txt", + } + + includedFiles := []string{ + "main.js", + "main.pdf", + "main.py", + "main/file.txt", + "readme.md", + } + + const ( + content = "this is a file content" + includePattern1 = "main*" + includePattern2 = "*.md" + ) + + for _, filename := range srcFiles { + putFile(t, s3client, srcbucket, filename, content) + } + + for _, filename := range dstFiles { + putFile(t, s3client, dstbucket, filename, content) + } + + src := fmt.Sprintf("s3://%v/*", srcbucket) + dst := fmt.Sprintf("s3://%v/prefix/", dstbucket) + + cmd := s5cmd("sync", "--include", includePattern1, "--include", includePattern2, src, dst) + result := icmd.RunCmd(cmd) + + result.Assert(t, icmd.Success) + + assertLines(t, result.Stdout(), map[int]compareFunc{ + 0: equals(`cp s3://%s/%s s3://%s/prefix/%s`, srcbucket, includedFiles[0], dstbucket, includedFiles[0]), + 1: equals(`cp s3://%s/%s s3://%s/prefix/%s`, srcbucket, includedFiles[1], dstbucket, includedFiles[1]), + 2: equals(`cp s3://%s/%s s3://%s/prefix/%s`, srcbucket, includedFiles[2], dstbucket, includedFiles[2]), + 3: equals(`cp s3://%s/%s s3://%s/prefix/%s`, srcbucket, includedFiles[3], dstbucket, includedFiles[3]), + 4: equals(`cp s3://%s/%s s3://%s/prefix/%s`, srcbucket, includedFiles[4], dstbucket, includedFiles[4]), + }, sortInput(true)) + + // assert s3 source objects + for _, filename := range srcFiles { + assert.Assert(t, ensureS3Object(s3client, srcbucket, filename, content)) + } + + // assert s3 destination objects + for _, filename := range includedFiles { + assert.Assert(t, ensureS3Object(s3client, dstbucket, "prefix/"+filename, content)) + } + + // assert s3 destination objects which should not be in bucket. + for _, filename := range excludedFiles { + err := ensureS3Object(s3client, dstbucket, filename, content) + assertError(t, err, errS3NoSuchKey) + } +} From 36855d7350ad1bbfc0bd6de2c9dc995ef4fd6d59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Thu, 20 Jul 2023 18:55:09 +0300 Subject: [PATCH 15/31] command/cp: update comment --- command/cp.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/cp.go b/command/cp.go index fa22e7ee8..a4d47a708 100644 --- a/command/cp.go +++ b/command/cp.go @@ -787,7 +787,7 @@ func (c Copy) shouldOverride(ctx context.Context, srcurl *url.URL, dsturl *url.U return stickyErr } -// shouldCopyObject checks is object should be skipped. +// shouldCopyObject checks is object should be copied. func (c Copy) shouldCopyObject(object *storage.Object, verbose bool) bool { if err := object.Err; err != nil { if verbose { From 518f4ab46e540df21c17221756d3fc1ac54bf996 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Mon, 24 Jul 2023 15:10:29 +0300 Subject: [PATCH 16/31] command/cp: update precedences of `--exclude` and `--include` --- command/cp.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/command/cp.go b/command/cp.go index a4d47a708..ab17192e8 100644 --- a/command/cp.go +++ b/command/cp.go @@ -795,12 +795,12 @@ func (c Copy) shouldCopyObject(object *storage.Object, verbose bool) bool { } return false } + if len(c.excludePatterns) > 0 && isURLExcluded(c.excludePatterns, object.URL.Path, c.src.Prefix) { + return false + } if len(c.includePatterns) > 0 { return isURLIncluded(c.includePatterns, object.URL.Path, c.src.Prefix) } - if len(c.excludePatterns) > 0 { - return !isURLExcluded(c.excludePatterns, object.URL.Path, c.src.Prefix) - } return true } From 95935511848966f4426d7a3c40d76e45bfc7d2f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Mon, 24 Jul 2023 15:10:39 +0300 Subject: [PATCH 17/31] command/rm: update precedences of `--exclude` and `--include` --- command/rm.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/command/rm.go b/command/rm.go index 71467ba64..752000dc4 100644 --- a/command/rm.go +++ b/command/rm.go @@ -227,12 +227,12 @@ func (d Delete) shouldDeleteObject(object *storage.Object, verbose bool, prefix } return false } + if len(d.excludePatterns) > 0 && isURLExcluded(d.excludePatterns, object.URL.Path, prefix) { + return false + } if len(d.includePatterns) > 0 { return isURLIncluded(d.includePatterns, object.URL.Path, prefix) } - if len(d.excludePatterns) > 0 { - return !isURLExcluded(d.excludePatterns, object.URL.Path, prefix) - } return true } From c94bc0b30de7bb1d8d400b55aeba364ca5cf58a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Mon, 24 Jul 2023 15:12:06 +0300 Subject: [PATCH 18/31] changelog: add `--include` entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd94c13ca..b44fb3081 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ #### Breaking changes #### Features +- Added `--include` flag to `cp`, `rm` and `sync` commands. ([#516](https://github.com/peak/s5cmd/issues/516)) #### Improvements #### Bugfixes - Fixed a bug introduced with `external sort` support in `sync` command which prevents `sync` to an empty destination with `--delete` option. ([#576](https://github.com/peak/s5cmd/issues/576)) From bf9a562d43f6fdbffe3eb5e1993ade2278169a37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Mon, 24 Jul 2023 16:01:17 +0300 Subject: [PATCH 19/31] command/cp: update `--include` tests --- e2e/cp_test.go | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/e2e/cp_test.go b/e2e/cp_test.go index b3e4e7347..e585aa6c6 100644 --- a/e2e/cp_test.go +++ b/e2e/cp_test.go @@ -4256,20 +4256,14 @@ func TestCopyS3ObjectsWithIncludeExcludeFilter(t *testing.T) { result.Assert(t, icmd.Success) - assertLines(t, result.Stdout(), map[int]compareFunc{ - 0: equals("cp %v/file1.py %s", srcpath, files[0]), - 1: equals("cp %v/file2.py %s", srcpath, files[1]), - }, sortInput(true)) + assertLines(t, result.Stdout(), map[int]compareFunc{}, sortInput(true)) // assert s3 for _, f := range files { assert.Assert(t, ensureS3Object(s3client, bucket, f, fileContent)) } - expectedFileSystem := []fs.PathOp{ - fs.WithFile("file1.py", fileContent), - fs.WithFile("file2.py", fileContent), - } + expectedFileSystem := []fs.PathOp{} // assert local filesystem expected := fs.Expected(t, expectedFileSystem...) assert.Assert(t, fs.Equal(cmd.Dir, expected)) @@ -4311,9 +4305,7 @@ func TestCopyS3ObjectsWithIncludeExcludeFilter2(t *testing.T) { assertLines(t, result.Stdout(), map[int]compareFunc{ 0: equals("cp %v/app.py %s", srcpath, files[3]), - 1: equals("cp %v/file1.py %s", srcpath, files[0]), - 2: equals("cp %v/file2.py %s", srcpath, files[1]), - 3: equals("cp %v/test.py %s", srcpath, files[2]), + 1: equals("cp %v/test.py %s", srcpath, files[2]), }, sortInput(true)) // assert s3 @@ -4322,8 +4314,6 @@ func TestCopyS3ObjectsWithIncludeExcludeFilter2(t *testing.T) { } expectedFileSystem := []fs.PathOp{ - fs.WithFile("file1.py", fileContent), - fs.WithFile("file2.py", fileContent), fs.WithFile("test.py", fileContent), fs.WithFile("app.py", fileContent), } From 0819eab7448a9b0ea37bcdd3dc3e9dd85138a5b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Mon, 24 Jul 2023 16:26:15 +0300 Subject: [PATCH 20/31] command/rm: update `--include` tests --- e2e/rm_test.go | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/e2e/rm_test.go b/e2e/rm_test.go index edcee9d1a..43c6cb628 100644 --- a/e2e/rm_test.go +++ b/e2e/rm_test.go @@ -1363,7 +1363,7 @@ func TestRemoveS3ObjectsWithIncludeExcludeFilter(t *testing.T) { createBucket(t, s3client, bucket) const ( - includePattern = "file*" + includePattern = "*.md" excludePattern = "*.py" fileContent = "content" ) @@ -1373,12 +1373,13 @@ func TestRemoveS3ObjectsWithIncludeExcludeFilter(t *testing.T) { "file2.py", "test.py", "app.py", - "docs/readme.md", + "docs/file.md", } filesKept := [...]string{ + "file1.py", + "file2.py", "test.py", "app.py", - "docs/readme.md", } for _, filename := range files { @@ -1393,8 +1394,7 @@ func TestRemoveS3ObjectsWithIncludeExcludeFilter(t *testing.T) { result.Assert(t, icmd.Success) assertLines(t, result.Stdout(), map[int]compareFunc{ - 0: equals("rm %v/%s", srcpath, files[0]), - 1: equals("rm %v/%s", srcpath, files[1]), + 0: equals("rm %v/%s", srcpath, files[4]), }, sortInput(true)) // assert s3 @@ -1403,7 +1403,7 @@ func TestRemoveS3ObjectsWithIncludeExcludeFilter(t *testing.T) { } } -// rm --exclude "file*" --include "*.py" s3://bucket/ +// rm --exclude "docs*" --include "*.md" --include "*.py" s3://bucket/ func TestRemoveS3ObjectsWithIncludeExcludeFilter2(t *testing.T) { t.Parallel() @@ -1413,9 +1413,10 @@ func TestRemoveS3ObjectsWithIncludeExcludeFilter2(t *testing.T) { createBucket(t, s3client, bucket) const ( - includePattern = "*.py" - excludePattern = "file*" - fileContent = "content" + includePattern = "*.md" + includePattern2 = "*.py" + excludePattern = "docs*" + fileContent = "content" ) files := [...]string{ @@ -1435,7 +1436,7 @@ func TestRemoveS3ObjectsWithIncludeExcludeFilter2(t *testing.T) { srcpath := fmt.Sprintf("s3://%s", bucket) - cmd := s5cmd("rm", "--exclude", excludePattern, "--include", includePattern, srcpath+"/*") + cmd := s5cmd("rm", "--exclude", excludePattern, "--include", includePattern, "--include", includePattern2, srcpath+"/*") result := icmd.RunCmd(cmd) result.Assert(t, icmd.Success) From ab97b4072f9c5cc43a42fe1fb839880e0bbfcc21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Mon, 24 Jul 2023 16:31:19 +0300 Subject: [PATCH 21/31] command/cp: update `--include` test --- e2e/cp_test.go | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/e2e/cp_test.go b/e2e/cp_test.go index e585aa6c6..73258f111 100644 --- a/e2e/cp_test.go +++ b/e2e/cp_test.go @@ -4269,7 +4269,7 @@ func TestCopyS3ObjectsWithIncludeExcludeFilter(t *testing.T) { assert.Assert(t, fs.Equal(cmd.Dir, expected)) } -// cp --exclude "file*" --include "*.py" s3://bucket/* . +// cp --exclude "file*" --exclude "vendor/*" --include "*.py" --include "*.go" s3://bucket/* . func TestCopyS3ObjectsWithIncludeExcludeFilter2(t *testing.T) { t.Parallel() @@ -4279,16 +4279,22 @@ func TestCopyS3ObjectsWithIncludeExcludeFilter2(t *testing.T) { createBucket(t, s3client, bucket) const ( - includePattern = "*.py" - excludePattern = "file*" - fileContent = "content" + includePattern = "*.py" + includePattern2 = "*.go" + excludePattern = "file*" + excludePattern2 = "vendor/*" + fileContent = "content" ) files := [...]string{ "file1.py", "file2.py", + "file1.go", + "file2.go", "test.py", "app.py", + "app.go", + "vendor/package.go", "docs/readme.md", } @@ -4298,14 +4304,15 @@ func TestCopyS3ObjectsWithIncludeExcludeFilter2(t *testing.T) { srcpath := fmt.Sprintf("s3://%s", bucket) - cmd := s5cmd("cp", "--exclude", excludePattern, "--include", includePattern, srcpath+"/*", ".") + cmd := s5cmd("cp", "--exclude", excludePattern, "--exclude", excludePattern2, "--include", includePattern, "--include", includePattern2, srcpath+"/*", ".") result := icmd.RunCmd(cmd) result.Assert(t, icmd.Success) assertLines(t, result.Stdout(), map[int]compareFunc{ - 0: equals("cp %v/app.py %s", srcpath, files[3]), - 1: equals("cp %v/test.py %s", srcpath, files[2]), + 0: equals("cp %v/app.go %s", srcpath, files[6]), + 1: equals("cp %v/app.py %s", srcpath, files[5]), + 2: equals("cp %v/test.py %s", srcpath, files[4]), }, sortInput(true)) // assert s3 @@ -4316,6 +4323,7 @@ func TestCopyS3ObjectsWithIncludeExcludeFilter2(t *testing.T) { expectedFileSystem := []fs.PathOp{ fs.WithFile("test.py", fileContent), fs.WithFile("app.py", fileContent), + fs.WithFile("app.go", fileContent), } // assert local filesystem expected := fs.Expected(t, expectedFileSystem...) From 90f0555ca193c4a8d5db3e994a80237c3c9d8c58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Mon, 24 Jul 2023 16:59:26 +0300 Subject: [PATCH 22/31] command/cp: reformat the code --- command/cp.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/command/cp.go b/command/cp.go index 48f75a89c..33d0030de 100644 --- a/command/cp.go +++ b/command/cp.go @@ -289,8 +289,8 @@ type Copy struct { contentType string contentEncoding string contentDisposition string - - // patterns + + // patterns excludePatterns []*regexp.Regexp includePatterns []*regexp.Regexp From 0ab93c91ea035b31797f02dc5b3e61d198f881e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Tue, 25 Jul 2023 18:15:23 +0300 Subject: [PATCH 23/31] command: refactor wildcard handling --- command/cp.go | 8 +++--- command/du.go | 4 +-- command/include.go | 44 ----------------------------- command/ls.go | 4 +-- command/rm.go | 8 +++--- command/select.go | 4 +-- command/{exclude.go => wildcard.go} | 15 +++++----- 7 files changed, 21 insertions(+), 66 deletions(-) delete mode 100644 command/include.go rename command/{exclude.go => wildcard.go} (54%) diff --git a/command/cp.go b/command/cp.go index 33d0030de..fd301c62e 100644 --- a/command/cp.go +++ b/command/cp.go @@ -410,13 +410,13 @@ func (c Copy) Run(ctx context.Context) error { isBatch = obj != nil && obj.Type.IsDir() } - c.excludePatterns, err = createExcludesFromWildcard(c.exclude) + c.excludePatterns, err = createRegexFromWildcard(c.exclude) if err != nil { printError(c.fullCommand, c.op, err) return err } - c.includePatterns, err = createIncludesFromWildcard(c.include) + c.includePatterns, err = createRegexFromWildcard(c.include) if err != nil { printError(c.fullCommand, c.op, err) return err @@ -806,11 +806,11 @@ func (c Copy) shouldCopyObject(object *storage.Object, verbose bool) bool { } return false } - if len(c.excludePatterns) > 0 && isURLExcluded(c.excludePatterns, object.URL.Path, c.src.Prefix) { + if len(c.excludePatterns) > 0 && isURLMatched(c.excludePatterns, object.URL.Path, c.src.Prefix) { return false } if len(c.includePatterns) > 0 { - return isURLIncluded(c.includePatterns, object.URL.Path, c.src.Prefix) + return isURLMatched(c.includePatterns, object.URL.Path, c.src.Prefix) } return true } diff --git a/command/du.go b/command/du.go index 3a46aab03..8812c9582 100644 --- a/command/du.go +++ b/command/du.go @@ -144,7 +144,7 @@ func (sz Size) Run(ctx context.Context) error { var merror error - excludePatterns, err := createExcludesFromWildcard(sz.exclude) + excludePatterns, err := createRegexFromWildcard(sz.exclude) if err != nil { printError(sz.fullCommand, sz.op, err) return err @@ -161,7 +161,7 @@ func (sz Size) Run(ctx context.Context) error { continue } - if isURLExcluded(excludePatterns, object.URL.Path, sz.src.Prefix) { + if isURLMatched(excludePatterns, object.URL.Path, sz.src.Prefix) { continue } diff --git a/command/include.go b/command/include.go deleted file mode 100644 index 85dc34a56..000000000 --- a/command/include.go +++ /dev/null @@ -1,44 +0,0 @@ -package command - -import ( - "path/filepath" - "regexp" - "strings" - - "github.com/peak/s5cmd/v2/strutil" -) - -// createIncludesFromWildcard creates regex strings from wildcard. -func createIncludesFromWildcard(inputIncludes []string) ([]*regexp.Regexp, error) { - var result []*regexp.Regexp - for _, input := range inputIncludes { - if input != "" { - regex := strutil.WildCardToRegexp(input) - regex = strutil.MatchFromStartToEnd(regex) - regex = strutil.AddNewLineFlag(regex) - regexpCompiled, err := regexp.Compile(regex) - if err != nil { - return nil, err - } - result = append(result, regexpCompiled) - } - } - return result, nil -} - -// isURLIncluded checks whether given urlPath matches any of the include patterns. -func isURLIncluded(includePatterns []*regexp.Regexp, urlPath, sourcePrefix string) bool { - if len(includePatterns) == 0 { - return false - } - if !strings.HasSuffix(sourcePrefix, "/") { - sourcePrefix += "/" - } - sourcePrefix = filepath.ToSlash(sourcePrefix) - for _, includePattern := range includePatterns { - if includePattern.MatchString(strings.TrimPrefix(urlPath, sourcePrefix)) { - return true - } - } - return false -} diff --git a/command/ls.go b/command/ls.go index 2fe591d7a..87ff1e06b 100644 --- a/command/ls.go +++ b/command/ls.go @@ -188,7 +188,7 @@ func (l List) Run(ctx context.Context) error { var merror error - excludePatterns, err := createExcludesFromWildcard(l.exclude) + excludePatterns, err := createRegexFromWildcard(l.exclude) if err != nil { printError(l.fullCommand, l.op, err) return err @@ -205,7 +205,7 @@ func (l List) Run(ctx context.Context) error { continue } - if isURLExcluded(excludePatterns, object.URL.Path, l.src.Prefix) { + if isURLMatched(excludePatterns, object.URL.Path, l.src.Prefix) { continue } diff --git a/command/rm.go b/command/rm.go index 752000dc4..9b13ca97d 100644 --- a/command/rm.go +++ b/command/rm.go @@ -102,13 +102,13 @@ func NewDeleteCommand() *cli.Command { return err } - excludePatterns, err := createExcludesFromWildcard(c.StringSlice("exclude")) + excludePatterns, err := createRegexFromWildcard(c.StringSlice("exclude")) if err != nil { printError(fullCommand, c.Command.Name, err) return err } - includePatterns, err := createIncludesFromWildcard(c.StringSlice("include")) + includePatterns, err := createRegexFromWildcard(c.StringSlice("include")) if err != nil { printError(fullCommand, c.Command.Name, err) return err @@ -227,11 +227,11 @@ func (d Delete) shouldDeleteObject(object *storage.Object, verbose bool, prefix } return false } - if len(d.excludePatterns) > 0 && isURLExcluded(d.excludePatterns, object.URL.Path, prefix) { + if len(d.excludePatterns) > 0 && isURLMatched(d.excludePatterns, object.URL.Path, prefix) { return false } if len(d.includePatterns) > 0 { - return isURLIncluded(d.includePatterns, object.URL.Path, prefix) + return isURLMatched(d.includePatterns, object.URL.Path, prefix) } return true } diff --git a/command/select.go b/command/select.go index eb2c2eddf..cbd42990d 100644 --- a/command/select.go +++ b/command/select.go @@ -191,7 +191,7 @@ func (s Select) Run(ctx context.Context) error { } }() - excludePatterns, err := createExcludesFromWildcard(s.exclude) + excludePatterns, err := createRegexFromWildcard(s.exclude) if err != nil { printError(s.fullCommand, s.op, err) return err @@ -217,7 +217,7 @@ func (s Select) Run(ctx context.Context) error { continue } - if isURLExcluded(excludePatterns, object.URL.Path, s.src.Prefix) { + if isURLMatched(excludePatterns, object.URL.Path, s.src.Prefix) { continue } diff --git a/command/exclude.go b/command/wildcard.go similarity index 54% rename from command/exclude.go rename to command/wildcard.go index 36d9a9aa9..33625294d 100644 --- a/command/exclude.go +++ b/command/wildcard.go @@ -8,10 +8,10 @@ import ( "github.com/peak/s5cmd/v2/strutil" ) -// createExcludesFromWildcard creates regex strings from wildcard. -func createExcludesFromWildcard(inputExcludes []string) ([]*regexp.Regexp, error) { +// createRegexFromWildcard creates regex strings from wildcard. +func createRegexFromWildcard(wildcards []string) ([]*regexp.Regexp, error) { var result []*regexp.Regexp - for _, input := range inputExcludes { + for _, input := range wildcards { if input != "" { regex := strutil.WildCardToRegexp(input) regex = strutil.MatchFromStartToEnd(regex) @@ -26,17 +26,16 @@ func createExcludesFromWildcard(inputExcludes []string) ([]*regexp.Regexp, error return result, nil } -// isURLExcluded checks whether given urlPath matches any of the exclude patterns. -func isURLExcluded(excludePatterns []*regexp.Regexp, urlPath, sourcePrefix string) bool { - if len(excludePatterns) == 0 { +func isURLMatched(regexPatterns []*regexp.Regexp, urlPath, sourcePrefix string) bool { + if len(regexPatterns) == 0 { return false } if !strings.HasSuffix(sourcePrefix, "/") { sourcePrefix += "/" } sourcePrefix = filepath.ToSlash(sourcePrefix) - for _, excludePattern := range excludePatterns { - if excludePattern.MatchString(strings.TrimPrefix(urlPath, sourcePrefix)) { + for _, regexPattern := range regexPatterns { + if regexPattern.MatchString(strings.TrimPrefix(urlPath, sourcePrefix)) { return true } } From 873c42b540b90576db907bb53afc00bf8db9789c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Wed, 26 Jul 2023 10:55:29 +0300 Subject: [PATCH 24/31] command/cp: add an unit test for `shouldCopyObject` --- command/cp_test.go | 72 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/command/cp_test.go b/command/cp_test.go index ccac38ff3..1df2b5677 100644 --- a/command/cp_test.go +++ b/command/cp_test.go @@ -3,8 +3,11 @@ package command import ( "io" "os" + "reflect" "testing" + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" "gotest.tools/v3/assert" ) @@ -77,3 +80,72 @@ func TestGuessContentType(t *testing.T) { os.Remove(f.Name()) } } + +func TestShouldCopyObject(t *testing.T) { + t.Parallel() + + testcases := []struct { + excludePatterns []string + includePatterns []string + objects []string + filteredObjects []string + }{ + { + excludePatterns: []string{"*.txt", "*.log"}, + includePatterns: []string{"file-*.doc"}, + objects: []string{"document.txt", "file-2.log", "file-1.doc", "image.png"}, + filteredObjects: []string{"file-1.doc"}, + }, + { + excludePatterns: []string{"secret-*"}, + includePatterns: []string{"*.txt", "*.log"}, + objects: []string{"secret-passwords.txt", "file-1.txt", "file-2.txt", "image.png"}, + filteredObjects: []string{"file-1.txt", "file-2.txt"}, + }, + { + excludePatterns: []string{}, + includePatterns: []string{"*.png"}, + objects: []string{"secret-passwords.txt", "file-1.txt", "file-2.txt", "image.png"}, + filteredObjects: []string{"image.png"}, + }, + { + excludePatterns: []string{"file*"}, + includePatterns: []string{}, + objects: []string{"readme.md", "file-1.txt", "file-2.txt", "image.png"}, + filteredObjects: []string{"readme.md", "image.png"}, + }, + { + excludePatterns: []string{"file*"}, + includePatterns: []string{"*txt"}, + objects: []string{"readme.txt", "file-1.txt", "file-2.txt", "license.txt"}, + filteredObjects: []string{"readme.txt", "license.txt"}, + }, + { + excludePatterns: []string{"*tmp", "*.txt"}, + includePatterns: []string{"*png", "*.doc*"}, + objects: []string{"readme.txt", "license.txt", "cache.tmp", "image.png", "eula.doc", "eula.docx", "personaldoc"}, + filteredObjects: []string{"image.png", "eula.doc", "eula.docx"}, + }, + } + + for _, tc := range testcases { + tc := tc + excludeRegex, err := createRegexFromWildcard(tc.excludePatterns) + if err != nil { + t.Error(err) + } + includeRegex, err := createRegexFromWildcard(tc.includePatterns) + if err != nil { + t.Error(err) + } + var filteredObjects []string + cp := Copy{excludePatterns: excludeRegex, includePatterns: includeRegex} + cp.src = &url.URL{Prefix: ""} + for _, object := range tc.objects { + if cp.shouldCopyObject(&storage.Object{URL: &url.URL{Path: object}}, false) { + filteredObjects = append(filteredObjects, object) + } + } + assert.Equal(t, reflect.DeepEqual(tc.filteredObjects, filteredObjects), true) + } +} From 6a14745ff9ff1e934d703bb64df634e3f07883ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Wed, 26 Jul 2023 17:15:37 +0300 Subject: [PATCH 25/31] command/cp: remove unwanted character --- command/cp.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/cp.go b/command/cp.go index fd301c62e..6936f1731 100644 --- a/command/cp.go +++ b/command/cp.go @@ -95,7 +95,7 @@ Examples: 19. Copy all files from S3 bucket to another S3 bucket but exclude the ones starts with log > s5cmd {{.HelpName}} --exclude "log*" "s3://bucket/*" s3://destbucket - + 20. Copy all files from S3 bucket to another S3 bucket but only the ones starts with log > s5cmd {{.HelpName}} --include "log*" "s3://bucket/*" s3://destbucket From 002bf3dbffe991305b78779bd18fe1e4ef5bec22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Wed, 26 Jul 2023 17:16:45 +0300 Subject: [PATCH 26/31] command/cp: use `DeepEqual` in a test --- command/cp_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/command/cp_test.go b/command/cp_test.go index 1df2b5677..2a8cd5466 100644 --- a/command/cp_test.go +++ b/command/cp_test.go @@ -3,7 +3,6 @@ package command import ( "io" "os" - "reflect" "testing" "github.com/peak/s5cmd/v2/storage" @@ -146,6 +145,6 @@ func TestShouldCopyObject(t *testing.T) { filteredObjects = append(filteredObjects, object) } } - assert.Equal(t, reflect.DeepEqual(tc.filteredObjects, filteredObjects), true) + assert.DeepEqual(t, tc.filteredObjects, filteredObjects) } } From f4b3f893e747f24f5adba4655a1b159d3b28b7fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Wed, 26 Jul 2023 18:57:33 +0300 Subject: [PATCH 27/31] readme: add information about `--exclude` and `--include` --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index 0d5d75800..254f3c43c 100644 --- a/README.md +++ b/README.md @@ -287,6 +287,24 @@ folder hierarchy. ⚠️ Copying objects (from S3 to S3) larger than 5GB is not supported yet. We have an [open ticket](https://github.com/peak/s5cmd/issues/29) to track the issue. +#### Using Exclude and Include Filters +`s5cmd` supports both `--exclude` and `--include` flags, which can take wildcard values. These flags can be used with `cp`, `rm`, and `sync` commands. If `--exclude` flag is used, all objects matching the pattern will be excluded from the transfer. If `--include` flag is used, only objects matching the pattern will be included in the transfer. If both flags are used at the same time, `--exclude` has precedence over `--include`. This means if an object URL is matched with any of `--exclude` patterns, the object will be skipped. If there are exclude patterns but a URL does not match any of them, it will check for include patterns. If the URL matches any of include patterns, it will be transferred; otherwise, it will be skipped. The order of the flags does not affect the results, unlike `aws-cli`. + +The command below will delete only objects that end with `.log`. + + s5cmd rm --include "*.log" 's3://bucket/logs/2020/*' + +The command below will delete all objects except those that end with `.log` or `.txt`. + + s5cmd rm --exclude "*.log" --exclude "*.txt" 's3://bucket/logs/2020/*' + +If you wish, you can use multiple flags, like below. It will download objects that start with `request` and end with `.log`. + + s5cmd cp --include "*.log" --include "request*" 's3://bucket/logs/2020/*' . + +Using a combination of `--include` and `--exclude` also possible. The command below will only sync objects that end with `.log` and `.txt` but exclude those that start with `access_`. For example, `request.log`, and `license.txt` will be included, while `access_log.txt`, and `readme.md` are excluded. + + s5cmd sync --include "*log" --exclude "access_*" --include "*txt" 's3://bucket/logs/*' . #### Select JSON object content using SQL `s5cmd` supports the `SelectObjectContent` S3 operation, and will run your From 8ce7ca1c157ddde9e15960c6455def0eb53157ae Mon Sep 17 00:00:00 2001 From: Deniz Surmeli <52484739+denizsurmeli@users.noreply.github.com> Date: Thu, 27 Jul 2023 16:21:35 +0300 Subject: [PATCH 28/31] cat: make it concurrent (#593) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds a new io.WriterAt adapter for non-seekable writers. It uses an internal linked list to order the incoming chunks. The implementation is independent from the download manager of aws-sdk-go, and because of that currently it can not bound the memory usage. In order to limit the memory usage, we would have had to write a custom manager other than the aws-sdk-go's implementation, which seemed unfeasible. The new implementation is about %25 percent faster than the older implementation for a 9.4 GB file with partSize=50MB and concurrency=20 parameters, with significantly higher memory usage, on average it uses 0.9 GB of memory and at most 2.1 GB is observed. Obviously, the memory usage and performance is dependent on the partSize-concurrency configuration and the link. Resolves #245 Co-authored-by: İbrahim Güngör --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f6b4497d..27ec6b56b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ #### Features - Added `--content-disposition` flag to `cp` command. ([#569](https://github.com/peak/s5cmd/issues/569)) -- Added `--show-fullpath` flag to `ls`. (#[596](https://github.com/peak/s5cmd/issues/596)) +- Added `--show-fullpath` flag to `ls`. ([#596](https://github.com/peak/s5cmd/issues/596)) - Added `pipe` command. ([#182](https://github.com/peak/s5cmd/issues/182)) - Added `--include` flag to `cp`, `rm` and `sync` commands. ([#516](https://github.com/peak/s5cmd/issues/516)) From f008722001fa3c18876e45f62ce7a330c56250d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Fri, 28 Jul 2023 10:43:12 +0300 Subject: [PATCH 29/31] command: refactor `shouldCopyObject` and `shoulRemoveObject` --- command/cp.go | 8 ++++++-- command/rm.go | 23 +++++------------------ command/wildcard.go | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/command/cp.go b/command/cp.go index 69103a419..a0a168e2a 100644 --- a/command/cp.go +++ b/command/cp.go @@ -296,7 +296,7 @@ type Copy struct { contentType string contentEncoding string contentDisposition string - showProgress bool + showProgress bool progressbar progressbar.ProgressBar // patterns @@ -463,7 +463,11 @@ func (c Copy) Run(ctx context.Context) error { continue } - if !c.shouldCopyObject(object, true) { + isExcluded, err := isObjectExcluded(object, c.excludePatterns, c.includePatterns, c.src.Prefix) + if err != nil { + printError(c.fullCommand, c.op, err) + } + if isExcluded { continue } diff --git a/command/rm.go b/command/rm.go index 9b13ca97d..3ba092b23 100644 --- a/command/rm.go +++ b/command/rm.go @@ -188,7 +188,11 @@ func (d Delete) Run(ctx context.Context) error { continue } - if !d.shouldDeleteObject(object, true, srcurl.Prefix) { + isExcluded, err := isObjectExcluded(object, d.excludePatterns, d.includePatterns, srcurl.Prefix) + if err != nil { + printError(d.fullCommand, d.op, err) + } + if isExcluded { continue } @@ -219,23 +223,6 @@ func (d Delete) Run(ctx context.Context) error { return multierror.Append(merrorResult, merrorObjects).ErrorOrNil() } -// shouldDeleteObject checks is object should be deleted. -func (d Delete) shouldDeleteObject(object *storage.Object, verbose bool, prefix string) bool { - if err := object.Err; err != nil { - if verbose { - printError(d.fullCommand, d.op, err) - } - return false - } - if len(d.excludePatterns) > 0 && isURLMatched(d.excludePatterns, object.URL.Path, prefix) { - return false - } - if len(d.includePatterns) > 0 { - return isURLMatched(d.includePatterns, object.URL.Path, prefix) - } - return true -} - // newSources creates object URL list from given sources. func newURLs(isRaw bool, versionID string, isAllVersions bool, sources ...string) ([]*url.URL, error) { var urls []*url.URL diff --git a/command/wildcard.go b/command/wildcard.go index 33625294d..73ac239e7 100644 --- a/command/wildcard.go +++ b/command/wildcard.go @@ -5,6 +5,7 @@ import ( "regexp" "strings" + "github.com/peak/s5cmd/v2/storage" "github.com/peak/s5cmd/v2/strutil" ) @@ -41,3 +42,16 @@ func isURLMatched(regexPatterns []*regexp.Regexp, urlPath, sourcePrefix string) } return false } + +func isObjectExcluded(object *storage.Object, excludePatterns []*regexp.Regexp, includePatterns []*regexp.Regexp, prefix string) (bool, error) { + if err := object.Err; err != nil { + return true, err + } + if len(excludePatterns) > 0 && isURLMatched(excludePatterns, object.URL.Path, prefix) { + return true, nil + } + if len(includePatterns) > 0 { + return !isURLMatched(includePatterns, object.URL.Path, prefix), nil + } + return false, nil +} From c7c399b5a9d6afa8da482609a42e85f8c431818a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Fri, 4 Aug 2023 11:25:45 +0300 Subject: [PATCH 30/31] command: refactor wildcard testing --- command/cp.go | 17 -------- command/cp_test.go | 71 --------------------------------- command/wildcard_test.go | 86 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 88 deletions(-) create mode 100644 command/wildcard_test.go diff --git a/command/cp.go b/command/cp.go index 8f8010af8..f304f3c26 100644 --- a/command/cp.go +++ b/command/cp.go @@ -847,23 +847,6 @@ func (c Copy) shouldOverride(ctx context.Context, srcurl *url.URL, dsturl *url.U return stickyErr } -// shouldCopyObject checks is object should be copied. -func (c Copy) shouldCopyObject(object *storage.Object, verbose bool) bool { - if err := object.Err; err != nil { - if verbose { - printError(c.fullCommand, c.op, err) - } - return false - } - if len(c.excludePatterns) > 0 && isURLMatched(c.excludePatterns, object.URL.Path, c.src.Prefix) { - return false - } - if len(c.includePatterns) > 0 { - return isURLMatched(c.includePatterns, object.URL.Path, c.src.Prefix) - } - return true -} - // prepareRemoteDestination will return a new destination URL for // remote->remote and local->remote copy operations. func prepareRemoteDestination( diff --git a/command/cp_test.go b/command/cp_test.go index 2a8cd5466..ccac38ff3 100644 --- a/command/cp_test.go +++ b/command/cp_test.go @@ -5,8 +5,6 @@ import ( "os" "testing" - "github.com/peak/s5cmd/v2/storage" - "github.com/peak/s5cmd/v2/storage/url" "gotest.tools/v3/assert" ) @@ -79,72 +77,3 @@ func TestGuessContentType(t *testing.T) { os.Remove(f.Name()) } } - -func TestShouldCopyObject(t *testing.T) { - t.Parallel() - - testcases := []struct { - excludePatterns []string - includePatterns []string - objects []string - filteredObjects []string - }{ - { - excludePatterns: []string{"*.txt", "*.log"}, - includePatterns: []string{"file-*.doc"}, - objects: []string{"document.txt", "file-2.log", "file-1.doc", "image.png"}, - filteredObjects: []string{"file-1.doc"}, - }, - { - excludePatterns: []string{"secret-*"}, - includePatterns: []string{"*.txt", "*.log"}, - objects: []string{"secret-passwords.txt", "file-1.txt", "file-2.txt", "image.png"}, - filteredObjects: []string{"file-1.txt", "file-2.txt"}, - }, - { - excludePatterns: []string{}, - includePatterns: []string{"*.png"}, - objects: []string{"secret-passwords.txt", "file-1.txt", "file-2.txt", "image.png"}, - filteredObjects: []string{"image.png"}, - }, - { - excludePatterns: []string{"file*"}, - includePatterns: []string{}, - objects: []string{"readme.md", "file-1.txt", "file-2.txt", "image.png"}, - filteredObjects: []string{"readme.md", "image.png"}, - }, - { - excludePatterns: []string{"file*"}, - includePatterns: []string{"*txt"}, - objects: []string{"readme.txt", "file-1.txt", "file-2.txt", "license.txt"}, - filteredObjects: []string{"readme.txt", "license.txt"}, - }, - { - excludePatterns: []string{"*tmp", "*.txt"}, - includePatterns: []string{"*png", "*.doc*"}, - objects: []string{"readme.txt", "license.txt", "cache.tmp", "image.png", "eula.doc", "eula.docx", "personaldoc"}, - filteredObjects: []string{"image.png", "eula.doc", "eula.docx"}, - }, - } - - for _, tc := range testcases { - tc := tc - excludeRegex, err := createRegexFromWildcard(tc.excludePatterns) - if err != nil { - t.Error(err) - } - includeRegex, err := createRegexFromWildcard(tc.includePatterns) - if err != nil { - t.Error(err) - } - var filteredObjects []string - cp := Copy{excludePatterns: excludeRegex, includePatterns: includeRegex} - cp.src = &url.URL{Prefix: ""} - for _, object := range tc.objects { - if cp.shouldCopyObject(&storage.Object{URL: &url.URL{Path: object}}, false) { - filteredObjects = append(filteredObjects, object) - } - } - assert.DeepEqual(t, tc.filteredObjects, filteredObjects) - } -} diff --git a/command/wildcard_test.go b/command/wildcard_test.go new file mode 100644 index 000000000..4c3a8fae2 --- /dev/null +++ b/command/wildcard_test.go @@ -0,0 +1,86 @@ +package command + +import ( + "testing" + + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" + "gotest.tools/v3/assert" +) + +func TestIsObjectExcluded(t *testing.T) { + t.Parallel() + + testcases := []struct { + excludePatterns []string + includePatterns []string + objects []string + filteredObjects []string + }{ + { + excludePatterns: []string{"*.txt", "*.log"}, + includePatterns: []string{"file-*.doc"}, + objects: []string{"document.txt", "file-2.log", "file-1.doc", "image.png"}, + filteredObjects: []string{"file-1.doc"}, + }, + { + excludePatterns: []string{"secret-*"}, + includePatterns: []string{"*.txt", "*.log"}, + objects: []string{"secret-passwords.txt", "file-1.txt", "file-2.txt", "image.png"}, + filteredObjects: []string{"file-1.txt", "file-2.txt"}, + }, + { + excludePatterns: []string{}, + includePatterns: []string{"*.png"}, + objects: []string{"secret-passwords.txt", "file-1.txt", "file-2.txt", "image.png"}, + filteredObjects: []string{"image.png"}, + }, + { + excludePatterns: []string{"file*"}, + includePatterns: []string{}, + objects: []string{"readme.md", "file-1.txt", "file-2.txt", "image.png"}, + filteredObjects: []string{"readme.md", "image.png"}, + }, + { + excludePatterns: []string{"file*"}, + includePatterns: []string{"*txt"}, + objects: []string{"readme.txt", "file-1.txt", "file-2.txt", "license.txt"}, + filteredObjects: []string{"readme.txt", "license.txt"}, + }, + { + excludePatterns: []string{"*tmp", "*.txt"}, + includePatterns: []string{"*png", "*.doc*"}, + objects: []string{"readme.txt", "license.txt", "cache.tmp", "image.png", "eula.doc", "eula.docx", "personaldoc"}, + filteredObjects: []string{"image.png", "eula.doc", "eula.docx"}, + }, + } + + for _, tc := range testcases { + tc := tc + + excludeRegex, err := createRegexFromWildcard(tc.excludePatterns) + if err != nil { + t.Error(err) + } + + includeRegex, err := createRegexFromWildcard(tc.includePatterns) + if err != nil { + t.Error(err) + } + + var filteredObjects []string + + for _, object := range tc.objects { + skip, err := isObjectExcluded(&storage.Object{URL: &url.URL{Path: object}}, excludeRegex, includeRegex, "") + if err != nil { + t.Fatal(err) + } + if skip { + continue + } + filteredObjects = append(filteredObjects, object) + } + + assert.DeepEqual(t, tc.filteredObjects, filteredObjects) + } +} From c76d0ea3d6c53a3446ec6435bd4cc3c6d7022c18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Hakan=20Be=C5=9Fel?= Date: Fri, 4 Aug 2023 12:02:37 +0300 Subject: [PATCH 31/31] readme: update filtering section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: İlkin Balkanay --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 85aa983dc..aceeb551c 100644 --- a/README.md +++ b/README.md @@ -302,7 +302,12 @@ folder hierarchy. an [open ticket](https://github.com/peak/s5cmd/issues/29) to track the issue. #### Using Exclude and Include Filters -`s5cmd` supports both `--exclude` and `--include` flags, which can take wildcard values. These flags can be used with `cp`, `rm`, and `sync` commands. If `--exclude` flag is used, all objects matching the pattern will be excluded from the transfer. If `--include` flag is used, only objects matching the pattern will be included in the transfer. If both flags are used at the same time, `--exclude` has precedence over `--include`. This means if an object URL is matched with any of `--exclude` patterns, the object will be skipped. If there are exclude patterns but a URL does not match any of them, it will check for include patterns. If the URL matches any of include patterns, it will be transferred; otherwise, it will be skipped. The order of the flags does not affect the results, unlike `aws-cli`. +`s5cmd` supports the `--exclude` and `--include` flags, which can be used to specify patterns for objects to be excluded or included in commands. + +- The `--exclude` flag specifies objects that should be excluded from the operation. Any object that matches the pattern will be skipped. +- The `--include` flag specifies objects that should be included in the operation. Only objects that match the pattern will be handled. +- If both flags are used, `--exclude` has precedence over `--include`. This means that if an object URL matches any of the `--exclude` patterns, the object will be skipped, even if it also matches one of the `--include` patterns. +- The order of the flags does not affect the results (unlike `aws-cli`). The command below will delete only objects that end with `.log`.