diff --git a/pkg/storage/common/common.go b/pkg/storage/common/common.go index 84dd30b1a7..ab3ca10af3 100644 --- a/pkg/storage/common/common.go +++ b/pkg/storage/common/common.go @@ -971,14 +971,36 @@ type DedupeTaskGenerator struct { and generating a task for each unprocessed one*/ lastDigests []godigest.Digest done bool + repos []string // list of repos on which we run dedupe Log zlog.Logger } func (gen *DedupeTaskGenerator) Next() (scheduler.Task, error) { var err error + /* at first run get from storage currently found repositories so that we skip the ones that gets synced/uploaded + while this generator runs, there are deduped/restored inline, no need to run dedupe/restore again */ + if len(gen.repos) == 0 { + gen.repos, err = gen.ImgStore.GetRepositories() + if err != nil { + //nolint: dupword + gen.Log.Error().Err(err).Msg("dedupe rebuild: unable to to get list of repositories") + + return nil, err + } + } + + if len(gen.repos) == 0 { + gen.Log.Info().Msg("dedupe rebuild: no repositories found in storage, finished.") + + // no repositories in storage, no need to continue + gen.done = true + + return nil, nil + } + // get all blobs from storage.imageStore and group them by digest - gen.digest, gen.duplicateBlobs, err = gen.ImgStore.GetNextDigestWithBlobPaths(gen.lastDigests) + gen.digest, gen.duplicateBlobs, err = gen.ImgStore.GetNextDigestWithBlobPaths(gen.repos, gen.lastDigests) if err != nil { gen.Log.Error().Err(err).Msg("dedupe rebuild: failed to get next digest") diff --git a/pkg/storage/imagestore/imagestore.go b/pkg/storage/imagestore/imagestore.go index e3dbc71275..d62798dc0d 100644 --- a/pkg/storage/imagestore/imagestore.go +++ b/pkg/storage/imagestore/imagestore.go @@ -1965,7 +1965,8 @@ func (is *ImageStore) RunGCPeriodically(interval time.Duration, sch *scheduler.S sch.SubmitGenerator(generator, interval, scheduler.MediumPriority) } -func (is *ImageStore) GetNextDigestWithBlobPaths(lastDigests []godigest.Digest) (godigest.Digest, []string, error) { +func (is *ImageStore) GetNextDigestWithBlobPaths(repos []string, lastDigests []godigest.Digest, +) (godigest.Digest, []string, error) { var lockLatency time.Time dir := is.rootDir @@ -1983,8 +1984,18 @@ func (is *ImageStore) GetNextDigestWithBlobPaths(lastDigests []godigest.Digest) return driver.ErrSkipDir } + // skip blobs under .uploads + if strings.HasSuffix(fileInfo.Path(), storageConstants.BlobUploadDir) { + return driver.ErrSkipDir + } + if fileInfo.IsDir() { - return nil + // skip repositories not found in repos + repo := path.Base(fileInfo.Path()) + + if !zcommon.Contains(repos, repo) && repo != "blobs" && repo != "sha256" { + return driver.ErrSkipDir + } } blobDigest := godigest.NewDigestFromEncoded("sha256", path.Base(fileInfo.Path())) diff --git a/pkg/storage/local/local_test.go b/pkg/storage/local/local_test.go index d3ff5d1e4f..130a39f83f 100644 --- a/pkg/storage/local/local_test.go +++ b/pkg/storage/local/local_test.go @@ -1200,6 +1200,17 @@ func TestDedupeLinks(t *testing.T) { storageConstants.DefaultUntaggedImgeRetentionDelay, testCase.dedupe, true, log, metrics, nil, nil) } + // run on empty image store + // switch dedupe to true from false + taskScheduler, cancel := runAndGetScheduler() + + // rebuild with dedupe true + imgStore.RunDedupeBlobs(time.Duration(0), taskScheduler) + // wait until rebuild finishes + time.Sleep(1 * time.Second) + + cancel() + // manifest1 upload, err := imgStore.NewBlobUpload("dedupe1") So(err, ShouldBeNil) diff --git a/pkg/storage/types/types.go b/pkg/storage/types/types.go index e2a5a18bd4..7b6c4de19f 100644 --- a/pkg/storage/types/types.go +++ b/pkg/storage/types/types.go @@ -52,7 +52,7 @@ type ImageStore interface { //nolint:interfacebloat RunGCPeriodically(interval time.Duration, sch *scheduler.Scheduler) RunDedupeBlobs(interval time.Duration, sch *scheduler.Scheduler) RunDedupeForDigest(digest godigest.Digest, dedupe bool, duplicateBlobs []string) error - GetNextDigestWithBlobPaths(lastDigests []godigest.Digest) (godigest.Digest, []string, error) + GetNextDigestWithBlobPaths(repos []string, lastDigests []godigest.Digest) (godigest.Digest, []string, error) GetAllBlobs(repo string) ([]string, error) } diff --git a/pkg/test/mocks/image_store_mock.go b/pkg/test/mocks/image_store_mock.go index 7736d9464a..b5063fed4b 100644 --- a/pkg/test/mocks/image_store_mock.go +++ b/pkg/test/mocks/image_store_mock.go @@ -50,7 +50,7 @@ type MockedImageStore struct { RunGCPeriodicallyFn func(interval time.Duration, sch *scheduler.Scheduler) RunDedupeBlobsFn func(interval time.Duration, sch *scheduler.Scheduler) RunDedupeForDigestFn func(digest godigest.Digest, dedupe bool, duplicateBlobs []string) error - GetNextDigestWithBlobPathsFn func(lastDigests []godigest.Digest) (godigest.Digest, []string, error) + GetNextDigestWithBlobPathsFn func(repos []string, lastDigests []godigest.Digest) (godigest.Digest, []string, error) GetAllBlobsFn func(repo string) ([]string, error) } @@ -370,10 +370,10 @@ func (is MockedImageStore) RunDedupeForDigest(digest godigest.Digest, dedupe boo return nil } -func (is MockedImageStore) GetNextDigestWithBlobPaths(lastDigests []godigest.Digest, +func (is MockedImageStore) GetNextDigestWithBlobPaths(repos []string, lastDigests []godigest.Digest, ) (godigest.Digest, []string, error) { if is.GetNextDigestWithBlobPathsFn != nil { - return is.GetNextDigestWithBlobPathsFn(lastDigests) + return is.GetNextDigestWithBlobPathsFn(repos, lastDigests) } return "", []string{}, nil