Skip to content

Commit

Permalink
fix(dedupe): run dedupe only for repositories found at startup
Browse files Browse the repository at this point in the history
no need to run dedupe/restore blobs for images being pushed or synced while
running dedupe task, they are already deduped/restored inline.

Signed-off-by: Petu Eusebiu <[email protected]>
  • Loading branch information
eusebiu-constantin-petu-dbk committed Sep 22, 2023
1 parent 8c55944 commit 4366b6d
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 7 deletions.
24 changes: 23 additions & 1 deletion pkg/storage/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -971,14 +971,36 @@ type DedupeTaskGenerator struct {
and generating a task for each unprocessed one*/
lastDigests []godigest.Digest
done bool
repos []string // list of repos on which we run dedupe
Log zlog.Logger
}

func (gen *DedupeTaskGenerator) Next() (scheduler.Task, error) {
var err error

/* at first run get from storage currently found repositories so that we skip the ones that gets synced/uploaded
while this generator runs, there are deduped/restored inline, no need to run dedupe/restore again */
if len(gen.repos) == 0 {
gen.repos, err = gen.ImgStore.GetRepositories()
if err != nil {
//nolint: dupword
gen.Log.Error().Err(err).Msg("dedupe rebuild: unable to to get list of repositories")

return nil, err
}
}

if len(gen.repos) == 0 {
gen.Log.Info().Msg("dedupe rebuild: no repositories found in storage, finished.")

// no repositories in storage, no need to continue
gen.done = true

return nil, nil
}

// get all blobs from storage.imageStore and group them by digest
gen.digest, gen.duplicateBlobs, err = gen.ImgStore.GetNextDigestWithBlobPaths(gen.lastDigests)
gen.digest, gen.duplicateBlobs, err = gen.ImgStore.GetNextDigestWithBlobPaths(gen.repos, gen.lastDigests)
if err != nil {
gen.Log.Error().Err(err).Msg("dedupe rebuild: failed to get next digest")

Expand Down
15 changes: 13 additions & 2 deletions pkg/storage/imagestore/imagestore.go
Original file line number Diff line number Diff line change
Expand Up @@ -1965,7 +1965,8 @@ func (is *ImageStore) RunGCPeriodically(interval time.Duration, sch *scheduler.S
sch.SubmitGenerator(generator, interval, scheduler.MediumPriority)
}

func (is *ImageStore) GetNextDigestWithBlobPaths(lastDigests []godigest.Digest) (godigest.Digest, []string, error) {
func (is *ImageStore) GetNextDigestWithBlobPaths(repos []string, lastDigests []godigest.Digest,
) (godigest.Digest, []string, error) {
var lockLatency time.Time

dir := is.rootDir
Expand All @@ -1983,8 +1984,18 @@ func (is *ImageStore) GetNextDigestWithBlobPaths(lastDigests []godigest.Digest)
return driver.ErrSkipDir
}

// skip blobs under .uploads
if strings.HasSuffix(fileInfo.Path(), storageConstants.BlobUploadDir) {
return driver.ErrSkipDir
}

if fileInfo.IsDir() {
return nil
// skip repositories not found in repos
repo := path.Base(fileInfo.Path())

if !zcommon.Contains(repos, repo) && repo != "blobs" && repo != "sha256" {
return driver.ErrSkipDir
}
}

blobDigest := godigest.NewDigestFromEncoded("sha256", path.Base(fileInfo.Path()))
Expand Down
11 changes: 11 additions & 0 deletions pkg/storage/local/local_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1200,6 +1200,17 @@ func TestDedupeLinks(t *testing.T) {
storageConstants.DefaultUntaggedImgeRetentionDelay, testCase.dedupe, true, log, metrics, nil, nil)
}

// run on empty image store
// switch dedupe to true from false
taskScheduler, cancel := runAndGetScheduler()

// rebuild with dedupe true
imgStore.RunDedupeBlobs(time.Duration(0), taskScheduler)
// wait until rebuild finishes
time.Sleep(1 * time.Second)

cancel()

// manifest1
upload, err := imgStore.NewBlobUpload("dedupe1")
So(err, ShouldBeNil)
Expand Down
2 changes: 1 addition & 1 deletion pkg/storage/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ type ImageStore interface { //nolint:interfacebloat
RunGCPeriodically(interval time.Duration, sch *scheduler.Scheduler)
RunDedupeBlobs(interval time.Duration, sch *scheduler.Scheduler)
RunDedupeForDigest(digest godigest.Digest, dedupe bool, duplicateBlobs []string) error
GetNextDigestWithBlobPaths(lastDigests []godigest.Digest) (godigest.Digest, []string, error)
GetNextDigestWithBlobPaths(repos []string, lastDigests []godigest.Digest) (godigest.Digest, []string, error)
GetAllBlobs(repo string) ([]string, error)
}

Expand Down
6 changes: 3 additions & 3 deletions pkg/test/mocks/image_store_mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ type MockedImageStore struct {
RunGCPeriodicallyFn func(interval time.Duration, sch *scheduler.Scheduler)
RunDedupeBlobsFn func(interval time.Duration, sch *scheduler.Scheduler)
RunDedupeForDigestFn func(digest godigest.Digest, dedupe bool, duplicateBlobs []string) error
GetNextDigestWithBlobPathsFn func(lastDigests []godigest.Digest) (godigest.Digest, []string, error)
GetNextDigestWithBlobPathsFn func(repos []string, lastDigests []godigest.Digest) (godigest.Digest, []string, error)
GetAllBlobsFn func(repo string) ([]string, error)
}

Expand Down Expand Up @@ -370,10 +370,10 @@ func (is MockedImageStore) RunDedupeForDigest(digest godigest.Digest, dedupe boo
return nil
}

func (is MockedImageStore) GetNextDigestWithBlobPaths(lastDigests []godigest.Digest,
func (is MockedImageStore) GetNextDigestWithBlobPaths(repos []string, lastDigests []godigest.Digest,
) (godigest.Digest, []string, error) {
if is.GetNextDigestWithBlobPathsFn != nil {
return is.GetNextDigestWithBlobPathsFn(lastDigests)
return is.GetNextDigestWithBlobPathsFn(repos, lastDigests)
}

return "", []string{}, nil
Expand Down

0 comments on commit 4366b6d

Please sign in to comment.