From 0d798671bf2c8cbe02b2ab95fbc46fc84b896299 Mon Sep 17 00:00:00 2001 From: Ryan Gonzalez Date: Fri, 10 Nov 2023 17:01:16 -0600 Subject: [PATCH] Split reflists to share their contents across snapshots In current aptly, each repository and snapshot has its own reflist in the database. This brings a few problems with it: - Given a sufficiently large repositories and snapshots, these lists can get enormous, reaching >1MB. This is a problem for LevelDB's overall performance, as it tends to prefer values around the confiruged block size (defaults to just 4KiB). - When you take these large repositories and snapshot them, you have a full, new copy of the reflist, even if only a few packages changed. This means that having a lot of snapshots with a few changes causes the database to basically be full of largely duplicate reflists. - All the duplication also means that many of the same refs are being loaded repeatedly, which can cause some slowdown but, more notably, eats up huge amounts of memory. - Adding on more and more new repositories and snapshots will cause the time and memory spent on things like cleanup and publishing to grow roughly linearly. At the core, there are two problems here: - Reflists get very big because there are just a lot of packages. - Different reflists can tend to duplicate much of the same contents. *Split reflists* aim at solving this by separating reflists into 64 *buckets*. Package refs are sorted into individual buckets according to the following system: - Take the first 3 letters of the package name, after dropping a `lib` prefix. (Using only the first 3 letters will cause packages with similar prefixes to end up in the same bucket, under the assumption that packages with similar names tend to be updated together.) - Take the 64-bit xxhash of these letters. (xxhash was chosen because it relatively good distribution across the individual bits, which is important for the next step.) - Use the first 6 bits of the hash (range [0:63]) as an index into the buckets. Once refs are placed in buckets, a sha256 digest of all the refs in the bucket is taken. These buckets are then stored in the database, split into roughly block-sized segments, and all the repositories and snapshots simply store an array of bucket digests. This approach means that *repositories and snapshots can share their reflist buckets*. If a snapshot is taken of a repository, it will have the same contents, so its split reflist will point to the same buckets as the base repository, and only one copy of each bucket is stored in the database. When some packages in the repository change, only the buckets containing those packages will be modified; all the other buckets will remain unchanged, and thus their contents will still be shared. Later on, when these reflists are loaded, each bucket is only loaded once, short-cutting loaded many megabytes of data. In effect, split reflists are essentially copy-on-write, with only the changed buckets stored individually. Changing the disk format means that a migration needs to take place, so that task is moved into the database cleanup step, which will migrate reflists over to split reflists, as well as delete any unused reflist buckets. All the reflist tests are also changed to additionally test out split reflists; although the internal logic is all shared (since buckets are, themselves, just normal reflists), some special additions are needed to have native versions of the various reflist helper methods. In our tests, we've observed the following improvements: - Memory usage during publish and database cleanup, with `GOMEMLIMIT=2GiB`, goes down from ~3.2GiB (larger than the memory limit!) to ~0.7GiB, a decrease of ~4.5x. - Database size decreases from 1.3GB to 367MB. *In my local tests*, publish times had also decreased down to mere seconds but the same effect wasn't observed on the server, with the times staying around the same. My suspicions are that this is due to I/O performance: my local system is an M1 MBP, which almost certainly has much faster disk speeds than our DigitalOcean block volumes. Split reflists include a side effect of requiring more random accesses from reading all the buckets by their keys, so if your random I/O performance is slower, it might cancel out the benefits. That being said, even in that case, the memory usage and database size advantages still persist. Signed-off-by: Ryan Gonzalez --- api/api.go | 2 +- api/db.go | 81 ++- api/metrics.go | 2 +- api/mirror.go | 12 +- api/publish.go | 10 +- api/repos.go | 20 +- api/snapshot.go | 30 +- cmd/cmd.go | 2 +- cmd/db_cleanup.go | 128 +++-- cmd/mirror_create.go | 2 +- cmd/mirror_edit.go | 2 +- cmd/mirror_rename.go | 2 +- cmd/mirror_show.go | 9 +- cmd/mirror_update.go | 8 +- cmd/package_show.go | 6 +- cmd/publish_snapshot.go | 6 +- cmd/publish_switch.go | 4 +- cmd/publish_update.go | 2 +- cmd/repo_add.go | 6 +- cmd/repo_create.go | 4 +- cmd/repo_edit.go | 4 +- cmd/repo_include.go | 2 +- cmd/repo_list.go | 5 +- cmd/repo_move.go | 16 +- cmd/repo_remove.go | 6 +- cmd/repo_rename.go | 2 +- cmd/repo_show.go | 9 +- cmd/snapshot_create.go | 6 +- cmd/snapshot_diff.go | 6 +- cmd/snapshot_filter.go | 4 +- cmd/snapshot_merge.go | 4 +- cmd/snapshot_pull.go | 6 +- cmd/snapshot_rename.go | 2 +- cmd/snapshot_search.go | 8 +- cmd/snapshot_show.go | 13 +- cmd/snapshot_verify.go | 2 +- database/database.go | 2 + database/goleveldb/database.go | 3 + database/goleveldb/storage.go | 11 + deb/changes.go | 9 +- deb/changes_test.go | 12 +- deb/collections.go | 12 + deb/graph.go | 6 +- deb/list.go | 2 +- deb/local.go | 28 +- deb/local_test.go | 34 +- deb/publish.go | 89 ++-- deb/publish_bench_test.go | 7 +- deb/publish_test.go | 84 ++-- deb/reflist.go | 773 +++++++++++++++++++++++++++- deb/reflist_bench_test.go | 38 ++ deb/reflist_test.go | 890 ++++++++++++++++++++++----------- deb/remote.go | 26 +- deb/remote_test.go | 44 +- deb/snapshot.go | 34 +- deb/snapshot_bench_test.go | 15 +- deb/snapshot_test.go | 50 +- go.mod | 2 +- 58 files changed, 1944 insertions(+), 660 deletions(-) diff --git a/api/api.go b/api/api.go index c15be6e2d2..f931be7dc0 100644 --- a/api/api.go +++ b/api/api.go @@ -181,7 +181,7 @@ func maybeRunTaskInBackground(c *gin.Context, name string, resources []string, p // Common piece of code to show list of packages, // with searching & details if requested -func showPackages(c *gin.Context, reflist *deb.PackageRefList, collectionFactory *deb.CollectionFactory) { +func showPackages(c *gin.Context, reflist deb.AnyRefList, collectionFactory *deb.CollectionFactory) { result := []*deb.Package{} list, err := deb.NewPackageListFromRefList(reflist, collectionFactory.PackageCollection(), nil) diff --git a/api/db.go b/api/db.go index 3f8b826ddc..8869fbd991 100644 --- a/api/db.go +++ b/api/db.go @@ -5,6 +5,7 @@ import ( "sort" "github.com/aptly-dev/aptly/aptly" + "github.com/aptly-dev/aptly/database" "github.com/aptly-dev/aptly/deb" "github.com/aptly-dev/aptly/task" "github.com/aptly-dev/aptly/utils" @@ -20,18 +21,22 @@ func apiDbCleanup(c *gin.Context) { collectionFactory := context.NewCollectionFactory() - // collect information about referenced packages... - existingPackageRefs := deb.NewPackageRefList() + // collect information about referenced packages and their reflist buckets... + existingPackageRefs := deb.NewSplitRefList() + existingBuckets := deb.NewRefListDigestSet() + + reflistMigration := collectionFactory.RefListCollection().NewMigration() out.Printf("Loading mirrors, local repos, snapshots and published repos...") err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *deb.RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -40,14 +45,14 @@ func apiDbCleanup(c *gin.Context) { } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -56,12 +61,14 @@ func apiDbCleanup(c *gin.Context) { } err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *deb.Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, snapshot.RefKey(), reflistMigration) if e != nil { return e } - existingPackageRefs = existingPackageRefs.Merge(snapshot.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -73,13 +80,16 @@ func apiDbCleanup(c *gin.Context) { if published.SourceKind != deb.SourceLocalRepo { return nil } - e := collectionFactory.PublishedRepoCollection().LoadComplete(published, collectionFactory) - if e != nil { - return e - } for _, component := range published.Components() { - existingPackageRefs = existingPackageRefs.Merge(published.RefList(component), false, true) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, published.RefKey(component), reflistMigration) + if e != nil { + return e + } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) } return nil }) @@ -87,11 +97,20 @@ func apiDbCleanup(c *gin.Context) { return nil, err } + err = reflistMigration.Flush() + if err != nil { + return nil, err + } + if stats := reflistMigration.Stats(); stats.Reflists > 0 { + out.Printf("Split %d reflist(s) into %d bucket(s) (%d segment(s))", + stats.Reflists, stats.Buckets, stats.Segments) + } + // ... and compare it to the list of all packages out.Printf("Loading list of all packages...") allPackageRefs := collectionFactory.PackageCollection().AllPackageRefs() - toDelete := allPackageRefs.Subtract(existingPackageRefs) + toDelete := allPackageRefs.Subtract(existingPackageRefs.Flatten()) // delete packages that are no longer referenced out.Printf("Deleting unreferenced packages (%d)...", toDelete.Len()) @@ -112,6 +131,28 @@ func apiDbCleanup(c *gin.Context) { } } + bucketsToDelete, err := collectionFactory.RefListCollection().AllBucketDigests() + if err != nil { + return nil, err + } + + bucketsToDelete.RemoveAll(existingBuckets) + + out.Printf("Deleting unreferenced reflist buckets (%d)...", bucketsToDelete.Len()) + if bucketsToDelete.Len() > 0 { + batch := db.CreateBatch() + err := bucketsToDelete.ForEach(func(digest []byte) error { + return collectionFactory.RefListCollection().UnsafeDropBucket(digest, batch) + }) + if err != nil { + return nil, err + } + + if err := batch.Write(); err != nil { + return nil, err + } + } + // now, build a list of files that should be present in Repository (package pool) out.Printf("Building list of files referenced by packages...") referencedFiles := make([]string, 0, existingPackageRefs.Len()) diff --git a/api/metrics.go b/api/metrics.go index 94a9dc2525..875c3c196d 100644 --- a/api/metrics.go +++ b/api/metrics.go @@ -102,7 +102,7 @@ func countPackagesByRepos() { components := repo.Components() for _, c := range components { - count := float64(len(repo.RefList(c).Refs)) + count := float64(repo.RefList(c).Len()) apiReposPackageCountGauge.WithLabelValues(fmt.Sprintf("%s", (repo.SourceNames())), repo.Distribution, c).Set(count) } diff --git a/api/mirror.go b/api/mirror.go index 70ff715650..798cd750da 100644 --- a/api/mirror.go +++ b/api/mirror.go @@ -121,7 +121,7 @@ func apiMirrorsCreate(c *gin.Context) { return } - err = collection.Add(repo) + err = collection.Add(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to add mirror: %s", err)) return @@ -181,7 +181,7 @@ func apiMirrorsShow(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to show: %s", err)) } @@ -201,7 +201,7 @@ func apiMirrorsPackages(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to show: %s", err)) } @@ -395,12 +395,12 @@ func apiMirrorsUpdate(c *gin.Context) { e := context.ReOpenDatabase() if e == nil { remote.MarkAsIdle() - collection.Update(remote) + collection.Update(remote, collectionFactory.RefListCollection()) } }() remote.MarkAsUpdating() - err = collection.Update(remote) + err = collection.Update(remote, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } @@ -545,7 +545,7 @@ func apiMirrorsUpdate(c *gin.Context) { log.Info().Msgf("%s: Finalizing download\n", b.Name) remote.FinalizeDownload(collectionFactory, out) - err = collectionFactory.RemoteRepoCollection().Update(remote) + err = collectionFactory.RemoteRepoCollection().Update(remote, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } diff --git a/api/publish.go b/api/publish.go index 1cd0f009ee..f646e6ca2f 100644 --- a/api/publish.go +++ b/api/publish.go @@ -140,7 +140,7 @@ func apiPublishRepoOrSnapshot(c *gin.Context) { } resources = append(resources, string(snapshot.ResourceKey())) - err = snapshotCollection.LoadComplete(snapshot) + err = snapshotCollection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to publish: %s", err)) return @@ -164,7 +164,7 @@ func apiPublishRepoOrSnapshot(c *gin.Context) { } resources = append(resources, string(localRepo.Key())) - err = localCollection.LoadComplete(localRepo) + err = localCollection.LoadComplete(localRepo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to publish: %s", err)) } @@ -231,7 +231,7 @@ func apiPublishRepoOrSnapshot(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to publish: %s", err) } - err = collection.Add(published) + err = collection.Add(published, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save to DB: %s", err) } @@ -311,7 +311,7 @@ func apiPublishUpdateSwitch(c *gin.Context) { return } - err2 = snapshotCollection.LoadComplete(snapshot) + err2 = snapshotCollection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err2 != nil { AbortWithJSONError(c, 500, err2) return @@ -346,7 +346,7 @@ func apiPublishUpdateSwitch(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } - err = collection.Update(published) + err = collection.Update(published, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save to DB: %s", err) } diff --git a/api/repos.go b/api/repos.go index fe6206d36e..ca5d62f8c1 100644 --- a/api/repos.go +++ b/api/repos.go @@ -82,7 +82,7 @@ func apiReposCreate(c *gin.Context) { collectionFactory := context.NewCollectionFactory() collection := collectionFactory.LocalRepoCollection() - err := collection.Add(repo) + err := collection.Add(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 400, err) return @@ -132,7 +132,7 @@ func apiReposEdit(c *gin.Context) { repo.DefaultComponent = *b.DefaultComponent } - err = collection.Update(repo) + err = collection.Update(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -201,7 +201,7 @@ func apiReposPackagesShow(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -229,7 +229,7 @@ func apiReposPackagesAddDelete(c *gin.Context, taskNamePrefix string, cb func(li return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -261,9 +261,9 @@ func apiReposPackagesAddDelete(c *gin.Context, taskNamePrefix string, cb func(li } } - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save: %s", err) } @@ -320,7 +320,7 @@ func apiReposPackageFromDir(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -369,9 +369,9 @@ func apiReposPackageFromDir(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to import package files: %s", err) } - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save: %s", err) } @@ -489,7 +489,7 @@ func apiReposIncludePackageFromDir(c *gin.Context) { _, failedFiles2, err = deb.ImportChangesFiles( changesFiles, reporter, acceptUnsigned, ignoreSignature, forceReplace, noRemoveFiles, verifier, repoTemplate, context.Progress(), collectionFactory.LocalRepoCollection(), collectionFactory.PackageCollection(), - context.PackagePool(), collectionFactory.ChecksumCollection, nil, query.Parse) + collectionFactory.RefListCollection(), context.PackagePool(), collectionFactory.ChecksumCollection, nil, query.Parse) failedFiles = append(failedFiles, failedFiles2...) if err != nil { diff --git a/api/snapshot.go b/api/snapshot.go index af90522113..a58fe53b2b 100644 --- a/api/snapshot.go +++ b/api/snapshot.go @@ -69,7 +69,7 @@ func apiSnapshotsCreateFromMirror(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusConflict, Value: nil}, err } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -83,7 +83,7 @@ func apiSnapshotsCreateFromMirror(c *gin.Context) { snapshot.Description = b.Description } - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -128,7 +128,7 @@ func apiSnapshotsCreate(c *gin.Context) { return } - err = snapshotCollection.LoadComplete(sources[i]) + err = snapshotCollection.LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -155,9 +155,9 @@ func apiSnapshotsCreate(c *gin.Context) { } } - snapshot = deb.NewSnapshotFromRefList(b.Name, sources, deb.NewPackageRefListFromPackageList(list), b.Description) + snapshot = deb.NewSnapshotFromRefList(b.Name, sources, deb.NewSplitRefListFromPackageList(list), b.Description) - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -197,7 +197,7 @@ func apiSnapshotsCreateFromRepository(c *gin.Context) { resources := []string{string(repo.Key()), "S" + b.Name} taskName := fmt.Sprintf("Create snapshot of repo %s", name) maybeRunTaskInBackground(c, taskName, resources, func(out aptly.Progress, detail *task.Detail) (*task.ProcessReturnValue, error) { - err := collection.LoadComplete(repo) + err := collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -211,7 +211,7 @@ func apiSnapshotsCreateFromRepository(c *gin.Context) { snapshot.Description = b.Description } - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -261,7 +261,7 @@ func apiSnapshotsUpdate(c *gin.Context) { snapshot.Description = b.Description } - err = collectionFactory.SnapshotCollection().Update(snapshot) + err = collectionFactory.SnapshotCollection().Update(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -280,7 +280,7 @@ func apiSnapshotsShow(c *gin.Context) { return } - err = collection.LoadComplete(snapshot) + err = collection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -347,20 +347,20 @@ func apiSnapshotsDiff(c *gin.Context) { return } - err = collection.LoadComplete(snapshotA) + err = collection.LoadComplete(snapshotA, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return } - err = collection.LoadComplete(snapshotB) + err = collection.LoadComplete(snapshotB, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return } // Calculate diff - diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection()) + diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection(), nil) if err != nil { AbortWithJSONError(c, 500, err) return @@ -390,7 +390,7 @@ func apiSnapshotsSearchPackages(c *gin.Context) { return } - err = collection.LoadComplete(snapshot) + err = collection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -441,7 +441,7 @@ func apiSnapshotsMerge(c *gin.Context) { return } - err = snapshotCollection.LoadComplete(sources[i]) + err = snapshotCollection.LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, http.StatusInternalServerError, err) return @@ -467,7 +467,7 @@ func apiSnapshotsMerge(c *gin.Context) { snapshot = deb.NewSnapshotFromRefList(body.Destination, sources, result, fmt.Sprintf("Merged from sources: %s", strings.Join(sourceDescription, ", "))) - err = collectionFactory.SnapshotCollection().Add(snapshot) + err = collectionFactory.SnapshotCollection().Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/cmd.go b/cmd/cmd.go index 14a0efd166..b70bf145c8 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -21,7 +21,7 @@ const ( ) // ListPackagesRefList shows list of packages in PackageRefList -func ListPackagesRefList(reflist *deb.PackageRefList, collectionFactory *deb.CollectionFactory) (err error) { +func ListPackagesRefList(reflist deb.AnyRefList, collectionFactory *deb.CollectionFactory) (err error) { fmt.Printf("Packages:\n") if reflist == nil { diff --git a/cmd/db_cleanup.go b/cmd/db_cleanup.go index 66fece677a..c63211be04 100644 --- a/cmd/db_cleanup.go +++ b/cmd/db_cleanup.go @@ -6,6 +6,7 @@ import ( "strings" "github.com/aptly-dev/aptly/aptly" + "github.com/aptly-dev/aptly/database" "github.com/aptly-dev/aptly/deb" "github.com/aptly-dev/aptly/utils" "github.com/smira/commander" @@ -24,12 +25,20 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { dryRun := context.Flags().Lookup("dry-run").Value.Get().(bool) collectionFactory := context.NewCollectionFactory() - // collect information about references packages... - existingPackageRefs := deb.NewPackageRefList() + // collect information about references packages and their reflistbuckets... + existingPackageRefs := deb.NewSplitRefList() + existingBuckets := deb.NewRefListDigestSet() // used only in verbose mode to report package use source packageRefSources := map[string][]string{} + var reflistMigration *deb.RefListMigration + if !dryRun { + reflistMigration = collectionFactory.RefListCollection().NewMigration() + } else { + reflistMigration = collectionFactory.RefListCollection().NewMigrationDryRun() + } + context.Progress().ColoredPrintf("@{w!}Loading mirrors, local repos, snapshots and published repos...@|") if verbose { context.Progress().ColoredPrintf("@{y}Loading mirrors:@|") @@ -39,20 +48,21 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", repo.Name) } - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - if verbose { - description := fmt.Sprintf("mirror %s", repo.Name) - repo.RefList().ForEach(func(key []byte) error { - packageRefSources[string(key)] = append(packageRefSources[string(key)], description) - return nil - }) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) + + if verbose { + description := fmt.Sprintf("mirror %s", repo.Name) + sl.ForEach(func(key []byte) error { + packageRefSources[string(key)] = append(packageRefSources[string(key)], description) + return nil + }) } return nil @@ -71,21 +81,23 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", repo.Name) } - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) - if verbose { - description := fmt.Sprintf("local repo %s", repo.Name) - repo.RefList().ForEach(func(key []byte) error { - packageRefSources[string(key)] = append(packageRefSources[string(key)], description) - return nil - }) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + + if verbose { + description := fmt.Sprintf("local repo %s", repo.Name) + sl.ForEach(func(key []byte) error { + packageRefSources[string(key)] = append(packageRefSources[string(key)], description) + return nil + }) } return nil @@ -104,16 +116,18 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", snapshot.Name) } - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, snapshot.RefKey(), reflistMigration) if e != nil { return e } - existingPackageRefs = existingPackageRefs.Merge(snapshot.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) if verbose { description := fmt.Sprintf("snapshot %s", snapshot.Name) - snapshot.RefList().ForEach(func(key []byte) error { + sl.ForEach(func(key []byte) error { packageRefSources[string(key)] = append(packageRefSources[string(key)], description) return nil }) @@ -136,17 +150,21 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { if published.SourceKind != deb.SourceLocalRepo { return nil } - e := collectionFactory.PublishedRepoCollection().LoadComplete(published, collectionFactory) - if e != nil { - return e - } for _, component := range published.Components() { - existingPackageRefs = existingPackageRefs.Merge(published.RefList(component), false, true) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, published.RefKey(component), reflistMigration) + if e != nil { + return e + } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) + if verbose { description := fmt.Sprintf("published repository %s:%s/%s component %s", published.Storage, published.Prefix, published.Distribution, component) - published.RefList(component).ForEach(func(key []byte) error { + sl.ForEach(func(key []byte) error { packageRefSources[string(key)] = append(packageRefSources[string(key)], description) return nil }) @@ -160,11 +178,27 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { collectionFactory.Flush() + err = reflistMigration.Flush() + if err != nil { + return err + } + + if stats := reflistMigration.Stats(); stats.Reflists > 0 { + if !dryRun { + context.Progress().ColoredPrintf("@{w!}Split %d reflist(s) into %d bucket(s) (%d segment(s)) @|", + stats.Reflists, stats.Buckets, stats.Segments) + } else { + context.Progress().ColoredPrintf( + "@{y!}Skipped splitting %d reflist(s) into %d bucket(s) (%d segment(s)), as -dry-run has been requested.@|", + stats.Reflists, stats.Buckets, stats.Segments) + } + } + // ... and compare it to the list of all packages context.Progress().ColoredPrintf("@{w!}Loading list of all packages...@|") allPackageRefs := collectionFactory.PackageCollection().AllPackageRefs() - toDelete := allPackageRefs.Subtract(existingPackageRefs) + toDelete := allPackageRefs.Subtract(existingPackageRefs.Flatten()) // delete packages that are no longer referenced context.Progress().ColoredPrintf("@{r!}Deleting unreferenced packages (%d)...@|", toDelete.Len()) @@ -202,6 +236,32 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { } } + bucketsToDelete, err := collectionFactory.RefListCollection().AllBucketDigests() + if err != nil { + return err + } + + bucketsToDelete.RemoveAll(existingBuckets) + + context.Progress().ColoredPrintf("@{r!}Deleting unreferenced reflist buckets (%d)...@|", bucketsToDelete.Len()) + if bucketsToDelete.Len() > 0 { + if !dryRun { + batch := db.CreateBatch() + err := bucketsToDelete.ForEach(func(digest []byte) error { + return collectionFactory.RefListCollection().UnsafeDropBucket(digest, batch) + }) + if err != nil { + return err + } + + if err := batch.Write(); err != nil { + return err + } + } else { + context.Progress().ColoredPrintf("@{y!}Skipped reflist deletion, as -dry-run has been requested.@|") + } + } + collectionFactory.Flush() // now, build a list of files that should be present in Repository (package pool) diff --git a/cmd/mirror_create.go b/cmd/mirror_create.go index 78d91b5823..eb2ba7cab8 100644 --- a/cmd/mirror_create.go +++ b/cmd/mirror_create.go @@ -65,7 +65,7 @@ func aptlyMirrorCreate(cmd *commander.Command, args []string) error { } collectionFactory := context.NewCollectionFactory() - err = collectionFactory.RemoteRepoCollection().Add(repo) + err = collectionFactory.RemoteRepoCollection().Add(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add mirror: %s", err) } diff --git a/cmd/mirror_edit.go b/cmd/mirror_edit.go index 86462c4c0a..93986c0623 100644 --- a/cmd/mirror_edit.go +++ b/cmd/mirror_edit.go @@ -75,7 +75,7 @@ func aptlyMirrorEdit(cmd *commander.Command, args []string) error { } } - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } diff --git a/cmd/mirror_rename.go b/cmd/mirror_rename.go index 2ff9f92041..ff453b857f 100644 --- a/cmd/mirror_rename.go +++ b/cmd/mirror_rename.go @@ -37,7 +37,7 @@ func aptlyMirrorRename(cmd *commander.Command, args []string) error { } repo.Name = newName - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/mirror_show.go b/cmd/mirror_show.go index 03179161a3..3c52d6e37d 100644 --- a/cmd/mirror_show.go +++ b/cmd/mirror_show.go @@ -38,7 +38,7 @@ func aptlyMirrorShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -98,12 +98,13 @@ func aptlyMirrorShowJSON(_ *commander.Command, args []string) error { name := args[0] - repo, err := context.NewCollectionFactory().RemoteRepoCollection().ByName(name) + collectionFactory := context.NewCollectionFactory() + repo, err := collectionFactory.RemoteRepoCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -113,7 +114,7 @@ func aptlyMirrorShowJSON(_ *commander.Command, args []string) error { if withPackages { if repo.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(repo.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(repo.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err != nil { return fmt.Errorf("unable to get package list: %s", err) } diff --git a/cmd/mirror_update.go b/cmd/mirror_update.go index 111590bbd6..d27ffe43a3 100644 --- a/cmd/mirror_update.go +++ b/cmd/mirror_update.go @@ -28,7 +28,7 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to update: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } @@ -96,12 +96,12 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { err = context.ReOpenDatabase() if err == nil { repo.MarkAsIdle() - collectionFactory.RemoteRepoCollection().Update(repo) + collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) } }() repo.MarkAsUpdating() - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } @@ -236,7 +236,7 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { } repo.FinalizeDownload(collectionFactory, context.Progress()) - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } diff --git a/cmd/package_show.go b/cmd/package_show.go index 37f07e9b04..6bbec98496 100644 --- a/cmd/package_show.go +++ b/cmd/package_show.go @@ -14,7 +14,7 @@ import ( func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) (err error) { err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *deb.RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) + e := collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -30,7 +30,7 @@ func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -46,7 +46,7 @@ func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) } err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *deb.Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + e := collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/cmd/publish_snapshot.go b/cmd/publish_snapshot.go index 322479aacc..a1e89ea199 100644 --- a/cmd/publish_snapshot.go +++ b/cmd/publish_snapshot.go @@ -49,7 +49,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to publish: %s", err) } @@ -85,7 +85,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(localRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(localRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to publish: %s", err) } @@ -171,7 +171,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Add(published) + err = collectionFactory.PublishedRepoCollection().Add(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/publish_switch.go b/cmd/publish_switch.go index 0784fba3fb..db98ed4da1 100644 --- a/cmd/publish_switch.go +++ b/cmd/publish_switch.go @@ -73,7 +73,7 @@ func aptlyPublishSwitch(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to switch: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to switch: %s", err) } @@ -105,7 +105,7 @@ func aptlyPublishSwitch(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Update(published) + err = collectionFactory.PublishedRepoCollection().Update(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/publish_update.go b/cmd/publish_update.go index fcdea8ed65..3e043666d3 100644 --- a/cmd/publish_update.go +++ b/cmd/publish_update.go @@ -69,7 +69,7 @@ func aptlyPublishUpdate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Update(published) + err = collectionFactory.PublishedRepoCollection().Update(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/repo_add.go b/cmd/repo_add.go index 8189e78342..0263879f31 100644 --- a/cmd/repo_add.go +++ b/cmd/repo_add.go @@ -28,7 +28,7 @@ func aptlyRepoAdd(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to add: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add: %s", err) } @@ -58,9 +58,9 @@ func aptlyRepoAdd(cmd *commander.Command, args []string) error { processedFiles = append(processedFiles, otherFiles...) - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_create.go b/cmd/repo_create.go index 5fef46d922..0e3a1e52b2 100644 --- a/cmd/repo_create.go +++ b/cmd/repo_create.go @@ -36,7 +36,7 @@ func aptlyRepoCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load source snapshot: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load source snapshot: %s", err) } @@ -44,7 +44,7 @@ func aptlyRepoCreate(cmd *commander.Command, args []string) error { repo.UpdateRefList(snapshot.RefList()) } - err = collectionFactory.LocalRepoCollection().Add(repo) + err = collectionFactory.LocalRepoCollection().Add(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add local repo: %s", err) } diff --git a/cmd/repo_edit.go b/cmd/repo_edit.go index bc81dc4a12..c7fbc41964 100644 --- a/cmd/repo_edit.go +++ b/cmd/repo_edit.go @@ -22,7 +22,7 @@ func aptlyRepoEdit(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to edit: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } @@ -53,7 +53,7 @@ func aptlyRepoEdit(cmd *commander.Command, args []string) error { } } - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } diff --git a/cmd/repo_include.go b/cmd/repo_include.go index b84b96a389..4a750db1f9 100644 --- a/cmd/repo_include.go +++ b/cmd/repo_include.go @@ -64,7 +64,7 @@ func aptlyRepoInclude(cmd *commander.Command, args []string) error { _, failedFiles2, err = deb.ImportChangesFiles( changesFiles, reporter, acceptUnsigned, ignoreSignatures, forceReplace, noRemoveFiles, verifier, repoTemplate, context.Progress(), collectionFactory.LocalRepoCollection(), collectionFactory.PackageCollection(), - context.PackagePool(), collectionFactory.ChecksumCollection, + collectionFactory.RefListCollection(), context.PackagePool(), collectionFactory.ChecksumCollection, uploaders, query.Parse) failedFiles = append(failedFiles, failedFiles2...) diff --git a/cmd/repo_list.go b/cmd/repo_list.go index 9c4b0d47eb..f3ca4a8bae 100644 --- a/cmd/repo_list.go +++ b/cmd/repo_list.go @@ -36,7 +36,7 @@ func aptlyRepoListTxt(cmd *commander.Command, _ []string) error { if raw { repos[i] = repo.Name } else { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -77,7 +77,8 @@ func aptlyRepoListJSON(_ *commander.Command, _ []string) error { repos := make([]*deb.LocalRepo, context.NewCollectionFactory().LocalRepoCollection().Len()) i := 0 context.NewCollectionFactory().LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := context.NewCollectionFactory().LocalRepoCollection().LoadComplete(repo) + collectionFactory := context.NewCollectionFactory() + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/cmd/repo_move.go b/cmd/repo_move.go index 8be6698b6b..004b4af0a9 100644 --- a/cmd/repo_move.go +++ b/cmd/repo_move.go @@ -25,13 +25,13 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: %s", command, err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(dstRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(dstRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } var ( - srcRefList *deb.PackageRefList + srcRefList *deb.SplitRefList srcRepo *deb.LocalRepo ) @@ -45,7 +45,7 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: source and destination are the same", command) } - err = collectionFactory.LocalRepoCollection().LoadComplete(srcRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(srcRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } @@ -59,7 +59,7 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: %s", command, err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(srcRemoteRepo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(srcRemoteRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } @@ -150,17 +150,17 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { if context.Flags().Lookup("dry-run").Value.Get().(bool) { context.Progress().Printf("\nChanges not saved, as dry run has been requested.\n") } else { - dstRepo.UpdateRefList(deb.NewPackageRefListFromPackageList(dstList)) + dstRepo.UpdateRefList(deb.NewSplitRefListFromPackageList(dstList)) - err = collectionFactory.LocalRepoCollection().Update(dstRepo) + err = collectionFactory.LocalRepoCollection().Update(dstRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } if command == "move" { // nolint: goconst - srcRepo.UpdateRefList(deb.NewPackageRefListFromPackageList(srcList)) + srcRepo.UpdateRefList(deb.NewSplitRefListFromPackageList(srcList)) - err = collectionFactory.LocalRepoCollection().Update(srcRepo) + err = collectionFactory.LocalRepoCollection().Update(srcRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_remove.go b/cmd/repo_remove.go index 93e8535c71..d3a1159ee3 100644 --- a/cmd/repo_remove.go +++ b/cmd/repo_remove.go @@ -24,7 +24,7 @@ func aptlyRepoRemove(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to remove: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to remove: %s", err) } @@ -59,9 +59,9 @@ func aptlyRepoRemove(cmd *commander.Command, args []string) error { if context.Flags().Lookup("dry-run").Value.Get().(bool) { context.Progress().Printf("\nChanges not saved, as dry run has been requested.\n") } else { - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_rename.go b/cmd/repo_rename.go index 9234b7c722..459afcbbb3 100644 --- a/cmd/repo_rename.go +++ b/cmd/repo_rename.go @@ -32,7 +32,7 @@ func aptlyRepoRename(cmd *commander.Command, args []string) error { } repo.Name = newName - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/repo_show.go b/cmd/repo_show.go index a61a5f1f92..741915d123 100644 --- a/cmd/repo_show.go +++ b/cmd/repo_show.go @@ -36,7 +36,7 @@ func aptlyRepoShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -63,12 +63,13 @@ func aptlyRepoShowJSON(_ *commander.Command, args []string) error { name := args[0] - repo, err := context.NewCollectionFactory().LocalRepoCollection().ByName(name) + collectionFactory := context.NewCollectionFactory() + repo, err := collectionFactory.LocalRepoCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -79,7 +80,7 @@ func aptlyRepoShowJSON(_ *commander.Command, args []string) error { if withPackages { if repo.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(repo.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(repo.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err == nil { packageList = list.FullNames() } diff --git a/cmd/snapshot_create.go b/cmd/snapshot_create.go index 000a78d9b9..6bc319de3a 100644 --- a/cmd/snapshot_create.go +++ b/cmd/snapshot_create.go @@ -30,7 +30,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to create snapshot: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } @@ -50,7 +50,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to create snapshot: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } @@ -71,7 +71,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return commander.ErrCommandError } - err = collectionFactory.SnapshotCollection().Add(snapshot) + err = collectionFactory.SnapshotCollection().Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add snapshot: %s", err) } diff --git a/cmd/snapshot_diff.go b/cmd/snapshot_diff.go index ccbea32ee0..19da7fcc99 100644 --- a/cmd/snapshot_diff.go +++ b/cmd/snapshot_diff.go @@ -23,7 +23,7 @@ func aptlySnapshotDiff(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot A: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshotA) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshotA, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot A: %s", err) } @@ -34,13 +34,13 @@ func aptlySnapshotDiff(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot B: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshotB) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshotB, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot B: %s", err) } // Calculate diff - diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection()) + diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection(), nil) if err != nil { return fmt.Errorf("unable to calculate diff: %s", err) } diff --git a/cmd/snapshot_filter.go b/cmd/snapshot_filter.go index b81a9cfc23..5aed03b95b 100644 --- a/cmd/snapshot_filter.go +++ b/cmd/snapshot_filter.go @@ -27,7 +27,7 @@ func aptlySnapshotFilter(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to filter: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(source) + err = collectionFactory.SnapshotCollection().LoadComplete(source, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to filter: %s", err) } @@ -76,7 +76,7 @@ func aptlySnapshotFilter(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromPackageList(args[1], []*deb.Snapshot{source}, result, fmt.Sprintf("Filtered '%s', query was: '%s'", source.Name, strings.Join(args[2:], " "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_merge.go b/cmd/snapshot_merge.go index 0a319a5ab2..e9eb0c7793 100644 --- a/cmd/snapshot_merge.go +++ b/cmd/snapshot_merge.go @@ -24,7 +24,7 @@ func aptlySnapshotMerge(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(sources[i]) + err = collectionFactory.SnapshotCollection().LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot: %s", err) } @@ -57,7 +57,7 @@ func aptlySnapshotMerge(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromRefList(args[0], sources, result, fmt.Sprintf("Merged from sources: %s", strings.Join(sourceDescription, ", "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_pull.go b/cmd/snapshot_pull.go index 884b50ffb2..50e8488b84 100644 --- a/cmd/snapshot_pull.go +++ b/cmd/snapshot_pull.go @@ -29,7 +29,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to pull: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to pull: %s", err) } @@ -40,7 +40,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to pull: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(source) + err = collectionFactory.SnapshotCollection().LoadComplete(source, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to pull: %s", err) } @@ -138,7 +138,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromPackageList(args[2], []*deb.Snapshot{snapshot, source}, packageList, fmt.Sprintf("Pulled into '%s' with '%s' as source, pull request was: '%s'", snapshot.Name, source.Name, strings.Join(args[3:], " "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_rename.go b/cmd/snapshot_rename.go index b8ac74cf33..b13b7dca6b 100644 --- a/cmd/snapshot_rename.go +++ b/cmd/snapshot_rename.go @@ -32,7 +32,7 @@ func aptlySnapshotRename(cmd *commander.Command, args []string) error { } snapshot.Name = newName - err = collectionFactory.SnapshotCollection().Update(snapshot) + err = collectionFactory.SnapshotCollection().Update(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/snapshot_search.go b/cmd/snapshot_search.go index d771af7ce5..c0244f11c0 100644 --- a/cmd/snapshot_search.go +++ b/cmd/snapshot_search.go @@ -25,7 +25,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error command := cmd.Parent.Name() collectionFactory := context.NewCollectionFactory() - var reflist *deb.PackageRefList + var reflist *deb.SplitRefList if command == "snapshot" { // nolint: goconst var snapshot *deb.Snapshot @@ -34,7 +34,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } @@ -47,7 +47,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } @@ -60,7 +60,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } diff --git a/cmd/snapshot_show.go b/cmd/snapshot_show.go index e03a49e586..582b6a9e37 100644 --- a/cmd/snapshot_show.go +++ b/cmd/snapshot_show.go @@ -35,7 +35,7 @@ func aptlySnapshotShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -86,16 +86,17 @@ func aptlySnapshotShowTxt(_ *commander.Command, args []string) error { } func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { + collectionFactory := context.NewCollectionFactory() var err error name := args[0] - snapshot, err := context.NewCollectionFactory().SnapshotCollection().ByName(name) + snapshot, err := collectionFactory.SnapshotCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -105,14 +106,14 @@ func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { for _, sourceID := range snapshot.SourceIDs { if snapshot.SourceKind == deb.SourceSnapshot { var source *deb.Snapshot - source, err = context.NewCollectionFactory().SnapshotCollection().ByUUID(sourceID) + source, err = collectionFactory.SnapshotCollection().ByUUID(sourceID) if err != nil { continue } snapshot.Snapshots = append(snapshot.Snapshots, source) } else if snapshot.SourceKind == deb.SourceLocalRepo { var source *deb.LocalRepo - source, err = context.NewCollectionFactory().LocalRepoCollection().ByUUID(sourceID) + source, err = collectionFactory.LocalRepoCollection().ByUUID(sourceID) if err != nil { continue } @@ -133,7 +134,7 @@ func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { if withPackages { if snapshot.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(snapshot.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(snapshot.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err != nil { return fmt.Errorf("unable to get package list: %s", err) } diff --git a/cmd/snapshot_verify.go b/cmd/snapshot_verify.go index f815f29ce6..fc566aae39 100644 --- a/cmd/snapshot_verify.go +++ b/cmd/snapshot_verify.go @@ -23,7 +23,7 @@ func aptlySnapshotVerify(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to verify: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshots[i]) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshots[i], collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to verify: %s", err) } diff --git a/database/database.go b/database/database.go index 709a1aa803..03d896b201 100644 --- a/database/database.go +++ b/database/database.go @@ -48,6 +48,8 @@ type Storage interface { CreateTemporary() (Storage, error) + GetRecommendedMaxKVSize() int + Open() error Close() error CompactDB() error diff --git a/database/goleveldb/database.go b/database/goleveldb/database.go index a2874a6e61..011681a6bf 100644 --- a/database/goleveldb/database.go +++ b/database/goleveldb/database.go @@ -9,10 +9,13 @@ import ( "github.com/aptly-dev/aptly/database" ) +const blockSize = 4 * 1024 + func internalOpen(path string, throttleCompaction bool) (*leveldb.DB, error) { o := &opt.Options{ Filter: filter.NewBloomFilter(10), OpenFilesCacheCapacity: 256, + BlockSize: blockSize, } if throttleCompaction { diff --git a/database/goleveldb/storage.go b/database/goleveldb/storage.go index 37acf3d830..1281f3fbc6 100644 --- a/database/goleveldb/storage.go +++ b/database/goleveldb/storage.go @@ -16,6 +16,17 @@ type storage struct { db *leveldb.DB } +func (s *storage) GetRecommendedMaxKVSize() int { + // The block size configured is not actually a *set* block size, but rather a + // *minimum*. LevelDB only checks if a block is full after a new key/value pair is + // written, meaning that blocks will tend to overflow a bit. + // Therefore, using the default block size as the max value size will ensure + // that a new block will only contain a single value and that the size will + // only ever be as large as around double the block size (if the block was + // nearly full before the new items were added). + return blockSize +} + // CreateTemporary creates new DB of the same type in temp dir func (s *storage) CreateTemporary() (database.Storage, error) { tempdir, err := os.MkdirTemp("", "aptly") diff --git a/deb/changes.go b/deb/changes.go index c264986abb..6c8bf8812f 100644 --- a/deb/changes.go +++ b/deb/changes.go @@ -291,7 +291,8 @@ func CollectChangesFiles(locations []string, reporter aptly.ResultReporter) (cha // ImportChangesFiles imports referenced files in changes files into local repository func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, acceptUnsigned, ignoreSignatures, forceReplace, noRemoveFiles bool, verifier pgp.Verifier, repoTemplate *template.Template, progress aptly.Progress, localRepoCollection *LocalRepoCollection, packageCollection *PackageCollection, - pool aptly.PackagePool, checksumStorageProvider aptly.ChecksumStorageProvider, uploaders *Uploaders, parseQuery parseQuery) (processedFiles []string, failedFiles []string, err error) { + reflistCollection *RefListCollection, pool aptly.PackagePool, checksumStorageProvider aptly.ChecksumStorageProvider, uploaders *Uploaders, + parseQuery parseQuery) (processedFiles []string, failedFiles []string, err error) { for _, path := range changesFiles { var changes *Changes @@ -359,7 +360,7 @@ func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, ac } } - err = localRepoCollection.LoadComplete(repo) + err = localRepoCollection.LoadComplete(repo, reflistCollection) if err != nil { return nil, nil, fmt.Errorf("unable to load repo: %s", err) } @@ -382,9 +383,9 @@ func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, ac return nil, nil, fmt.Errorf("unable to import package files: %s", err) } - repo.UpdateRefList(NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(NewSplitRefListFromPackageList(list)) - err = localRepoCollection.Update(repo) + err = localRepoCollection.Update(repo, reflistCollection) if err != nil { return nil, nil, fmt.Errorf("unable to save: %s", err) } diff --git a/deb/changes_test.go b/deb/changes_test.go index b7dc4d95d0..1d50d610a5 100644 --- a/deb/changes_test.go +++ b/deb/changes_test.go @@ -21,6 +21,7 @@ type ChangesSuite struct { db database.Storage localRepoCollection *LocalRepoCollection packageCollection *PackageCollection + reflistCollection *RefListCollection packagePool aptly.PackagePool checksumStorage aptly.ChecksumStorage progress aptly.Progress @@ -42,6 +43,7 @@ func (s *ChangesSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.localRepoCollection = NewLocalRepoCollection(s.db) s.packageCollection = NewPackageCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.checksumStorage = files.NewMockChecksumStorage() s.packagePool = files.NewPackagePool(s.Dir, false) @@ -88,7 +90,7 @@ func (s *ChangesSuite) TestCollectChangesFiles(c *C) { func (s *ChangesSuite) TestImportChangesFiles(c *C) { repo := NewLocalRepo("test", "Test Comment") - c.Assert(s.localRepoCollection.Add(repo), IsNil) + c.Assert(s.localRepoCollection.Add(repo, s.reflistCollection), IsNil) origFailedFiles := []string{ "testdata/changes/calamares.changes", @@ -124,7 +126,8 @@ func (s *ChangesSuite) TestImportChangesFiles(c *C) { processedFiles, failedFiles, err := ImportChangesFiles( append(changesFiles, "testdata/changes/notexistent.changes"), s.Reporter, true, true, false, false, &NullVerifier{}, - template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.packagePool, func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, + template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.reflistCollection, s.packagePool, + func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, nil, nil) c.Assert(err, IsNil) c.Check(failedFiles, DeepEquals, append(expectedFailedFiles, "testdata/changes/notexistent.changes")) @@ -133,7 +136,7 @@ func (s *ChangesSuite) TestImportChangesFiles(c *C) { func (s *ChangesSuite) TestImportDbgsymWithVersionedSourceField(c *C) { repo := NewLocalRepo("test", "Test Comment") - c.Assert(s.localRepoCollection.Add(repo), IsNil) + c.Assert(s.localRepoCollection.Add(repo, s.reflistCollection), IsNil) changesFiles, failedFiles := CollectChangesFiles( []string{"testdata/dbgsym-with-source-version"}, s.Reporter) @@ -142,7 +145,8 @@ func (s *ChangesSuite) TestImportDbgsymWithVersionedSourceField(c *C) { _, failedFiles, err := ImportChangesFiles( changesFiles, s.Reporter, true, true, false, true, &NullVerifier{}, - template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.packagePool, func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, + template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.reflistCollection, s.packagePool, + func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, nil, nil) c.Assert(err, IsNil) c.Check(failedFiles, IsNil) diff --git a/deb/collections.go b/deb/collections.go index 7dfe852350..ff711e2650 100644 --- a/deb/collections.go +++ b/deb/collections.go @@ -16,6 +16,7 @@ type CollectionFactory struct { snapshots *SnapshotCollection localRepos *LocalRepoCollection publishedRepos *PublishedRepoCollection + reflists *RefListCollection checksums *ChecksumCollection } @@ -91,6 +92,17 @@ func (factory *CollectionFactory) PublishedRepoCollection() *PublishedRepoCollec return factory.publishedRepos } +func (factory *CollectionFactory) RefListCollection() *RefListCollection { + factory.Lock() + defer factory.Unlock() + + if factory.reflists == nil { + factory.reflists = NewRefListCollection(factory.db) + } + + return factory.reflists +} + // ChecksumCollection returns (or creates) new ChecksumCollection func (factory *CollectionFactory) ChecksumCollection(db database.ReaderWriter) aptly.ChecksumStorage { factory.Lock() diff --git a/deb/graph.go b/deb/graph.go index 16a7ce8543..77421c3a57 100644 --- a/deb/graph.go +++ b/deb/graph.go @@ -33,7 +33,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz existingNodes := map[string]bool{} err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) + e := collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -55,7 +55,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -81,7 +81,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz }) err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + e := collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/deb/list.go b/deb/list.go index ec14bb982c..e59390f6be 100644 --- a/deb/list.go +++ b/deb/list.go @@ -90,7 +90,7 @@ func NewPackageListWithDuplicates(duplicates bool, capacity int) *PackageList { } // NewPackageListFromRefList loads packages list from PackageRefList -func NewPackageListFromRefList(reflist *PackageRefList, collection *PackageCollection, progress aptly.Progress) (*PackageList, error) { +func NewPackageListFromRefList(reflist AnyRefList, collection *PackageCollection, progress aptly.Progress) (*PackageList, error) { // empty reflist if reflist == nil { return NewPackageList(), nil diff --git a/deb/local.go b/deb/local.go index 2a15c734b2..acae46646f 100644 --- a/deb/local.go +++ b/deb/local.go @@ -26,7 +26,7 @@ type LocalRepo struct { // Uploaders configuration Uploaders *Uploaders `codec:"Uploaders,omitempty" json:"-"` // "Snapshot" of current list of packages - packageRefs *PackageRefList + packageRefs *SplitRefList } // NewLocalRepo creates new instance of Debian local repository @@ -55,13 +55,13 @@ func (repo *LocalRepo) NumPackages() int { } // RefList returns package list for repo -func (repo *LocalRepo) RefList() *PackageRefList { +func (repo *LocalRepo) RefList() *SplitRefList { return repo.packageRefs } // UpdateRefList changes package list for local repo -func (repo *LocalRepo) UpdateRefList(reflist *PackageRefList) { - repo.packageRefs = reflist +func (repo *LocalRepo) UpdateRefList(sl *SplitRefList) { + repo.packageRefs = sl } // Encode does msgpack encoding of LocalRepo @@ -140,14 +140,14 @@ func (collection *LocalRepoCollection) search(filter func(*LocalRepo) bool, uniq } // Add appends new repo to collection and saves it -func (collection *LocalRepoCollection) Add(repo *LocalRepo) error { +func (collection *LocalRepoCollection) Add(repo *LocalRepo, reflistCollection *RefListCollection) error { _, err := collection.ByName(repo.Name) if err == nil { return fmt.Errorf("local repo with name %s already exists", repo.Name) } - err = collection.Update(repo) + err = collection.Update(repo, reflistCollection) if err != nil { return err } @@ -157,27 +157,25 @@ func (collection *LocalRepoCollection) Add(repo *LocalRepo) error { } // Update stores updated information about repo in DB -func (collection *LocalRepoCollection) Update(repo *LocalRepo) error { +func (collection *LocalRepoCollection) Update(repo *LocalRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.packageRefs != nil { - batch.Put(repo.RefKey(), repo.packageRefs.Encode()) + bc := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(repo.packageRefs, repo.RefKey(), bc) } return batch.Write() } // LoadComplete loads additional information for local repo -func (collection *LocalRepoCollection) LoadComplete(repo *LocalRepo) error { - encoded, err := collection.db.Get(repo.RefKey()) +func (collection *LocalRepoCollection) LoadComplete(repo *LocalRepo, reflistCollection *RefListCollection) error { + repo.packageRefs = NewSplitRefList() + err := reflistCollection.LoadComplete(repo.packageRefs, repo.RefKey()) if err == database.ErrNotFound { return nil } - if err != nil { - return err - } - repo.packageRefs = &PackageRefList{} - return repo.packageRefs.Decode(encoded) + return err } // ByName looks up repository by name diff --git a/deb/local_test.go b/deb/local_test.go index c9072b7dc0..b87b1b624a 100644 --- a/deb/local_test.go +++ b/deb/local_test.go @@ -12,7 +12,7 @@ import ( type LocalRepoSuite struct { db database.Storage list *PackageList - reflist *PackageRefList + reflist *SplitRefList repo *LocalRepo } @@ -24,7 +24,7 @@ func (s *LocalRepoSuite) SetUpTest(c *C) { s.list.Add(&Package{Name: "lib", Version: "1.7", Architecture: "i386"}) s.list.Add(&Package{Name: "app", Version: "1.9", Architecture: "amd64"}) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) s.repo = NewLocalRepo("lrepo", "Super repo") s.repo.packageRefs = s.reflist @@ -75,10 +75,11 @@ func (s *LocalRepoSuite) TestRefKey(c *C) { } type LocalRepoCollectionSuite struct { - db database.Storage - collection *LocalRepoCollection - list *PackageList - reflist *PackageRefList + db database.Storage + collection *LocalRepoCollection + reflistCollection *RefListCollection + list *PackageList + reflist *SplitRefList } var _ = Suite(&LocalRepoCollectionSuite{}) @@ -86,12 +87,13 @@ var _ = Suite(&LocalRepoCollectionSuite{}) func (s *LocalRepoCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewLocalRepoCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.list = NewPackageList() s.list.Add(&Package{Name: "lib", Version: "1.7", Architecture: "i386"}) s.list.Add(&Package{Name: "app", Version: "1.9", Architecture: "amd64"}) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromRefList(NewPackageRefListFromPackageList(s.list)) } func (s *LocalRepoCollectionSuite) TearDownTest(c *C) { @@ -103,8 +105,8 @@ func (s *LocalRepoCollectionSuite) TestAddByName(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Add(repo), IsNil) - c.Assert(s.collection.Add(repo), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(repo, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(repo, s.reflistCollection), ErrorMatches, ".*already exists") r, err := s.collection.ByName("local1") c.Assert(err, IsNil) @@ -121,7 +123,7 @@ func (s *LocalRepoCollectionSuite) TestByUUID(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Add(repo), IsNil) + c.Assert(s.collection.Add(repo, s.reflistCollection), IsNil) r, err := s.collection.ByUUID(repo.UUID) c.Assert(err, IsNil) @@ -135,7 +137,7 @@ func (s *LocalRepoCollectionSuite) TestByUUID(c *C) { func (s *LocalRepoCollectionSuite) TestUpdateLoadComplete(c *C) { repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.reflistCollection), IsNil) collection := NewLocalRepoCollection(s.db) r, err := collection.ByName("local1") @@ -143,20 +145,20 @@ func (s *LocalRepoCollectionSuite) TestUpdateLoadComplete(c *C) { c.Assert(r.packageRefs, IsNil) repo.packageRefs = s.reflist - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.reflistCollection), IsNil) collection = NewLocalRepoCollection(s.db) r, err = collection.ByName("local1") c.Assert(err, IsNil) c.Assert(r.packageRefs, IsNil) c.Assert(r.NumPackages(), Equals, 0) - c.Assert(s.collection.LoadComplete(r), IsNil) + c.Assert(s.collection.LoadComplete(r, s.reflistCollection), IsNil) c.Assert(r.NumPackages(), Equals, 2) } func (s *LocalRepoCollectionSuite) TestForEachAndLen(c *C) { repo := NewLocalRepo("local1", "Comment 1") - s.collection.Add(repo) + s.collection.Add(repo, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*LocalRepo) error { @@ -178,10 +180,10 @@ func (s *LocalRepoCollectionSuite) TestForEachAndLen(c *C) { func (s *LocalRepoCollectionSuite) TestDrop(c *C) { repo1 := NewLocalRepo("local1", "Comment 1") - s.collection.Add(repo1) + s.collection.Add(repo1, s.reflistCollection) repo2 := NewLocalRepo("local2", "Comment 2") - s.collection.Add(repo2) + s.collection.Add(repo2, s.reflistCollection) r1, _ := s.collection.ByUUID(repo1.UUID) c.Check(r1, Equals, repo1) diff --git a/deb/publish.go b/deb/publish.go index 04c1d9c990..a09cbc1d4a 100644 --- a/deb/publish.go +++ b/deb/publish.go @@ -27,7 +27,7 @@ type repoSourceItem struct { // Pointer to local repo if SourceKind == "local" localRepo *LocalRepo // Package references is SourceKind == "local" - packageRefs *PackageRefList + packageRefs *SplitRefList } // PublishedRepo is a published for http/ftp representation of snapshot as Debian repository @@ -401,7 +401,7 @@ func (p *PublishedRepo) RefKey(component string) []byte { } // RefList returns list of package refs in local repo -func (p *PublishedRepo) RefList(component string) *PackageRefList { +func (p *PublishedRepo) RefList(component string) *SplitRefList { item := p.sourceItems[component] if p.SourceKind == SourceLocalRepo { return item.packageRefs @@ -948,14 +948,14 @@ func (collection *PublishedRepoCollection) loadList() { } // Add appends new repo to collection and saves it -func (collection *PublishedRepoCollection) Add(repo *PublishedRepo) error { +func (collection *PublishedRepoCollection) Add(repo *PublishedRepo, reflistCollection *RefListCollection) error { collection.loadList() if collection.CheckDuplicate(repo) != nil { return fmt.Errorf("published repo with storage/prefix/distribution %s/%s/%s already exists", repo.Storage, repo.Prefix, repo.Distribution) } - err := collection.Update(repo) + err := collection.Update(repo, reflistCollection) if err != nil { return err } @@ -978,13 +978,14 @@ func (collection *PublishedRepoCollection) CheckDuplicate(repo *PublishedRepo) * } // Update stores updated information about repo in DB -func (collection *PublishedRepoCollection) Update(repo *PublishedRepo) error { +func (collection *PublishedRepoCollection) Update(repo *PublishedRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.SourceKind == SourceLocalRepo { + rb := reflistCollection.NewBatch(batch) for component, item := range repo.sourceItems { - batch.Put(repo.RefKey(component), item.packageRefs.Encode()) + reflistCollection.UpdateInBatch(item.packageRefs, repo.RefKey(component), rb) } } return batch.Write() @@ -1017,7 +1018,7 @@ func (collection *PublishedRepoCollection) LoadShallow(repo *PublishedRepo, coll return } - item.packageRefs = &PackageRefList{} + item.packageRefs = NewSplitRefList() repo.sourceItems[component] = item } } else { @@ -1033,35 +1034,29 @@ func (collection *PublishedRepoCollection) LoadComplete(repo *PublishedRepo, col if repo.SourceKind == SourceSnapshot { for _, item := range repo.sourceItems { - err = collectionFactory.SnapshotCollection().LoadComplete(item.snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(item.snapshot, collectionFactory.RefListCollection()) if err != nil { return } } } else if repo.SourceKind == SourceLocalRepo { for component, item := range repo.sourceItems { - err = collectionFactory.LocalRepoCollection().LoadComplete(item.localRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(item.localRepo, collectionFactory.RefListCollection()) if err != nil { return } - var encoded []byte - encoded, err = collection.db.Get(repo.RefKey(component)) + err = collectionFactory.RefListCollection().LoadComplete(item.packageRefs, repo.RefKey(component)) if err != nil { // < 0.6 saving w/o component name if err == database.ErrNotFound && len(repo.Sources) == 1 { - encoded, err = collection.db.Get(repo.RefKey("")) + err = collectionFactory.RefListCollection().LoadComplete(item.packageRefs, repo.RefKey("")) } if err != nil { return } } - - err = item.packageRefs.Decode(encoded) - if err != nil { - return - } } } else { panic("unknown SourceKind") @@ -1166,6 +1161,11 @@ func (collection *PublishedRepoCollection) listReferencedFilesByComponent(prefix referencedFiles := map[string][]string{} processedComponentRefs := map[string]*PackageRefList{} + processedComponentBuckets := map[string]*RefListDigestSet{} + for _, component := range components { + processedComponentBuckets[component] = NewRefListDigestSet() + } + for _, r := range collection.list { if r.Prefix == prefix { matches := false @@ -1189,36 +1189,51 @@ func (collection *PublishedRepoCollection) listReferencedFilesByComponent(prefix for _, component := range components { if utils.StrSliceHasItem(repoComponents, component) { - unseenRefs := r.RefList(component) - processedRefs := processedComponentRefs[component] - if processedRefs != nil { - unseenRefs = unseenRefs.Subtract(processedRefs) - } else { - processedRefs = NewPackageRefList() - } + processedBuckets := processedComponentBuckets[component] - if unseenRefs.Len() == 0 { - continue - } - processedComponentRefs[component] = processedRefs.Merge(unseenRefs, false, true) + err := r.RefList(component).ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + if processedBuckets.Has(digest) { + return nil + } + processedBuckets.Add(digest) + + unseenRefs := bucket + processedRefs := processedComponentRefs[component] + if processedRefs != nil { + unseenRefs = unseenRefs.Subtract(processedRefs) + } else { + processedRefs = NewPackageRefList() + } - packageList, err := NewPackageListFromRefList(unseenRefs, collectionFactory.PackageCollection(), progress) - if err != nil { - return nil, err - } + if unseenRefs.Len() == 0 { + return nil + } + processedComponentRefs[component] = processedRefs.Merge(unseenRefs, false, true) - packageList.ForEach(func(p *Package) error { - poolDir, err := p.PoolDirectory() + packageList, err := NewPackageListFromRefList(unseenRefs, collectionFactory.PackageCollection(), progress) if err != nil { return err } - for _, f := range p.Files() { - referencedFiles[component] = append(referencedFiles[component], filepath.Join(poolDir, f.Filename)) - } + packageList.ForEach(func(p *Package) error { + poolDir, err := p.PoolDirectory() + if err != nil { + return err + } + + for _, f := range p.Files() { + referencedFiles[component] = append(referencedFiles[component], filepath.Join(poolDir, f.Filename)) + } + + return nil + }) return nil }) + + if err != nil { + return nil, err + } } } } diff --git a/deb/publish_bench_test.go b/deb/publish_bench_test.go index b135a8b8cb..f1d87e1ebe 100644 --- a/deb/publish_bench_test.go +++ b/deb/publish_bench_test.go @@ -31,6 +31,7 @@ func BenchmarkListReferencedFiles(b *testing.B) { packageCollection := factory.PackageCollection() repoCollection := factory.LocalRepoCollection() publishCollection := factory.PublishedRepoCollection() + reflistCollection := factory.RefListCollection() sharedRefs := NewPackageRefList() { @@ -91,14 +92,14 @@ func BenchmarkListReferencedFiles(b *testing.B) { repo := NewLocalRepo(fmt.Sprintf("repo%d", repoIndex), "comment") repo.DefaultDistribution = fmt.Sprintf("dist%d", repoIndex) repo.DefaultComponent = defaultComponent - repo.UpdateRefList(refs.Merge(sharedRefs, false, true)) - repoCollection.Add(repo) + repo.UpdateRefList(NewSplitRefListFromRefList(refs.Merge(sharedRefs, false, true))) + repoCollection.Add(repo, reflistCollection) publish, err := NewPublishedRepo("", "test", "", nil, []string{defaultComponent}, []interface{}{repo}, factory) if err != nil { b.Fatal(err) } - publishCollection.Add(publish) + publishCollection.Add(publish, reflistCollection) } db.CompactDB() diff --git a/deb/publish_test.go b/deb/publish_test.go index 860fa62bf6..1b6213da0c 100644 --- a/deb/publish_test.go +++ b/deb/publish_test.go @@ -82,6 +82,7 @@ type PublishedRepoSuite struct { db database.Storage factory *CollectionFactory packageCollection *PackageCollection + reflistCollection *RefListCollection } var _ = Suite(&PublishedRepoSuite{}) @@ -113,21 +114,22 @@ func (s *PublishedRepoSuite) SetUpTest(c *C) { s.p2.UpdateFiles(s.p1.Files()) s.p3.UpdateFiles(s.p1.Files()) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) + s.reflistCollection = s.factory.RefListCollection() repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) repo.packageRefs = s.reflist - s.factory.RemoteRepoCollection().Add(repo) + s.factory.RemoteRepoCollection().Add(repo, s.reflistCollection) s.localRepo = NewLocalRepo("local1", "comment1") s.localRepo.packageRefs = s.reflist - s.factory.LocalRepoCollection().Add(s.localRepo) + s.factory.LocalRepoCollection().Add(s.localRepo, s.reflistCollection) s.snapshot, _ = NewSnapshotFromRepository("snap", repo) - s.factory.SnapshotCollection().Add(s.snapshot) + s.factory.SnapshotCollection().Add(s.snapshot, s.reflistCollection) s.snapshot2, _ = NewSnapshotFromRepository("snap", repo) - s.factory.SnapshotCollection().Add(s.snapshot2) + s.factory.SnapshotCollection().Add(s.snapshot2, s.reflistCollection) s.packageCollection = s.factory.PackageCollection() s.packageCollection.Update(s.p1) @@ -284,7 +286,7 @@ func (s *PublishedRepoSuite) TestDistributionComponentGuessing(c *C) { s.localRepo.DefaultDistribution = "precise" s.localRepo.DefaultComponent = "contrib" - s.factory.LocalRepoCollection().Update(s.localRepo) + s.factory.LocalRepoCollection().Update(s.localRepo, s.reflistCollection) repo, err = NewPublishedRepo("", "ppa", "", nil, []string{""}, []interface{}{s.localRepo}, s.factory) c.Check(err, IsNil) @@ -442,6 +444,7 @@ type PublishedRepoCollectionSuite struct { db database.Storage factory *CollectionFactory snapshotCollection *SnapshotCollection + reflistCollection *RefListCollection collection *PublishedRepoCollection snap1, snap2 *Snapshot localRepo *LocalRepo @@ -457,22 +460,23 @@ func (s *PublishedRepoCollectionSuite) SetUpTest(c *C) { s.factory = NewCollectionFactory(s.db) s.snapshotCollection = s.factory.SnapshotCollection() + s.reflistCollection = s.factory.RefListCollection() snap1Refs := NewPackageRefList() snap1Refs.Refs = [][]byte{s.p1.Key(""), s.p2.Key("")} sort.Sort(snap1Refs) - s.snap1 = NewSnapshotFromRefList("snap1", []*Snapshot{}, snap1Refs, "desc1") + s.snap1 = NewSnapshotFromRefList("snap1", []*Snapshot{}, NewSplitRefListFromRefList(snap1Refs), "desc1") snap2Refs := NewPackageRefList() snap2Refs.Refs = [][]byte{s.p3.Key("")} sort.Sort(snap2Refs) - s.snap2 = NewSnapshotFromRefList("snap2", []*Snapshot{}, snap2Refs, "desc2") + s.snap2 = NewSnapshotFromRefList("snap2", []*Snapshot{}, NewSplitRefListFromRefList(snap2Refs), "desc2") - s.snapshotCollection.Add(s.snap1) - s.snapshotCollection.Add(s.snap2) + s.snapshotCollection.Add(s.snap1, s.reflistCollection) + s.snapshotCollection.Add(s.snap2, s.reflistCollection) s.localRepo = NewLocalRepo("local1", "comment1") - s.factory.LocalRepoCollection().Add(s.localRepo) + s.factory.LocalRepoCollection().Add(s.localRepo, s.reflistCollection) s.repo1, _ = NewPublishedRepo("", "ppa", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory) s.repo2, _ = NewPublishedRepo("", "", "anaconda", []string{}, []string{"main", "contrib"}, []interface{}{s.snap2, s.snap1}, s.factory) @@ -491,14 +495,14 @@ func (s *PublishedRepoCollectionSuite) TestAddByStoragePrefixDistribution(c *C) _, err := s.collection.ByStoragePrefixDistribution("", "ppa", "anaconda") c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.repo1), IsNil) - c.Assert(s.collection.Add(s.repo1), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), ErrorMatches, ".*already exists") c.Assert(s.collection.CheckDuplicate(s.repo2), IsNil) - c.Assert(s.collection.Add(s.repo2), IsNil) - c.Assert(s.collection.Add(s.repo3), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.repo2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo3, s.reflistCollection), ErrorMatches, ".*already exists") c.Assert(s.collection.CheckDuplicate(s.repo3), Equals, s.repo1) - c.Assert(s.collection.Add(s.repo4), IsNil) - c.Assert(s.collection.Add(s.repo5), IsNil) + c.Assert(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo5, s.reflistCollection), IsNil) r, err := s.collection.ByStoragePrefixDistribution("", "ppa", "anaconda") c.Assert(err, IsNil) @@ -524,7 +528,7 @@ func (s *PublishedRepoCollectionSuite) TestByUUID(c *C) { _, err := s.collection.ByUUID(s.repo1.UUID) c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.repo1), IsNil) + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), IsNil) r, err := s.collection.ByUUID(s.repo1.UUID) c.Assert(err, IsNil) @@ -535,8 +539,8 @@ func (s *PublishedRepoCollectionSuite) TestByUUID(c *C) { } func (s *PublishedRepoCollectionSuite) TestUpdateLoadComplete(c *C) { - c.Assert(s.collection.Update(s.repo1), IsNil) - c.Assert(s.collection.Update(s.repo4), IsNil) + c.Assert(s.collection.Update(s.repo1, s.reflistCollection), IsNil) + c.Assert(s.collection.Update(s.repo4, s.reflistCollection), IsNil) collection := NewPublishedRepoCollection(s.db) r, err := collection.ByStoragePrefixDistribution("", "ppa", "anaconda") @@ -584,7 +588,7 @@ func (s *PublishedRepoCollectionSuite) TestLoadPre0_6(c *C) { encoder.Encode(&old) c.Assert(s.db.Put(s.repo1.Key(), buf.Bytes()), IsNil) - c.Assert(s.db.Put(s.repo1.RefKey(""), s.localRepo.RefList().Encode()), IsNil) + c.Assert(s.db.Put(s.repo1.RefKey(""), NewPackageRefList().Encode()), IsNil) collection := NewPublishedRepoCollection(s.db) repo, err := collection.ByStoragePrefixDistribution("", "ppa", "anaconda") @@ -599,7 +603,7 @@ func (s *PublishedRepoCollectionSuite) TestLoadPre0_6(c *C) { } func (s *PublishedRepoCollectionSuite) TestForEachAndLen(c *C) { - s.collection.Add(s.repo1) + s.collection.Add(s.repo1, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*PublishedRepo) error { @@ -620,17 +624,17 @@ func (s *PublishedRepoCollectionSuite) TestForEachAndLen(c *C) { } func (s *PublishedRepoCollectionSuite) TestBySnapshot(c *C) { - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo2), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo2, s.reflistCollection), IsNil) c.Check(s.collection.BySnapshot(s.snap1), DeepEquals, []*PublishedRepo{s.repo1, s.repo2}) c.Check(s.collection.BySnapshot(s.snap2), DeepEquals, []*PublishedRepo{s.repo2}) } func (s *PublishedRepoCollectionSuite) TestByLocalRepo(c *C) { - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo4), IsNil) - c.Check(s.collection.Add(s.repo5), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo5, s.reflistCollection), IsNil) c.Check(s.collection.ByLocalRepo(s.localRepo), DeepEquals, []*PublishedRepo{s.repo4, s.repo5}) } @@ -640,10 +644,10 @@ func (s *PublishedRepoCollectionSuite) TestListReferencedFiles(c *C) { c.Check(s.factory.PackageCollection().Update(s.p2), IsNil) c.Check(s.factory.PackageCollection().Update(s.p3), IsNil) - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo2), IsNil) - c.Check(s.collection.Add(s.repo4), IsNil) - c.Check(s.collection.Add(s.repo5), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo2, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo5, s.reflistCollection), IsNil) files, err := s.collection.listReferencedFilesByComponent(".", []string{"main", "contrib"}, s.factory, nil) c.Assert(err, IsNil) @@ -659,12 +663,12 @@ func (s *PublishedRepoCollectionSuite) TestListReferencedFiles(c *C) { }) snap3 := NewSnapshotFromRefList("snap3", []*Snapshot{}, s.snap2.RefList(), "desc3") - s.snapshotCollection.Add(snap3) + s.snapshotCollection.Add(snap3, s.reflistCollection) // Ensure that adding a second publish point with matching files doesn't give duplicate results. repo3, err := NewPublishedRepo("", "", "anaconda-2", []string{}, []string{"main"}, []interface{}{snap3}, s.factory) c.Check(err, IsNil) - c.Check(s.collection.Add(repo3), IsNil) + c.Check(s.collection.Add(repo3, s.reflistCollection), IsNil) files, err = s.collection.listReferencedFilesByComponent(".", []string{"main", "contrib"}, s.factory, nil) c.Assert(err, IsNil) @@ -685,6 +689,7 @@ type PublishedRepoRemoveSuite struct { db database.Storage factory *CollectionFactory snapshotCollection *SnapshotCollection + reflistCollection *RefListCollection collection *PublishedRepoCollection root, root2 string provider *FakeStorageProvider @@ -700,10 +705,11 @@ func (s *PublishedRepoRemoveSuite) SetUpTest(c *C) { s.factory = NewCollectionFactory(s.db) s.snapshotCollection = s.factory.SnapshotCollection() + s.reflistCollection = s.factory.RefListCollection() s.snap1 = NewSnapshotFromPackageList("snap1", []*Snapshot{}, NewPackageList(), "desc1") - s.snapshotCollection.Add(s.snap1) + s.snapshotCollection.Add(s.snap1, s.reflistCollection) s.repo1, _ = NewPublishedRepo("", "ppa", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory) s.repo2, _ = NewPublishedRepo("", "", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory) @@ -712,11 +718,11 @@ func (s *PublishedRepoRemoveSuite) SetUpTest(c *C) { s.repo5, _ = NewPublishedRepo("files:other", "ppa", "osminog", []string{}, []string{"contrib"}, []interface{}{s.snap1}, s.factory) s.collection = s.factory.PublishedRepoCollection() - s.collection.Add(s.repo1) - s.collection.Add(s.repo2) - s.collection.Add(s.repo3) - s.collection.Add(s.repo4) - s.collection.Add(s.repo5) + s.collection.Add(s.repo1, s.reflistCollection) + s.collection.Add(s.repo2, s.reflistCollection) + s.collection.Add(s.repo3, s.reflistCollection) + s.collection.Add(s.repo4, s.reflistCollection) + s.collection.Add(s.repo5, s.reflistCollection) s.root = c.MkDir() s.publishedStorage = files.NewPublishedStorage(s.root, "", "") diff --git a/deb/reflist.go b/deb/reflist.go index 30396548cc..248443f86c 100644 --- a/deb/reflist.go +++ b/deb/reflist.go @@ -2,10 +2,15 @@ package deb import ( "bytes" + "crypto/sha256" + "encoding/base64" "encoding/json" + "fmt" "sort" "github.com/AlekSi/pointer" + "github.com/aptly-dev/aptly/database" + "github.com/cespare/xxhash/v2" "github.com/ugorji/go/codec" ) @@ -44,6 +49,13 @@ func NewPackageRefListFromPackageList(list *PackageList) *PackageRefList { return reflist } +func (l *PackageRefList) Clone() *PackageRefList { + clone := &PackageRefList{} + clone.Refs = make([][]byte, l.Len()) + copy(clone.Refs, l.Refs) + return clone +} + // Len returns number of refs func (l *PackageRefList) Len() int { return len(l.Refs) @@ -184,8 +196,12 @@ func (d PackageDiff) MarshalJSON() ([]byte, error) { type PackageDiffs []PackageDiff // Diff calculates difference between two reflists -func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageCollection) (result PackageDiffs, err error) { - result = make(PackageDiffs, 0, 128) +func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageCollection, result PackageDiffs) (PackageDiffs, error) { + var err error + + if result == nil { + result = make(PackageDiffs, 0, 128) + } // pointer to left and right reflists il, ir := 0, 0 @@ -258,7 +274,7 @@ func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageColle } } - return + return result, nil } // Merge merges reflist r into current reflist. If overrideMatching, merge @@ -391,3 +407,754 @@ func (l *PackageRefList) FilterLatestRefs() { lastArch, lastName, lastVer = arch, name, ver } } + +const ( + reflistBucketCount = 1 << 6 + reflistBucketMask = reflistBucketCount - 1 +) + +type reflistDigestArray [sha256.Size]byte + +func bucketRefPrefix(ref []byte) []byte { + const maxPrefixLen = 3 + + // Cut out the arch, leaving behind the package name and subsequent info. + _, ref, _ = bytes.Cut(ref, []byte{' '}) + + // Strip off the lib prefix, so that "libxyz" and "xyz", which are likely + // to be updated together, go in the same bucket. + libPrefix := []byte("lib") + if bytes.HasPrefix(ref, libPrefix) { + ref = ref[len(libPrefix):] + } + + prefixLen := min(maxPrefixLen, len(ref)) + prefix, _, _ := bytes.Cut(ref[:prefixLen], []byte{' '}) + return prefix +} + +func bucketIdxForRef(ref []byte) int { + return int(xxhash.Sum64(bucketRefPrefix(ref))) & reflistBucketMask +} + +// SplitRefList is a list of package refs, similar to a PackageRefList. However, +// instead of storing a linear array of refs, SplitRefList splits the refs into +// PackageRefList "buckets", based on a hash of the package name inside the ref. +// Each bucket has a digest of its contents that serves as its key in the database. +// +// When serialized, a SplitRefList just becomes an array of bucket digests, and +// the buckets themselves are stored separately. Because the buckets are then +// referenced by their digests, multiple independent reflists can share buckets, +// if their buckets have matching digests. +// +// Buckets themselves may not be confirmed to a single database value; instead, +// they're split into "segments", based on the database's preferred maximum +// value size. This prevents large buckets from slowing down the database. +type SplitRefList struct { + Buckets [][]byte + + bucketRefs []*PackageRefList +} + +// NewSplitRefList creates empty SplitRefList +func NewSplitRefList() *SplitRefList { + sl := &SplitRefList{} + sl.reset() + return sl +} + +// NewSplitRefListFromRefList creates SplitRefList from PackageRefList +func NewSplitRefListFromRefList(reflist *PackageRefList) *SplitRefList { + sl := NewSplitRefList() + sl.Replace(reflist) + return sl +} + +// NewSplitRefListFromRefList creates SplitRefList from PackageList +func NewSplitRefListFromPackageList(list *PackageList) *SplitRefList { + return NewSplitRefListFromRefList(NewPackageRefListFromPackageList(list)) +} + +func (sl *SplitRefList) reset() { + sl.Buckets = make([][]byte, reflistBucketCount) + sl.bucketRefs = make([]*PackageRefList, reflistBucketCount) +} + +// Has checks whether package is part of reflist +func (sl *SplitRefList) Has(p *Package) bool { + idx := bucketIdxForRef(p.Key("")) + if bucket := sl.bucketRefs[idx]; bucket != nil { + return bucket.Has(p) + } else { + return false + } +} + +// Len returns number of refs +func (sl *SplitRefList) Len() int { + total := 0 + for _, bucket := range sl.bucketRefs { + if bucket != nil { + total += bucket.Len() + } + } + return total +} + +func reflistDigest(l *PackageRefList) []byte { + // Different algorithms on PackageRefLists will sometimes return a nil slice + // of refs and other times return an empty slice. Regardless, they should + // both be treated identically and be given an empty digest. + if len(l.Refs) == 0 { + return nil + } + + h := sha256.New() + for _, ref := range l.Refs { + h.Write(ref) + h.Write([]byte{0}) + } + return h.Sum(nil) +} + +// Removes all the refs inside and replaces them with those in the given reflist +func (sl *SplitRefList) Replace(reflist *PackageRefList) { + sl.reset() + + for _, ref := range reflist.Refs { + idx := bucketIdxForRef(ref) + + bucket := sl.bucketRefs[idx] + if bucket == nil { + bucket = NewPackageRefList() + sl.bucketRefs[idx] = bucket + } + + bucket.Refs = append(bucket.Refs, ref) + } + + for idx, bucket := range sl.bucketRefs { + if bucket != nil { + sort.Sort(bucket) + sl.Buckets[idx] = reflistDigest(bucket) + } + } +} + +// Merge merges reflist r into current reflist (see PackageRefList.Merge) +func (l *SplitRefList) Merge(r *SplitRefList, overrideMatching, ignoreConflicting bool) (result *SplitRefList) { + result = NewSplitRefList() + + var empty PackageRefList + for idx, lbucket := range l.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket == nil && rbucket == nil { + continue + } + + if lbucket == nil { + lbucket = &empty + } else if rbucket == nil { + rbucket = &empty + } + + result.bucketRefs[idx] = lbucket.Merge(rbucket, overrideMatching, ignoreConflicting) + result.Buckets[idx] = reflistDigest(result.bucketRefs[idx]) + } + + return +} + +// Subtract returns all packages in l that are not in r +func (l *SplitRefList) Subtract(r *SplitRefList) (result *SplitRefList) { + result = NewSplitRefList() + + for idx, lbucket := range l.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket != nil { + if rbucket != nil { + result.bucketRefs[idx] = lbucket.Subtract(rbucket) + result.Buckets[idx] = reflistDigest(result.bucketRefs[idx]) + } else { + result.bucketRefs[idx] = lbucket.Clone() + result.Buckets[idx] = l.Buckets[idx] + } + } + } + + return +} + +// Diff calculates difference between two reflists +func (l *SplitRefList) Diff(r *SplitRefList, packageCollection *PackageCollection, result PackageDiffs) (PackageDiffs, error) { + var err error + + if result == nil { + result = make(PackageDiffs, 0, 128) + } + + var empty PackageRefList + for idx, lbucket := range l.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket != nil { + if rbucket != nil { + result, err = lbucket.Diff(rbucket, packageCollection, result) + } else { + result, err = lbucket.Diff(&empty, packageCollection, result) + } + } else if rbucket != nil { + result, err = empty.Diff(rbucket, packageCollection, result) + } + + if err != nil { + return nil, err + } + } + + sort.Slice(result, func(i, j int) bool { + var ri, rj []byte + if result[i].Left != nil { + ri = result[i].Left.Key("") + } else { + ri = result[i].Right.Key("") + } + if result[j].Left != nil { + rj = result[j].Left.Key("") + } else { + rj = result[j].Right.Key("") + } + + return bytes.Compare(ri, rj) < 0 + }) + + return result, nil +} + +// FilterLatestRefs reduces a reflist to the latest of each package (see PackageRefList.FilterLatestRefs) +func (sl *SplitRefList) FilterLatestRefs() { + for idx, bucket := range sl.bucketRefs { + if bucket != nil { + bucket.FilterLatestRefs() + sl.Buckets[idx] = reflistDigest(bucket) + } + } +} + +// Flatten creates a flat PackageRefList containing all the refs in this reflist +func (sl *SplitRefList) Flatten() *PackageRefList { + reflist := NewPackageRefList() + sl.ForEach(func(ref []byte) error { + reflist.Refs = append(reflist.Refs, ref) + return nil + }) + sort.Sort(reflist) + return reflist +} + +// ForEachBucket calls handler for each bucket in list +func (sl *SplitRefList) ForEachBucket(handler func(digest []byte, bucket *PackageRefList) error) error { + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + bucket := sl.bucketRefs[idx] + if bucket != nil { + if err := handler(digest, bucket); err != nil { + return err + } + } + } + + return nil +} + +// ForEach calls handler for each package ref in list +// +// IMPORTANT: unlike PackageRefList.ForEach, the order of handler invocations +// is *not* guaranteed to be sorted. +func (sl *SplitRefList) ForEach(handler func([]byte) error) error { + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + bucket := sl.bucketRefs[idx] + if bucket != nil { + if err := bucket.ForEach(handler); err != nil { + return err + } + } + } + + return nil +} + +// RefListDigestSet is a set of SplitRefList bucket digests +type RefListDigestSet struct { + items map[reflistDigestArray]struct{} +} + +// NewRefListDigestSet creates empty RefListDigestSet +func NewRefListDigestSet() *RefListDigestSet { + return &RefListDigestSet{items: map[reflistDigestArray]struct{}{}} +} + +// Len returns number of digests in the set +func (set *RefListDigestSet) Len() int { + return len(set.items) +} + +// ForEach calls handler for each digest in the set +func (set *RefListDigestSet) ForEach(handler func(digest []byte) error) error { + for digest := range set.items { + if err := handler(digest[:]); err != nil { + return err + } + } + + return nil +} + +// Add adds digest to set, doing nothing if the digest was already present +func (set *RefListDigestSet) Add(digest []byte) { + set.items[reflistDigestArray(digest)] = struct{}{} +} + +// AddAllInRefList adds all the bucket digests in a SplitRefList to the set +func (set *RefListDigestSet) AddAllInRefList(sl *SplitRefList) { + for _, digest := range sl.Buckets { + if len(digest) > 0 { + set.Add(digest) + } + } +} + +// Has checks whether a digest is part of set +func (set *RefListDigestSet) Has(digest []byte) bool { + _, ok := set.items[reflistDigestArray(digest)] + return ok +} + +// Remove removes a digest from set +func (set *RefListDigestSet) Remove(digest []byte) { + delete(set.items, reflistDigestArray(digest)) +} + +// RemoveAll removes all the digests in other from the current set +func (set *RefListDigestSet) RemoveAll(other *RefListDigestSet) { + for digest := range other.items { + delete(set.items, digest) + } +} + +// RefListCollection does listing, updating/adding/deleting of SplitRefLists +type RefListCollection struct { + db database.Storage + + cache map[reflistDigestArray]*PackageRefList +} + +// NewRefListCollection creates a RefListCollection +func NewRefListCollection(db database.Storage) *RefListCollection { + return &RefListCollection{db: db, cache: make(map[reflistDigestArray]*PackageRefList)} +} + +type reflistStorageFormat int + +const ( + // (legacy format) all the refs are stored inline in a single value + reflistStorageFormatInline reflistStorageFormat = iota + // the refs are split into buckets that are stored externally from the value + reflistStorageFormatSplit +) + +// NoPadding is used because all digests are the same length, so the padding +// is useless and only serves to muddy the output. +var bucketDigestEncoding = base64.StdEncoding.WithPadding(base64.NoPadding) + +func segmentPrefix(encodedDigest string) []byte { + return []byte(fmt.Sprintf("F%s-", encodedDigest)) +} + +func segmentIndexKey(prefix []byte, idx int) []byte { + // Assume most buckets won't have more than 0xFFFF = ~65k segments (which + // would be an extremely large bucket!). + return append(bytes.Clone(prefix), []byte(fmt.Sprintf("%04x", idx))...) +} + +// AllBucketDigests returns a set of all the bucket digests in the database +func (collection *RefListCollection) AllBucketDigests() (*RefListDigestSet, error) { + digests := NewRefListDigestSet() + + err := collection.db.ProcessByPrefix([]byte("F"), func(key []byte, value []byte) error { + if !bytes.HasSuffix(key, []byte("-0000")) { + // Ignore additional segments for the same digest. + return nil + } + + encodedDigest, _, foundDash := bytes.Cut(key[1:], []byte("-")) + if !foundDash { + return fmt.Errorf("invalid key: %s", string(key)) + } + digest := make([]byte, bucketDigestEncoding.DecodedLen(len(encodedDigest))) + if _, err := bucketDigestEncoding.Decode(digest, encodedDigest); err != nil { + return fmt.Errorf("decoding key %s: %w", string(key), err) + } + + digests.Add(digest) + return nil + }) + + if err != nil { + return nil, err + } + return digests, nil +} + +// UnsafeDropBucket drops the bucket associated with digest from the database, +// doing so inside batch +// +// This is considered "unsafe" because no checks are performed to ensure that +// the bucket is no longer referenced by any saved reflists. +func (collection *RefListCollection) UnsafeDropBucket(digest []byte, batch database.Batch) error { + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + return collection.db.ProcessByPrefix(prefix, func(key []byte, value []byte) error { + return batch.Delete(key) + }) +} + +func (collection *RefListCollection) load(sl *SplitRefList, key []byte) (reflistStorageFormat, error) { + sl.reset() + + data, err := collection.db.Get(key) + if err != nil { + return 0, err + } + + var splitOrInlineRefList struct { + *SplitRefList + *PackageRefList + } + handle := &codec.MsgpackHandle{} + handle.ZeroCopy = true + decoder := codec.NewDecoderBytes(data, handle) + if err := decoder.Decode(&splitOrInlineRefList); err != nil { + return 0, err + } + + if splitOrInlineRefList.SplitRefList != nil { + sl.Buckets = splitOrInlineRefList.Buckets + } else if splitOrInlineRefList.PackageRefList != nil { + sl.Replace(splitOrInlineRefList.PackageRefList) + return reflistStorageFormatInline, nil + } + + return reflistStorageFormatSplit, nil +} + +func (collection *RefListCollection) loadBuckets(sl *SplitRefList, key []byte) error { + for idx := range sl.Buckets { + if sl.bucketRefs[idx] != nil { + continue + } + + var bucket *PackageRefList + + if digest := sl.Buckets[idx]; len(digest) > 0 { + cacheKey := reflistDigestArray(digest) + bucket = collection.cache[cacheKey] + if bucket == nil { + bucket = NewPackageRefList() + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + err := collection.db.ProcessByPrefix(prefix, func(digest []byte, value []byte) error { + var l PackageRefList + if err := l.Decode(append([]byte{}, value...)); err != nil { + return err + } + + bucket.Refs = append(bucket.Refs, l.Refs...) + return nil + }) + + if err != nil { + return err + } + + // The segments may not have been iterated in order, so make sure to re-sort + // here. + sort.Sort(bucket) + collection.cache[cacheKey] = bucket + } + + actualDigest := reflistDigest(bucket) + if !bytes.Equal(actualDigest, digest) { + return fmt.Errorf("corrupt reflist bucket %d: expected digest %s, got %s", + idx, + bucketDigestEncoding.EncodeToString(digest), + bucketDigestEncoding.EncodeToString(actualDigest)) + } + } + + sl.bucketRefs[idx] = bucket + } + + return nil +} + +// LoadComplete loads the reflist stored at the given key, as well as all the +// buckets referenced by a split reflist +func (collection *RefListCollection) LoadComplete(sl *SplitRefList, key []byte) error { + if _, err := collection.load(sl, key); err != nil { + return err + } + + return collection.loadBuckets(sl, key) +} + +// RefListBatch is a wrapper over a database.Batch that tracks already-written +// reflists to avoid writing them multiple times +// +// It is *not* safe to use the same underlying database.Batch that has already +// been given to UnsafeDropBucket. +type RefListBatch struct { + batch database.Batch + + alreadyWritten *RefListDigestSet +} + +// NewBatch creates a new RefListBatch wrapping the given database.Batch +func (collection *RefListCollection) NewBatch(batch database.Batch) *RefListBatch { + return &RefListBatch{ + batch: batch, + alreadyWritten: NewRefListDigestSet(), + } +} + +type reflistUpdateContext struct { + rb *RefListBatch + stats *RefListMigrationStats +} + +func clearSegmentRefs(reflist *PackageRefList, recommendedMaxKVSize int) { + avgRefsInSegment := recommendedMaxKVSize / 70 + reflist.Refs = make([][]byte, 0, avgRefsInSegment) +} + +func flushSegmentRefs(uctx *reflistUpdateContext, prefix []byte, segment int, reflist *PackageRefList) error { + encoded := reflist.Encode() + err := uctx.rb.batch.Put(segmentIndexKey(prefix, segment), encoded) + if err == nil && uctx.stats != nil { + uctx.stats.Segments++ + } + return err +} + +func (collection *RefListCollection) updateWithContext(sl *SplitRefList, key []byte, uctx *reflistUpdateContext) error { + if sl != nil { + recommendedMaxKVSize := collection.db.GetRecommendedMaxKVSize() + + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + if uctx.rb.alreadyWritten.Has(digest) { + continue + } + + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + if collection.db.HasPrefix(prefix) { + continue + } + + // All the sizing information taken from the msgpack spec: + // https://github.com/msgpack/msgpack/blob/master/spec.md + + // Assume that a segment will have [16,2^16) elements, which would + // fit into an array 16 and thus have 3 bytes of overhead. + // (A database would need a massive recommendedMaxKVSize to pass + // that limit.) + size := len(segmentIndexKey(prefix, 0)) + 3 + segment := 0 + + var reflist PackageRefList + clearSegmentRefs(&reflist, recommendedMaxKVSize) + for _, ref := range sl.bucketRefs[idx].Refs { + // In order to determine the size of the ref in the database, + // we need to know how much overhead will be added with by msgpack + // encoding. + requiredSize := len(ref) + if requiredSize < 1<<5 { + requiredSize += 1 + } else if requiredSize < 1<<8 { + requiredSize += 2 + } else if requiredSize < 1<<16 { + requiredSize += 3 + } else { + requiredSize += 4 + } + if size+requiredSize > recommendedMaxKVSize { + if err := flushSegmentRefs(uctx, prefix, segment, &reflist); err != nil { + return err + } + clearSegmentRefs(&reflist, recommendedMaxKVSize) + segment++ + } + + reflist.Refs = append(reflist.Refs, ref) + size += requiredSize + } + + if len(reflist.Refs) > 0 { + if err := flushSegmentRefs(uctx, prefix, segment, &reflist); err != nil { + return err + } + } + + uctx.rb.alreadyWritten.Add(digest) + if uctx.stats != nil { + uctx.stats.Buckets++ + } + } + } + + var buf bytes.Buffer + encoder := codec.NewEncoder(&buf, &codec.MsgpackHandle{}) + encoder.Encode(sl) + err := uctx.rb.batch.Put(key, buf.Bytes()) + if err == nil && uctx.stats != nil { + uctx.stats.Reflists++ + } + return err +} + +// UpdateInBatch will save or update the SplitRefList at key, as well as save the buckets inside, +// as part of the given batch +func (collection *RefListCollection) UpdateInBatch(sl *SplitRefList, key []byte, batch *RefListBatch) error { + return collection.updateWithContext(sl, key, &reflistUpdateContext{rb: batch}) +} + +// Update will save or update the SplitRefList at key, as well as save the buckets inside +func (collection *RefListCollection) Update(sl *SplitRefList, key []byte) error { + rb := collection.NewBatch(collection.db.CreateBatch()) + err := collection.UpdateInBatch(sl, key, rb) + if err == nil { + err = rb.batch.Write() + } + return err +} + +// RefListMigrationStats counts a number of reflists, buckets, and segments +type RefListMigrationStats struct { + Reflists, Buckets, Segments int +} + +// RefListMigration wraps a RefListBatch for the purpose of migrating inline format +// reflists to split reflists +// +// Once the batch gets too large, it will automatically be flushed to the database, +// and a new batch will be created in its place. +type RefListMigration struct { + rb *RefListBatch + + dryRun bool + + // current number of reflists/buckets/segments queued in the current, unwritten batch + batchStats RefListMigrationStats + flushStats RefListMigrationStats +} + +// NewMigration creates an empty RefListMigration +func (collection *RefListCollection) NewMigration() *RefListMigration { + return &RefListMigration{} +} + +// NewMigrationDryRun creates an empty RefListMigration that will track the +// changes to make as usual but avoid actually writing to the db +func (collection *RefListCollection) NewMigrationDryRun() *RefListMigration { + return &RefListMigration{dryRun: true} +} + +// Stats returns statistics on the written values in the current migration +func (migration *RefListMigration) Stats() RefListMigrationStats { + return migration.flushStats +} + +// Flush will flush the current batch in the migration to the database +func (migration *RefListMigration) Flush() error { + if migration.batchStats.Segments > 0 { + if !migration.dryRun { + if err := migration.rb.batch.Write(); err != nil { + return err + } + + // It's important that we don't clear the batch on dry runs, because + // the batch is what contains the list of already-written buckets. + // If we're not writing to the database, and we clear that list, + // duplicate "writes" will occur. + migration.rb = nil + } + + migration.flushStats.Reflists += migration.batchStats.Reflists + migration.flushStats.Buckets += migration.batchStats.Buckets + migration.flushStats.Segments += migration.batchStats.Segments + migration.batchStats = RefListMigrationStats{} + } + + return nil +} + +// LoadCompleteAndMigrate will load the reflist and its buckets as RefListCollection.LoadComplete, +// migrating any inline reflists to split ones along the way +func (collection *RefListCollection) LoadCompleteAndMigrate(sl *SplitRefList, key []byte, migration *RefListMigration) error { + // Given enough reflists, the memory used by a batch starts to become massive, so + // make sure to flush the written segments periodically. Note that this is only + // checked *after* a migration of a full bucket (and all the segments inside) + // takes place, as splitting a single bucket write into multiple batches would + // be unsafe if an interruption occurs midway. + const maxMigratorBatch = 50000 + + format, err := collection.load(sl, key) + if err != nil { + return err + } + + switch format { + case reflistStorageFormatInline: + if migration.rb == nil { + migration.rb = collection.NewBatch(collection.db.CreateBatch()) + } + + collection.updateWithContext(sl, key, &reflistUpdateContext{ + rb: migration.rb, + stats: &migration.batchStats, + }) + + if migration.batchStats.Segments > maxMigratorBatch { + if err := migration.Flush(); err != nil { + return err + } + } + + return nil + case reflistStorageFormatSplit: + return collection.loadBuckets(sl, key) + default: + panic(fmt.Sprintf("unexpected format %v", format)) + } +} + +// AnyRefList is implemented by both PackageRefList and SplitRefList +type AnyRefList interface { + Has(p *Package) bool + Len() int + ForEach(handler func([]byte) error) error + FilterLatestRefs() +} + +// Check interface +var ( + _ AnyRefList = (*PackageRefList)(nil) + _ AnyRefList = (*SplitRefList)(nil) +) diff --git a/deb/reflist_bench_test.go b/deb/reflist_bench_test.go index b377574ce4..f81a84d0ae 100644 --- a/deb/reflist_bench_test.go +++ b/deb/reflist_bench_test.go @@ -45,3 +45,41 @@ func BenchmarkReflistDecode(b *testing.B) { (&PackageRefList{}).Decode(data) } } + +func BenchmarkSplitRefListCreationSmall(b *testing.B) { + const count = 400 + + l := NewPackageRefList() + + for i := 0; i < count; i++ { + l.Refs = append(l.Refs, []byte(fmt.Sprintf("Pamd64 %x %d", i, i))) + } + + sort.Sort(l) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for j := 0; j < 8; j++ { + NewSplitRefListFromRefList(l) + } + } +} + +func BenchmarkSplitRefListCreationLarge(b *testing.B) { + const count = 4096 + + l := NewPackageRefList() + + for i := 0; i < count; i++ { + l.Refs = append(l.Refs, []byte(fmt.Sprintf("Pamd64 %x %d", i, i))) + } + + sort.Sort(l) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for j := 0; j < 8; j++ { + NewSplitRefListFromRefList(l) + } + } +} diff --git a/deb/reflist_test.go b/deb/reflist_test.go index ec7ed09f7e..bfc6411312 100644 --- a/deb/reflist_test.go +++ b/deb/reflist_test.go @@ -1,7 +1,10 @@ package deb import ( + "bytes" + "encoding/hex" "errors" + "fmt" "github.com/aptly-dev/aptly/database/goleveldb" @@ -9,24 +12,83 @@ import ( ) type PackageRefListSuite struct { - // Simple list with "real" packages from stanzas - list *PackageList p1, p2, p3, p4, p5, p6 *Package } var _ = Suite(&PackageRefListSuite{}) -func toStrSlice(reflist *PackageRefList) (result []string) { +func verifyRefListIntegrity(c *C, rl AnyRefList) AnyRefList { + if rl, ok := rl.(*SplitRefList); ok { + for idx, bucket := range rl.bucketRefs { + if bucket == nil { + bucket = NewPackageRefList() + } + c.Check(rl.Buckets[idx], DeepEquals, reflistDigest(bucket)) + } + } + + return rl +} + +func getRefs(rl AnyRefList) (refs [][]byte) { + switch rl := rl.(type) { + case *PackageRefList: + refs = rl.Refs + case *SplitRefList: + refs = rl.Flatten().Refs + default: + panic(fmt.Sprintf("unexpected reflist type %t", rl)) + } + + // Hack so that passing getRefs-returned slices to DeepEquals won't fail given a nil + // slice and an empty slice. + if len(refs) == 0 { + refs = nil + } + return +} + +func toStrSlice(reflist AnyRefList) (result []string) { result = make([]string, reflist.Len()) - for i, r := range reflist.Refs { + for i, r := range getRefs(reflist) { result[i] = string(r) } return } -func (s *PackageRefListSuite) SetUpTest(c *C) { - s.list = NewPackageList() +type reflistFactory struct { + new func() AnyRefList + newFromRefs func(refs ...[]byte) AnyRefList + newFromPackageList func(list *PackageList) AnyRefList +} + +func forEachRefList(test func(f reflistFactory)) { + test(reflistFactory{ + new: func() AnyRefList { + return NewPackageRefList() + }, + newFromRefs: func(refs ...[]byte) AnyRefList { + return &PackageRefList{Refs: refs} + }, + newFromPackageList: func(list *PackageList) AnyRefList { + return NewPackageRefListFromPackageList(list) + }, + }) + test(reflistFactory{ + new: func() AnyRefList { + return NewSplitRefList() + }, + newFromRefs: func(refs ...[]byte) AnyRefList { + return NewSplitRefListFromRefList(&PackageRefList{Refs: refs}) + }, + newFromPackageList: func(list *PackageList) AnyRefList { + return NewSplitRefListFromPackageList(list) + }, + }) +} + +func (s *PackageRefListSuite) SetUpTest(c *C) { s.p1 = NewPackageFromControlFile(packageStanza.Copy()) s.p2 = NewPackageFromControlFile(packageStanza.Copy()) stanza := packageStanza.Copy() @@ -44,346 +106,600 @@ func (s *PackageRefListSuite) SetUpTest(c *C) { } func (s *PackageRefListSuite) TestNewPackageListFromRefList(c *C) { - db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - coll.Update(s.p1) - coll.Update(s.p3) + forEachRefList(func(f reflistFactory) { + list := NewPackageList() - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + coll.Update(s.p1) + coll.Update(s.p3) - reflist := NewPackageRefListFromPackageList(s.list) + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - _, err := NewPackageListFromRefList(reflist, coll, nil) - c.Assert(err, ErrorMatches, "unable to load package with key.*") + reflist := f.newFromPackageList(list) - coll.Update(s.p5) - coll.Update(s.p6) + _, err := NewPackageListFromRefList(reflist, coll, nil) + c.Assert(err, ErrorMatches, "unable to load package with key.*") - list, err := NewPackageListFromRefList(reflist, coll, nil) - c.Assert(err, IsNil) - c.Check(list.Len(), Equals, 4) - c.Check(list.Add(s.p4), ErrorMatches, "conflict in package.*") + coll.Update(s.p5) + coll.Update(s.p6) - list, err = NewPackageListFromRefList(nil, coll, nil) - c.Assert(err, IsNil) - c.Check(list.Len(), Equals, 0) + list, err = NewPackageListFromRefList(reflist, coll, nil) + c.Assert(err, IsNil) + c.Check(list.Len(), Equals, 4) + c.Check(list.Add(s.p4), ErrorMatches, "conflict in package.*") + + list, err = NewPackageListFromRefList(nil, coll, nil) + c.Assert(err, IsNil) + c.Check(list.Len(), Equals, 0) + }) } func (s *PackageRefListSuite) TestNewPackageRefList(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) - - reflist := NewPackageRefListFromPackageList(s.list) - c.Assert(reflist.Len(), Equals, 4) - c.Check(reflist.Refs[0], DeepEquals, []byte(s.p1.Key(""))) - c.Check(reflist.Refs[1], DeepEquals, []byte(s.p6.Key(""))) - c.Check(reflist.Refs[2], DeepEquals, []byte(s.p5.Key(""))) - c.Check(reflist.Refs[3], DeepEquals, []byte(s.p3.Key(""))) - - reflist = NewPackageRefList() - c.Check(reflist.Len(), Equals, 0) + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) + + reflist := f.newFromPackageList(list) + verifyRefListIntegrity(c, reflist) + c.Assert(reflist.Len(), Equals, 4) + refs := getRefs(reflist) + c.Check(refs[0], DeepEquals, []byte(s.p1.Key(""))) + c.Check(refs[1], DeepEquals, []byte(s.p6.Key(""))) + c.Check(refs[2], DeepEquals, []byte(s.p5.Key(""))) + c.Check(refs[3], DeepEquals, []byte(s.p3.Key(""))) + + reflist = f.new() + c.Check(reflist.Len(), Equals, 0) + }) } -func (s *PackageRefListSuite) TestPackageRefListEncodeDecode(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) +func (s *PackageRefListSuite) TestPackageRefListForeach(c *C) { + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - reflist := NewPackageRefListFromPackageList(s.list) + reflist := f.newFromPackageList(list) - reflist2 := &PackageRefList{} - err := reflist2.Decode(reflist.Encode()) - c.Assert(err, IsNil) - c.Check(reflist2.Len(), Equals, reflist.Len()) - c.Check(reflist2.Refs, DeepEquals, reflist.Refs) -} + Len := 0 + err := reflist.ForEach(func([]byte) error { + Len++ + return nil + }) -func (s *PackageRefListSuite) TestPackageRefListForeach(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) + c.Check(Len, Equals, 4) + c.Check(err, IsNil) - reflist := NewPackageRefListFromPackageList(s.list) + e := errors.New("b") - Len := 0 - err := reflist.ForEach(func([]byte) error { - Len++ - return nil - }) - - c.Check(Len, Equals, 4) - c.Check(err, IsNil) + err = reflist.ForEach(func([]byte) error { + return e + }) - e := errors.New("b") + c.Check(err, Equals, e) + }) +} - err = reflist.ForEach(func([]byte) error { - return e +func (s *PackageRefListSuite) TestHas(c *C) { + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + reflist := f.newFromPackageList(list) + + c.Check(reflist.Has(s.p1), Equals, true) + c.Check(reflist.Has(s.p3), Equals, true) + c.Check(reflist.Has(s.p5), Equals, true) + c.Check(reflist.Has(s.p2), Equals, true) + c.Check(reflist.Has(s.p6), Equals, false) }) +} - c.Check(err, Equals, e) +func subtractRefLists(l, r AnyRefList) AnyRefList { + switch l := l.(type) { + case *PackageRefList: + return l.Subtract(r.(*PackageRefList)) + case *SplitRefList: + return l.Subtract(r.(*SplitRefList)) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) + } } -func (s *PackageRefListSuite) TestHas(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - reflist := NewPackageRefListFromPackageList(s.list) - - c.Check(reflist.Has(s.p1), Equals, true) - c.Check(reflist.Has(s.p3), Equals, true) - c.Check(reflist.Has(s.p5), Equals, true) - c.Check(reflist.Has(s.p2), Equals, true) - c.Check(reflist.Has(s.p6), Equals, false) +func (s *PackageRefListSuite) TestSubtract(c *C) { + forEachRefList(func(f reflistFactory) { + r1 := []byte("Pall r1") + r2 := []byte("Pall r2") + r3 := []byte("Pall r3") + r4 := []byte("Pall r4") + r5 := []byte("Pall r5") + + empty := f.newFromRefs() + l1 := f.newFromRefs(r1, r2, r3, r4) + l2 := f.newFromRefs(r1, r3) + l3 := f.newFromRefs(r2, r4) + l4 := f.newFromRefs(r4, r5) + l5 := f.newFromRefs(r1, r2, r3) + + c.Check(getRefs(subtractRefLists(l1, empty)), DeepEquals, getRefs(l1)) + c.Check(getRefs(subtractRefLists(l1, l2)), DeepEquals, getRefs(l3)) + c.Check(getRefs(subtractRefLists(l1, l3)), DeepEquals, getRefs(l2)) + c.Check(getRefs(subtractRefLists(l1, l4)), DeepEquals, getRefs(l5)) + c.Check(getRefs(subtractRefLists(empty, l1)), DeepEquals, getRefs(empty)) + c.Check(getRefs(subtractRefLists(l2, l3)), DeepEquals, getRefs(l2)) + }) } -func (s *PackageRefListSuite) TestSubstract(c *C) { - r1 := []byte("r1") - r2 := []byte("r2") - r3 := []byte("r3") - r4 := []byte("r4") - r5 := []byte("r5") - - empty := &PackageRefList{Refs: [][]byte{}} - l1 := &PackageRefList{Refs: [][]byte{r1, r2, r3, r4}} - l2 := &PackageRefList{Refs: [][]byte{r1, r3}} - l3 := &PackageRefList{Refs: [][]byte{r2, r4}} - l4 := &PackageRefList{Refs: [][]byte{r4, r5}} - l5 := &PackageRefList{Refs: [][]byte{r1, r2, r3}} - - c.Check(l1.Subtract(empty), DeepEquals, l1) - c.Check(l1.Subtract(l2), DeepEquals, l3) - c.Check(l1.Subtract(l3), DeepEquals, l2) - c.Check(l1.Subtract(l4), DeepEquals, l5) - c.Check(empty.Subtract(l1), DeepEquals, empty) - c.Check(l2.Subtract(l3), DeepEquals, l2) +func diffRefLists(l, r AnyRefList, packageCollection *PackageCollection) (PackageDiffs, error) { + switch l := l.(type) { + case *PackageRefList: + return l.Diff(r.(*PackageRefList), packageCollection, nil) + case *SplitRefList: + return l.Diff(r.(*SplitRefList), packageCollection, nil) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) + } } func (s *PackageRefListSuite) TestDiff(c *C) { - db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 - {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 - {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 - {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 - {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 - {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 - {Name: "xyz", Version: "3.0", Architecture: "sparc"}, //6 - } + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 + {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 + {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 + {Name: "xyz", Version: "3.0", Architecture: "sparc"}, //6 + } + + for _, p := range packages { + coll.Update(p) + } + + listA := NewPackageList() + listA.Add(packages[0]) + listA.Add(packages[1]) + listA.Add(packages[2]) + listA.Add(packages[3]) + listA.Add(packages[6]) + + listB := NewPackageList() + listB.Add(packages[0]) + listB.Add(packages[2]) + listB.Add(packages[4]) + listB.Add(packages[5]) + + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) + + diffAA, err := diffRefLists(reflistA, reflistA, coll) + c.Check(err, IsNil) + c.Check(diffAA, HasLen, 0) + + diffAB, err := diffRefLists(reflistA, reflistB, coll) + c.Check(err, IsNil) + c.Check(diffAB, HasLen, 4) + + c.Check(diffAB[0].Left, IsNil) + c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") + + c.Check(diffAB[2].Left.String(), Equals, "dpkg_1.7_i386") + c.Check(diffAB[2].Right, IsNil) + + c.Check(diffAB[3].Left.String(), Equals, "xyz_3.0_sparc") + c.Check(diffAB[3].Right, IsNil) + + diffBA, err := diffRefLists(reflistB, reflistA, coll) + c.Check(err, IsNil) + c.Check(diffBA, HasLen, 4) + + c.Check(diffBA[0].Right, IsNil) + c.Check(diffBA[0].Left.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffBA[1].Right.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffBA[1].Left.String(), Equals, "app_1.1~bp2_i386") + + c.Check(diffBA[2].Right.String(), Equals, "dpkg_1.7_i386") + c.Check(diffBA[2].Left, IsNil) + + c.Check(diffBA[3].Right.String(), Equals, "xyz_3.0_sparc") + c.Check(diffBA[3].Left, IsNil) + }) +} - for _, p := range packages { - coll.Update(p) - } +func (s *PackageRefListSuite) TestDiffCompactsAtEnd(c *C) { + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) - listA := NewPackageList() - listA.Add(packages[0]) - listA.Add(packages[1]) - listA.Add(packages[2]) - listA.Add(packages[3]) - listA.Add(packages[6]) + packages := []*Package{ + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //0 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //1 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //2 + } - listB := NewPackageList() - listB.Add(packages[0]) - listB.Add(packages[2]) - listB.Add(packages[4]) - listB.Add(packages[5]) + for _, p := range packages { + coll.Update(p) + } - reflistA := NewPackageRefListFromPackageList(listA) - reflistB := NewPackageRefListFromPackageList(listB) + listA := NewPackageList() + listA.Add(packages[0]) - diffAA, err := reflistA.Diff(reflistA, coll) - c.Check(err, IsNil) - c.Check(diffAA, HasLen, 0) + listB := NewPackageList() + listB.Add(packages[1]) + listB.Add(packages[2]) - diffAB, err := reflistA.Diff(reflistB, coll) - c.Check(err, IsNil) - c.Check(diffAB, HasLen, 4) + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) - c.Check(diffAB[0].Left, IsNil) - c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + diffAB, err := diffRefLists(reflistA, reflistB, coll) + c.Check(err, IsNil) + c.Check(diffAB, HasLen, 2) + + c.Check(diffAB[0].Left, IsNil) + c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") + }) +} - c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") - c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") +func mergeRefLists(l, r AnyRefList, overrideMatching, ignoreConflicting bool) AnyRefList { + switch l := l.(type) { + case *PackageRefList: + return l.Merge(r.(*PackageRefList), overrideMatching, ignoreConflicting) + case *SplitRefList: + return l.Merge(r.(*SplitRefList), overrideMatching, ignoreConflicting) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) + } +} - c.Check(diffAB[2].Left.String(), Equals, "dpkg_1.7_i386") - c.Check(diffAB[2].Right, IsNil) +func (s *PackageRefListSuite) TestMerge(c *C) { + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 + {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 + {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 + {Name: "dpkg", Version: "1.0", Architecture: "i386"}, //6 + {Name: "xyz", Version: "1.0", Architecture: "sparc"}, //7 + {Name: "dpkg", Version: "1.0", Architecture: "i386", FilesHash: 0x34445}, //8 + {Name: "app", Version: "1.1~bp2", Architecture: "i386", FilesHash: 0x44}, //9 + } + + for _, p := range packages { + p.V06Plus = true + coll.Update(p) + } + + listA := NewPackageList() + listA.Add(packages[0]) + listA.Add(packages[1]) + listA.Add(packages[2]) + listA.Add(packages[3]) + listA.Add(packages[7]) + + listB := NewPackageList() + listB.Add(packages[0]) + listB.Add(packages[2]) + listB.Add(packages[4]) + listB.Add(packages[5]) + listB.Add(packages[6]) + + listC := NewPackageList() + listC.Add(packages[0]) + listC.Add(packages[8]) + listC.Add(packages[9]) + + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) + reflistC := f.newFromPackageList(listC) + + mergeAB := mergeRefLists(reflistA, reflistB, true, false) + mergeBA := mergeRefLists(reflistB, reflistA, true, false) + mergeAC := mergeRefLists(reflistA, reflistC, true, false) + mergeBC := mergeRefLists(reflistB, reflistC, true, false) + mergeCB := mergeRefLists(reflistC, reflistB, true, false) + + verifyRefListIntegrity(c, mergeAB) + verifyRefListIntegrity(c, mergeBA) + verifyRefListIntegrity(c, mergeAC) + verifyRefListIntegrity(c, mergeBC) + verifyRefListIntegrity(c, mergeCB) + + c.Check(toStrSlice(mergeAB), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBA), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeAC), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBC), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + c.Check(toStrSlice(mergeCB), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000"}) + + mergeABall := mergeRefLists(reflistA, reflistB, false, false) + mergeBAall := mergeRefLists(reflistB, reflistA, false, false) + mergeACall := mergeRefLists(reflistA, reflistC, false, false) + mergeBCall := mergeRefLists(reflistB, reflistC, false, false) + mergeCBall := mergeRefLists(reflistC, reflistB, false, false) + + verifyRefListIntegrity(c, mergeABall) + verifyRefListIntegrity(c, mergeBAall) + verifyRefListIntegrity(c, mergeACall) + verifyRefListIntegrity(c, mergeBCall) + verifyRefListIntegrity(c, mergeCBall) + + c.Check(mergeABall, DeepEquals, mergeBAall) + c.Check(toStrSlice(mergeBAall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000000", + "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + + c.Check(mergeBCall, Not(DeepEquals), mergeCBall) + c.Check(toStrSlice(mergeACall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", + "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBCall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", + "Pi386 lib 1.0 00000000"}) + + mergeBCwithConflicts := mergeRefLists(reflistB, reflistC, false, true) + c.Check(toStrSlice(mergeBCwithConflicts), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", + "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + }) +} - c.Check(diffAB[3].Left.String(), Equals, "xyz_3.0_sparc") - c.Check(diffAB[3].Right, IsNil) +func (s *PackageRefListSuite) TestFilterLatestRefs(c *C) { + forEachRefList(func(f reflistFactory) { + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, + {Name: "lib", Version: "1.2~bp1", Architecture: "i386"}, + {Name: "lib", Version: "1.2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.3", Architecture: "i386"}, + {Name: "dpkg", Version: "1.3~bp2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.5", Architecture: "i386"}, + {Name: "dpkg", Version: "1.6", Architecture: "i386"}, + } + + rl := NewPackageList() + rl.Add(packages[0]) + rl.Add(packages[1]) + rl.Add(packages[2]) + rl.Add(packages[3]) + rl.Add(packages[4]) + rl.Add(packages[5]) + rl.Add(packages[6]) + rl.Add(packages[7]) + + result := f.newFromPackageList(rl) + result.FilterLatestRefs() + + verifyRefListIntegrity(c, result) + c.Check(toStrSlice(result), DeepEquals, + []string{"Pi386 dpkg 1.6", "Pi386 lib 1.2"}) + }) +} - diffBA, err := reflistB.Diff(reflistA, coll) - c.Check(err, IsNil) - c.Check(diffBA, HasLen, 4) +func (s *PackageRefListSuite) TestPackageRefListEncodeDecode(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - c.Check(diffBA[0].Right, IsNil) - c.Check(diffBA[0].Left.String(), Equals, "app_1.1~bp2_amd64") + reflist := NewPackageRefListFromPackageList(list) - c.Check(diffBA[1].Right.String(), Equals, "app_1.1~bp1_i386") - c.Check(diffBA[1].Left.String(), Equals, "app_1.1~bp2_i386") + reflist2 := &PackageRefList{} + err := reflist2.Decode(reflist.Encode()) + c.Assert(err, IsNil) + c.Check(reflist2.Len(), Equals, reflist.Len()) + c.Check(reflist2.Refs, DeepEquals, reflist.Refs) +} - c.Check(diffBA[2].Right.String(), Equals, "dpkg_1.7_i386") - c.Check(diffBA[2].Left, IsNil) +func (s *PackageRefListSuite) TestRefListBucketPrefix(c *C) { + c.Check(bucketRefPrefix([]byte("Pall abcd 1.0")), DeepEquals, []byte("abc")) + c.Check(bucketRefPrefix([]byte("Pall libabcd 1.0")), DeepEquals, []byte("abc")) + c.Check(bucketRefPrefix([]byte("Pamd64 xy 1.0")), DeepEquals, []byte("xy")) +} - c.Check(diffBA[3].Right.String(), Equals, "xyz_3.0_sparc") - c.Check(diffBA[3].Left, IsNil) +func (s *PackageRefListSuite) TestRefListBucketIdx(c *C) { + c.Check(bucketIdxForRef(s.p1.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p2.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p3.Key("")), Equals, 26) + c.Check(bucketIdxForRef(s.p4.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p5.Key("")), Equals, 4) + c.Check(bucketIdxForRef(s.p6.Key("")), Equals, 46) +} +func (s *PackageRefListSuite) TestSplitRefListBuckets(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + list.Add(s.p6) + + sl := NewSplitRefListFromPackageList(list) + verifyRefListIntegrity(c, sl) + + c.Check(hex.EncodeToString(sl.Buckets[4]), Equals, "7287aed32daad5d1aab4e89533bde135381d932e60548cfc00b882ca8858ae07") + c.Check(toStrSlice(sl.bucketRefs[4]), DeepEquals, []string{string(s.p5.Key(""))}) + c.Check(hex.EncodeToString(sl.Buckets[26]), Equals, "f31fc28e82368b63c8be47eefc64b8e217e2e5349c7e3827b98f80536b956f6e") + c.Check(toStrSlice(sl.bucketRefs[26]), DeepEquals, []string{string(s.p3.Key(""))}) + c.Check(hex.EncodeToString(sl.Buckets[46]), Equals, "55e70286393afc5da5046d68c632d35f98bec24781ae433bd1a1069b52853367") + c.Check(toStrSlice(sl.bucketRefs[46]), DeepEquals, []string{string(s.p1.Key("")), string(s.p6.Key(""))}) } -func (s *PackageRefListSuite) TestDiffCompactsAtEnd(c *C) { - db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) +func (s *PackageRefListSuite) TestRefListDigestSet(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + list.Add(s.p6) - packages := []*Package{ - {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //0 - {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //1 - {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //2 - } + sl := NewSplitRefListFromPackageList(list) - for _, p := range packages { - coll.Update(p) - } + set := NewRefListDigestSet() + c.Check(set.Len(), Equals, 0) - listA := NewPackageList() - listA.Add(packages[0]) + err := sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, false) + return nil + }) + c.Assert(err, IsNil) - listB := NewPackageList() - listB.Add(packages[1]) - listB.Add(packages[2]) + set.AddAllInRefList(sl) + c.Check(set.Len(), Equals, 3) - reflistA := NewPackageRefListFromPackageList(listA) - reflistB := NewPackageRefListFromPackageList(listB) + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, true) + return nil + }) + c.Assert(err, IsNil) - diffAB, err := reflistA.Diff(reflistB, coll) - c.Check(err, IsNil) - c.Check(diffAB, HasLen, 2) + firstDigest := sl.Buckets[bucketIdxForRef(s.p1.Key(""))] + set.Remove(firstDigest) + c.Check(set.Len(), Equals, 2) - c.Check(diffAB[0].Left, IsNil) - c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, !bytes.Equal(digest, firstDigest)) + return nil + }) + c.Assert(err, IsNil) - c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") - c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") + set2 := NewRefListDigestSet() + set2.AddAllInRefList(sl) + set2.RemoveAll(set) + + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set2.Has(digest), Equals, bytes.Equal(digest, firstDigest)) + return nil + }) + c.Assert(err, IsNil) } -func (s *PackageRefListSuite) TestMerge(c *C) { +func (s *PackageRefListSuite) TestRefListCollectionLoadSave(c *C) { db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 - {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 - {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 - {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 - {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 - {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 - {Name: "dpkg", Version: "1.0", Architecture: "i386"}, //6 - {Name: "xyz", Version: "1.0", Architecture: "sparc"}, //7 - {Name: "dpkg", Version: "1.0", Architecture: "i386", FilesHash: 0x34445}, //8 - {Name: "app", Version: "1.1~bp2", Architecture: "i386", FilesHash: 0x44}, //9 - } + reflistCollection := NewRefListCollection(db) + packageCollection := NewPackageCollection(db) + + packageCollection.Update(s.p1) + packageCollection.Update(s.p2) + packageCollection.Update(s.p3) + packageCollection.Update(s.p4) + packageCollection.Update(s.p5) + packageCollection.Update(s.p6) + + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p2) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + + key := []byte("test") + + reflist := NewPackageRefListFromPackageList(list) + db.Put(key, reflist.Encode()) + + sl := NewSplitRefList() + err := reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(reflist)) - for _, p := range packages { - p.V06Plus = true - coll.Update(p) - } + list.Add(s.p6) + sl = NewSplitRefListFromPackageList(list) + err = reflistCollection.Update(sl, key) + c.Assert(err, IsNil) - listA := NewPackageList() - listA.Add(packages[0]) - listA.Add(packages[1]) - listA.Add(packages[2]) - listA.Add(packages[3]) - listA.Add(packages[7]) - - listB := NewPackageList() - listB.Add(packages[0]) - listB.Add(packages[2]) - listB.Add(packages[4]) - listB.Add(packages[5]) - listB.Add(packages[6]) - - listC := NewPackageList() - listC.Add(packages[0]) - listC.Add(packages[8]) - listC.Add(packages[9]) - - reflistA := NewPackageRefListFromPackageList(listA) - reflistB := NewPackageRefListFromPackageList(listB) - reflistC := NewPackageRefListFromPackageList(listC) - - mergeAB := reflistA.Merge(reflistB, true, false) - mergeBA := reflistB.Merge(reflistA, true, false) - mergeAC := reflistA.Merge(reflistC, true, false) - mergeBC := reflistB.Merge(reflistC, true, false) - mergeCB := reflistC.Merge(reflistB, true, false) - - c.Check(toStrSlice(mergeAB), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBA), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeAC), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBC), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) - c.Check(toStrSlice(mergeCB), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000"}) - - mergeABall := reflistA.Merge(reflistB, false, false) - mergeBAall := reflistB.Merge(reflistA, false, false) - mergeACall := reflistA.Merge(reflistC, false, false) - mergeBCall := reflistB.Merge(reflistC, false, false) - mergeCBall := reflistC.Merge(reflistB, false, false) - - c.Check(mergeABall, DeepEquals, mergeBAall) - c.Check(toStrSlice(mergeBAall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000000", - "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - - c.Check(mergeBCall, Not(DeepEquals), mergeCBall) - c.Check(toStrSlice(mergeACall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", - "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBCall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", - "Pi386 lib 1.0 00000000"}) - - mergeBCwithConflicts := reflistB.Merge(reflistC, false, true) - c.Check(toStrSlice(mergeBCwithConflicts), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", - "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + sl = NewSplitRefList() + err = reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) } -func (s *PackageRefListSuite) TestFilterLatestRefs(c *C) { - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, - {Name: "lib", Version: "1.2~bp1", Architecture: "i386"}, - {Name: "lib", Version: "1.2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.3", Architecture: "i386"}, - {Name: "dpkg", Version: "1.3~bp2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.5", Architecture: "i386"}, - {Name: "dpkg", Version: "1.6", Architecture: "i386"}, - } +func (s *PackageRefListSuite) TestRefListCollectionMigrate(c *C) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + reflistCollection := NewRefListCollection(db) + packageCollection := NewPackageCollection(db) + + packageCollection.Update(s.p1) + packageCollection.Update(s.p2) + packageCollection.Update(s.p3) + packageCollection.Update(s.p4) + packageCollection.Update(s.p5) + packageCollection.Update(s.p6) + + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p2) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + + key := []byte("test") + + reflist := NewPackageRefListFromPackageList(list) + db.Put(key, reflist.Encode()) + + sl := NewSplitRefList() + format, err := reflistCollection.load(sl, key) + c.Assert(err, IsNil) + c.Check(format, Equals, reflistStorageFormatInline) + + migrator := reflistCollection.NewMigration() + err = reflistCollection.LoadCompleteAndMigrate(sl, key, migrator) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) - rl := NewPackageList() - rl.Add(packages[0]) - rl.Add(packages[1]) - rl.Add(packages[2]) - rl.Add(packages[3]) - rl.Add(packages[4]) - rl.Add(packages[5]) - rl.Add(packages[6]) - rl.Add(packages[7]) - - result := NewPackageRefListFromPackageList(rl) - result.FilterLatestRefs() - - c.Check(toStrSlice(result), DeepEquals, - []string{"Pi386 dpkg 1.6", "Pi386 lib 1.2"}) + stats := migrator.Stats() + c.Check(stats.Reflists, Equals, 0) + c.Check(stats.Buckets, Equals, 0) + c.Check(stats.Segments, Equals, 0) + + err = migrator.Flush() + c.Assert(err, IsNil) + stats = migrator.Stats() + c.Check(stats.Reflists, Equals, 1) + c.Check(stats.Buckets, Not(Equals), 0) + c.Check(stats.Segments, Equals, stats.Segments) + + sl = NewSplitRefList() + err = reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) + + format, err = reflistCollection.load(sl, key) + c.Assert(err, IsNil) + c.Check(format, Equals, reflistStorageFormatSplit) } diff --git a/deb/remote.go b/deb/remote.go index 72deb7afda..ce54ea6365 100644 --- a/deb/remote.go +++ b/deb/remote.go @@ -73,7 +73,7 @@ type RemoteRepo struct { // Packages for json output Packages []string `codec:"-" json:",omitempty"` // "Snapshot" of current list of packages - packageRefs *PackageRefList + packageRefs *SplitRefList // Parsed archived root archiveRootURL *url.URL // Current list of packages (filled while updating mirror) @@ -171,7 +171,7 @@ func (repo *RemoteRepo) NumPackages() int { } // RefList returns package list for repo -func (repo *RemoteRepo) RefList() *PackageRefList { +func (repo *RemoteRepo) RefList() *SplitRefList { return repo.packageRefs } @@ -659,7 +659,7 @@ func (repo *RemoteRepo) FinalizeDownload(collectionFactory *CollectionFactory, p }) if err == nil { - repo.packageRefs = NewPackageRefListFromPackageList(repo.packageList) + repo.packageRefs = NewSplitRefListFromPackageList(repo.packageList) repo.packageList = nil } @@ -801,14 +801,14 @@ func (collection *RemoteRepoCollection) search(filter func(*RemoteRepo) bool, un } // Add appends new repo to collection and saves it -func (collection *RemoteRepoCollection) Add(repo *RemoteRepo) error { +func (collection *RemoteRepoCollection) Add(repo *RemoteRepo, reflistCollection *RefListCollection) error { _, err := collection.ByName(repo.Name) if err == nil { return fmt.Errorf("mirror with name %s already exists", repo.Name) } - err = collection.Update(repo) + err = collection.Update(repo, reflistCollection) if err != nil { return err } @@ -818,28 +818,26 @@ func (collection *RemoteRepoCollection) Add(repo *RemoteRepo) error { } // Update stores updated information about repo in DB -func (collection *RemoteRepoCollection) Update(repo *RemoteRepo) error { +func (collection *RemoteRepoCollection) Update(repo *RemoteRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.packageRefs != nil { - batch.Put(repo.RefKey(), repo.packageRefs.Encode()) + rb := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(repo.packageRefs, repo.RefKey(), rb) } return batch.Write() } // LoadComplete loads additional information for remote repo -func (collection *RemoteRepoCollection) LoadComplete(repo *RemoteRepo) error { - encoded, err := collection.db.Get(repo.RefKey()) +func (collection *RemoteRepoCollection) LoadComplete(repo *RemoteRepo, reflistCollection *RefListCollection) error { + repo.packageRefs = NewSplitRefList() + err := reflistCollection.LoadComplete(repo.packageRefs, repo.RefKey()) if err == database.ErrNotFound { return nil } - if err != nil { - return err - } - repo.packageRefs = &PackageRefList{} - return repo.packageRefs.Decode(encoded) + return err } // ByName looks up repository by name diff --git a/deb/remote_test.go b/deb/remote_test.go index c331579f54..3e05ef7e56 100644 --- a/deb/remote_test.go +++ b/deb/remote_test.go @@ -52,7 +52,7 @@ func (n *NullVerifier) IsClearSigned(clearsign io.Reader) (bool, error) { type PackageListMixinSuite struct { p1, p2, p3 *Package list *PackageList - reflist *PackageRefList + reflist *SplitRefList } func (s *PackageListMixinSuite) SetUpPackages() { @@ -72,7 +72,7 @@ func (s *PackageListMixinSuite) SetUpPackages() { s.list.Add(s.p2) s.list.Add(s.p3) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) } type RemoteRepoSuite struct { @@ -291,7 +291,7 @@ func (s *RemoteRepoSuite) TestDownload(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") @@ -373,12 +373,12 @@ func (s *RemoteRepoSuite) TestDownloadWithInstaller(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "installer") } @@ -419,12 +419,12 @@ func (s *RemoteRepoSuite) TestDownloadWithSources(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "access-modifier-checker") @@ -503,7 +503,7 @@ func (s *RemoteRepoSuite) TestDownloadFlat(c *C) { s.flat.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.flat.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") @@ -593,12 +593,12 @@ func (s *RemoteRepoSuite) TestDownloadWithSourcesFlat(c *C) { s.flat.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.flat.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "access-modifier-checker") @@ -658,8 +658,9 @@ func (s *RemoteRepoSuite) TestDownloadWithSourcesFlat(c *C) { type RemoteRepoCollectionSuite struct { PackageListMixinSuite - db database.Storage - collection *RemoteRepoCollection + db database.Storage + collection *RemoteRepoCollection + refListCollection *RefListCollection } var _ = Suite(&RemoteRepoCollectionSuite{}) @@ -667,6 +668,7 @@ var _ = Suite(&RemoteRepoCollectionSuite{}) func (s *RemoteRepoCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewRemoteRepoCollection(s.db) + s.refListCollection = NewRefListCollection(s.db) s.SetUpPackages() } @@ -679,8 +681,8 @@ func (s *RemoteRepoCollectionSuite) TestAddByName(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Add(repo), IsNil) - c.Assert(s.collection.Add(repo), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(repo, s.refListCollection), IsNil) + c.Assert(s.collection.Add(repo, s.refListCollection), ErrorMatches, ".*already exists") r, err := s.collection.ByName("yandex") c.Assert(err, IsNil) @@ -697,7 +699,7 @@ func (s *RemoteRepoCollectionSuite) TestByUUID(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Add(repo), IsNil) + c.Assert(s.collection.Add(repo, s.refListCollection), IsNil) r, err := s.collection.ByUUID(repo.UUID) c.Assert(err, IsNil) @@ -711,7 +713,7 @@ func (s *RemoteRepoCollectionSuite) TestByUUID(c *C) { func (s *RemoteRepoCollectionSuite) TestUpdateLoadComplete(c *C) { repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.refListCollection), IsNil) collection := NewRemoteRepoCollection(s.db) r, err := collection.ByName("yandex") @@ -719,20 +721,20 @@ func (s *RemoteRepoCollectionSuite) TestUpdateLoadComplete(c *C) { c.Assert(r.packageRefs, IsNil) repo.packageRefs = s.reflist - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.refListCollection), IsNil) collection = NewRemoteRepoCollection(s.db) r, err = collection.ByName("yandex") c.Assert(err, IsNil) c.Assert(r.packageRefs, IsNil) c.Assert(r.NumPackages(), Equals, 0) - c.Assert(s.collection.LoadComplete(r), IsNil) + c.Assert(s.collection.LoadComplete(r, s.refListCollection), IsNil) c.Assert(r.NumPackages(), Equals, 3) } func (s *RemoteRepoCollectionSuite) TestForEachAndLen(c *C) { repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo) + s.collection.Add(repo, s.refListCollection) count := 0 err := s.collection.ForEach(func(*RemoteRepo) error { @@ -754,10 +756,10 @@ func (s *RemoteRepoCollectionSuite) TestForEachAndLen(c *C) { func (s *RemoteRepoCollectionSuite) TestDrop(c *C) { repo1, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo1) + s.collection.Add(repo1, s.refListCollection) repo2, _ := NewRemoteRepo("tyndex", "http://mirror.yandex.ru/debian/", "wheezy", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo2) + s.collection.Add(repo2, s.refListCollection) r1, _ := s.collection.ByUUID(repo1.UUID) c.Check(r1, Equals, repo1) diff --git a/deb/snapshot.go b/deb/snapshot.go index f351b87f3b..a9a8655ceb 100644 --- a/deb/snapshot.go +++ b/deb/snapshot.go @@ -40,7 +40,7 @@ type Snapshot struct { NotAutomatic string ButAutomaticUpgrades string - packageRefs *PackageRefList + packageRefs *SplitRefList } // NewSnapshotFromRepository creates snapshot from current state of repository @@ -76,7 +76,7 @@ func NewSnapshotFromLocalRepo(name string, repo *LocalRepo) (*Snapshot, error) { } if snap.packageRefs == nil { - snap.packageRefs = NewPackageRefList() + snap.packageRefs = NewSplitRefList() } return snap, nil @@ -84,11 +84,13 @@ func NewSnapshotFromLocalRepo(name string, repo *LocalRepo) (*Snapshot, error) { // NewSnapshotFromPackageList creates snapshot from PackageList func NewSnapshotFromPackageList(name string, sources []*Snapshot, list *PackageList, description string) *Snapshot { - return NewSnapshotFromRefList(name, sources, NewPackageRefListFromPackageList(list), description) + sl := NewSplitRefList() + sl.Replace(NewPackageRefListFromPackageList(list)) + return NewSnapshotFromRefList(name, sources, sl, description) } -// NewSnapshotFromRefList creates snapshot from PackageRefList -func NewSnapshotFromRefList(name string, sources []*Snapshot, list *PackageRefList, description string) *Snapshot { +// NewSnapshotFromRefList creates snapshot from SplitRefList +func NewSnapshotFromRefList(name string, sources []*Snapshot, list *SplitRefList, description string) *Snapshot { sourceUUIDs := make([]string, len(sources)) for i := range sources { sourceUUIDs[i] = sources[i].UUID @@ -116,7 +118,7 @@ func (s *Snapshot) NumPackages() int { } // RefList returns list of package refs in snapshot -func (s *Snapshot) RefList() *PackageRefList { +func (s *Snapshot) RefList() *SplitRefList { return s.packageRefs } @@ -209,13 +211,13 @@ func NewSnapshotCollection(db database.Storage) *SnapshotCollection { } // Add appends new repo to collection and saves it -func (collection *SnapshotCollection) Add(snapshot *Snapshot) error { +func (collection *SnapshotCollection) Add(snapshot *Snapshot, reflistCollection *RefListCollection) error { _, err := collection.ByName(snapshot.Name) if err == nil { return fmt.Errorf("snapshot with name %s already exists", snapshot.Name) } - err = collection.Update(snapshot) + err = collection.Update(snapshot, reflistCollection) if err != nil { return err } @@ -225,26 +227,22 @@ func (collection *SnapshotCollection) Add(snapshot *Snapshot) error { } // Update stores updated information about snapshot in DB -func (collection *SnapshotCollection) Update(snapshot *Snapshot) error { +func (collection *SnapshotCollection) Update(snapshot *Snapshot, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(snapshot.Key(), snapshot.Encode()) if snapshot.packageRefs != nil { - batch.Put(snapshot.RefKey(), snapshot.packageRefs.Encode()) + rb := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(snapshot.packageRefs, snapshot.RefKey(), rb) } return batch.Write() } // LoadComplete loads additional information about snapshot -func (collection *SnapshotCollection) LoadComplete(snapshot *Snapshot) error { - encoded, err := collection.db.Get(snapshot.RefKey()) - if err != nil { - return err - } - - snapshot.packageRefs = &PackageRefList{} - return snapshot.packageRefs.Decode(encoded) +func (collection *SnapshotCollection) LoadComplete(snapshot *Snapshot, reflistCollection *RefListCollection) error { + snapshot.packageRefs = NewSplitRefList() + return reflistCollection.LoadComplete(snapshot.packageRefs, snapshot.RefKey()) } func (collection *SnapshotCollection) search(filter func(*Snapshot) bool, unique bool) []*Snapshot { diff --git a/deb/snapshot_bench_test.go b/deb/snapshot_bench_test.go index c6bb94a2c6..4475ca57b5 100644 --- a/deb/snapshot_bench_test.go +++ b/deb/snapshot_bench_test.go @@ -18,10 +18,11 @@ func BenchmarkSnapshotCollectionForEach(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } } @@ -47,11 +48,12 @@ func BenchmarkSnapshotCollectionByUUID(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) uuids := []string{} for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } uuids = append(uuids, snapshot.UUID) @@ -78,10 +80,11 @@ func BenchmarkSnapshotCollectionByName(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } } diff --git a/deb/snapshot_test.go b/deb/snapshot_test.go index d27c422690..805ccc8e5b 100644 --- a/deb/snapshot_test.go +++ b/deb/snapshot_test.go @@ -109,6 +109,7 @@ type SnapshotCollectionSuite struct { snapshot1, snapshot2 *Snapshot snapshot3, snapshot4 *Snapshot collection *SnapshotCollection + reflistCollection *RefListCollection } var _ = Suite(&SnapshotCollectionSuite{}) @@ -116,6 +117,7 @@ var _ = Suite(&SnapshotCollectionSuite{}) func (s *SnapshotCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewSnapshotCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.SetUpPackages() s.repo1, _ = NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) @@ -143,10 +145,10 @@ func (s *SnapshotCollectionSuite) TestAddByNameByUUID(c *C) { _, err := s.collection.ByName("snap1") c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot1), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), ErrorMatches, ".*already exists") - c.Assert(s.collection.Add(s.snapshot2), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) snapshot, err := s.collection.ByName("snap1") c.Assert(err, IsNil) @@ -167,20 +169,20 @@ func (s *SnapshotCollectionSuite) TestAddByNameByUUID(c *C) { } func (s *SnapshotCollectionSuite) TestUpdateLoadComplete(c *C) { - c.Assert(s.collection.Update(s.snapshot1), IsNil) + c.Assert(s.collection.Update(s.snapshot1, s.reflistCollection), IsNil) collection := NewSnapshotCollection(s.db) snapshot, err := collection.ByName("snap1") c.Assert(err, IsNil) c.Assert(snapshot.packageRefs, IsNil) - c.Assert(s.collection.LoadComplete(snapshot), IsNil) + c.Assert(s.collection.LoadComplete(snapshot, s.reflistCollection), IsNil) c.Assert(snapshot.NumPackages(), Equals, 3) } func (s *SnapshotCollectionSuite) TestForEachAndLen(c *C) { - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot2) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot2, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*Snapshot) error { @@ -200,10 +202,10 @@ func (s *SnapshotCollectionSuite) TestForEachAndLen(c *C) { } func (s *SnapshotCollectionSuite) TestForEachSorted(c *C) { - s.collection.Add(s.snapshot2) - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot4) - s.collection.Add(s.snapshot3) + s.collection.Add(s.snapshot2, s.reflistCollection) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot4, s.reflistCollection) + s.collection.Add(s.snapshot3, s.reflistCollection) names := []string{} @@ -217,8 +219,8 @@ func (s *SnapshotCollectionSuite) TestForEachSorted(c *C) { } func (s *SnapshotCollectionSuite) TestFindByRemoteRepoSource(c *C) { - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) c.Check(s.collection.ByRemoteRepoSource(s.repo1), DeepEquals, []*Snapshot{s.snapshot1}) c.Check(s.collection.ByRemoteRepoSource(s.repo2), DeepEquals, []*Snapshot{s.snapshot2}) @@ -229,10 +231,10 @@ func (s *SnapshotCollectionSuite) TestFindByRemoteRepoSource(c *C) { } func (s *SnapshotCollectionSuite) TestFindByLocalRepoSource(c *C) { - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) - c.Assert(s.collection.Add(s.snapshot3), IsNil) - c.Assert(s.collection.Add(s.snapshot4), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot3, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot4, s.reflistCollection), IsNil) c.Check(s.collection.ByLocalRepoSource(s.lrepo1), DeepEquals, []*Snapshot{s.snapshot3}) c.Check(s.collection.ByLocalRepoSource(s.lrepo2), DeepEquals, []*Snapshot{s.snapshot4}) @@ -247,11 +249,11 @@ func (s *SnapshotCollectionSuite) TestFindSnapshotSource(c *C) { snapshot4 := NewSnapshotFromRefList("snap4", []*Snapshot{s.snapshot1}, s.reflist, "desc2") snapshot5 := NewSnapshotFromRefList("snap5", []*Snapshot{snapshot3}, s.reflist, "desc3") - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) - c.Assert(s.collection.Add(snapshot3), IsNil) - c.Assert(s.collection.Add(snapshot4), IsNil) - c.Assert(s.collection.Add(snapshot5), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot3, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot4, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot5, s.reflistCollection), IsNil) list := s.collection.BySnapshotSource(s.snapshot1) sorter, _ := newSnapshotSorter("name", list) @@ -263,8 +265,8 @@ func (s *SnapshotCollectionSuite) TestFindSnapshotSource(c *C) { } func (s *SnapshotCollectionSuite) TestDrop(c *C) { - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot2) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot2, s.reflistCollection) snap, _ := s.collection.ByUUID(s.snapshot1.UUID) c.Check(snap, Equals, s.snapshot1) diff --git a/go.mod b/go.mod index e1ce930c09..8b9194b718 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/DisposaBoy/JsonConfigReader v0.0.0-20201129172854-99cf318d67e7 github.com/awalterschulze/gographviz v2.0.3+incompatible github.com/cavaliergopher/grab/v3 v3.0.1 + github.com/cespare/xxhash/v2 v2.2.0 github.com/cheggaaa/pb v1.0.29 github.com/gin-gonic/gin v1.9.1 github.com/go-playground/validator/v10 v10.15.4 // indirect @@ -59,7 +60,6 @@ require ( github.com/aws/aws-sdk-go-v2/service/sts v1.23.2 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bytedance/sonic v1.10.1 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d // indirect github.com/chenzhuoyu/iasm v0.9.0 // indirect github.com/cloudflare/circl v1.3.3 // indirect