mirror of
https://github.com/project-zot/zot.git
synced 2026-06-17 21:17:58 +08:00
fix: GetNextRepository to use a list already scanned repositories as input (#3230)
Using just the last repository is not enough as in the case when it is deleted (either by GC or some other way), GetNextRepository returns empty string causing the generator to be marked completed without any errors. An alternative would have been to start over from the first repository, but this can take hours if multiple repositories need to be deleted, not to mention the processing power and I/O and S3 load this could take. Signed-off-by: Andrei Aaron <aaaron@luxoft.com>
This commit is contained in:
+15
-12
@@ -67,9 +67,12 @@ CleanImageStorePeriodically runs a periodic garbage collect on the ImageStore pr
|
||||
given an interval and a Scheduler.
|
||||
*/
|
||||
func (gc GarbageCollect) CleanImageStorePeriodically(interval time.Duration, sch *scheduler.Scheduler) {
|
||||
processedRepos := make(map[string]struct{})
|
||||
|
||||
generator := &GCTaskGenerator{
|
||||
imgStore: gc.imgStore,
|
||||
gc: gc,
|
||||
imgStore: gc.imgStore,
|
||||
gc: gc,
|
||||
processedRepos: processedRepos,
|
||||
}
|
||||
|
||||
sch.SubmitGenerator(generator, interval, scheduler.MediumPriority)
|
||||
@@ -798,12 +801,12 @@ and it will execute garbage collection for each repository by creating a task
|
||||
for each repository and pushing it to the task scheduler.
|
||||
*/
|
||||
type GCTaskGenerator struct {
|
||||
imgStore types.ImageStore
|
||||
gc GarbageCollect
|
||||
lastRepo string
|
||||
nextRun time.Time
|
||||
done bool
|
||||
rand *rand.Rand
|
||||
imgStore types.ImageStore
|
||||
gc GarbageCollect
|
||||
processedRepos map[string]struct{}
|
||||
nextRun time.Time
|
||||
done bool
|
||||
rand *rand.Rand
|
||||
}
|
||||
|
||||
func (gen *GCTaskGenerator) getRandomDelay() int {
|
||||
@@ -817,7 +820,7 @@ func (gen *GCTaskGenerator) Name() string {
|
||||
}
|
||||
|
||||
func (gen *GCTaskGenerator) Next() (scheduler.Task, error) {
|
||||
if gen.lastRepo == "" && gen.nextRun.IsZero() {
|
||||
if len(gen.processedRepos) == 0 && gen.nextRun.IsZero() {
|
||||
gen.rand = rand.New(rand.NewSource(time.Now().UTC().UnixNano())) //nolint: gosec
|
||||
}
|
||||
|
||||
@@ -825,7 +828,7 @@ func (gen *GCTaskGenerator) Next() (scheduler.Task, error) {
|
||||
|
||||
gen.nextRun = time.Now().Add(time.Duration(delay) * time.Second)
|
||||
|
||||
repo, err := gen.imgStore.GetNextRepository(gen.lastRepo)
|
||||
repo, err := gen.imgStore.GetNextRepository(gen.processedRepos)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -836,7 +839,7 @@ func (gen *GCTaskGenerator) Next() (scheduler.Task, error) {
|
||||
return nil, nil //nolint:nilnil
|
||||
}
|
||||
|
||||
gen.lastRepo = repo
|
||||
gen.processedRepos[repo] = struct{}{}
|
||||
|
||||
return NewGCTask(gen.imgStore, gen.gc, repo), nil
|
||||
}
|
||||
@@ -850,7 +853,7 @@ func (gen *GCTaskGenerator) IsReady() bool {
|
||||
}
|
||||
|
||||
func (gen *GCTaskGenerator) Reset() {
|
||||
gen.lastRepo = ""
|
||||
gen.processedRepos = make(map[string]struct{})
|
||||
gen.done = false
|
||||
gen.nextRun = time.Time{}
|
||||
}
|
||||
|
||||
@@ -563,6 +563,73 @@ func TestGarbageCollectAndRetentionMetaDB(t *testing.T) {
|
||||
So(repos, ShouldContain, "retention")
|
||||
})
|
||||
|
||||
Convey("gc all tags, untagged, and afterwards referrers using GetNextRepository()", func() {
|
||||
gc := gc.NewGarbageCollect(imgStore, metaDB, gc.Options{
|
||||
Delay: 1 * time.Millisecond,
|
||||
ImageRetention: config.ImageRetention{
|
||||
Delay: 1 * time.Millisecond,
|
||||
Policies: []config.RetentionPolicy{
|
||||
{
|
||||
Repositories: []string{"gc-test1", "gc-test3"},
|
||||
DeleteReferrers: true,
|
||||
DeleteUntagged: &trueVal,
|
||||
KeepTags: []config.KeepTagsPolicy{
|
||||
{
|
||||
Patterns: []string{"v1"}, // should not match any tag
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}, audit, log)
|
||||
|
||||
processedRepos := make(map[string]struct{})
|
||||
expectedRepos := []string{"gc-test1", "gc-test2", "gc-test3", "gc-test4", "retention"}
|
||||
|
||||
for i := range 10 {
|
||||
t.Logf("index %d, processed repos %v", i, processedRepos)
|
||||
|
||||
// we need to check if GetNextRepository returns each repository just once, and empty string afterwards
|
||||
repo, err := imgStore.GetNextRepository(processedRepos)
|
||||
t.Logf("index %d, repo '%s'", i, repo)
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
if i >= 5 {
|
||||
So(repo, ShouldEqual, "")
|
||||
|
||||
continue
|
||||
} else {
|
||||
So(repo, ShouldEqual, expectedRepos[i])
|
||||
}
|
||||
|
||||
processedRepos[repo] = struct{}{}
|
||||
|
||||
// run cleanRepo, this should not impact the list of calls necessary for
|
||||
// GetNextRepository to iterate through every repo
|
||||
err = gc.CleanRepo(ctx, repo)
|
||||
So(err, ShouldBeNil)
|
||||
}
|
||||
|
||||
// verify one more time the returned values
|
||||
So(len(processedRepos), ShouldEqual, len(expectedRepos))
|
||||
|
||||
for _, repo := range expectedRepos {
|
||||
So(processedRepos, ShouldContainKey, repo)
|
||||
}
|
||||
|
||||
_, _, _, err = imgStore.GetImageManifest("gc-test1", gcUntagged1.DigestStr())
|
||||
So(err, ShouldNotBeNil)
|
||||
|
||||
// now repos should get gc'ed
|
||||
repos, err := imgStore.GetRepositories()
|
||||
So(err, ShouldBeNil)
|
||||
So(repos, ShouldNotContain, "gc-test1")
|
||||
So(repos, ShouldContain, "gc-test2")
|
||||
So(repos, ShouldNotContain, "gc-test3")
|
||||
So(repos, ShouldContain, "gc-test4")
|
||||
So(repos, ShouldContain, "retention")
|
||||
})
|
||||
|
||||
Convey("gc with dry-run all tags, untagged, and afterwards referrers", func() {
|
||||
gc := gc.NewGarbageCollect(imgStore, metaDB, gc.Options{
|
||||
Delay: 1 * time.Millisecond,
|
||||
@@ -1924,6 +1991,73 @@ func TestGarbageCollectAndRetentionNoMetaDB(t *testing.T) {
|
||||
So(repos, ShouldContain, "retention")
|
||||
})
|
||||
|
||||
Convey("gc all tags, untagged, and afterwards referrers using GetNextRepository()", func() {
|
||||
gc := gc.NewGarbageCollect(imgStore, metaDB, gc.Options{
|
||||
Delay: 1 * time.Millisecond,
|
||||
ImageRetention: config.ImageRetention{
|
||||
Delay: 1 * time.Millisecond,
|
||||
Policies: []config.RetentionPolicy{
|
||||
{
|
||||
Repositories: []string{"gc-test1", "gc-test3"},
|
||||
DeleteReferrers: true,
|
||||
DeleteUntagged: &trueVal,
|
||||
KeepTags: []config.KeepTagsPolicy{
|
||||
{
|
||||
Patterns: []string{"v1"}, // should not match any tag
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}, audit, log)
|
||||
|
||||
processedRepos := make(map[string]struct{})
|
||||
expectedRepos := []string{"gc-test1", "gc-test2", "gc-test3", "gc-test4", "retention"}
|
||||
|
||||
for i := range 10 {
|
||||
t.Logf("index %d, processed repos %v", i, processedRepos)
|
||||
|
||||
// we need to check if GetNextRepository returns each repository just once, and empty string afterwards
|
||||
repo, err := imgStore.GetNextRepository(processedRepos)
|
||||
t.Logf("index %d, repo '%s'", i, repo)
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
if i >= 5 {
|
||||
So(repo, ShouldEqual, "")
|
||||
|
||||
continue
|
||||
} else {
|
||||
So(repo, ShouldEqual, expectedRepos[i])
|
||||
}
|
||||
|
||||
processedRepos[repo] = struct{}{}
|
||||
|
||||
// run cleanRepo, this should not impact the list of calls necessary for
|
||||
// GetNextRepository to iterate through every repo
|
||||
err = gc.CleanRepo(ctx, repo)
|
||||
So(err, ShouldBeNil)
|
||||
}
|
||||
|
||||
// verify one more time the returned values
|
||||
So(len(processedRepos), ShouldEqual, len(expectedRepos))
|
||||
|
||||
for _, repo := range expectedRepos {
|
||||
So(processedRepos, ShouldContainKey, repo)
|
||||
}
|
||||
|
||||
_, _, _, err = imgStore.GetImageManifest("gc-test1", gcUntagged1.DigestStr())
|
||||
So(err, ShouldNotBeNil)
|
||||
|
||||
// now repos should get gc'ed
|
||||
repos, err := imgStore.GetRepositories()
|
||||
So(err, ShouldBeNil)
|
||||
So(repos, ShouldNotContain, "gc-test1")
|
||||
So(repos, ShouldContain, "gc-test2")
|
||||
So(repos, ShouldNotContain, "gc-test3")
|
||||
So(repos, ShouldContain, "gc-test4")
|
||||
So(repos, ShouldContain, "retention")
|
||||
})
|
||||
|
||||
Convey("gc with dry-run all tags, untagged, and afterwards referrers", func() {
|
||||
gc := gc.NewGarbageCollect(imgStore, metaDB, gc.Options{
|
||||
Delay: 1 * time.Millisecond,
|
||||
|
||||
Reference in New Issue
Block a user