fix: GetNextRepository to use a list already scanned repositories as input (#3230)

Using just the last repository is not enough as in the case when it is deleted
(either by GC or some other way), GetNextRepository returns empty string
causing the generator to be marked completed without any errors.

An alternative would have been to start over from the first repository,
but this can take hours if multiple repositories need to be deleted,
not to mention the processing power and I/O and S3 load this could take.

Signed-off-by: Andrei Aaron <aaaron@luxoft.com>
This commit is contained in:
Andrei Aaron
2025-07-04 19:12:18 +03:00
committed by GitHub
parent e8ac21c001
commit 80081bb012
10 changed files with 244 additions and 75 deletions
+19 -11
View File
@@ -28,18 +28,26 @@ func EnableScrubExtension(config *config.Config, log log.Logger, storeController
log.Warn().Msg("scrub interval set to too-short interval < 2h, changing scrub duration to 2 hours and continuing.") //nolint:lll // gofumpt conflicts with lll
}
processedRepos := make(map[string]struct{})
generator := &taskGenerator{
imgStore: storeController.DefaultStore,
log: log,
imgStore: storeController.DefaultStore,
log: log,
processedRepos: processedRepos,
}
sch.SubmitGenerator(generator, config.Extensions.Scrub.Interval, scheduler.LowPriority)
if config.Storage.SubPaths != nil {
for route := range config.Storage.SubPaths {
processedRepos := make(map[string]struct{})
generator := &taskGenerator{
imgStore: storeController.SubStore[route],
log: log,
imgStore: storeController.SubStore[route],
log: log,
processedRepos: processedRepos,
}
sch.SubmitGenerator(generator, config.Extensions.Scrub.Interval, scheduler.LowPriority)
}
}
@@ -49,10 +57,10 @@ func EnableScrubExtension(config *config.Config, log log.Logger, storeController
}
type taskGenerator struct {
imgStore storageTypes.ImageStore
log log.Logger
lastRepo string
done bool
imgStore storageTypes.ImageStore
log log.Logger
processedRepos map[string]struct{}
done bool
}
func (gen *taskGenerator) Name() string {
@@ -60,7 +68,7 @@ func (gen *taskGenerator) Name() string {
}
func (gen *taskGenerator) Next() (scheduler.Task, error) {
repo, err := gen.imgStore.GetNextRepository(gen.lastRepo)
repo, err := gen.imgStore.GetNextRepository(gen.processedRepos)
if err != nil {
return nil, err
}
@@ -71,7 +79,7 @@ func (gen *taskGenerator) Next() (scheduler.Task, error) {
return nil, nil //nolint:nilnil
}
gen.lastRepo = repo
gen.processedRepos[repo] = struct{}{}
return scrub.NewTask(gen.imgStore, repo, gen.log), nil
}
@@ -85,6 +93,6 @@ func (gen *taskGenerator) IsReady() bool {
}
func (gen *taskGenerator) Reset() {
gen.lastRepo = ""
gen.processedRepos = make(map[string]struct{})
gen.done = false
}
+7 -6
View File
@@ -467,12 +467,13 @@ func TestPopulateStorageMetrics(t *testing.T) {
sch := scheduler.NewScheduler(conf, metrics, ctlr.Log)
sch.RunScheduler()
generator := &common.StorageMetricsInitGenerator{
ImgStore: ctlr.StoreController.DefaultStore,
Metrics: ctlr.Metrics,
Log: ctlr.Log,
MaxDelay: 1, // maximum delay between jobs (each job computes repo's storage size)
}
generator := common.NewStorageMetricsInitGenerator(
ctlr.StoreController.DefaultStore,
ctlr.Metrics,
ctlr.Log,
)
generator.MaxDelay = 1 // maximum delay between jobs (each job computes repo's storage size)
sch.SubmitGenerator(generator, time.Duration(0), scheduler.LowPriority)