mirror of
https://github.com/project-zot/zot.git
synced 2026-06-17 21:17:58 +08:00
feat(metrics): add Prometheus GC metrics (#3863)
* feat(metrics): add Prometheus GC metrics Track garbage collection activity with three new metrics: - zot_gc_runs_total (counter, label: error) — GC run count - zot_gc_duration_seconds (summary) — GC run duration - zot_gc_deleted_total (counter, label: type) — items deleted by type: blob, manifest, upload MetricServer is added to GarbageCollect and wired through all callers (controller, verify-feature retention, tests). Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> * fix(test): add missing metrics var in GCS GC tests TestGCSGarbageCollectImageIndex and TestGCSGarbageCollectChainedImageIndexes were missing the metrics variable required by NewGarbageCollect after the MetricServer parameter was added. Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> * fix(test): add defer metrics.Stop() in GC tests Prevent goroutine/port leaks by stopping MetricsServer in storage_test.go (3 functions) and gcs_test.go (also add missing metrics declaration in TestGCSGarbageCollectImageManifest). Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> * fix(test): cover `CleanRepo` error path Add test that exercises the error branch in `CleanRepo` where `cleanRepo` fails, covering the metrics calls and log lines flagged by Codecov. Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> * test: Cover GC error paths for codecov Add three tests in gc_internal_test.go to cover previously untested error branches in `removeBlobUploads` and `removeUnreferencedBlobs`: `ListBlobUploads` failure, `addIndexBlobsToReferences` failure, and `PathNotFoundError` from `GetAllBlobs`. Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> * test(gc): cover remaining error paths Cover `StatBlobUpload`, `digest.Validate()`, `isBlobOlderThan`, and `CleanupRepo` error branches in `removeBlobUploads` and `removeUnreferencedBlobs`. `removeUnreferencedBlobs` now at 100% coverage, `removeBlobUploads` from 78.3% to 91.3%. Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> * test: cover `sanityChecks` label name mismatch Try to avoid -0.09% coverage regression on `minimal.go` by exercising the uncovered branch in `sanityChecks` where label names have correct count but wrong values. Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> * test(gc): exercise real GC path in metrics test TestGCMetrics was calling metric helpers directly instead of running actual garbage collection, so it couldn't catch wiring regressions where `CleanRepo` stops recording metrics. Now uploads an orphaned blob and runs `gc.CleanRepo` end-to-end, verifying metrics appear on the Prometheus endpoint. Suggestion from Copilot: https://github.com/project-zot/zot/pull/3863#discussion_r3129324719 Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> * fix(gc): skip deletion metrics when DryRun is enabled https://github.com/project-zot/zot/pull/3863#discussion_r3129324684 Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> * fix(test): stop leaked MetricsServer goroutines in GCS tests https://github.com/project-zot/zot/pull/3863#discussion_r3129324657 Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> * refactor(test): drop unnecessary zlog import alias Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> * fix(monitoring): expose metric types outside build tag `MetricsCopy` and related types were only visible under `\!metrics`, causing a typecheck failure when golangci-lint runs with `-tags metrics`. Moving the type definitions to `common.go` makes them unconditionally available. Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> * fix(monitoring): remove extra blank line for gci Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> * test(gc): cover both dry-run and real deletion metrics And fix issue with build tag with metrics Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> * Satisfy testpackage linter for gc metrics test The `testpackage` linter allows `package gc` only in files named `*_internal_test.go`; rename to follow that convention. Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr> --------- Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr>
This commit is contained in:
@@ -557,7 +557,7 @@ func (c *Controller) StartBackgroundTasks() {
|
||||
}
|
||||
|
||||
// Run GC and retention tasks
|
||||
RunGCTasks(c.Config, c.StoreController, c.MetaDB, c.taskScheduler, c.Log, c.Audit)
|
||||
RunGCTasks(c.Config, c.StoreController, c.MetaDB, c.taskScheduler, c.Log, c.Audit, c.Metrics)
|
||||
|
||||
// Enable running dedupe blobs both ways (dedupe or restore deduped blobs)
|
||||
c.StoreController.DefaultStore.RunDedupeBlobs(time.Duration(0), c.taskScheduler)
|
||||
@@ -617,7 +617,7 @@ func (c *Controller) StartBackgroundTasks() {
|
||||
|
||||
// RunGCTasks runs minimal GC and retention tasks without full controller.
|
||||
func RunGCTasks(conf *config.Config, storeController storage.StoreController, metaDB mTypes.MetaDB,
|
||||
taskScheduler *scheduler.Scheduler, logger log.Logger, audit *log.Logger,
|
||||
taskScheduler *scheduler.Scheduler, logger log.Logger, audit *log.Logger, metrics monitoring.MetricServer,
|
||||
) {
|
||||
// Enable running garbage-collect periodically for DefaultStore
|
||||
storageConfig := conf.CopyStorageConfig()
|
||||
@@ -626,7 +626,7 @@ func RunGCTasks(conf *config.Config, storeController storage.StoreController, me
|
||||
Delay: storageConfig.GCDelay,
|
||||
ImageRetention: storageConfig.Retention,
|
||||
MaxSchedulerDelay: storageConfig.GCMaxSchedulerDelay,
|
||||
}, audit, logger)
|
||||
}, audit, logger, metrics)
|
||||
|
||||
gc.CleanImageStorePeriodically(storageConfig.GCInterval, taskScheduler)
|
||||
}
|
||||
@@ -641,7 +641,7 @@ func RunGCTasks(conf *config.Config, storeController storage.StoreController, me
|
||||
Delay: subStorageConfig.GCDelay,
|
||||
ImageRetention: subStorageConfig.Retention,
|
||||
MaxSchedulerDelay: subStorageConfig.GCMaxSchedulerDelay,
|
||||
}, audit, logger)
|
||||
}, audit, logger, metrics)
|
||||
|
||||
gc.CleanImageStorePeriodically(subStorageConfig.GCInterval, taskScheduler)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user