fix: more accurate storage metrics after zot restart (#1972)

Signed-off-by: Alexei Dodon <adodon@cisco.com>
This commit is contained in:
Alexei Dodon
2023-11-01 18:09:21 +02:00
committed by GitHub
parent 3e6053e1db
commit a79d79a03a
10 changed files with 195 additions and 20 deletions
+72
View File
@@ -6,8 +6,10 @@ import (
"encoding/json"
"errors"
"fmt"
"math/rand"
"path"
"strings"
"time"
"github.com/docker/distribution/registry/storage/driver"
godigest "github.com/opencontainers/go-digest"
@@ -18,6 +20,7 @@ import (
zerr "zotregistry.io/zot/errors"
zcommon "zotregistry.io/zot/pkg/common"
"zotregistry.io/zot/pkg/extensions/monitoring"
zlog "zotregistry.io/zot/pkg/log"
"zotregistry.io/zot/pkg/scheduler"
storageConstants "zotregistry.io/zot/pkg/storage/constants"
@@ -1052,3 +1055,72 @@ func (dt *dedupeTask) DoWork(ctx context.Context) error {
return err
}
type StorageMetricsInitGenerator struct {
ImgStore storageTypes.ImageStore
done bool
Metrics monitoring.MetricServer
lastRepo string
nextRun time.Time
rand *rand.Rand
Log zlog.Logger
MaxDelay int
}
func (gen *StorageMetricsInitGenerator) Next() (scheduler.Task, error) {
if gen.lastRepo == "" && gen.nextRun.IsZero() {
gen.rand = rand.New(rand.NewSource(time.Now().UTC().UnixNano())) //nolint: gosec
}
delay := gen.rand.Intn(gen.MaxDelay)
gen.nextRun = time.Now().Add(time.Duration(delay) * time.Second)
repo, err := gen.ImgStore.GetNextRepository(gen.lastRepo)
if err != nil {
return nil, err
}
gen.Log.Debug().Str("repo", repo).Int("randomDelay", delay).Msg("StorageMetricsInitGenerator")
if repo == "" {
gen.done = true
return nil, nil
}
gen.lastRepo = repo
return NewStorageMetricsTask(gen.ImgStore, gen.Metrics, repo), nil
}
func (gen *StorageMetricsInitGenerator) IsDone() bool {
return gen.done
}
func (gen *StorageMetricsInitGenerator) IsReady() bool {
return time.Now().After(gen.nextRun)
}
func (gen *StorageMetricsInitGenerator) Reset() {
gen.lastRepo = ""
gen.done = false
gen.nextRun = time.Time{}
}
type smTask struct {
imgStore storageTypes.ImageStore
metrics monitoring.MetricServer
repo string
}
func NewStorageMetricsTask(imgStore storageTypes.ImageStore, metrics monitoring.MetricServer, repo string,
) *smTask {
return &smTask{imgStore, metrics, repo}
}
func (smt *smTask) DoWork(ctx context.Context) error {
// run task
monitoring.SetStorageUsage(smt.metrics, smt.imgStore.RootDir(), smt.repo)
return nil
}
+20 -2
View File
@@ -488,7 +488,10 @@ func (is *ImageStore) PutImageManifest(repo, reference, mediaType string, //noli
is.Unlock(&lockLatency)
if err == nil {
monitoring.SetStorageUsage(is.metrics, is.rootDir, repo)
if is.storeDriver.Name() == storageConstants.LocalStorageDriverName {
monitoring.SetStorageUsage(is.metrics, is.rootDir, repo)
}
monitoring.IncUploadCounter(is.metrics, repo)
}
}()
@@ -621,7 +624,11 @@ func (is *ImageStore) DeleteImageManifest(repo, reference string, detectCollisio
}
func (is *ImageStore) deleteImageManifest(repo, reference string, detectCollisions bool) error {
defer monitoring.SetStorageUsage(is.metrics, is.rootDir, repo)
defer func() {
if is.storeDriver.Name() == storageConstants.LocalStorageDriverName {
monitoring.SetStorageUsage(is.metrics, is.rootDir, repo)
}
}()
index, err := common.GetIndex(is, repo, is.log)
if err != nil {
@@ -1929,6 +1936,17 @@ func (is *ImageStore) RunDedupeBlobs(interval time.Duration, sch *scheduler.Sche
sch.SubmitGenerator(generator, interval, scheduler.MediumPriority)
}
func (is *ImageStore) PopulateStorageMetrics(interval time.Duration, sch *scheduler.Scheduler) {
generator := &common.StorageMetricsInitGenerator{
ImgStore: is,
Metrics: is.metrics,
Log: is.log,
MaxDelay: 15, //nolint:gomnd
}
sch.SubmitGenerator(generator, interval, scheduler.LowPriority)
}
type blobStream struct {
reader io.Reader
closer io.Closer
+1
View File
@@ -61,6 +61,7 @@ type ImageStore interface { //nolint:interfacebloat
RunDedupeForDigest(digest godigest.Digest, dedupe bool, duplicateBlobs []string) error
GetNextDigestWithBlobPaths(repos []string, lastDigests []godigest.Digest) (godigest.Digest, []string, error)
GetAllBlobs(repo string) ([]string, error)
PopulateStorageMetrics(interval time.Duration, sch *scheduler.Scheduler)
}
type Driver interface { //nolint:interfacebloat