mirror of
https://github.com/project-zot/zot.git
synced 2026-06-18 05:28:07 +08:00
dfb5d1df54
* fix: make config read/write thread safe and fix some other similar issues 1. The config config has a lock, and safe methods to update and read the attributes 2. The config has methods to retrieve copies of specific attributes, such as the extyensions config, the auth config, and the authz config. These are needed, as the config object may mutate in the middle of an auth/authz requests, and we avoid partial configuration being applied for that request. 3. Fix an issue with the monitoring server not stopping when the controller is shut down. 4. Fix an issue with the HTPasswdWatcher not stopping when the background tasks are supposed to finish. 5. Fix some tests using hardcoded ports. Moved some of the methods which were on the main config to the auth, access control and extension configs Signed-off-by: Andrei Aaron <andreifdaaron@gmail.com>
297 lines
7.8 KiB
Go
297 lines
7.8 KiB
Go
//go:build metrics
|
|
// +build metrics
|
|
|
|
package monitoring
|
|
|
|
import (
|
|
"path"
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
|
|
"zotregistry.dev/zot/v2/errors"
|
|
"zotregistry.dev/zot/v2/pkg/log"
|
|
)
|
|
|
|
const metricsNamespace = "zot"
|
|
|
|
var (
|
|
httpConnRequests = promauto.NewCounterVec( //nolint: gochecknoglobals
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Name: "http_requests_total",
|
|
Help: "Total number of http request in zot",
|
|
},
|
|
[]string{"method", "code"},
|
|
)
|
|
httpRepoLatency = promauto.NewSummaryVec( //nolint: gochecknoglobals
|
|
prometheus.SummaryOpts{
|
|
Namespace: metricsNamespace,
|
|
Name: "http_repo_latency_seconds",
|
|
Help: "Latency of serving HTTP requests",
|
|
},
|
|
[]string{"repo"},
|
|
)
|
|
httpMethodLatency = promauto.NewHistogramVec( //nolint: gochecknoglobals
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Name: "http_method_latency_seconds",
|
|
Help: "Latency of serving HTTP requests",
|
|
Buckets: GetDefaultBuckets(),
|
|
},
|
|
[]string{"method"},
|
|
)
|
|
repoStorageBytes = promauto.NewGaugeVec( //nolint: gochecknoglobals
|
|
prometheus.GaugeOpts{
|
|
Namespace: metricsNamespace,
|
|
Name: "repo_storage_bytes",
|
|
Help: "Storage used per zot repo",
|
|
},
|
|
[]string{"repo"},
|
|
)
|
|
uploadCounter = promauto.NewCounterVec( //nolint: gochecknoglobals
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Name: "repo_uploads_total",
|
|
Help: "Total number times an image was uploaded",
|
|
},
|
|
[]string{"repo"},
|
|
)
|
|
downloadCounter = promauto.NewCounterVec( //nolint: gochecknoglobals
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Name: "repo_downloads_total",
|
|
Help: "Total number times an image was downloaded",
|
|
},
|
|
[]string{"repo"},
|
|
)
|
|
serverInfo = promauto.NewGaugeVec( //nolint: gochecknoglobals
|
|
prometheus.GaugeOpts{
|
|
Namespace: metricsNamespace,
|
|
Name: "info",
|
|
Help: "Server general information",
|
|
},
|
|
[]string{"commit", "binaryType", "goVersion", "version"},
|
|
)
|
|
storageLockLatency = promauto.NewHistogramVec( //nolint: gochecknoglobals
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Name: "storage_lock_latency_seconds",
|
|
Help: "Latency of serving HTTP requests",
|
|
Buckets: GetStorageLatencyBuckets(),
|
|
},
|
|
[]string{"storageName", "lockType"},
|
|
)
|
|
schedulerGenerators = promauto.NewCounter( //nolint: gochecknoglobals
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Name: "scheduler_generators_total",
|
|
Help: "Total number of generators registered in scheduler",
|
|
},
|
|
)
|
|
schedulerGeneratorsStatus = promauto.NewGaugeVec( //nolint: gochecknoglobals
|
|
prometheus.GaugeOpts{
|
|
Namespace: metricsNamespace,
|
|
Name: "scheduler_generators_status",
|
|
Help: "Scheduler generators by priority & state",
|
|
},
|
|
[]string{"priority", "state"},
|
|
)
|
|
schedulerNumWorkers = promauto.NewGauge( //nolint: gochecknoglobals
|
|
prometheus.GaugeOpts{ //nolint: promlinter
|
|
Namespace: metricsNamespace,
|
|
Name: "scheduler_workers_total",
|
|
Help: "Total number of available workers to perform scheduler tasks",
|
|
},
|
|
)
|
|
schedulerWorkers = promauto.NewGaugeVec( //nolint: gochecknoglobals
|
|
prometheus.GaugeOpts{
|
|
Namespace: metricsNamespace,
|
|
Name: "scheduler_workers",
|
|
Help: "Scheduler workers state",
|
|
},
|
|
[]string{"state"},
|
|
)
|
|
schedulerTasksQueue = promauto.NewGaugeVec( //nolint: gochecknoglobals
|
|
prometheus.GaugeOpts{
|
|
Namespace: metricsNamespace,
|
|
Name: "scheduler_tasksqueue_length",
|
|
Help: "Number of tasks waiting in the queue to pe processed by scheduler workers",
|
|
},
|
|
[]string{"priority"},
|
|
)
|
|
workersTasksDuration = promauto.NewHistogramVec( //nolint: gochecknoglobals
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Name: "scheduler_workers_tasks_duration_seconds",
|
|
Help: "How long it takes for a worker to execute a task",
|
|
Buckets: GetDefaultBuckets(),
|
|
},
|
|
[]string{"name"},
|
|
)
|
|
)
|
|
|
|
type metricServer struct {
|
|
enabled bool
|
|
log log.Logger
|
|
}
|
|
|
|
// Stop gracefully shuts down the metrics server (no-op for this implementation).
|
|
func (ms *metricServer) Stop() {
|
|
// This is a no-op implementation for the disabled metrics server
|
|
}
|
|
|
|
func GetDefaultBuckets() []float64 {
|
|
return []float64{.05, .5, 1, 5, 30, 60, 600}
|
|
}
|
|
|
|
func GetStorageLatencyBuckets() []float64 {
|
|
return []float64{.001, .01, 0.1, 1, 5, 10, 15, 30, 60}
|
|
}
|
|
|
|
func NewMetricsServer(enabled bool, log log.Logger) MetricServer {
|
|
return &metricServer{
|
|
enabled: enabled,
|
|
log: log,
|
|
}
|
|
}
|
|
|
|
// implementing the MetricServer interface.
|
|
func (ms *metricServer) SendMetric(mfunc interface{}) {
|
|
if ms.enabled {
|
|
mfn, ok := mfunc.(func())
|
|
if !ok {
|
|
ms.log.Error().Err(errors.ErrInvalidMetric).
|
|
Msgf("failed to cast type, expected '%T' but got '%T'", func() {}, mfunc)
|
|
|
|
return
|
|
}
|
|
|
|
mfn()
|
|
}
|
|
}
|
|
|
|
func (ms *metricServer) ForceSendMetric(mfunc interface{}) {
|
|
mfn, ok := mfunc.(func())
|
|
if !ok {
|
|
ms.log.Error().Err(errors.ErrInvalidMetric).
|
|
Msgf("failed to cast type, expected '%T' but got '%T'", func() {}, mfunc)
|
|
|
|
return
|
|
}
|
|
|
|
mfn()
|
|
}
|
|
|
|
func (ms *metricServer) ReceiveMetrics() interface{} {
|
|
return nil
|
|
}
|
|
|
|
func (ms *metricServer) IsEnabled() bool {
|
|
return ms.enabled
|
|
}
|
|
|
|
func IncHTTPConnRequests(ms MetricServer, lvalues ...string) {
|
|
ms.SendMetric(func() {
|
|
httpConnRequests.WithLabelValues(lvalues...).Inc()
|
|
})
|
|
}
|
|
|
|
func ObserveHTTPRepoLatency(ms MetricServer, path string, latency time.Duration) {
|
|
ms.SendMetric(func() {
|
|
match := re.FindStringSubmatch(path)
|
|
|
|
if len(match) > 1 {
|
|
httpRepoLatency.WithLabelValues(match[1]).Observe(latency.Seconds())
|
|
} else {
|
|
httpRepoLatency.WithLabelValues("N/A").Observe(latency.Seconds())
|
|
}
|
|
})
|
|
}
|
|
|
|
func ObserveHTTPMethodLatency(ms MetricServer, method string, latency time.Duration) {
|
|
ms.SendMetric(func() {
|
|
httpMethodLatency.WithLabelValues(method).Observe(latency.Seconds())
|
|
})
|
|
}
|
|
|
|
func IncDownloadCounter(ms MetricServer, repo string) {
|
|
ms.SendMetric(func() {
|
|
downloadCounter.WithLabelValues(repo).Inc()
|
|
})
|
|
}
|
|
|
|
func SetStorageUsage(ms MetricServer, rootDir, repo string) {
|
|
ms.ForceSendMetric(func() {
|
|
dir := path.Join(rootDir, repo)
|
|
|
|
repoSize, err := GetDirSize(dir)
|
|
if err == nil {
|
|
repoStorageBytes.WithLabelValues(repo).Set(float64(repoSize))
|
|
}
|
|
})
|
|
}
|
|
|
|
func IncUploadCounter(ms MetricServer, repo string) {
|
|
ms.SendMetric(func() {
|
|
uploadCounter.WithLabelValues(repo).Inc()
|
|
})
|
|
}
|
|
|
|
func SetServerInfo(ms MetricServer, lvalues ...string) {
|
|
ms.ForceSendMetric(func() {
|
|
serverInfo.WithLabelValues(lvalues...).Set(0)
|
|
})
|
|
}
|
|
|
|
func ObserveStorageLockLatency(ms MetricServer, latency time.Duration, storageName, lockType string) {
|
|
ms.SendMetric(func() {
|
|
storageLockLatency.WithLabelValues(storageName, lockType).Observe(latency.Seconds())
|
|
})
|
|
}
|
|
|
|
func IncSchedulerGenerators(ms MetricServer) {
|
|
ms.ForceSendMetric(func() {
|
|
schedulerGenerators.Inc()
|
|
})
|
|
}
|
|
|
|
func SetSchedulerGenerators(ms MetricServer, gen map[string]map[string]uint64) {
|
|
ms.SendMetric(func() {
|
|
for priority, states := range gen {
|
|
for state, value := range states {
|
|
schedulerGeneratorsStatus.WithLabelValues(priority, state).Set(float64(value))
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
func SetSchedulerNumWorkers(ms MetricServer, total int) {
|
|
ms.SendMetric(func() {
|
|
schedulerNumWorkers.Set(float64(total))
|
|
})
|
|
}
|
|
|
|
func SetSchedulerWorkers(ms MetricServer, w map[string]int) {
|
|
ms.SendMetric(func() {
|
|
for state, value := range w {
|
|
schedulerWorkers.WithLabelValues(state).Set(float64(value))
|
|
}
|
|
})
|
|
}
|
|
|
|
func SetSchedulerTasksQueue(ms MetricServer, tq map[string]int) {
|
|
ms.SendMetric(func() {
|
|
for priority, value := range tq {
|
|
schedulerTasksQueue.WithLabelValues(priority).Set(float64(value))
|
|
}
|
|
})
|
|
}
|
|
|
|
func ObserveWorkersTasksDuration(ms MetricServer, taskName string, duration time.Duration) {
|
|
ms.SendMetric(func() {
|
|
workersTasksDuration.WithLabelValues(taskName).Observe(duration.Seconds())
|
|
})
|
|
}
|