mirror of
https://github.com/project-zot/zot.git
synced 2026-06-17 21:17:58 +08:00
feat: add zot subcommand to enable testing retention policy settings (#3449)
feat: add verify-feature retention subcommand with comprehensive testing and validation Add a `verify-feature retention` subcommand that allows users to preview and validate retention policy changes without running the actual Zot server. The command runs GC and retention tasks in dry-run mode for immediate feedback. - Run verify-feature retention standalone without starting the server - Preview retention policy decisions in dry-run mode - Configurable GC interval override via command-line flag - Optional timeout for task completion - Configurable log output (stdout or file) Basic usage: ```bash zot verify-feature retention <config-file> ``` With log file output: ```bash zot verify-feature retention -l /var/log/zot-retention-check.log <config-file> ``` With GC interval override (runs GC tasks every 30 seconds): ```bash zot verify-feature retention -i 30s <config-file> ``` With timeout (wait up to 5 minutes for tasks to complete): ```bash zot verify-feature retention -t 5m <config-file> ``` Combined flags: ```bash zot verify-feature retention -l /var/log/zot-retention-check.log -i 1m -t 10m <config-file> ``` The command supports overriding GC settings from the config: - `-i, --gc-interval`: Override the GC interval setting (applies to all storage paths including subpaths) - Refactored `RunGCTasks` from `controller.go` to be reusable - Added `checkServerRunning` validation to prevent conflicts - Implemented signal handling for graceful shutdown - Added configuration sanitization and logging - Set GCMaxSchedulerDelay programmatically (not user-configurable) Added tests for coverage on main function: - Negative test cases (no args, bad config, GC disabled, server running) - Both BoltDB and Redis - Retention enabled scenarios with complex image setups - Retention disabled scenarios - Delete referrers functionality - Subpaths configuration - GC interval override validation Run the verify-feature retention tests: ```bash go test -v ./pkg/cli/server -run TestRetentionCheck ``` Signed-off-by: Andrei Aaron <andreifdaaron@gmail.com>
This commit is contained in:
@@ -36,6 +36,10 @@ type StorageConfig struct {
|
||||
Retention ImageRetention
|
||||
StorageDriver map[string]interface{} `mapstructure:",omitempty"`
|
||||
CacheDriver map[string]interface{} `mapstructure:",omitempty"`
|
||||
|
||||
// GCMaxSchedulerDelay is the maximum random delay for GC task scheduling
|
||||
// This field is not configurable by the end user
|
||||
GCMaxSchedulerDelay time.Duration `yaml:"-"`
|
||||
}
|
||||
|
||||
type ImageRetention struct {
|
||||
|
||||
+38
-24
@@ -460,47 +460,27 @@ func (c *Controller) StartBackgroundTasks() {
|
||||
c.HTPasswdWatcher.Run()
|
||||
}
|
||||
|
||||
// Enable running garbage-collect periodically for DefaultStore
|
||||
storageConfig := c.Config.CopyStorageConfig()
|
||||
if storageConfig.GC {
|
||||
gc := gc.NewGarbageCollect(c.StoreController.DefaultStore, c.MetaDB, gc.Options{
|
||||
Delay: storageConfig.GCDelay,
|
||||
ImageRetention: storageConfig.Retention,
|
||||
}, c.Audit, c.Log)
|
||||
|
||||
gc.CleanImageStorePeriodically(storageConfig.GCInterval, c.taskScheduler)
|
||||
}
|
||||
// Run GC and retention tasks
|
||||
RunGCTasks(c.Config, c.StoreController, c.MetaDB, c.taskScheduler, c.Log, c.Audit)
|
||||
|
||||
// Enable running dedupe blobs both ways (dedupe or restore deduped blobs)
|
||||
c.StoreController.DefaultStore.RunDedupeBlobs(time.Duration(0), c.taskScheduler)
|
||||
|
||||
// Enable extensions if extension config is provided for DefaultStore
|
||||
extensionsConfig := c.Config.CopyExtensionsConfig()
|
||||
|
||||
// Always call EnableSearchExtension to ensure proper logging, even when search is disabled
|
||||
ext.EnableSearchExtension(c.Config, c.StoreController, c.MetaDB, c.taskScheduler, c.CveScanner, c.Log)
|
||||
|
||||
// Always call EnableMetricsExtension to ensure proper logging, even when metrics is disabled
|
||||
storageConfig := c.Config.CopyStorageConfig()
|
||||
ext.EnableMetricsExtension(c.Config, c.Log, storageConfig.RootDirectory)
|
||||
|
||||
// runs once if metrics are enabled & imagestore is local
|
||||
extensionsConfig := c.Config.CopyExtensionsConfig()
|
||||
if extensionsConfig.IsMetricsEnabled() && storageConfig.StorageDriver == nil {
|
||||
c.StoreController.DefaultStore.PopulateStorageMetrics(time.Duration(0), c.taskScheduler)
|
||||
}
|
||||
|
||||
if storageConfig.SubPaths != nil {
|
||||
for route, subStorageConfig := range storageConfig.SubPaths {
|
||||
// Enable running garbage-collect periodically for subImageStore
|
||||
if subStorageConfig.GC {
|
||||
gc := gc.NewGarbageCollect(c.StoreController.SubStore[route], c.MetaDB,
|
||||
gc.Options{
|
||||
Delay: subStorageConfig.GCDelay,
|
||||
ImageRetention: subStorageConfig.Retention,
|
||||
}, c.Audit, c.Log)
|
||||
|
||||
gc.CleanImageStorePeriodically(subStorageConfig.GCInterval, c.taskScheduler)
|
||||
}
|
||||
|
||||
// Enable extensions if extension config is provided for subImageStore
|
||||
ext.EnableMetricsExtension(c.Config, c.Log, subStorageConfig.RootDirectory)
|
||||
|
||||
@@ -539,6 +519,40 @@ func (c *Controller) StartBackgroundTasks() {
|
||||
ext.EnableScheduledTasks(c.Config, c.taskScheduler, c.MetaDB, c.Log) //nolint: contextcheck
|
||||
}
|
||||
|
||||
// RunGCTasks runs minimal GC and retention tasks without full controller.
|
||||
func RunGCTasks(conf *config.Config, storeController storage.StoreController, metaDB mTypes.MetaDB,
|
||||
taskScheduler *scheduler.Scheduler, logger log.Logger, audit *log.Logger,
|
||||
) {
|
||||
// Enable running garbage-collect periodically for DefaultStore
|
||||
storageConfig := conf.CopyStorageConfig()
|
||||
if storageConfig.GC {
|
||||
gc := gc.NewGarbageCollect(storeController.DefaultStore, metaDB, gc.Options{
|
||||
Delay: storageConfig.GCDelay,
|
||||
ImageRetention: storageConfig.Retention,
|
||||
MaxSchedulerDelay: storageConfig.GCMaxSchedulerDelay,
|
||||
}, audit, logger)
|
||||
|
||||
gc.CleanImageStorePeriodically(storageConfig.GCInterval, taskScheduler)
|
||||
}
|
||||
|
||||
// Handle subpaths
|
||||
if storageConfig.SubPaths != nil {
|
||||
for route, subStorageConfig := range storageConfig.SubPaths {
|
||||
// Enable running garbage-collect periodically for subImageStore
|
||||
if subStorageConfig.GC {
|
||||
gc := gc.NewGarbageCollect(storeController.SubStore[route], metaDB,
|
||||
gc.Options{
|
||||
Delay: subStorageConfig.GCDelay,
|
||||
ImageRetention: subStorageConfig.Retention,
|
||||
MaxSchedulerDelay: subStorageConfig.GCMaxSchedulerDelay,
|
||||
}, audit, logger)
|
||||
|
||||
gc.CleanImageStorePeriodically(subStorageConfig.GCInterval, taskScheduler)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type SyncOnDemand interface {
|
||||
SyncImage(ctx context.Context, repo, reference string) error
|
||||
SyncReferrers(ctx context.Context, repo string, subjectDigestStr string, referenceTypes []string) error
|
||||
|
||||
@@ -189,6 +189,19 @@ func newVerifyCmd(conf *config.Config) *cobra.Command {
|
||||
return verifyCmd
|
||||
}
|
||||
|
||||
func newVerifyFeatureCmd(conf *config.Config) *cobra.Command {
|
||||
verifyFeatureCmd := &cobra.Command{
|
||||
Use: "verify-feature",
|
||||
Short: "`verify-feature` validates specific zot features",
|
||||
Long: "`verify-feature` validates specific zot features",
|
||||
}
|
||||
|
||||
// Add subcommands
|
||||
verifyFeatureCmd.AddCommand(newVerifyFeatureRetentionCmd(conf))
|
||||
|
||||
return verifyFeatureCmd
|
||||
}
|
||||
|
||||
// "zot" - registry server.
|
||||
func NewServerRootCmd() *cobra.Command {
|
||||
showVersion := false
|
||||
@@ -220,6 +233,8 @@ func NewServerRootCmd() *cobra.Command {
|
||||
rootCmd.AddCommand(newVerifyCmd(conf))
|
||||
// "scrub"
|
||||
rootCmd.AddCommand(newScrubCmd(conf))
|
||||
// "verify-feature"
|
||||
rootCmd.AddCommand(newVerifyFeatureCmd(conf))
|
||||
// "version"
|
||||
rootCmd.Flags().BoolVarP(&showVersion, "version", "v", false, "show the version and exit")
|
||||
|
||||
|
||||
@@ -0,0 +1,244 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
zerr "zotregistry.dev/zot/v2/errors"
|
||||
"zotregistry.dev/zot/v2/pkg/api"
|
||||
"zotregistry.dev/zot/v2/pkg/api/config"
|
||||
"zotregistry.dev/zot/v2/pkg/extensions/monitoring"
|
||||
zlog "zotregistry.dev/zot/v2/pkg/log"
|
||||
"zotregistry.dev/zot/v2/pkg/meta"
|
||||
mTypes "zotregistry.dev/zot/v2/pkg/meta/types"
|
||||
"zotregistry.dev/zot/v2/pkg/scheduler"
|
||||
"zotregistry.dev/zot/v2/pkg/storage"
|
||||
)
|
||||
|
||||
func newVerifyFeatureRetentionCmd(conf *config.Config) *cobra.Command {
|
||||
// "verify-feature retention"
|
||||
retentionCheckCmd := &cobra.Command{
|
||||
Use: "retention <config>",
|
||||
Short: "`verify-feature retention` runs garbage collection and retention tasks",
|
||||
Long: "`verify-feature retention` runs garbage collection and retention tasks " +
|
||||
"based on the provided configuration.\n\n" +
|
||||
"WARNING: If retention settings are enabled in the config, the server metadata database needs to be accessed, " +
|
||||
"which means the zot server must be stopped before running this command.",
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Use stdout by default, or the specified log file
|
||||
logFile, err := cmd.PersistentFlags().GetString("log-file")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get log-file flag: %w", err)
|
||||
}
|
||||
|
||||
logOutput := ""
|
||||
if logFile != "" {
|
||||
logOutput = logFile
|
||||
}
|
||||
logger := zlog.NewLogger("info", logOutput)
|
||||
|
||||
if len(args) > 0 {
|
||||
if err := LoadConfiguration(conf, args[0]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Do not show usage on errors which are not related to command line arguments
|
||||
cmd.SilenceUsage = true
|
||||
|
||||
// Check if GC is enabled in config
|
||||
if !conf.Storage.GC {
|
||||
logger.Error().Msgf("failed to run verify-feature retention, garbage collection is disabled in config")
|
||||
|
||||
return fmt.Errorf("%w: %s", zerr.ErrBadConfig, "verify-feature retention requires GC to be enabled")
|
||||
}
|
||||
|
||||
// Set short delay for verify-feature retention command
|
||||
conf.Storage.GCMaxSchedulerDelay = 5 * time.Millisecond
|
||||
|
||||
// Override GC interval if specified
|
||||
gcInterval, err := cmd.PersistentFlags().GetDuration("gc-interval")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get gc-interval flag: %w", err)
|
||||
}
|
||||
|
||||
if gcInterval > 0 {
|
||||
conf.Storage.GCInterval = gcInterval
|
||||
}
|
||||
|
||||
// Process subpaths for GC interval override
|
||||
if conf.Storage.SubPaths != nil {
|
||||
for route, storageConfig := range conf.Storage.SubPaths {
|
||||
storageConfig.GCMaxSchedulerDelay = 5 * time.Millisecond
|
||||
if gcInterval > 0 {
|
||||
storageConfig.GCInterval = gcInterval
|
||||
}
|
||||
conf.Storage.SubPaths[route] = storageConfig
|
||||
}
|
||||
}
|
||||
|
||||
// Log entire configuration after all overrides
|
||||
logger.Info().Interface("params", conf.Sanitize()).
|
||||
Msg("configuration settings (after applying overrides)")
|
||||
|
||||
// Check if server is running BEFORE initializing storage (to avoid database lock)
|
||||
if !isRemoteCacheEnabled(conf) {
|
||||
logger.Warn().Msg("local storage detected - the zot server must be stopped to access the storage database")
|
||||
|
||||
if err := checkServerRunning(conf, logger); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize metrics server
|
||||
metricsServer := monitoring.NewMetricsServer(false, logger)
|
||||
|
||||
// Initialize store controller
|
||||
storeController, err := storage.New(conf, nil, metricsServer, logger, nil)
|
||||
if err != nil {
|
||||
msg := "failed to initialize store controller"
|
||||
logger.Error().Err(err).Msg(msg)
|
||||
|
||||
return fmt.Errorf("%s: %w", msg, err)
|
||||
}
|
||||
|
||||
// Initialize MetaDB only if retention policies are configured
|
||||
var metaDB mTypes.MetaDB
|
||||
if conf.IsRetentionEnabled() {
|
||||
// Enable retention dry-run mode only when retention is enabled
|
||||
conf.Storage.Retention.DryRun = true
|
||||
|
||||
// Process subpaths for retention dry-run
|
||||
if conf.Storage.SubPaths != nil {
|
||||
for route, storageConfig := range conf.Storage.SubPaths {
|
||||
storageConfig.Retention.DryRun = true
|
||||
conf.Storage.SubPaths[route] = storageConfig
|
||||
}
|
||||
}
|
||||
|
||||
driver, err := meta.New(conf.Storage.StorageConfig, logger)
|
||||
if err != nil {
|
||||
msg := "failed to initialize metadata database"
|
||||
logger.Error().Err(err).Msg(msg)
|
||||
|
||||
return fmt.Errorf("%s: %w", msg, err)
|
||||
}
|
||||
|
||||
err = meta.ParseStorage(driver, storeController, logger)
|
||||
if err != nil {
|
||||
msg := "failed to parse storage"
|
||||
logger.Error().Err(err).Msg(msg)
|
||||
|
||||
return fmt.Errorf("%s: %w", msg, err)
|
||||
}
|
||||
|
||||
metaDB = driver
|
||||
logger.Info().Msg("retention policies are configured - retention rules will be applied")
|
||||
} else {
|
||||
metaDB = nil
|
||||
logger.Info().Msg("no retention policies are configured - garbage collection will run with default settings")
|
||||
}
|
||||
|
||||
// Initialize scheduler
|
||||
taskScheduler := scheduler.NewScheduler(conf, metricsServer, logger)
|
||||
taskScheduler.RunScheduler()
|
||||
defer taskScheduler.Shutdown()
|
||||
|
||||
logger.Info().Msg("garbage collection and retention tasks will be submitted to the scheduler")
|
||||
|
||||
// Run GC and retention tasks
|
||||
api.RunGCTasks(conf, storeController, metaDB, taskScheduler, logger, nil)
|
||||
|
||||
// Wait for tasks to complete with optional timeout
|
||||
timeout, err := cmd.PersistentFlags().GetDuration("timeout")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get timeout flag: %w", err)
|
||||
}
|
||||
|
||||
var waitCtx context.Context
|
||||
var cancel context.CancelFunc
|
||||
|
||||
if timeout > 0 {
|
||||
logger.Info().Dur("timeout", timeout).Msg("waiting for garbage collection tasks to complete...")
|
||||
waitCtx, cancel = context.WithTimeout(context.Background(), timeout)
|
||||
} else {
|
||||
logger.Info().Msg("waiting for garbage collection tasks to complete indefinitely " +
|
||||
"(can be interrupted by SIGINT/SIGTERM)...")
|
||||
waitCtx, cancel = context.WithCancel(cmd.Context())
|
||||
}
|
||||
defer cancel()
|
||||
|
||||
// Set up signal handling for graceful shutdown
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
// Wait for either context cancellation or signal
|
||||
select {
|
||||
case <-waitCtx.Done():
|
||||
logger.Info().Msg("retention check completed successfully")
|
||||
case sig := <-sigChan:
|
||||
logger.Info().Str("signal", sig.String()).Msg("received interrupt signal, stopping retention check")
|
||||
logger.Info().Msg("retention check stopped gracefully")
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
retentionCheckCmd.PersistentFlags().StringP("log-file", "l", "", "log file location (default: stdout)")
|
||||
retentionCheckCmd.PersistentFlags().DurationP("gc-interval", "i", 0,
|
||||
"override GC interval (default: use config value)")
|
||||
retentionCheckCmd.PersistentFlags().DurationP("timeout", "t", 0,
|
||||
"timeout for waiting for tasks to complete (default: wait indefinitely)")
|
||||
|
||||
return retentionCheckCmd
|
||||
}
|
||||
|
||||
// checkServerRunning checks if a Zot server is already running on the configured address/port.
|
||||
func checkServerRunning(conf *config.Config, logger zlog.Logger) error {
|
||||
req, err := http.NewRequestWithContext(context.Background(),
|
||||
http.MethodGet,
|
||||
fmt.Sprintf("http://%s/v2", net.JoinHostPort(conf.HTTP.Address, conf.HTTP.Port)),
|
||||
nil)
|
||||
if err != nil {
|
||||
msg := "failed to create http request"
|
||||
logger.Error().Err(err).Msg(msg)
|
||||
|
||||
return fmt.Errorf("%s: %w", msg, err)
|
||||
}
|
||||
|
||||
response, err := http.DefaultClient.Do(req)
|
||||
if err == nil {
|
||||
response.Body.Close()
|
||||
logger.Warn().Err(zerr.ErrServerIsRunning).
|
||||
Msg("server is running, in order to perform the verify-feature retention command the server should be shut down")
|
||||
|
||||
return zerr.ErrServerIsRunning
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// isRemoteCacheEnabled checks if the remote cache is enabled for the global and subpaths storage configs.
|
||||
func isRemoteCacheEnabled(conf *config.Config) bool {
|
||||
if conf == nil || !conf.Storage.RemoteCache {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, subStorageConfig := range conf.Storage.SubPaths {
|
||||
if !subStorageConfig.RemoteCache {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
+21
-4
@@ -37,6 +37,10 @@ type Options struct {
|
||||
// will garbage collect blobs older than Delay
|
||||
Delay time.Duration
|
||||
|
||||
// MaxSchedulerDelay is the maximum random delay for GC task scheduling
|
||||
// Defaults to 30 seconds if not specified
|
||||
MaxSchedulerDelay time.Duration
|
||||
|
||||
ImageRetention config.ImageRetention
|
||||
}
|
||||
|
||||
@@ -69,10 +73,16 @@ given an interval and a Scheduler.
|
||||
func (gc GarbageCollect) CleanImageStorePeriodically(interval time.Duration, sch *scheduler.Scheduler) {
|
||||
processedRepos := make(map[string]struct{})
|
||||
|
||||
maxDelay := gc.opts.MaxSchedulerDelay
|
||||
if maxDelay <= 0 {
|
||||
maxDelay = 30 * time.Second // default value
|
||||
}
|
||||
|
||||
generator := &GCTaskGenerator{
|
||||
imgStore: gc.imgStore,
|
||||
gc: gc,
|
||||
processedRepos: processedRepos,
|
||||
maxDelay: maxDelay,
|
||||
}
|
||||
|
||||
sch.SubmitGenerator(generator, interval, scheduler.MediumPriority)
|
||||
@@ -808,12 +818,19 @@ type GCTaskGenerator struct {
|
||||
nextRun time.Time
|
||||
done bool
|
||||
rand *rand.Rand
|
||||
maxDelay time.Duration
|
||||
}
|
||||
|
||||
func (gen *GCTaskGenerator) getRandomDelay() int {
|
||||
maxDelay := 30
|
||||
func (gen *GCTaskGenerator) getRandomDelay() time.Duration {
|
||||
maxDelay := gen.maxDelay
|
||||
if maxDelay <= 0 {
|
||||
maxDelay = 30 * time.Second // default fallback
|
||||
}
|
||||
|
||||
return gen.rand.Intn(maxDelay)
|
||||
// Generate random delay with nanosecond precision by working directly with
|
||||
// time.Duration's internal representation (nanoseconds as int64).
|
||||
// This supports sub-second delays (milliseconds, microseconds).
|
||||
return time.Duration(gen.rand.Int63n(int64(maxDelay)))
|
||||
}
|
||||
|
||||
func (gen *GCTaskGenerator) Name() string {
|
||||
@@ -827,7 +844,7 @@ func (gen *GCTaskGenerator) Next() (scheduler.Task, error) {
|
||||
|
||||
delay := gen.getRandomDelay()
|
||||
|
||||
gen.nextRun = time.Now().Add(time.Duration(delay) * time.Second)
|
||||
gen.nextRun = time.Now().Add(delay)
|
||||
|
||||
repo, err := gen.imgStore.GetNextRepository(gen.processedRepos)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user