From 41e10d4fe98bc27cbd8cbf2d8ddc8a1d629a7b9f Mon Sep 17 00:00:00 2001 From: Andrei Aaron Date: Tue, 28 Oct 2025 22:36:59 +0200 Subject: [PATCH] feat: add zot subcommand to enable testing retention policy settings (#3449) feat: add verify-feature retention subcommand with comprehensive testing and validation Add a `verify-feature retention` subcommand that allows users to preview and validate retention policy changes without running the actual Zot server. The command runs GC and retention tasks in dry-run mode for immediate feedback. - Run verify-feature retention standalone without starting the server - Preview retention policy decisions in dry-run mode - Configurable GC interval override via command-line flag - Optional timeout for task completion - Configurable log output (stdout or file) Basic usage: ```bash zot verify-feature retention ``` With log file output: ```bash zot verify-feature retention -l /var/log/zot-retention-check.log ``` With GC interval override (runs GC tasks every 30 seconds): ```bash zot verify-feature retention -i 30s ``` With timeout (wait up to 5 minutes for tasks to complete): ```bash zot verify-feature retention -t 5m ``` Combined flags: ```bash zot verify-feature retention -l /var/log/zot-retention-check.log -i 1m -t 10m ``` The command supports overriding GC settings from the config: - `-i, --gc-interval`: Override the GC interval setting (applies to all storage paths including subpaths) - Refactored `RunGCTasks` from `controller.go` to be reusable - Added `checkServerRunning` validation to prevent conflicts - Implemented signal handling for graceful shutdown - Added configuration sanitization and logging - Set GCMaxSchedulerDelay programmatically (not user-configurable) Added tests for coverage on main function: - Negative test cases (no args, bad config, GC disabled, server running) - Both BoltDB and Redis - Retention enabled scenarios with complex image setups - Retention disabled scenarios - Delete referrers functionality - Subpaths configuration - GC interval override validation Run the verify-feature retention tests: ```bash go test -v ./pkg/cli/server -run TestRetentionCheck ``` Signed-off-by: Andrei Aaron --- pkg/api/config/config.go | 4 + pkg/api/controller.go | 62 +- pkg/cli/server/root.go | 15 + pkg/cli/server/verify_retention.go | 244 ++++ pkg/cli/server/verify_retention_test.go | 1744 +++++++++++++++++++++++ pkg/storage/gc/gc.go | 25 +- 6 files changed, 2066 insertions(+), 28 deletions(-) create mode 100644 pkg/cli/server/verify_retention.go create mode 100644 pkg/cli/server/verify_retention_test.go diff --git a/pkg/api/config/config.go b/pkg/api/config/config.go index a718a029..e7cea046 100644 --- a/pkg/api/config/config.go +++ b/pkg/api/config/config.go @@ -36,6 +36,10 @@ type StorageConfig struct { Retention ImageRetention StorageDriver map[string]interface{} `mapstructure:",omitempty"` CacheDriver map[string]interface{} `mapstructure:",omitempty"` + + // GCMaxSchedulerDelay is the maximum random delay for GC task scheduling + // This field is not configurable by the end user + GCMaxSchedulerDelay time.Duration `yaml:"-"` } type ImageRetention struct { diff --git a/pkg/api/controller.go b/pkg/api/controller.go index 30666a7f..69ec6086 100644 --- a/pkg/api/controller.go +++ b/pkg/api/controller.go @@ -460,47 +460,27 @@ func (c *Controller) StartBackgroundTasks() { c.HTPasswdWatcher.Run() } - // Enable running garbage-collect periodically for DefaultStore - storageConfig := c.Config.CopyStorageConfig() - if storageConfig.GC { - gc := gc.NewGarbageCollect(c.StoreController.DefaultStore, c.MetaDB, gc.Options{ - Delay: storageConfig.GCDelay, - ImageRetention: storageConfig.Retention, - }, c.Audit, c.Log) - - gc.CleanImageStorePeriodically(storageConfig.GCInterval, c.taskScheduler) - } + // Run GC and retention tasks + RunGCTasks(c.Config, c.StoreController, c.MetaDB, c.taskScheduler, c.Log, c.Audit) // Enable running dedupe blobs both ways (dedupe or restore deduped blobs) c.StoreController.DefaultStore.RunDedupeBlobs(time.Duration(0), c.taskScheduler) - // Enable extensions if extension config is provided for DefaultStore - extensionsConfig := c.Config.CopyExtensionsConfig() - // Always call EnableSearchExtension to ensure proper logging, even when search is disabled ext.EnableSearchExtension(c.Config, c.StoreController, c.MetaDB, c.taskScheduler, c.CveScanner, c.Log) // Always call EnableMetricsExtension to ensure proper logging, even when metrics is disabled + storageConfig := c.Config.CopyStorageConfig() ext.EnableMetricsExtension(c.Config, c.Log, storageConfig.RootDirectory) // runs once if metrics are enabled & imagestore is local + extensionsConfig := c.Config.CopyExtensionsConfig() if extensionsConfig.IsMetricsEnabled() && storageConfig.StorageDriver == nil { c.StoreController.DefaultStore.PopulateStorageMetrics(time.Duration(0), c.taskScheduler) } if storageConfig.SubPaths != nil { for route, subStorageConfig := range storageConfig.SubPaths { - // Enable running garbage-collect periodically for subImageStore - if subStorageConfig.GC { - gc := gc.NewGarbageCollect(c.StoreController.SubStore[route], c.MetaDB, - gc.Options{ - Delay: subStorageConfig.GCDelay, - ImageRetention: subStorageConfig.Retention, - }, c.Audit, c.Log) - - gc.CleanImageStorePeriodically(subStorageConfig.GCInterval, c.taskScheduler) - } - // Enable extensions if extension config is provided for subImageStore ext.EnableMetricsExtension(c.Config, c.Log, subStorageConfig.RootDirectory) @@ -539,6 +519,40 @@ func (c *Controller) StartBackgroundTasks() { ext.EnableScheduledTasks(c.Config, c.taskScheduler, c.MetaDB, c.Log) //nolint: contextcheck } +// RunGCTasks runs minimal GC and retention tasks without full controller. +func RunGCTasks(conf *config.Config, storeController storage.StoreController, metaDB mTypes.MetaDB, + taskScheduler *scheduler.Scheduler, logger log.Logger, audit *log.Logger, +) { + // Enable running garbage-collect periodically for DefaultStore + storageConfig := conf.CopyStorageConfig() + if storageConfig.GC { + gc := gc.NewGarbageCollect(storeController.DefaultStore, metaDB, gc.Options{ + Delay: storageConfig.GCDelay, + ImageRetention: storageConfig.Retention, + MaxSchedulerDelay: storageConfig.GCMaxSchedulerDelay, + }, audit, logger) + + gc.CleanImageStorePeriodically(storageConfig.GCInterval, taskScheduler) + } + + // Handle subpaths + if storageConfig.SubPaths != nil { + for route, subStorageConfig := range storageConfig.SubPaths { + // Enable running garbage-collect periodically for subImageStore + if subStorageConfig.GC { + gc := gc.NewGarbageCollect(storeController.SubStore[route], metaDB, + gc.Options{ + Delay: subStorageConfig.GCDelay, + ImageRetention: subStorageConfig.Retention, + MaxSchedulerDelay: subStorageConfig.GCMaxSchedulerDelay, + }, audit, logger) + + gc.CleanImageStorePeriodically(subStorageConfig.GCInterval, taskScheduler) + } + } + } +} + type SyncOnDemand interface { SyncImage(ctx context.Context, repo, reference string) error SyncReferrers(ctx context.Context, repo string, subjectDigestStr string, referenceTypes []string) error diff --git a/pkg/cli/server/root.go b/pkg/cli/server/root.go index a9b1316d..3af97d97 100644 --- a/pkg/cli/server/root.go +++ b/pkg/cli/server/root.go @@ -189,6 +189,19 @@ func newVerifyCmd(conf *config.Config) *cobra.Command { return verifyCmd } +func newVerifyFeatureCmd(conf *config.Config) *cobra.Command { + verifyFeatureCmd := &cobra.Command{ + Use: "verify-feature", + Short: "`verify-feature` validates specific zot features", + Long: "`verify-feature` validates specific zot features", + } + + // Add subcommands + verifyFeatureCmd.AddCommand(newVerifyFeatureRetentionCmd(conf)) + + return verifyFeatureCmd +} + // "zot" - registry server. func NewServerRootCmd() *cobra.Command { showVersion := false @@ -220,6 +233,8 @@ func NewServerRootCmd() *cobra.Command { rootCmd.AddCommand(newVerifyCmd(conf)) // "scrub" rootCmd.AddCommand(newScrubCmd(conf)) + // "verify-feature" + rootCmd.AddCommand(newVerifyFeatureCmd(conf)) // "version" rootCmd.Flags().BoolVarP(&showVersion, "version", "v", false, "show the version and exit") diff --git a/pkg/cli/server/verify_retention.go b/pkg/cli/server/verify_retention.go new file mode 100644 index 00000000..95984e86 --- /dev/null +++ b/pkg/cli/server/verify_retention.go @@ -0,0 +1,244 @@ +package server + +import ( + "context" + "fmt" + "net" + "net/http" + "os" + "os/signal" + "syscall" + "time" + + "github.com/spf13/cobra" + + zerr "zotregistry.dev/zot/v2/errors" + "zotregistry.dev/zot/v2/pkg/api" + "zotregistry.dev/zot/v2/pkg/api/config" + "zotregistry.dev/zot/v2/pkg/extensions/monitoring" + zlog "zotregistry.dev/zot/v2/pkg/log" + "zotregistry.dev/zot/v2/pkg/meta" + mTypes "zotregistry.dev/zot/v2/pkg/meta/types" + "zotregistry.dev/zot/v2/pkg/scheduler" + "zotregistry.dev/zot/v2/pkg/storage" +) + +func newVerifyFeatureRetentionCmd(conf *config.Config) *cobra.Command { + // "verify-feature retention" + retentionCheckCmd := &cobra.Command{ + Use: "retention ", + Short: "`verify-feature retention` runs garbage collection and retention tasks", + Long: "`verify-feature retention` runs garbage collection and retention tasks " + + "based on the provided configuration.\n\n" + + "WARNING: If retention settings are enabled in the config, the server metadata database needs to be accessed, " + + "which means the zot server must be stopped before running this command.", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + // Use stdout by default, or the specified log file + logFile, err := cmd.PersistentFlags().GetString("log-file") + if err != nil { + return fmt.Errorf("failed to get log-file flag: %w", err) + } + + logOutput := "" + if logFile != "" { + logOutput = logFile + } + logger := zlog.NewLogger("info", logOutput) + + if len(args) > 0 { + if err := LoadConfiguration(conf, args[0]); err != nil { + return err + } + } + + // Do not show usage on errors which are not related to command line arguments + cmd.SilenceUsage = true + + // Check if GC is enabled in config + if !conf.Storage.GC { + logger.Error().Msgf("failed to run verify-feature retention, garbage collection is disabled in config") + + return fmt.Errorf("%w: %s", zerr.ErrBadConfig, "verify-feature retention requires GC to be enabled") + } + + // Set short delay for verify-feature retention command + conf.Storage.GCMaxSchedulerDelay = 5 * time.Millisecond + + // Override GC interval if specified + gcInterval, err := cmd.PersistentFlags().GetDuration("gc-interval") + if err != nil { + return fmt.Errorf("failed to get gc-interval flag: %w", err) + } + + if gcInterval > 0 { + conf.Storage.GCInterval = gcInterval + } + + // Process subpaths for GC interval override + if conf.Storage.SubPaths != nil { + for route, storageConfig := range conf.Storage.SubPaths { + storageConfig.GCMaxSchedulerDelay = 5 * time.Millisecond + if gcInterval > 0 { + storageConfig.GCInterval = gcInterval + } + conf.Storage.SubPaths[route] = storageConfig + } + } + + // Log entire configuration after all overrides + logger.Info().Interface("params", conf.Sanitize()). + Msg("configuration settings (after applying overrides)") + + // Check if server is running BEFORE initializing storage (to avoid database lock) + if !isRemoteCacheEnabled(conf) { + logger.Warn().Msg("local storage detected - the zot server must be stopped to access the storage database") + + if err := checkServerRunning(conf, logger); err != nil { + return err + } + } + + // Initialize metrics server + metricsServer := monitoring.NewMetricsServer(false, logger) + + // Initialize store controller + storeController, err := storage.New(conf, nil, metricsServer, logger, nil) + if err != nil { + msg := "failed to initialize store controller" + logger.Error().Err(err).Msg(msg) + + return fmt.Errorf("%s: %w", msg, err) + } + + // Initialize MetaDB only if retention policies are configured + var metaDB mTypes.MetaDB + if conf.IsRetentionEnabled() { + // Enable retention dry-run mode only when retention is enabled + conf.Storage.Retention.DryRun = true + + // Process subpaths for retention dry-run + if conf.Storage.SubPaths != nil { + for route, storageConfig := range conf.Storage.SubPaths { + storageConfig.Retention.DryRun = true + conf.Storage.SubPaths[route] = storageConfig + } + } + + driver, err := meta.New(conf.Storage.StorageConfig, logger) + if err != nil { + msg := "failed to initialize metadata database" + logger.Error().Err(err).Msg(msg) + + return fmt.Errorf("%s: %w", msg, err) + } + + err = meta.ParseStorage(driver, storeController, logger) + if err != nil { + msg := "failed to parse storage" + logger.Error().Err(err).Msg(msg) + + return fmt.Errorf("%s: %w", msg, err) + } + + metaDB = driver + logger.Info().Msg("retention policies are configured - retention rules will be applied") + } else { + metaDB = nil + logger.Info().Msg("no retention policies are configured - garbage collection will run with default settings") + } + + // Initialize scheduler + taskScheduler := scheduler.NewScheduler(conf, metricsServer, logger) + taskScheduler.RunScheduler() + defer taskScheduler.Shutdown() + + logger.Info().Msg("garbage collection and retention tasks will be submitted to the scheduler") + + // Run GC and retention tasks + api.RunGCTasks(conf, storeController, metaDB, taskScheduler, logger, nil) + + // Wait for tasks to complete with optional timeout + timeout, err := cmd.PersistentFlags().GetDuration("timeout") + if err != nil { + return fmt.Errorf("failed to get timeout flag: %w", err) + } + + var waitCtx context.Context + var cancel context.CancelFunc + + if timeout > 0 { + logger.Info().Dur("timeout", timeout).Msg("waiting for garbage collection tasks to complete...") + waitCtx, cancel = context.WithTimeout(context.Background(), timeout) + } else { + logger.Info().Msg("waiting for garbage collection tasks to complete indefinitely " + + "(can be interrupted by SIGINT/SIGTERM)...") + waitCtx, cancel = context.WithCancel(cmd.Context()) + } + defer cancel() + + // Set up signal handling for graceful shutdown + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) + + // Wait for either context cancellation or signal + select { + case <-waitCtx.Done(): + logger.Info().Msg("retention check completed successfully") + case sig := <-sigChan: + logger.Info().Str("signal", sig.String()).Msg("received interrupt signal, stopping retention check") + logger.Info().Msg("retention check stopped gracefully") + } + + return nil + }, + } + + retentionCheckCmd.PersistentFlags().StringP("log-file", "l", "", "log file location (default: stdout)") + retentionCheckCmd.PersistentFlags().DurationP("gc-interval", "i", 0, + "override GC interval (default: use config value)") + retentionCheckCmd.PersistentFlags().DurationP("timeout", "t", 0, + "timeout for waiting for tasks to complete (default: wait indefinitely)") + + return retentionCheckCmd +} + +// checkServerRunning checks if a Zot server is already running on the configured address/port. +func checkServerRunning(conf *config.Config, logger zlog.Logger) error { + req, err := http.NewRequestWithContext(context.Background(), + http.MethodGet, + fmt.Sprintf("http://%s/v2", net.JoinHostPort(conf.HTTP.Address, conf.HTTP.Port)), + nil) + if err != nil { + msg := "failed to create http request" + logger.Error().Err(err).Msg(msg) + + return fmt.Errorf("%s: %w", msg, err) + } + + response, err := http.DefaultClient.Do(req) + if err == nil { + response.Body.Close() + logger.Warn().Err(zerr.ErrServerIsRunning). + Msg("server is running, in order to perform the verify-feature retention command the server should be shut down") + + return zerr.ErrServerIsRunning + } + + return nil +} + +// isRemoteCacheEnabled checks if the remote cache is enabled for the global and subpaths storage configs. +func isRemoteCacheEnabled(conf *config.Config) bool { + if conf == nil || !conf.Storage.RemoteCache { + return false + } + + for _, subStorageConfig := range conf.Storage.SubPaths { + if !subStorageConfig.RemoteCache { + return false + } + } + + return true +} diff --git a/pkg/cli/server/verify_retention_test.go b/pkg/cli/server/verify_retention_test.go new file mode 100644 index 00000000..7307deaf --- /dev/null +++ b/pkg/cli/server/verify_retention_test.go @@ -0,0 +1,1744 @@ +package server_test + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path" + "strings" + "testing" + "time" + + "github.com/alicebob/miniredis/v2" + goredis "github.com/redis/go-redis/v9" + . "github.com/smartystreets/goconvey/convey" + + zerr "zotregistry.dev/zot/v2/errors" + "zotregistry.dev/zot/v2/pkg/api" + "zotregistry.dev/zot/v2/pkg/api/config" + cli "zotregistry.dev/zot/v2/pkg/cli/server" + "zotregistry.dev/zot/v2/pkg/extensions/monitoring" + zlog "zotregistry.dev/zot/v2/pkg/log" + "zotregistry.dev/zot/v2/pkg/meta" + "zotregistry.dev/zot/v2/pkg/meta/boltdb" + "zotregistry.dev/zot/v2/pkg/meta/redis" + "zotregistry.dev/zot/v2/pkg/storage" + "zotregistry.dev/zot/v2/pkg/storage/local" + storageTypes "zotregistry.dev/zot/v2/pkg/storage/types" + . "zotregistry.dev/zot/v2/pkg/test/common" + . "zotregistry.dev/zot/v2/pkg/test/image-utils" +) + +const ( + decisionKeep = "keep" + decisionDelete = "delete" + retentionTestRepo = "retention-test-repo" + retentionTestRepoSubpath = "a/retention-test-repo" + testGCDelay = "1ms" +) + +func TestRetentionCheckNegative(t *testing.T) { + oldArgs := os.Args + + defer func() { os.Args = oldArgs }() + + Convey("Test verify-feature retention no args", t, func(c C) { + os.Args = []string{"cli_test", "verify-feature", "retention"} + err := cli.NewServerRootCmd().Execute() + So(err, ShouldNotBeNil) + }) + + Convey("non-existent config", t, func(c C) { + os.Args = []string{"cli_test", "verify-feature", "retention", path.Join(os.TempDir(), "/x.yaml")} + err := cli.NewServerRootCmd().Execute() + So(err, ShouldNotBeNil) + }) + + Convey("unknown config", t, func(c C) { + os.Args = []string{"cli_test", "verify-feature", "retention", path.Join(os.TempDir(), "/x")} + err := cli.NewServerRootCmd().Execute() + So(err, ShouldNotBeNil) + }) + + Convey("bad config", t, func(c C) { + testDir := t.TempDir() + configFile := path.Join(testDir, "zot-config.json") + + content := []byte(`{"log":{}}`) + err := os.WriteFile(configFile, content, 0o600) + So(err, ShouldBeNil) + + os.Args = []string{"cli_test", "verify-feature", "retention", "-t", "30s", configFile} + err = cli.NewServerRootCmd().Execute() + So(err, ShouldNotBeNil) + }) + + Convey("config with GC disabled", t, func(c C) { + testDir := t.TempDir() + configFile := path.Join(testDir, "zot-config.json") + logFile := path.Join(testDir, "retention-check.log") + port := GetFreePort() + + content := []byte(fmt.Sprintf(`{ + "distSpecVersion": "1.1.1", + "storage": { + "rootDirectory": "%s", + "gc": false + }, + "http": { + "address": "127.0.0.1", + "port": "%s" + } + }`, testDir, port)) + err := os.WriteFile(configFile, content, 0o600) + So(err, ShouldBeNil) + + os.Args = []string{"cli_test", "verify-feature", "retention", "-l", logFile, "-t", "30s", configFile} + err = cli.NewServerRootCmd().Execute() + + // Verify the specific error + So(err, ShouldNotBeNil) + So(err.Error(), ShouldEqual, + fmt.Sprintf("%s: %s", zerr.ErrBadConfig.Error(), "verify-feature retention requires GC to be enabled")) + + // Verify error message is logged to the log file + logContent, err := os.ReadFile(logFile) + So(err, ShouldBeNil) + logStr := string(logContent) + So(logStr, ShouldContainSubstring, + "failed to run verify-feature retention, garbage collection is disabled in config") + }) + + Convey("server is running", t, func(c C) { + port := GetFreePort() + config := config.New() + config.HTTP.Port = port + controller := api.NewController(config) + + testDir := t.TempDir() + storageDir := path.Join(testDir, "storage") + configFile := path.Join(testDir, "zot-config.json") + logFile := path.Join(testDir, "retention-check.log") + + controller.Config.Storage.RootDirectory = storageDir + controller.Config.Storage.GC = true + ctrlManager := NewControllerManager(controller) + ctrlManager.StartAndWait(port) + + defer ctrlManager.StopServer() + + content := []byte(fmt.Sprintf(`{ + "storage": { + "rootDirectory": "%s", + "gc": true, + "retention": { + "delay": "1ms", + "policies": [ + { + "repositories": ["**"], + "keepTags": [ + { + "patterns": [".*"], + "mostRecentlyPulledCount": 5 + } + ] + } + ] + } + }, + "http": { + "port": %s + }, + "log": { + "level": "debug" + } + } + `, storageDir, port)) + err := os.WriteFile(configFile, content, 0o600) + So(err, ShouldBeNil) + + os.Args = []string{"cli_test", "verify-feature", "retention", "-l", logFile, "-t", "30s", configFile} + err = cli.NewServerRootCmd().Execute() + So(err, ShouldNotBeNil) + So(err, ShouldEqual, zerr.ErrServerIsRunning) + + // Verify warning messages are logged to the log file + logContent, err := os.ReadFile(logFile) + So(err, ShouldBeNil) + So(string(logContent), ShouldContainSubstring, + "local storage detected - the zot server must be stopped to access the storage database") + So(string(logContent), ShouldContainSubstring, + "server is running, in order to perform the verify-feature retention command the server should be shut down") + }) + + Convey("invalid log-file flag", t, func(c C) { + testCases := []struct { + name string + logFile string + }{ + {"invalid log file path (parent directory doesn't exist)", "/invalid/directory/logfile.log"}, + {"invalid log file path (null bytes)", "logfile\x00.log"}, + } + + for _, testCase := range testCases { + Convey(testCase.name, func() { + testDir := t.TempDir() + configFile := path.Join(testDir, "zot-config.json") + port := GetFreePort() + + content := []byte(fmt.Sprintf(`{ + "distSpecVersion": "1.1.1", + "storage": { + "rootDirectory": "%s", + "gc": true + }, + "http": { + "address": "127.0.0.1", + "port": "%s" + } + }`, testDir, port)) + err := os.WriteFile(configFile, content, 0o600) + So(err, ShouldBeNil) + + os.Args = []string{"cli_test", "verify-feature", "retention", "-l", testCase.logFile, "-t", "30s", configFile} + // This panics during logger initialization due to invalid log file location + So(func() { + _ = cli.NewServerRootCmd().Execute() + }, ShouldPanic) + }) + } + }) + + Convey("invalid duration flags", t, func(c C) { + testCases := []struct { + name string + flag string + flagValue string + }{ + {"invalid gc-interval flag", "-i", "invalid-duration"}, + {"invalid timeout flag", "-t", "invalid-duration"}, + } + + for _, testCase := range testCases { + Convey(testCase.name, func() { + testDir := t.TempDir() + configFile := path.Join(testDir, "zot-config.json") + logFile := path.Join(testDir, "retention-check.log") + port := GetFreePort() + + content := []byte(fmt.Sprintf(`{ + "distSpecVersion": "1.1.1", + "storage": { + "rootDirectory": "%s", + "gc": true + }, + "http": { + "address": "127.0.0.1", + "port": "%s" + } + }`, testDir, port)) + err := os.WriteFile(configFile, content, 0o600) + So(err, ShouldBeNil) + + args := []string{ + "cli_test", "verify-feature", "retention", "-l", logFile, + testCase.flag, testCase.flagValue, + } + + if testCase.flag == "-i" { + args = append(args, "-t", "30s") + } + + args = append(args, configFile) + os.Args = args + + err = cli.NewServerRootCmd().Execute() + // Flag parsing should fail before reaching RunE + So(err, ShouldNotBeNil) + So(err.Error(), ShouldContainSubstring, "invalid duration") + }) + } + }) +} + +func TestRetentionCheckWithRetentionEnabledAndRedisDriver(t *testing.T) { + oldArgs := os.Args + + defer func() { os.Args = oldArgs }() + + Convey("server is running with Redis driver", t, func(c C) { + miniRedis := miniredis.RunT(t) + port := GetFreePort() + testDir := t.TempDir() + storageDir := path.Join(testDir, "storage") + configFile := path.Join(testDir, "zot-config.json") + logFile := path.Join(testDir, "retention-check.log") + + content := []byte(fmt.Sprintf(`{ + "distSpecVersion": "1.1.1", + "storage": { + "rootDirectory": "%s", + "gc": true, + "remoteCache": true, + "gcDelay": %q, + "gcInterval": "1m", + "cacheDriver": { + "name": "redis", + "url": "redis://%s" + }, + "retention": { + "delay": "1ms", + "policies": [ + { + "repositories": ["**"], + "keepTags": [ + { + "patterns": [".*"], + "mostRecentlyPulledCount": 2 + } + ] + } + ] + } + }, + "http": { + "address": "127.0.0.1", + "port": "%s" + }, + "log": { + "level": "debug" + } + } + `, storageDir, testGCDelay, miniRedis.Addr(), port)) + err := os.WriteFile(configFile, content, 0o600) + So(err, ShouldBeNil) + + // Create complex image setup before running verify-feature retention + conf := config.New() + err = cli.LoadConfiguration(conf, configFile) + So(err, ShouldBeNil) + + // Initialize storage and metaDB using the same approach as gc tests + metricsServer := monitoring.NewMetricsServer(false, zlog.NewLogger("info", "")) + // Create ImageStore directly (like gc tests) + imgStore := local.NewImageStore(storageDir, false, false, zlog.NewLogger("info", ""), metricsServer, + nil, nil, nil, nil) + // Initialize metaDB with Redis + redisClient := goredis.NewClient(&goredis.Options{ + Addr: miniRedis.Addr(), + }) + params := redis.DBDriverParameters{KeyPrefix: "zot"} + metaDB, err := redis.New(redisClient, params, zlog.NewLogger("info", "")) + So(err, ShouldBeNil) + // Create store controller + storeController := storage.StoreController{} + storeController.DefaultStore = imgStore + err = meta.ParseStorage(metaDB, storeController, zlog.NewLogger("info", "")) + So(err, ShouldBeNil) + + // Create test repositories with different image types for retention testing + // Repository 1: Multiple tagged images (some old, some recent) + repo1 := retentionTestRepo + + // Old image (should be deleted by retention - keeping only 2 most recent) + oldImage := CreateRandomImage() + err = WriteImageToFileSystem(oldImage, repo1, "old-tag", storeController) + So(err, ShouldBeNil) + + // Recent images (should be kept) + recentImage1 := CreateRandomImage() + err = WriteImageToFileSystem(recentImage1, repo1, "recent-tag-1", storeController) + So(err, ShouldBeNil) + + recentImage2 := CreateRandomImage() + err = WriteImageToFileSystem(recentImage2, repo1, "recent-tag-2", storeController) + So(err, ShouldBeNil) + + // Multiarch image + multiarchImage := CreateRandomMultiarch() + err = WriteMultiArchImageToFileSystem(multiarchImage, repo1, "multiarch-tag", storeController) + So(err, ShouldBeNil) + + // Untagged image (should be cleaned up by GC) + untaggedImage := CreateRandomImage() + err = WriteImageToFileSystem(untaggedImage, repo1, untaggedImage.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Repository 2: Referrers + repo2 := "referrer-test-repo" + + // Base image + baseImage := CreateRandomImage() + err = WriteImageToFileSystem(baseImage, repo2, "base-tag", storeController) + So(err, ShouldBeNil) + + // Referrer pointing to base image + referrer := CreateRandomImageWith().Subject(baseImage.DescriptorRef()).Build() + err = WriteImageToFileSystem(referrer, repo2, referrer.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Referrer pointing to non-existent subject (should be deleted) + nonExistentSubject := CreateRandomImage() // Create but don't write to storage + referrerWithInvalidSubject := CreateRandomImageWith().Subject(nonExistentSubject.DescriptorRef()).Build() + err = WriteImageToFileSystem(referrerWithInvalidSubject, repo2, + referrerWithInvalidSubject.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Re-parse storage after creating images to update metadata + err = meta.ParseStorage(metaDB, storeController, zlog.NewLogger("info", "")) + So(err, ShouldBeNil) + + // Update metadata with timestamps for retention testing + // Set old timestamps for images that should be deleted + repoMeta1, err := metaDB.GetRepoMeta(context.Background(), repo1) + So(err, ShouldBeNil) + + // Old images (should be deleted by retention - keeping only 2 most recent) + oldImageStats := repoMeta1.Statistics[oldImage.DigestStr()] + oldImageStats.PushTimestamp = time.Now().Add(-10 * 24 * time.Hour) + oldImageStats.LastPullTimestamp = time.Now().Add(-10 * 24 * time.Hour) + repoMeta1.Statistics[oldImage.DigestStr()] = oldImageStats + + // Recent images (should be kept) + recentImage1Stats := repoMeta1.Statistics[recentImage1.DigestStr()] + recentImage1Stats.PushTimestamp = time.Now().Add(-1 * 24 * time.Hour) + recentImage1Stats.LastPullTimestamp = time.Now().Add(-1 * 24 * time.Hour) + repoMeta1.Statistics[recentImage1.DigestStr()] = recentImage1Stats + + recentImage2Stats := repoMeta1.Statistics[recentImage2.DigestStr()] + recentImage2Stats.PushTimestamp = time.Now().Add(-2 * 24 * time.Hour) + recentImage2Stats.LastPullTimestamp = time.Now().Add(-2 * 24 * time.Hour) + repoMeta1.Statistics[recentImage2.DigestStr()] = recentImage2Stats + + multiarchStats := repoMeta1.Statistics[multiarchImage.DigestStr()] + multiarchStats.PushTimestamp = time.Now().Add(-3 * 24 * time.Hour) + multiarchStats.LastPullTimestamp = time.Now().Add(-3 * 24 * time.Hour) + repoMeta1.Statistics[multiarchImage.DigestStr()] = multiarchStats + + err = metaDB.SetRepoMeta(repo1, repoMeta1) + So(err, ShouldBeNil) + + // Set timestamps for referrer repo + repoMeta2, err := metaDB.GetRepoMeta(context.Background(), repo2) + So(err, ShouldBeNil) + + baseImageStats := repoMeta2.Statistics[baseImage.DigestStr()] + baseImageStats.PushTimestamp = time.Now().Add(-5 * 24 * time.Hour) + baseImageStats.LastPullTimestamp = time.Now().Add(-5 * 24 * time.Hour) + repoMeta2.Statistics[baseImage.DigestStr()] = baseImageStats + + referrerStats := repoMeta2.Statistics[referrer.DigestStr()] + referrerStats.PushTimestamp = time.Now().Add(-4 * 24 * time.Hour) + referrerStats.LastPullTimestamp = time.Now().Add(-4 * 24 * time.Hour) + repoMeta2.Statistics[referrer.DigestStr()] = referrerStats + + err = metaDB.SetRepoMeta(repo2, repoMeta2) + So(err, ShouldBeNil) + + // Close metaDB to release database lock before running verify-feature retention + err = metaDB.Close() + So(err, ShouldBeNil) + + gcDelay, _ := time.ParseDuration(testGCDelay) + time.Sleep(gcDelay + 50*time.Millisecond) // wait for GC delay to pass + + // Start a controller using the same config to test running verify-feature retention while server is running + controller := api.NewController(conf) + ctrlManager := NewControllerManager(controller) + ctrlManager.StartAndWait(port) + + defer ctrlManager.StopServer() + + os.Args = []string{"cli_test", "verify-feature", "retention", "-l", logFile, "-t", "1s", configFile} + err = cli.NewServerRootCmd().Execute() + So(err, ShouldBeNil) + + // Verify success messages are logged to the log file + logContent, err := os.ReadFile(logFile) + So(err, ShouldBeNil) + logStr := string(logContent) + + // Dump log content to stdout on test failure + defer func() { + if t.Failed() { + t.Logf("Retention check log content:\n%s", logStr) + } + }() + + // Verify basic verify-feature retention and GC messages + So(logStr, ShouldContainSubstring, "configuration settings (after applying overrides)") + // Verify GC configuration values are present in the log + So(logStr, ShouldContainSubstring, "\"GCInterval\":60000000000") // 1m = 60s in nanoseconds + So(logStr, ShouldContainSubstring, "\"GCDelay\":1000000") // 1ms in nanoseconds + So(logStr, ShouldContainSubstring, "\"GCMaxSchedulerDelay\":5000000") // 5ms + So(logStr, ShouldContainSubstring, + "garbage collection and retention tasks will be submitted to the scheduler") + So(logStr, ShouldContainSubstring, "waiting for garbage collection tasks to complete...") + So(logStr, ShouldContainSubstring, "executing gc of orphaned blobs") + So(logStr, ShouldContainSubstring, "garbage collected blobs") + So(logStr, ShouldContainSubstring, "gc successfully completed") + So(logStr, ShouldContainSubstring, "retention check completed successfully") + + // No need to build expectedResults - we only need counts for concurrent scenario + + // In concurrent scenarios (controller + verify-feature retention running together), + // we just verify that the command completes successfully. The actual retention + // policy validation is tested in the non-concurrent test cases. + actualDecisions := parseRetentionDecisions([]byte(logStr)) + + // Count KEEP decisions to verify tag retention policies work + keepCount := 0 + + for _, decision := range actualDecisions { + if decision.Decision == decisionKeep { + keepCount++ + } + } + + // Validate KEEP decisions exactly (base-tag, recent-tag-1, recent-tag-2) + So(keepCount, ShouldEqual, 3) + }) +} + +func TestRetentionCheckWithRetentionEnabled(t *testing.T) { + oldArgs := os.Args + + defer func() { os.Args = oldArgs }() + + Convey("valid config with retention enabled", t, func(c C) { + port := GetFreePort() + testDir := t.TempDir() + storageDir := path.Join(testDir, "storage") + configFile := path.Join(testDir, "zot-config.json") + logFile := path.Join(testDir, "retention-check.log") + + content := []byte(fmt.Sprintf(`{ + "distSpecVersion": "1.1.1", + "storage": { + "rootDirectory": "%s", + "gc": true, + "gcDelay": %q, + "gcInterval": "1m", + "retention": { + "delay": "1ms", + "policies": [ + { + "repositories": ["**"], + "keepTags": [ + { + "patterns": [".*"], + "mostRecentlyPulledCount": 2 + } + ] + } + ] + } + }, + "http": { + "address": "127.0.0.1", + "port": "%s" + }, + "log": { + "level": "debug" + } + } + `, storageDir, testGCDelay, port)) + err := os.WriteFile(configFile, content, 0o600) + So(err, ShouldBeNil) + + // Create complex image setup before running verify-feature retention + conf := config.New() + err = cli.LoadConfiguration(conf, configFile) + So(err, ShouldBeNil) + + // Initialize storage and metaDB using the same approach as gc tests + metricsServer := monitoring.NewMetricsServer(false, zlog.NewLogger("info", "")) + // Create ImageStore directly (like gc tests) + imgStore := local.NewImageStore(storageDir, false, false, zlog.NewLogger("info", ""), metricsServer, + nil, nil, nil, nil) + // Initialize metaDB directly (like gc tests) + params := boltdb.DBParameters{ + RootDir: storageDir, + } + boltDriver, err := boltdb.GetBoltDriver(params) + So(err, ShouldBeNil) + metaDB, err := boltdb.New(boltDriver, zlog.NewLogger("info", "")) + So(err, ShouldBeNil) + // Create store controller + storeController := storage.StoreController{} + storeController.DefaultStore = imgStore + err = meta.ParseStorage(metaDB, storeController, zlog.NewLogger("info", "")) + So(err, ShouldBeNil) + + // Create test repositories with different image types for retention testing + // Repository 1: Multiple tagged images (some old, some recent) + repo1 := retentionTestRepo + + // Old images (should be deleted by retention - keeping only 2 most recent) + oldImage1 := CreateRandomImage() + err = WriteImageToFileSystem(oldImage1, repo1, "old-tag-1", storeController) + So(err, ShouldBeNil) + + oldImage2 := CreateRandomImage() + err = WriteImageToFileSystem(oldImage2, repo1, "old-tag-2", storeController) + So(err, ShouldBeNil) + + // Recent images (should be kept) + recentImage1 := CreateRandomImage() + err = WriteImageToFileSystem(recentImage1, repo1, "recent-tag-1", storeController) + So(err, ShouldBeNil) + + recentImage2 := CreateRandomImage() + err = WriteImageToFileSystem(recentImage2, repo1, "recent-tag-2", storeController) + So(err, ShouldBeNil) + + // Multiarch image + multiarchImage := CreateRandomMultiarch() + err = WriteMultiArchImageToFileSystem(multiarchImage, repo1, "multiarch-tag", storeController) + So(err, ShouldBeNil) + + // Untagged images (should be cleaned up by GC) + untaggedImage1 := CreateRandomImage() + err = WriteImageToFileSystem(untaggedImage1, repo1, untaggedImage1.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Repository 2: Referrers and referrers of referrers + repo2 := "referrer-test-repo" + + // Base image + baseImage := CreateRandomImage() + err = WriteImageToFileSystem(baseImage, repo2, "base-tag", storeController) + So(err, ShouldBeNil) + + // Referrer pointing to base image + referrer1 := CreateRandomImageWith().Subject(baseImage.DescriptorRef()).Build() + err = WriteImageToFileSystem(referrer1, repo2, referrer1.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Referrer pointing to referrer + referrerOfReferrer := CreateRandomImageWith().Subject(referrer1.DescriptorRef()).Build() + err = WriteImageToFileSystem(referrerOfReferrer, repo2, referrerOfReferrer.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Referrer pointing to non-existent subject (should be deleted) + nonExistentSubject := CreateRandomImage() // Create but don't write to storage + referrerWithInvalidSubject := CreateRandomImageWith().Subject(nonExistentSubject.DescriptorRef()).Build() + err = WriteImageToFileSystem(referrerWithInvalidSubject, repo2, + referrerWithInvalidSubject.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Re-parse storage after creating images to update metadata + err = meta.ParseStorage(metaDB, storeController, zlog.NewLogger("info", "")) + So(err, ShouldBeNil) + + // Update metadata with timestamps for retention testing + // Set old timestamps for images that should be deleted + repoMeta1, err := metaDB.GetRepoMeta(context.Background(), repo1) + So(err, ShouldBeNil) + + // Old images (should be deleted by retention - keeping only 2 most recent) + oldImage1Stats := repoMeta1.Statistics[oldImage1.DigestStr()] + oldImage1Stats.PushTimestamp = time.Now().Add(-10 * 24 * time.Hour) + oldImage1Stats.LastPullTimestamp = time.Now().Add(-10 * 24 * time.Hour) + repoMeta1.Statistics[oldImage1.DigestStr()] = oldImage1Stats + + oldImage2Stats := repoMeta1.Statistics[oldImage2.DigestStr()] + oldImage2Stats.PushTimestamp = time.Now().Add(-11 * 24 * time.Hour) + oldImage2Stats.LastPullTimestamp = time.Now().Add(-11 * 24 * time.Hour) + repoMeta1.Statistics[oldImage2.DigestStr()] = oldImage2Stats + + // Recent images (should be kept) + recentImage1Stats := repoMeta1.Statistics[recentImage1.DigestStr()] + recentImage1Stats.PushTimestamp = time.Now().Add(-1 * 24 * time.Hour) + recentImage1Stats.LastPullTimestamp = time.Now().Add(-1 * 24 * time.Hour) + repoMeta1.Statistics[recentImage1.DigestStr()] = recentImage1Stats + + recentImage2Stats := repoMeta1.Statistics[recentImage2.DigestStr()] + recentImage2Stats.PushTimestamp = time.Now().Add(-2 * 24 * time.Hour) + recentImage2Stats.LastPullTimestamp = time.Now().Add(-2 * 24 * time.Hour) + repoMeta1.Statistics[recentImage2.DigestStr()] = recentImage2Stats + + multiarchStats := repoMeta1.Statistics[multiarchImage.DigestStr()] + multiarchStats.PushTimestamp = time.Now().Add(-3 * 24 * time.Hour) + multiarchStats.LastPullTimestamp = time.Now().Add(-3 * 24 * time.Hour) + repoMeta1.Statistics[multiarchImage.DigestStr()] = multiarchStats + + err = metaDB.SetRepoMeta(repo1, repoMeta1) + So(err, ShouldBeNil) + + // Set timestamps for referrer repo + repoMeta2, err := metaDB.GetRepoMeta(context.Background(), repo2) + So(err, ShouldBeNil) + + baseImageStats := repoMeta2.Statistics[baseImage.DigestStr()] + baseImageStats.PushTimestamp = time.Now().Add(-5 * 24 * time.Hour) + baseImageStats.LastPullTimestamp = time.Now().Add(-5 * 24 * time.Hour) + repoMeta2.Statistics[baseImage.DigestStr()] = baseImageStats + + referrer1Stats := repoMeta2.Statistics[referrer1.DigestStr()] + referrer1Stats.PushTimestamp = time.Now().Add(-4 * 24 * time.Hour) + referrer1Stats.LastPullTimestamp = time.Now().Add(-4 * 24 * time.Hour) + repoMeta2.Statistics[referrer1.DigestStr()] = referrer1Stats + + referrerOfReferrerStats := repoMeta2.Statistics[referrerOfReferrer.DigestStr()] + referrerOfReferrerStats.PushTimestamp = time.Now().Add(-3 * 24 * time.Hour) + referrerOfReferrerStats.LastPullTimestamp = time.Now().Add(-3 * 24 * time.Hour) + repoMeta2.Statistics[referrerOfReferrer.DigestStr()] = referrerOfReferrerStats + + err = metaDB.SetRepoMeta(repo2, repoMeta2) + So(err, ShouldBeNil) + + // Close metaDB to release database lock before running verify-feature retention + err = metaDB.Close() + So(err, ShouldBeNil) + + gcDelay, _ := time.ParseDuration(testGCDelay) + time.Sleep(gcDelay + 50*time.Millisecond) // wait for GC delay to pass + + os.Args = []string{"cli_test", "verify-feature", "retention", "-l", logFile, "-t", "1s", configFile} + err = cli.NewServerRootCmd().Execute() + So(err, ShouldBeNil) + + // Verify success messages are logged to the log file + logContent, err := os.ReadFile(logFile) + So(err, ShouldBeNil) + logStr := string(logContent) + + // Dump log content to stdout on test failure + defer func() { + if t.Failed() { + t.Logf("Retention check log content:\n%s", logStr) + } + }() + + // Verify basic verify-feature retention and GC messages + So(logStr, ShouldContainSubstring, + "local storage detected - the zot server must be stopped to access the storage database") + So(logStr, ShouldContainSubstring, "configuration settings (after applying overrides)") + // Verify GC configuration values are present in the log + So(logStr, ShouldContainSubstring, "\"GCInterval\":60000000000") // 1m = 60s in nanoseconds + So(logStr, ShouldContainSubstring, "\"GCDelay\":1000000") // 1ms in nanoseconds + So(logStr, ShouldContainSubstring, "\"GCMaxSchedulerDelay\":5000000") // 5ms + So(logStr, ShouldContainSubstring, + "garbage collection and retention tasks will be submitted to the scheduler") + So(logStr, ShouldContainSubstring, "waiting for garbage collection tasks to complete...") + So(logStr, ShouldContainSubstring, "executing gc of orphaned blobs") + So(logStr, ShouldContainSubstring, "garbage collected blobs") + So(logStr, ShouldContainSubstring, "gc successfully completed") + So(logStr, ShouldContainSubstring, "retention check completed successfully") + + // Validate specific retention decisions by parsing log entries + expectedResults := []ExpectedRetentionResult{ + { + Tag: "base-tag", Repository: "referrer-test-repo", Decision: decisionKeep, + Reason: "retained by mostRecentlyPulledCount", + }, + { + Tag: "recent-tag-1", Repository: repo1, Decision: decisionKeep, + Reason: "retained by mostRecentlyPulledCount", + }, + { + Tag: "recent-tag-2", Repository: repo1, Decision: decisionKeep, + Reason: "retained by mostRecentlyPulledCount", + }, + { + Tag: "old-tag-1", Repository: repo1, Decision: decisionDelete, + Reason: "didn't meet any tag retention rule", + }, + { + Tag: "old-tag-2", Repository: repo1, Decision: decisionDelete, + Reason: "didn't meet any tag retention rule", + }, + { + Tag: "multiarch-tag", Repository: repo1, Decision: decisionDelete, + Reason: "didn't meet any tag retention rule", + }, + // Untagged manifest deletions - original untagged image + deleted tagged images + // (old-tag-1, old-tag-2, multiarch-tag) plus single-image manifests from the multiarch image + // (which become untagged when the multiarch-tag is deleted) + { + Tag: "", Repository: repo1, Decision: decisionDelete, + Reason: "deleteUntagged", Digest: untaggedImage1.DigestStr(), IsUntagged: true, + }, + { + Tag: "", Repository: repo1, Decision: decisionDelete, + Reason: "deleteUntagged", Digest: oldImage1.DigestStr(), IsUntagged: true, + }, + { + Tag: "", Repository: repo1, Decision: decisionDelete, + Reason: "deleteUntagged", Digest: oldImage2.DigestStr(), IsUntagged: true, + }, + { + Tag: "", Repository: repo1, Decision: decisionDelete, + Reason: "deleteUntagged", Digest: multiarchImage.DigestStr(), IsUntagged: true, + }, + // Single-image manifests from multiarch image (they become untagged when multiarch-tag is deleted) + { + Tag: "", Repository: repo1, Decision: decisionDelete, + Reason: "deleteUntagged", Digest: multiarchImage.Images[0].DigestStr(), IsUntagged: true, + }, + { + Tag: "", Repository: repo1, Decision: decisionDelete, + Reason: "deleteUntagged", Digest: multiarchImage.Images[1].DigestStr(), IsUntagged: true, + }, + { + Tag: "", Repository: repo1, Decision: decisionDelete, + Reason: "deleteUntagged", Digest: multiarchImage.Images[2].DigestStr(), IsUntagged: true, + }, + } + + validateRetentionDecisions(t, logContent, expectedResults) + }) +} + +func TestRetentionCheckWithDeleteReferrers(t *testing.T) { + oldArgs := os.Args + + defer func() { os.Args = oldArgs }() + + Convey("valid config with deleteReferrers enabled", t, func(c C) { + port := GetFreePort() + testDir := t.TempDir() + storageDir := path.Join(testDir, "storage") + configFile := path.Join(testDir, "zot-config.json") + logFile := path.Join(testDir, "retention-check.log") + + content := []byte(fmt.Sprintf(`{ + "distSpecVersion": "1.1.1", + "storage": { + "rootDirectory": "%s", + "gc": true, + "gcDelay": %q, + "gcInterval": "1m", + "retention": { + "delay": "1ms", + "policies": [ + { + "repositories": ["**"], + "keepTags": [ + { + "patterns": [".*"], + "mostRecentlyPulledCount": 1 + } + ], + "deleteReferrers": true + } + ] + } + }, + "http": { + "address": "127.0.0.1", + "port": "%s" + }, + "log": { + "level": "debug" + } + } + `, storageDir, testGCDelay, port)) + err := os.WriteFile(configFile, content, 0o600) + So(err, ShouldBeNil) + + // Create image setup before running verify-feature retention + conf := config.New() + err = cli.LoadConfiguration(conf, configFile) + So(err, ShouldBeNil) + + // Initialize storage and metaDB + metricsServer := monitoring.NewMetricsServer(false, zlog.NewLogger("info", "")) + imgStore := local.NewImageStore(storageDir, false, false, zlog.NewLogger("info", ""), metricsServer, + nil, nil, nil, nil) + params := boltdb.DBParameters{ + RootDir: storageDir, + } + boltDriver, err := boltdb.GetBoltDriver(params) + So(err, ShouldBeNil) + metaDB, err := boltdb.New(boltDriver, zlog.NewLogger("info", "")) + So(err, ShouldBeNil) + storeController := storage.StoreController{} + storeController.DefaultStore = imgStore + err = meta.ParseStorage(metaDB, storeController, zlog.NewLogger("info", "")) + So(err, ShouldBeNil) + + // Repository with images and referrers + repo := retentionTestRepo + + // Old image (should be deleted by retention - keeping only 1 most recent) + oldImage := CreateRandomImage() + err = WriteImageToFileSystem(oldImage, repo, "old-tag", storeController) + So(err, ShouldBeNil) + + // Recent image (should be kept) + recentImage := CreateRandomImage() + err = WriteImageToFileSystem(recentImage, repo, "recent-tag", storeController) + So(err, ShouldBeNil) + + // Referrer pointing to old image (should be deleted when old image is deleted) + referrerToOldImage := CreateRandomImageWith().Subject(oldImage.DescriptorRef()).Build() + err = WriteImageToFileSystem(referrerToOldImage, repo, referrerToOldImage.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Referrer pointing to recent image (should be kept) + referrerToRecentImage := CreateRandomImageWith().Subject(recentImage.DescriptorRef()).Build() + err = WriteImageToFileSystem(referrerToRecentImage, repo, referrerToRecentImage.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Re-parse storage after creating images to update metadata + err = meta.ParseStorage(metaDB, storeController, zlog.NewLogger("info", "")) + So(err, ShouldBeNil) + + // Update metadata with timestamps for retention testing + repoMeta, err := metaDB.GetRepoMeta(context.Background(), repo) + So(err, ShouldBeNil) + + // Old image (should be deleted by retention) + oldImageStats := repoMeta.Statistics[oldImage.DigestStr()] + oldImageStats.PushTimestamp = time.Now().Add(-10 * 24 * time.Hour) + oldImageStats.LastPullTimestamp = time.Now().Add(-10 * 24 * time.Hour) + repoMeta.Statistics[oldImage.DigestStr()] = oldImageStats + + // Recent image (should be kept) + recentImageStats := repoMeta.Statistics[recentImage.DigestStr()] + recentImageStats.PushTimestamp = time.Now().Add(-1 * 24 * time.Hour) + recentImageStats.LastPullTimestamp = time.Now().Add(-1 * 24 * time.Hour) + repoMeta.Statistics[recentImage.DigestStr()] = recentImageStats + + err = metaDB.SetRepoMeta(repo, repoMeta) + So(err, ShouldBeNil) + + // Close metaDB to release database lock before running verify-feature retention + err = metaDB.Close() + So(err, ShouldBeNil) + + gcDelay, _ := time.ParseDuration(testGCDelay) + time.Sleep(gcDelay + 50*time.Millisecond) // wait for GC delay to pass + + os.Args = []string{"cli_test", "verify-feature", "retention", "-l", logFile, "-t", "1s", configFile} + err = cli.NewServerRootCmd().Execute() + So(err, ShouldBeNil) + + // Verify success messages are logged to the log file + logContent, err := os.ReadFile(logFile) + So(err, ShouldBeNil) + logStr := string(logContent) + + // Dump log content to stdout on test failure + defer func() { + if t.Failed() { + t.Logf("Retention check log content:\n%s", logStr) + } + }() + + // Verify basic verify-feature retention and GC messages + So(logStr, ShouldContainSubstring, + "local storage detected - the zot server must be stopped to access the storage database") + So(logStr, ShouldContainSubstring, "configuration settings (after applying overrides)") + // Verify GC configuration values are present in the log + So(logStr, ShouldContainSubstring, "\"GCInterval\":60000000000") // 1m = 60s in nanoseconds + So(logStr, ShouldContainSubstring, "\"GCDelay\":1000000") // 1ms in nanoseconds + So(logStr, ShouldContainSubstring, "\"GCMaxSchedulerDelay\":5000000") // 5ms + So(logStr, ShouldContainSubstring, + "garbage collection and retention tasks will be submitted to the scheduler") + So(logStr, ShouldContainSubstring, "waiting for garbage collection tasks to complete...") + So(logStr, ShouldContainSubstring, "executing gc of orphaned blobs") + So(logStr, ShouldContainSubstring, "garbage collected blobs") + So(logStr, ShouldContainSubstring, "gc successfully completed") + So(logStr, ShouldContainSubstring, "retention check completed successfully") + + // Validate specific retention decisions by parsing log entries + expectedResults := []ExpectedRetentionResult{ + // Tagged images + { + Tag: "recent-tag", Repository: repo, Decision: decisionKeep, + Reason: "retained by mostRecentlyPulledCount", + }, + { + Tag: "old-tag", Repository: repo, Decision: decisionDelete, + Reason: "didn't meet any tag retention rule", + }, + // Untagged manifest deletions (old-tag image becomes untagged) + { + Tag: "", Repository: repo, Decision: decisionDelete, + Reason: "deleteUntagged", Digest: oldImage.DigestStr(), IsUntagged: true, + }, + // Referrer deletions - with deleteReferrers=true, only referrer to deleted subject is deleted + { + Tag: "", Repository: repo, Decision: decisionDelete, + Reason: "deleteReferrers", Digest: referrerToOldImage.DigestStr(), IsReferrer: true, Subject: oldImage.DigestStr(), + }, + // Note: referrerToRecentImage is kept because its subject (recentImage) is retained + } + + validateRetentionDecisions(t, logContent, expectedResults) + }) +} + +func TestRetentionCheckWithRetentionDisabled(t *testing.T) { + oldArgs := os.Args + + defer func() { os.Args = oldArgs }() + + Convey("valid config with retention disabled", t, func(c C) { + port := GetFreePort() + testDir := t.TempDir() + storageDir := path.Join(testDir, "storage") + configFile := path.Join(testDir, "zot-config.json") + logFile := path.Join(testDir, "retention-check.log") + + content := []byte(fmt.Sprintf(`{ + "distSpecVersion": "1.1.1", + "storage": { + "rootDirectory": "%s", + "gc": true, + "gcDelay": %q, + "gcInterval": "1m" + }, + "http": { + "address": "127.0.0.1", + "port": "%s" + }, + "log": { + "level": "debug" + } + } + `, storageDir, testGCDelay, port)) + err := os.WriteFile(configFile, content, 0o600) + So(err, ShouldBeNil) + + // Create image setup for GC testing (no retention, no MetaDB needed) + conf := config.New() + err = cli.LoadConfiguration(conf, configFile) + So(err, ShouldBeNil) + + // Initialize storage only (no MetaDB needed when retention is disabled) + metricsServer := monitoring.NewMetricsServer(false, zlog.NewLogger("info", "")) + imgStore := local.NewImageStore(storageDir, false, false, zlog.NewLogger("info", ""), metricsServer, + nil, nil, nil, nil) + storeController := storage.StoreController{} + storeController.DefaultStore = imgStore + + // Create test repositories with various image types for GC testing + // Repository 1: Tagged and untagged images + repo1 := "gc-test-repo" + + // Tagged image (should be kept) + taggedImage := CreateRandomImage() + err = WriteImageToFileSystem(taggedImage, repo1, "tagged-1", storeController) + So(err, ShouldBeNil) + + // Untagged image (should be cleaned up by GC) + untaggedImage1 := CreateRandomImage() + err = WriteImageToFileSystem(untaggedImage1, repo1, untaggedImage1.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Repository 2: Multiarch images + repo2 := "multiarch-test-repo" + + // Tagged multiarch (should be kept) + multiarchImage := CreateRandomMultiarch() + err = WriteMultiArchImageToFileSystem(multiarchImage, repo2, "multiarch-tag-1", storeController) + So(err, ShouldBeNil) + + // Untagged multiarch (should be cleaned up) + untaggedMultiarch := CreateRandomMultiarch() + err = WriteMultiArchImageToFileSystem(untaggedMultiarch, repo2, untaggedMultiarch.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Repository 3: Referrers + repo3 := "referrer-gc-repo" + + // Base image + baseImage := CreateRandomImage() + err = WriteImageToFileSystem(baseImage, repo3, "base-tag", storeController) + So(err, ShouldBeNil) + + // Referrer pointing to base image (should be kept) + referrer1 := CreateRandomImageWith().Subject(baseImage.DescriptorRef()).Build() + err = WriteImageToFileSystem(referrer1, repo3, referrer1.DigestStr(), storeController) + So(err, ShouldBeNil) + + gcDelay, _ := time.ParseDuration(testGCDelay) + time.Sleep(gcDelay + 50*time.Millisecond) // wait for GC delay to pass + + os.Args = []string{"cli_test", "verify-feature", "retention", "-l", logFile, "-t", "1s", configFile} + err = cli.NewServerRootCmd().Execute() + So(err, ShouldBeNil) + + // Verify warning and success messages are logged to the log file + logContent, err := os.ReadFile(logFile) + So(err, ShouldBeNil) + logStr := string(logContent) + + // Dump log content to stdout on test failure + defer func() { + if t.Failed() { + t.Logf("Retention check log content:\n%s", logStr) + } + }() + + // Verify basic verify-feature retention messages + So(logStr, ShouldContainSubstring, + "no retention policies are configured - garbage collection will run with default settings") + So(logStr, ShouldContainSubstring, "configuration settings (after applying overrides)") + // Verify GC configuration values are present in the log + So(logStr, ShouldContainSubstring, "\"GCInterval\":60000000000") // 1m = 60s in nanoseconds + So(logStr, ShouldContainSubstring, "\"GCDelay\":1000000") // 1ms in nanoseconds + So(logStr, ShouldContainSubstring, "\"GCMaxSchedulerDelay\":5000000") // 5ms + So(logStr, ShouldContainSubstring, + "garbage collection and retention tasks will be submitted to the scheduler") + So(logStr, ShouldContainSubstring, "waiting for garbage collection tasks to complete...") + So(logStr, ShouldContainSubstring, "executing gc of orphaned blobs") + So(logStr, ShouldContainSubstring, "garbage collected blobs") + So(logStr, ShouldContainSubstring, "gc successfully completed") + So(logStr, ShouldContainSubstring, "retention check completed successfully") + + // Validate retention decisions - untagged manifests should be cleaned up by default + expectedResults := []ExpectedRetentionResult{ + // gc-test-repo: 1 untagged manifest deleted + { + Tag: "", Repository: "gc-test-repo", Decision: decisionDelete, + Reason: "deleteUntagged", Digest: untaggedImage1.DigestStr(), IsUntagged: true, + }, + + // multiarch-test-repo: 4 untagged manifests deleted (multiarch index + 3 single-image manifests) + { + Tag: "", Repository: "multiarch-test-repo", Decision: decisionDelete, + Reason: "deleteUntagged", Digest: untaggedMultiarch.DigestStr(), IsUntagged: true, + }, + { + Tag: "", Repository: "multiarch-test-repo", Decision: decisionDelete, + Reason: "deleteUntagged", Digest: untaggedMultiarch.Images[0].DigestStr(), IsUntagged: true, + }, + { + Tag: "", Repository: "multiarch-test-repo", Decision: decisionDelete, + Reason: "deleteUntagged", Digest: untaggedMultiarch.Images[1].DigestStr(), IsUntagged: true, + }, + { + Tag: "", Repository: "multiarch-test-repo", Decision: decisionDelete, + Reason: "deleteUntagged", Digest: untaggedMultiarch.Images[2].DigestStr(), IsUntagged: true, + }, + } + + validateRetentionDecisions(t, logContent, expectedResults) + + // Verify that tagged images are NOT logged for deletion (they should be kept) + // Check that no tagged images appear in deletion logs + So(logStr, ShouldNotContainSubstring, "\"tag\":\"tagged-1\"") + So(logStr, ShouldNotContainSubstring, "\"tag\":\"multiarch-tag-1\"") + So(logStr, ShouldNotContainSubstring, "\"tag\":\"base-tag\"") + }) +} + +func TestRetentionCheckWithSubpaths(t *testing.T) { + oldArgs := os.Args + + defer func() { os.Args = oldArgs }() + + Convey("config with subpaths", t, func(c C) { + port := GetFreePort() + testDir := t.TempDir() + storageDir := path.Join(testDir, "storage") + configFile := path.Join(testDir, "zot-config.json") + logFile := path.Join(testDir, "retention-check.log") + + content := []byte(fmt.Sprintf(`{ + "distSpecVersion": "1.1.1", + "storage": { + "rootDirectory": "%s", + "gc": true, + "gcDelay": %q, + "gcInterval": "1m", + "retention": { + "delay": "1ms", + "policies": [ + { + "repositories": ["**"], + "keepTags": [ + { + "patterns": [".*"], + "mostRecentlyPulledCount": 2 + } + ], + "deleteReferrers": true + } + ] + }, + "subPaths": { + "/a": { + "rootDirectory": "%s/a", + "gc": true, + "gcDelay": %q, + "gcInterval": "1m", + "retention": { + "delay": "1ms", + "policies": [ + { + "repositories": ["**"], + "keepTags": [ + { + "patterns": [".*"], + "mostRecentlyPulledCount": 2 + } + ], + "deleteReferrers": true + } + ] + } + } + } + }, + "http": { + "address": "127.0.0.1", + "port": "%s" + }, + "log": { + "level": "debug" + } + } + `, storageDir, testGCDelay, storageDir, testGCDelay, port)) + err := os.WriteFile(configFile, content, 0o600) + So(err, ShouldBeNil) + + // Create image setup before running verify-feature retention + conf := config.New() + err = cli.LoadConfiguration(conf, configFile) + So(err, ShouldBeNil) + + // Initialize storage and metaDB + metricsServer := monitoring.NewMetricsServer(false, zlog.NewLogger("info", "")) + imgStore := local.NewImageStore(storageDir, false, false, zlog.NewLogger("info", ""), metricsServer, + nil, nil, nil, nil) + subpathStore := local.NewImageStore(path.Join(storageDir, "a"), false, false, + zlog.NewLogger("info", ""), metricsServer, nil, nil, nil, nil) + params := boltdb.DBParameters{ + RootDir: storageDir, + } + boltDriver, err := boltdb.GetBoltDriver(params) + So(err, ShouldBeNil) + metaDB, err := boltdb.New(boltDriver, zlog.NewLogger("info", "")) + So(err, ShouldBeNil) + storeController := storage.StoreController{} + storeController.DefaultStore = imgStore + storeController.SubStore = map[string]storageTypes.ImageStore{ + "/a": subpathStore, + } + err = meta.ParseStorage(metaDB, storeController, zlog.NewLogger("info", "")) + So(err, ShouldBeNil) + + // Create simplified image setup for retention testing + repo1 := retentionTestRepo + + // Old image (should be deleted by retention - keeping only 1 most recent) + oldImage := CreateRandomImage() + err = WriteImageToFileSystem(oldImage, repo1, "old-tag", storeController) + So(err, ShouldBeNil) + + // Recent image (should be kept) + recentImage := CreateRandomImage() + err = WriteImageToFileSystem(recentImage, repo1, "recent-tag", storeController) + So(err, ShouldBeNil) + + // Multiarch image (should be deleted by retention) + multiarchImage := CreateRandomMultiarch() + err = WriteMultiArchImageToFileSystem(multiarchImage, repo1, "multiarch-tag", storeController) + So(err, ShouldBeNil) + + // Untagged image (should be cleaned up by GC) + untaggedImage := CreateRandomImage() + err = WriteImageToFileSystem(untaggedImage, repo1, untaggedImage.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Referrer pointing to oldImage (subject will be deleted, so referrer should be deleted) + referrerToOldImage := CreateRandomImageWith().Subject(oldImage.DescriptorRef()).Build() + err = WriteImageToFileSystem(referrerToOldImage, repo1, referrerToOldImage.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Images in subpath /a/retention-test-repo + repo2 := retentionTestRepoSubpath + + subpathOldImage := CreateRandomImage() + err = WriteImageToFileSystem(subpathOldImage, repo2, "old-tag", storeController) + So(err, ShouldBeNil) + + subpathRecentImage := CreateRandomImage() + err = WriteImageToFileSystem(subpathRecentImage, repo2, "recent-tag", storeController) + So(err, ShouldBeNil) + + subpathMultiarchImage := CreateRandomMultiarch() + err = WriteMultiArchImageToFileSystem(subpathMultiarchImage, repo2, "multiarch-tag", storeController) + So(err, ShouldBeNil) + + subpathUntaggedImage := CreateRandomImage() + err = WriteImageToFileSystem(subpathUntaggedImage, repo2, subpathUntaggedImage.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Referrer pointing to subpathOldImage (subject will be deleted, so referrer should be deleted) + subpathReferrerToOldImage := CreateRandomImageWith().Subject(subpathOldImage.DescriptorRef()).Build() + err = WriteImageToFileSystem(subpathReferrerToOldImage, repo2, subpathReferrerToOldImage.DigestStr(), storeController) + So(err, ShouldBeNil) + + // Re-parse storage after creating images to update metadata + err = meta.ParseStorage(metaDB, storeController, zlog.NewLogger("info", "")) + So(err, ShouldBeNil) + + // Update metadata with timestamps for retention testing + repoMeta1, err := metaDB.GetRepoMeta(context.Background(), repo1) + So(err, ShouldBeNil) + + // Old image (should be deleted by retention) + oldImageStats := repoMeta1.Statistics[oldImage.DigestStr()] + oldImageStats.PushTimestamp = time.Now().Add(-10 * 24 * time.Hour) + oldImageStats.LastPullTimestamp = time.Now().Add(-10 * 24 * time.Hour) + repoMeta1.Statistics[oldImage.DigestStr()] = oldImageStats + + // Recent image (should be kept) + recentImageStats := repoMeta1.Statistics[recentImage.DigestStr()] + recentImageStats.PushTimestamp = time.Now().Add(-1 * 24 * time.Hour) + recentImageStats.LastPullTimestamp = time.Now().Add(-1 * 24 * time.Hour) + repoMeta1.Statistics[recentImage.DigestStr()] = recentImageStats + + // Multiarch image (should be deleted by retention) + multiarchStats := repoMeta1.Statistics[multiarchImage.DigestStr()] + multiarchStats.PushTimestamp = time.Now().Add(-3 * 24 * time.Hour) + multiarchStats.LastPullTimestamp = time.Now().Add(-3 * 24 * time.Hour) + repoMeta1.Statistics[multiarchImage.DigestStr()] = multiarchStats + + err = metaDB.SetRepoMeta(repo1, repoMeta1) + So(err, ShouldBeNil) + + // Update metadata for subpath repository + repoMeta3, err := metaDB.GetRepoMeta(context.Background(), repo2) + So(err, ShouldBeNil) + + subpathOldImageStats := repoMeta3.Statistics[subpathOldImage.DigestStr()] + subpathOldImageStats.PushTimestamp = time.Now().Add(-10 * 24 * time.Hour) + subpathOldImageStats.LastPullTimestamp = time.Now().Add(-10 * 24 * time.Hour) + repoMeta3.Statistics[subpathOldImage.DigestStr()] = subpathOldImageStats + + subpathRecentImageStats := repoMeta3.Statistics[subpathRecentImage.DigestStr()] + subpathRecentImageStats.PushTimestamp = time.Now().Add(-1 * 24 * time.Hour) + subpathRecentImageStats.LastPullTimestamp = time.Now().Add(-1 * 24 * time.Hour) + repoMeta3.Statistics[subpathRecentImage.DigestStr()] = subpathRecentImageStats + + subpathMultiarchStats := repoMeta3.Statistics[subpathMultiarchImage.DigestStr()] + subpathMultiarchStats.PushTimestamp = time.Now().Add(-3 * 24 * time.Hour) + subpathMultiarchStats.LastPullTimestamp = time.Now().Add(-3 * 24 * time.Hour) + repoMeta3.Statistics[subpathMultiarchImage.DigestStr()] = subpathMultiarchStats + + err = metaDB.SetRepoMeta(repo2, repoMeta3) + So(err, ShouldBeNil) + + // Close metaDB to release database lock before running verify-feature retention + err = metaDB.Close() + So(err, ShouldBeNil) + + gcDelay, _ := time.ParseDuration(testGCDelay) + time.Sleep(gcDelay + 50*time.Millisecond) // wait for GC delay to pass + + os.Args = []string{"cli_test", "verify-feature", "retention", "-l", logFile, "-t", "1s", configFile} + err = cli.NewServerRootCmd().Execute() + So(err, ShouldBeNil) + + // Verify log file was created and contains expected messages + logContent, err := os.ReadFile(logFile) + So(err, ShouldBeNil) + logStr := string(logContent) + + // Dump log content to stdout on test failure + defer func() { + if t.Failed() { + t.Logf("Retention check log content:\n%s", logStr) + } + }() + + // Verify basic verify-feature retention and GC messages + So(logStr, ShouldContainSubstring, + "local storage detected - the zot server must be stopped to access the storage database") + So(logStr, ShouldContainSubstring, "configuration settings (after applying overrides)") + // Verify GC configuration values are present in the log + So(logStr, ShouldContainSubstring, "\"GCInterval\":60000000000") // 1m = 60s in nanoseconds + So(logStr, ShouldContainSubstring, "\"GCDelay\":1000000") // 1ms in nanoseconds + So(logStr, ShouldContainSubstring, "\"GCMaxSchedulerDelay\":5000000") // 5ms + So(logStr, ShouldContainSubstring, + "garbage collection and retention tasks will be submitted to the scheduler") + So(logStr, ShouldContainSubstring, "waiting for garbage collection tasks to complete...") + So(logStr, ShouldContainSubstring, "executing gc of orphaned blobs") + So(logStr, ShouldContainSubstring, "garbage collected blobs") + So(logStr, ShouldContainSubstring, "gc successfully completed") + So(logStr, ShouldContainSubstring, "retention check completed successfully") + + // Validate specific retention decisions by parsing log entries + expectedResults := []ExpectedRetentionResult{ + // Default path repositories + { + Tag: "recent-tag", Repository: repo1, Decision: decisionKeep, + Reason: "retained by mostRecentlyPulledCount", + }, + { + Tag: "multiarch-tag", Repository: repo1, Decision: decisionKeep, + Reason: "retained by mostRecentlyPulledCount", + }, + { + Tag: "old-tag", Repository: repo1, Decision: decisionDelete, + Reason: "didn't meet any tag retention rule", + }, + // Untagged manifest deletions (only untaggedImage and oldImage, multiarch is kept) + { + Tag: "", Repository: repo1, Decision: decisionDelete, + Reason: "deleteUntagged", Digest: untaggedImage.DigestStr(), IsUntagged: true, + }, + { + Tag: "", Repository: repo1, Decision: decisionDelete, + Reason: "deleteUntagged", Digest: oldImage.DigestStr(), IsUntagged: true, + }, + // Referrer deletion (subject oldImage is deleted) + { + Tag: "", Repository: repo1, Decision: decisionDelete, + Reason: "deleteReferrers", Digest: referrerToOldImage.DigestStr(), IsReferrer: true, Subject: oldImage.DigestStr(), + }, + // Subpath repositories + { + Tag: "recent-tag", Repository: repo2, Decision: decisionKeep, + Reason: "retained by mostRecentlyPulledCount", + }, + { + Tag: "multiarch-tag", Repository: repo2, Decision: decisionKeep, + Reason: "retained by mostRecentlyPulledCount", + }, + { + Tag: "old-tag", Repository: repo2, Decision: decisionDelete, + Reason: "didn't meet any tag retention rule", + }, + // Untagged manifest deletions in subpath + { + Tag: "", Repository: repo2, Decision: decisionDelete, + Reason: "deleteUntagged", Digest: subpathUntaggedImage.DigestStr(), IsUntagged: true, + }, + { + Tag: "", Repository: repo2, Decision: decisionDelete, + Reason: "deleteUntagged", Digest: subpathOldImage.DigestStr(), IsUntagged: true, + }, + // Referrer deletion in subpath (subject subpathOldImage is deleted) + { + Tag: "", Repository: repo2, Decision: decisionDelete, + Reason: "deleteReferrers", Digest: subpathReferrerToOldImage.DigestStr(), + IsReferrer: true, Subject: subpathOldImage.DigestStr(), + }, + } + + validateRetentionDecisions(t, logContent, expectedResults) + }) +} + +func TestRetentionCheckWithGCIntervalOverride(t *testing.T) { + oldArgs := os.Args + + defer func() { os.Args = oldArgs }() + + Convey("config with gc-interval override", t, func(c C) { + testDir := t.TempDir() + storageDir := path.Join(testDir, "storage") + configFile := path.Join(testDir, "zot-config.json") + logFile := path.Join(testDir, "retention-check.log") + port := GetFreePort() + + content := []byte(fmt.Sprintf(`{ + "distSpecVersion": "1.1.1", + "storage": { + "rootDirectory": "%s", + "gc": true, + "gcDelay": %q, + "gcInterval": "1m", + "subPaths": { + "/a": { + "rootDirectory": "%s/a", + "gc": true, + "gcDelay": %q, + "gcInterval": "1m" + } + } + }, + "http": { + "address": "127.0.0.1", + "port": "%s" + }, + "log": { + "level": "debug" + } + } + `, storageDir, testGCDelay, storageDir, testGCDelay, port)) + err := os.WriteFile(configFile, content, 0o600) + So(err, ShouldBeNil) + + gcDelay, _ := time.ParseDuration(testGCDelay) + time.Sleep(gcDelay + 50*time.Millisecond) // wait for GC delay to pass + + // Override GC interval from 1m to 30s using -i flag + os.Args = []string{"cli_test", "verify-feature", "retention", "-l", logFile, "-i", "30s", "-t", "5ms", configFile} + err = cli.NewServerRootCmd().Execute() + So(err, ShouldBeNil) + + // Verify log file was created and contains expected messages + logContent, err := os.ReadFile(logFile) + So(err, ShouldBeNil) + logStr := string(logContent) + + // Verify the local storage warning message is logged + So(logStr, ShouldContainSubstring, + "local storage detected - the zot server must be stopped to access the storage database") + + // Parse the configuration log line as JSON + lines := strings.Split(logStr, "\n") + + var configLogLine string + + for _, line := range lines { + if strings.Contains(line, "configuration settings (after applying overrides)") { + configLogLine = line + + break + } + } + + So(configLogLine, ShouldNotBeEmpty) + + // Parse the JSON log line + //nolint:tagliatelle // JSON field names match Go struct names + type ConfigParams struct { + Storage struct { + GCInterval int64 `json:"GCInterval"` + GCDelay int64 `json:"GCDelay"` + GCMaxSchedulerDelay int64 `json:"GCMaxSchedulerDelay"` + SubPaths map[string]interface{} `json:"SubPaths"` + } `json:"Storage"` + } + + type ConfigLog struct { + Params ConfigParams `json:"params"` + } + + var configLog ConfigLog + err = json.Unmarshal([]byte(configLogLine), &configLog) + So(err, ShouldBeNil) + + // Verify default storage configuration + So(configLog.Params.Storage.GCInterval, ShouldEqual, 30000000000) // 30s in nanoseconds + So(configLog.Params.Storage.GCDelay, ShouldEqual, 1000000) // 1ms in nanoseconds + So(configLog.Params.Storage.GCMaxSchedulerDelay, ShouldEqual, 5000000) // 5ms + + // Verify subpaths configuration + So(configLog.Params.Storage.SubPaths, ShouldNotBeNil) + subpathA, exists := configLog.Params.Storage.SubPaths["/a"] + So(exists, ShouldBeTrue) + + // Parse subpath configuration + subpathJSON, err := json.Marshal(subpathA) + So(err, ShouldBeNil) + + //nolint:tagliatelle // JSON field names match Go struct names + type SubPathConfig struct { + GCInterval int64 `json:"GCInterval"` + GCDelay int64 `json:"GCDelay"` + GCMaxSchedulerDelay int64 `json:"GCMaxSchedulerDelay"` + } + + var subpathConfig SubPathConfig + + err = json.Unmarshal(subpathJSON, &subpathConfig) + So(err, ShouldBeNil) + + // Verify subpath GC interval was also overridden + So(subpathConfig.GCInterval, ShouldEqual, 30000000000) // 30s in nanoseconds + So(subpathConfig.GCDelay, ShouldEqual, 1000000) // 1ms in nanoseconds + So(subpathConfig.GCMaxSchedulerDelay, ShouldEqual, 5000000) // 5ms + + // Verify other expected log messages + So(logStr, ShouldContainSubstring, + "no retention policies are configured - garbage collection will run with default settings") + So(logStr, ShouldContainSubstring, + "garbage collection and retention tasks will be submitted to the scheduler") + So(logStr, ShouldContainSubstring, "waiting for garbage collection tasks to complete...") + So(logStr, ShouldContainSubstring, "retention check completed successfully") + }) +} + +// ExpectedRetentionResult represents the expected outcome for a specific tag, untagged image, or referrer. +type ExpectedRetentionResult struct { + Tag string + Repository string + Decision string + Reason string + Digest string // For untagged images and referrers, this will be the digest + IsUntagged bool // Flag to indicate if this is an untagged image + IsReferrer bool // Flag to indicate if this is a referrer + Subject string // For referrers, this is the subject digest +} + +// RetentionDecision represents a parsed retention decision from logs. +type RetentionDecision struct { + Message string `json:"message"` + Repository string `json:"repository"` + Tag string `json:"tag"` + Decision string `json:"decision"` + Reason string `json:"reason"` + Reference string `json:"reference"` // For untagged images and referrers, this contains the digest + Subject string `json:"subject"` // For referrers, this contains the subject digest +} + +func parseRetentionDecisions(logContent []byte) []RetentionDecision { + lines := strings.Split(string(logContent), "\n") + + var actualDecisions []RetentionDecision + + for _, line := range lines { + // Parse retention policy decisions + if strings.Contains(line, "applied policy") && strings.Contains(line, "decision") { + var decision RetentionDecision + + if err := json.Unmarshal([]byte(line), &decision); err == nil { + actualDecisions = append(actualDecisions, decision) + } + } + // Parse untagged manifest cleanup + if strings.Contains(line, "removed untagged manifest") { + var decision RetentionDecision + + if err := json.Unmarshal([]byte(line), &decision); err == nil { + // For untagged manifests, the digest is in the "reference" field + decision.Tag = "" // Untagged images have no tag + actualDecisions = append(actualDecisions, decision) + } + } + // Parse referrer cleanup + if strings.Contains(line, "removed manifest without reference") { + var decision RetentionDecision + + if err := json.Unmarshal([]byte(line), &decision); err == nil { + // For referrers, the digest is in the "reference" field, subject in "subject" field + decision.Tag = "" // Referrers have no tag + actualDecisions = append(actualDecisions, decision) + } + } + } + + return actualDecisions +} + +func getExpectedKey(expected ExpectedRetentionResult) string { + switch { + case expected.IsUntagged: + return expected.Repository + ":untagged:" + expected.Digest + case expected.IsReferrer: + return expected.Repository + ":referrer:" + expected.Digest + default: + return expected.Repository + ":tag:" + expected.Tag + } +} + +func getActualKey(actual RetentionDecision) string { + switch { + case actual.Tag == "" && actual.Reference != "" && actual.Subject != "": + // This is a referrer + return actual.Repository + ":referrer:" + actual.Reference + case actual.Tag == "" && actual.Reference != "": + // This is an untagged image + return actual.Repository + ":untagged:" + actual.Reference + default: + // This is a tagged image + return actual.Repository + ":tag:" + actual.Tag + } +} + +func validateRetentionDecisions(t *testing.T, logContent []byte, expectedResults []ExpectedRetentionResult) { + t.Helper() + + actualDecisions := parseRetentionDecisions(logContent) + + logRetentionDecisions(t, actualDecisions) + logExpectedResults(t, expectedResults) + + // Validate that we have the expected number of decisions + So(len(actualDecisions), ShouldEqual, len(expectedResults)) + + // Create maps for easy lookup + expectedMap := make(map[string]ExpectedRetentionResult) + + for _, expected := range expectedResults { + expectedMap[getExpectedKey(expected)] = expected + } + + actualMap := make(map[string]RetentionDecision) + + for _, actual := range actualDecisions { + actualMap[getActualKey(actual)] = actual + } + + // Validate each expected result + for _, expected := range expectedResults { + key := getExpectedKey(expected) + actual, exists := actualMap[key] + + So(exists, ShouldBeTrue) + So(actual.Decision, ShouldEqual, expected.Decision) + So(actual.Reason, ShouldContainSubstring, expected.Reason) + + // For referrers, also validate the subject + if expected.IsReferrer { + So(actual.Subject, ShouldEqual, expected.Subject) + } + } + + // Validate that we don't have unexpected decisions + for _, actual := range actualDecisions { + key := getActualKey(actual) + _, exists := expectedMap[key] + So(exists, ShouldBeTrue) + } +} + +func logRetentionDecisions(t *testing.T, actualDecisions []RetentionDecision) { + t.Helper() + + keepTags := make([]string, 0) + deleteTags := make([]string, 0) + + for _, decision := range actualDecisions { + switch decision.Decision { + case decisionKeep: + keepTags = append(keepTags, decision.Tag) + case decisionDelete: + deleteTags = append(deleteTags, decision.Tag) + } + } + + t.Logf("KEEP decisions (%d): %v", len(keepTags), keepTags) + t.Logf("DELETE decisions (%d): %v", len(deleteTags), deleteTags) +} + +func logExpectedResults(t *testing.T, expectedResults []ExpectedRetentionResult) { + t.Helper() + + keepTags := make([]string, 0) + deleteTags := make([]string, 0) + + for _, expected := range expectedResults { + switch expected.Decision { + case decisionKeep: + if expected.Tag != "" { + keepTags = append(keepTags, expected.Tag) + } + case decisionDelete: + switch { + case expected.Tag != "": + deleteTags = append(deleteTags, expected.Tag) + case expected.IsUntagged: + deleteTags = append(deleteTags, "untagged:"+expected.Digest[:12]) + case expected.IsReferrer: + deleteTags = append(deleteTags, "referrer:"+expected.Digest[:12]) + } + } + } + + t.Logf("EXPECTED KEEP decisions (%d): %v", len(keepTags), keepTags) + t.Logf("EXPECTED DELETE decisions (%d): %v", len(deleteTags), deleteTags) +} diff --git a/pkg/storage/gc/gc.go b/pkg/storage/gc/gc.go index 79f15714..c658365e 100644 --- a/pkg/storage/gc/gc.go +++ b/pkg/storage/gc/gc.go @@ -37,6 +37,10 @@ type Options struct { // will garbage collect blobs older than Delay Delay time.Duration + // MaxSchedulerDelay is the maximum random delay for GC task scheduling + // Defaults to 30 seconds if not specified + MaxSchedulerDelay time.Duration + ImageRetention config.ImageRetention } @@ -69,10 +73,16 @@ given an interval and a Scheduler. func (gc GarbageCollect) CleanImageStorePeriodically(interval time.Duration, sch *scheduler.Scheduler) { processedRepos := make(map[string]struct{}) + maxDelay := gc.opts.MaxSchedulerDelay + if maxDelay <= 0 { + maxDelay = 30 * time.Second // default value + } + generator := &GCTaskGenerator{ imgStore: gc.imgStore, gc: gc, processedRepos: processedRepos, + maxDelay: maxDelay, } sch.SubmitGenerator(generator, interval, scheduler.MediumPriority) @@ -808,12 +818,19 @@ type GCTaskGenerator struct { nextRun time.Time done bool rand *rand.Rand + maxDelay time.Duration } -func (gen *GCTaskGenerator) getRandomDelay() int { - maxDelay := 30 +func (gen *GCTaskGenerator) getRandomDelay() time.Duration { + maxDelay := gen.maxDelay + if maxDelay <= 0 { + maxDelay = 30 * time.Second // default fallback + } - return gen.rand.Intn(maxDelay) + // Generate random delay with nanosecond precision by working directly with + // time.Duration's internal representation (nanoseconds as int64). + // This supports sub-second delays (milliseconds, microseconds). + return time.Duration(gen.rand.Int63n(int64(maxDelay))) } func (gen *GCTaskGenerator) Name() string { @@ -827,7 +844,7 @@ func (gen *GCTaskGenerator) Next() (scheduler.Task, error) { delay := gen.getRandomDelay() - gen.nextRun = time.Now().Add(time.Duration(delay) * time.Second) + gen.nextRun = time.Now().Add(delay) repo, err := gen.imgStore.GetNextRepository(gen.processedRepos) if err != nil {