metadb: add optional fast restart path that skips storage walk when (version + commit + storage config) matches metaDB stamp (#4026)

* chore(metadb): add writer version to interface

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* chore(metadb): add writer version to db mock

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* chore(metadb): implement writer version for bolt, redis, and dynamodb

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* feat(metadb): add optional fast restart path that skips storage walk when binary identity matches metaDB stamp

binary identity is determined by the current release tag/commit and stored in metaDB after a successful storage parse. When fast restart is enabled, the next startup will skip the parse if the stored identity matches the current binary

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* chore(cli): serve: add a way to force reparse storage

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* refactor(meta): version: split to avoid global state mutation in tests

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* fix(meta): version: include commit in writerVersion to distinguish retags

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* chore(config): add IsFastRestartEnabled() test

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* fix(meta): skip writer-version stamp when storage parse is incomplete

ParseStorage returns nil even when individual repos fail to parse or are only partially parsed (a missing manifest blob), so MaybeParseStorage would stamp a partially-populated metaDB as good. On the next restart fastRestart trusts the stamp, skips the storage walk, and never recovers.

Track per-repo outcomes via parseStats and stamp only when the walk fully populated the metaDB, otherwise log and continue so the next restart reparses

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* fix(docs): readme: remove trailing comma from JSON config

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* fix(meta): dynamodb: use context.Background instead of context.TODO

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* fix(meta): invalidate fast restart on storage config changes

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* chore(meta): dynamodb: use context.Background() instead of context.TODO()

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* docs(meta): dynamodb: add comment about nil AttributeValue handling in GetWriterVersion

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* chore: rename writer-version stamp to fast-restart stamp

also replaces the version/commit tracking to use BinaryVersion instead of WriterVersion

This should make things more clear

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* fix(config): ensure FastRestart is on GlobalStorageConfig

This is not a per-subpath setting

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* fix(metadb): redis: tests: ensure clients are closed

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

---------

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>
This commit is contained in:
Jacob McSwain
2026-06-09 12:47:20 -05:00
committed by GitHub
parent d480380ef7
commit 273b15364b
19 changed files with 1103 additions and 44 deletions
+87
View File
@@ -13,6 +13,75 @@ import (
syncconf "zotregistry.dev/zot/v2/pkg/extensions/config/sync"
)
func TestStorageFingerprint(t *testing.T) {
newConf := func() *config.Config {
conf := config.New()
conf.Storage.RootDirectory = "/var/lib/registry"
return conf
}
Convey("StorageFingerprint", t, func() {
Convey("nil config yields an empty fingerprint", func() {
var nilConf *config.Config
So(nilConf.StorageFingerprint(), ShouldEqual, "")
})
Convey("identical storage config yields an identical, non-empty fingerprint", func() {
fingerprint := newConf().StorageFingerprint()
So(fingerprint, ShouldNotEqual, "")
So(newConf().StorageFingerprint(), ShouldEqual, fingerprint)
})
Convey("changing a storage field changes the fingerprint", func() {
base := newConf().StorageFingerprint()
dedupe := newConf()
dedupe.Storage.Dedupe = !dedupe.Storage.Dedupe
So(dedupe.StorageFingerprint(), ShouldNotEqual, base)
rootDir := newConf()
rootDir.Storage.RootDirectory = "/different"
So(rootDir.StorageFingerprint(), ShouldNotEqual, base)
driver := newConf()
driver.Storage.StorageDriver = map[string]any{"name": "s3"}
So(driver.StorageFingerprint(), ShouldNotEqual, base)
subPaths := newConf()
subPaths.Storage.SubPaths = map[string]config.StorageConfig{"/a": {RootDirectory: "/data/a"}}
So(subPaths.StorageFingerprint(), ShouldNotEqual, base)
})
Convey("changing a non-storage field keeps the fingerprint", func() {
base := newConf().StorageFingerprint()
port := newConf()
port.HTTP.Port = "9999"
So(port.StorageFingerprint(), ShouldEqual, base)
logLevel := newConf()
logLevel.Log = &config.LogConfig{Level: "debug"}
So(logLevel.StorageFingerprint(), ShouldEqual, base)
})
Convey("FastRestart and GCMaxSchedulerDelay are excluded from the fingerprint", func() {
base := newConf().StorageFingerprint()
enabled := true
fastRestart := newConf()
fastRestart.Storage.FastRestart = &enabled
So(fastRestart.StorageFingerprint(), ShouldEqual, base)
schedDelay := newConf()
schedDelay.Storage.GCMaxSchedulerDelay = 5 * time.Minute
So(schedDelay.StorageFingerprint(), ShouldEqual, base)
})
})
}
func TestConfig(t *testing.T) {
Convey("Test config utils", t, func() {
firstStorageConfig := config.StorageConfig{
@@ -605,6 +674,24 @@ func TestConfig(t *testing.T) {
So(conf.IsRetentionEnabled(), ShouldBeFalse)
})
Convey("Test IsFastRestartEnabled()", t, func() {
var nilConf *config.Config = nil
So(nilConf.IsFastRestartEnabled(), ShouldBeFalse)
// Default config leaves FastRestart unset
conf := config.New()
So(conf.IsFastRestartEnabled(), ShouldBeFalse)
disabled := false
conf.Storage.FastRestart = &disabled
So(conf.IsFastRestartEnabled(), ShouldBeFalse)
enabled := true
conf.Storage.FastRestart = &enabled
So(conf.IsFastRestartEnabled(), ShouldBeTrue)
})
Convey("Test IsEventRecorderEnabled()", t, func() {
conf := config.New()
extensionsConfig := conf.CopyExtensionsConfig()