mirror of
https://github.com/project-zot/zot.git
synced 2026-06-17 21:17:58 +08:00
metadb: add optional fast restart path that skips storage walk when (version + commit + storage config) matches metaDB stamp (#4026)
* chore(metadb): add writer version to interface Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * chore(metadb): add writer version to db mock Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * chore(metadb): implement writer version for bolt, redis, and dynamodb Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * feat(metadb): add optional fast restart path that skips storage walk when binary identity matches metaDB stamp binary identity is determined by the current release tag/commit and stored in metaDB after a successful storage parse. When fast restart is enabled, the next startup will skip the parse if the stored identity matches the current binary Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * chore(cli): serve: add a way to force reparse storage Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * refactor(meta): version: split to avoid global state mutation in tests Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * fix(meta): version: include commit in writerVersion to distinguish retags Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * chore(config): add IsFastRestartEnabled() test Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * fix(meta): skip writer-version stamp when storage parse is incomplete ParseStorage returns nil even when individual repos fail to parse or are only partially parsed (a missing manifest blob), so MaybeParseStorage would stamp a partially-populated metaDB as good. On the next restart fastRestart trusts the stamp, skips the storage walk, and never recovers. Track per-repo outcomes via parseStats and stamp only when the walk fully populated the metaDB, otherwise log and continue so the next restart reparses Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * fix(docs): readme: remove trailing comma from JSON config Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * fix(meta): dynamodb: use context.Background instead of context.TODO Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * fix(meta): invalidate fast restart on storage config changes Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * chore(meta): dynamodb: use context.Background() instead of context.TODO() Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * docs(meta): dynamodb: add comment about nil AttributeValue handling in GetWriterVersion Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * chore: rename writer-version stamp to fast-restart stamp also replaces the version/commit tracking to use BinaryVersion instead of WriterVersion This should make things more clear Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * fix(config): ensure FastRestart is on GlobalStorageConfig This is not a per-subpath setting Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * fix(metadb): redis: tests: ensure clients are closed Signed-off-by: Jacob McSwain <jacob@mcswain.dev> --------- Signed-off-by: Jacob McSwain <jacob@mcswain.dev>
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"maps"
|
||||
"os"
|
||||
@@ -498,6 +500,14 @@ type GlobalStorageConfig struct {
|
||||
StorageConfig `mapstructure:",squash"`
|
||||
|
||||
SubPaths map[string]StorageConfig
|
||||
|
||||
// FastRestart lets the controller skip the startup storage walk when neither
|
||||
// the Zot binary nor the storage config has changed since the last run. This
|
||||
// avoids re-reading all metadata from storage on every restart, at the cost
|
||||
// of not detecting out-of-band changes to storage; any storage-config change
|
||||
// forces a full reparse. It is a top-level storage setting only and is not
|
||||
// honored under subPaths. Defaults to false.
|
||||
FastRestart *bool `mapstructure:",omitempty"`
|
||||
}
|
||||
|
||||
type AccessControlConfig struct {
|
||||
@@ -1270,6 +1280,61 @@ func (c *Config) GetRealm() string {
|
||||
return c.HTTP.Realm
|
||||
}
|
||||
|
||||
// IsFastRestartEnabled reports whether the controller may skip the startup
|
||||
// storage walk when the metaDB fast-restart stamp matches the current binary
|
||||
// and storage config. Defaults to false when unset.
|
||||
func (c *Config) IsFastRestartEnabled() bool {
|
||||
if c == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
|
||||
if c.Storage.FastRestart == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return *c.Storage.FastRestart
|
||||
}
|
||||
|
||||
// StorageFingerprint returns a stable SHA-256 of the storage config that influences the
|
||||
// storage->metaDB walk. It is combined with this binary's identity (see meta.FastRestartStamp)
|
||||
// into the fast-restart stamp: when it changes, the metaDB may no longer match storage and a full
|
||||
// reparse is forced. FastRestart and the runtime-only GCMaxSchedulerDelay are excluded so
|
||||
// toggling them never spuriously invalidates the stamp.
|
||||
func (c *Config) StorageFingerprint() string {
|
||||
if c == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
|
||||
var norm GlobalStorageConfig
|
||||
if err := DeepCopy(c.Storage, &norm); err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
norm.FastRestart = nil
|
||||
norm.GCMaxSchedulerDelay = 0
|
||||
|
||||
for name, subPath := range norm.SubPaths {
|
||||
subPath.GCMaxSchedulerDelay = 0
|
||||
norm.SubPaths[name] = subPath
|
||||
}
|
||||
|
||||
// encoding/json sorts map keys, so the serialization is deterministic across restarts.
|
||||
blob, err := json.Marshal(norm)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
sum := sha256.Sum256(blob)
|
||||
|
||||
return hex.EncodeToString(sum[:])
|
||||
}
|
||||
|
||||
// GetCompat returns a copy of the compatibility config.
|
||||
func (c *Config) GetCompat() []compat.MediaCompatibility {
|
||||
if c == nil {
|
||||
|
||||
@@ -13,6 +13,75 @@ import (
|
||||
syncconf "zotregistry.dev/zot/v2/pkg/extensions/config/sync"
|
||||
)
|
||||
|
||||
func TestStorageFingerprint(t *testing.T) {
|
||||
newConf := func() *config.Config {
|
||||
conf := config.New()
|
||||
conf.Storage.RootDirectory = "/var/lib/registry"
|
||||
|
||||
return conf
|
||||
}
|
||||
|
||||
Convey("StorageFingerprint", t, func() {
|
||||
Convey("nil config yields an empty fingerprint", func() {
|
||||
var nilConf *config.Config
|
||||
|
||||
So(nilConf.StorageFingerprint(), ShouldEqual, "")
|
||||
})
|
||||
|
||||
Convey("identical storage config yields an identical, non-empty fingerprint", func() {
|
||||
fingerprint := newConf().StorageFingerprint()
|
||||
|
||||
So(fingerprint, ShouldNotEqual, "")
|
||||
So(newConf().StorageFingerprint(), ShouldEqual, fingerprint)
|
||||
})
|
||||
|
||||
Convey("changing a storage field changes the fingerprint", func() {
|
||||
base := newConf().StorageFingerprint()
|
||||
|
||||
dedupe := newConf()
|
||||
dedupe.Storage.Dedupe = !dedupe.Storage.Dedupe
|
||||
So(dedupe.StorageFingerprint(), ShouldNotEqual, base)
|
||||
|
||||
rootDir := newConf()
|
||||
rootDir.Storage.RootDirectory = "/different"
|
||||
So(rootDir.StorageFingerprint(), ShouldNotEqual, base)
|
||||
|
||||
driver := newConf()
|
||||
driver.Storage.StorageDriver = map[string]any{"name": "s3"}
|
||||
So(driver.StorageFingerprint(), ShouldNotEqual, base)
|
||||
|
||||
subPaths := newConf()
|
||||
subPaths.Storage.SubPaths = map[string]config.StorageConfig{"/a": {RootDirectory: "/data/a"}}
|
||||
So(subPaths.StorageFingerprint(), ShouldNotEqual, base)
|
||||
})
|
||||
|
||||
Convey("changing a non-storage field keeps the fingerprint", func() {
|
||||
base := newConf().StorageFingerprint()
|
||||
|
||||
port := newConf()
|
||||
port.HTTP.Port = "9999"
|
||||
So(port.StorageFingerprint(), ShouldEqual, base)
|
||||
|
||||
logLevel := newConf()
|
||||
logLevel.Log = &config.LogConfig{Level: "debug"}
|
||||
So(logLevel.StorageFingerprint(), ShouldEqual, base)
|
||||
})
|
||||
|
||||
Convey("FastRestart and GCMaxSchedulerDelay are excluded from the fingerprint", func() {
|
||||
base := newConf().StorageFingerprint()
|
||||
|
||||
enabled := true
|
||||
fastRestart := newConf()
|
||||
fastRestart.Storage.FastRestart = &enabled
|
||||
So(fastRestart.StorageFingerprint(), ShouldEqual, base)
|
||||
|
||||
schedDelay := newConf()
|
||||
schedDelay.Storage.GCMaxSchedulerDelay = 5 * time.Minute
|
||||
So(schedDelay.StorageFingerprint(), ShouldEqual, base)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func TestConfig(t *testing.T) {
|
||||
Convey("Test config utils", t, func() {
|
||||
firstStorageConfig := config.StorageConfig{
|
||||
@@ -605,6 +674,24 @@ func TestConfig(t *testing.T) {
|
||||
So(conf.IsRetentionEnabled(), ShouldBeFalse)
|
||||
})
|
||||
|
||||
Convey("Test IsFastRestartEnabled()", t, func() {
|
||||
var nilConf *config.Config = nil
|
||||
|
||||
So(nilConf.IsFastRestartEnabled(), ShouldBeFalse)
|
||||
|
||||
// Default config leaves FastRestart unset
|
||||
conf := config.New()
|
||||
So(conf.IsFastRestartEnabled(), ShouldBeFalse)
|
||||
|
||||
disabled := false
|
||||
conf.Storage.FastRestart = &disabled
|
||||
So(conf.IsFastRestartEnabled(), ShouldBeFalse)
|
||||
|
||||
enabled := true
|
||||
conf.Storage.FastRestart = &enabled
|
||||
So(conf.IsFastRestartEnabled(), ShouldBeTrue)
|
||||
})
|
||||
|
||||
Convey("Test IsEventRecorderEnabled()", t, func() {
|
||||
conf := config.New()
|
||||
extensionsConfig := conf.CopyExtensionsConfig()
|
||||
|
||||
@@ -28,6 +28,7 @@ import (
|
||||
log "zotregistry.dev/zot/v2/pkg/log"
|
||||
meta "zotregistry.dev/zot/v2/pkg/meta"
|
||||
mTypes "zotregistry.dev/zot/v2/pkg/meta/types"
|
||||
version "zotregistry.dev/zot/v2/pkg/meta/version"
|
||||
scheduler "zotregistry.dev/zot/v2/pkg/scheduler"
|
||||
storage "zotregistry.dev/zot/v2/pkg/storage"
|
||||
gc "zotregistry.dev/zot/v2/pkg/storage/gc"
|
||||
@@ -442,7 +443,8 @@ func (c *Controller) InitMetaDB() error {
|
||||
return err
|
||||
}
|
||||
|
||||
err = meta.ParseStorage(driver, c.StoreController, c.Log) //nolint: contextcheck
|
||||
err = meta.MaybeParseStorage(driver, c.StoreController, c.Config.IsFastRestartEnabled(),
|
||||
meta.FastRestartStamp(version.CurrentBinaryVersion(), c.Config.StorageFingerprint()), c.Log)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user