metadb: add optional fast restart path that skips storage walk when (version + commit + storage config) matches metaDB stamp (#4026)

* chore(metadb): add writer version to interface

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* chore(metadb): add writer version to db mock

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* chore(metadb): implement writer version for bolt, redis, and dynamodb

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* feat(metadb): add optional fast restart path that skips storage walk when binary identity matches metaDB stamp

binary identity is determined by the current release tag/commit and stored in metaDB after a successful storage parse. When fast restart is enabled, the next startup will skip the parse if the stored identity matches the current binary

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* chore(cli): serve: add a way to force reparse storage

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* refactor(meta): version: split to avoid global state mutation in tests

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* fix(meta): version: include commit in writerVersion to distinguish retags

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* chore(config): add IsFastRestartEnabled() test

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* fix(meta): skip writer-version stamp when storage parse is incomplete

ParseStorage returns nil even when individual repos fail to parse or are only partially parsed (a missing manifest blob), so MaybeParseStorage would stamp a partially-populated metaDB as good. On the next restart fastRestart trusts the stamp, skips the storage walk, and never recovers.

Track per-repo outcomes via parseStats and stamp only when the walk fully populated the metaDB, otherwise log and continue so the next restart reparses

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* fix(docs): readme: remove trailing comma from JSON config

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* fix(meta): dynamodb: use context.Background instead of context.TODO

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* fix(meta): invalidate fast restart on storage config changes

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* chore(meta): dynamodb: use context.Background() instead of context.TODO()

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* docs(meta): dynamodb: add comment about nil AttributeValue handling in GetWriterVersion

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* chore: rename writer-version stamp to fast-restart stamp

also replaces the version/commit tracking to use BinaryVersion instead of WriterVersion

This should make things more clear

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* fix(config): ensure FastRestart is on GlobalStorageConfig

This is not a per-subpath setting

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

* fix(metadb): redis: tests: ensure clients are closed

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>

---------

Signed-off-by: Jacob McSwain <jacob@mcswain.dev>
This commit is contained in:
Jacob McSwain
2026-06-09 12:47:20 -05:00
committed by GitHub
parent d480380ef7
commit 273b15364b
19 changed files with 1103 additions and 44 deletions
+125 -10
View File
@@ -24,14 +24,36 @@ const (
NotationType = "notation"
)
// parseStats tracks per-repo outcomes of a storage walk.
type parseStats struct {
failedRepos int // skipped on a StatIndex or ParseRepo error
partialRepos int // parsed, but a manifest blob was missing
}
// complete reports whether the walk fully populated the metaDB.
func (s parseStats) complete() bool {
return s.failedRepos == 0 && s.partialRepos == 0
}
// ParseStorage will sync all repos found in the rootdirectory of the oci layout that zot was deployed on with the
// ParseStorage database.
func ParseStorage(metaDB mTypes.MetaDB, storeController stypes.StoreController, log log.Logger) error {
_, err := parseStorage(metaDB, storeController, log)
return err
}
// parseStorage runs the storage walk, returning per-repo outcomes in parseStats.
// Per-repo failures are logged and skipped. Only enumeration or deletion errors
// abort the walk and return a non-nil error.
func parseStorage(metaDB mTypes.MetaDB, storeController stypes.StoreController, log log.Logger) (parseStats, error) {
log.Info().Str("component", "metadb").Msg("parsing storage and initializing")
var stats parseStats
allStorageRepos, err := getAllRepos(storeController, log)
if err != nil {
return err
return parseStats{}, err
}
allMetaDBRepos, err := metaDB.GetAllRepoNames()
@@ -40,7 +62,7 @@ func ParseStorage(metaDB mTypes.MetaDB, storeController stypes.StoreController,
log.Error().Err(err).Str("component", "metadb").Str("rootDir", rootDir).
Msg("failed to get all repo names present under rootDir")
return err
return parseStats{}, err
}
for _, repo := range getReposToBeDeleted(allStorageRepos, allMetaDBRepos) {
@@ -49,7 +71,7 @@ func ParseStorage(metaDB mTypes.MetaDB, storeController stypes.StoreController,
log.Error().Err(err).Str("rootDir", storeController.GetImageStore(repo).RootDir()).Str("component", "metadb").
Str("repo", repo).Msg("failed to delete repo meta")
return err
return parseStats{}, err
}
}
@@ -64,6 +86,8 @@ func ParseStorage(metaDB mTypes.MetaDB, storeController stypes.StoreController,
log.Error().Err(err).Str("rootDir", imgStore.RootDir()).
Str("repo", repo).Msg("failed to sync repo")
stats.failedRepos++
continue
}
@@ -75,16 +99,93 @@ func ParseStorage(metaDB mTypes.MetaDB, storeController stypes.StoreController,
continue
}
err = ParseRepo(repo, metaDB, storeController, log)
partial, err := parseRepo(repo, metaDB, storeController, log)
if err != nil {
log.Error().Err(err).Str("repo", repo).Str("rootDir", imgStore.RootDir()).Msg("failed to sync repo")
stats.failedRepos++
continue
}
if partial {
stats.partialRepos++
}
}
log.Info().Str("component", "metadb").Msg("successfully initialized")
return stats, nil
}
// FastRestartStamp combines this binary's identity (binaryVersion, from version.CurrentBinaryVersion)
// with a fingerprint of the storage config into the stamp used to gate a fast restart.
func FastRestartStamp(binaryVersion, storageFingerprint string) string {
if binaryVersion == "" || storageFingerprint == "" {
return ""
}
return binaryVersion + "|" + storageFingerprint
}
// MaybeParseStorage conditionally runs ParseStorage based on a fast-restart stamp stored in metaDB.
// When fastRestart is true and the metaDB carries a stamp matching this binary and storage config,
// the full walk is skipped under the assumption that metaDB is consistent with storage from the
// previous run.
func MaybeParseStorage(metaDB mTypes.MetaDB, storeController stypes.StoreController,
fastRestart bool, fastRestartStamp string, log log.Logger,
) error {
if fastRestart {
if fastRestartStamp == "" {
log.Info().Str("component", "metadb").
Msg("fast-restart enabled but no stamp is available; falling back to full parse")
} else {
storedStamp, err := metaDB.GetFastRestartStamp()
switch {
case err != nil:
log.Warn().Err(err).Str("component", "metadb").
Msg("failed to read fast-restart stamp, falling back to full parse")
case storedStamp == fastRestartStamp:
log.Info().Str("component", "metadb").Str("fastRestartStamp", storedStamp).
Msg("metaDB fast-restart stamp matches, skipping full storage parse")
return nil
case storedStamp == "":
log.Info().Str("component", "metadb").
Msg("metaDB has no fast-restart stamp, running full parse")
default:
log.Info().Str("component", "metadb").
Str("storedStamp", storedStamp).Str("currentStamp", fastRestartStamp).
Msg("metaDB fast-restart stamp differs, running full parse")
}
}
}
stats, err := parseStorage(metaDB, storeController, log)
if err != nil {
return err
}
if fastRestartStamp == "" {
// go run/go test builds have no stamp, so always reparse.
return nil
}
// Leave the stamp untouched on an incomplete walk so the next restart
// reparses and can recover.
if !stats.complete() {
log.Warn().Str("component", "metadb").
Int("failedRepos", stats.failedRepos).Int("partialRepos", stats.partialRepos).
Msg("storage parse incomplete; skipping fast-restart stamp so the next restart reparses")
return nil
}
if err := metaDB.SetFastRestartStamp(fastRestartStamp); err != nil {
log.Warn().Err(err).Str("component", "metadb").
Msg("failed to write fast-restart stamp; next restart will reparse")
}
return nil
}
@@ -109,6 +210,16 @@ func getReposToBeDeleted(allStorageRepos []string, allMetaDBRepos []string) []st
// ParseRepo reads the contents of a repo and syncs all images and signatures found.
func ParseRepo(repo string, metaDB mTypes.MetaDB, storeController stypes.StoreController, log log.Logger) error {
_, err := parseRepo(repo, metaDB, storeController, log)
return err
}
// parseRepo syncs all images and signatures in a repo. It returns partial=true
// when a manifest was skipped because its blob is missing, so the caller knows
// the metaDB is incomplete even though no error was returned.
func parseRepo(repo string, metaDB mTypes.MetaDB, storeController stypes.StoreController, log log.Logger,
) (bool, error) {
imageStore := storeController.GetImageStore(repo)
var lockLatency time.Time
@@ -120,7 +231,7 @@ func ParseRepo(repo string, metaDB mTypes.MetaDB, storeController stypes.StoreCo
if err != nil {
log.Error().Err(err).Str("repository", repo).Msg("failed to read index.json for repo")
return err
return false, err
}
var indexContent ispec.Index
@@ -129,7 +240,7 @@ func ParseRepo(repo string, metaDB mTypes.MetaDB, storeController stypes.StoreCo
if err != nil {
log.Error().Err(err).Str("repository", repo).Msg("failed to unmarshal index.json for repo")
return err
return false, err
}
// Collect tags that exist in storage to preserve them
@@ -146,9 +257,11 @@ func ParseRepo(repo string, metaDB mTypes.MetaDB, storeController stypes.StoreCo
if err != nil && !errors.Is(err, zerr.ErrRepoMetaNotFound) {
log.Error().Err(err).Str("repository", repo).Msg("failed to reset tag field in RepoMetadata for repo")
return err
return false, err
}
partial := false
for _, manifest := range indexContent.Manifests {
tag := manifest.Annotations[ispec.AnnotationRefName]
@@ -164,13 +277,15 @@ func ParseRepo(repo string, metaDB mTypes.MetaDB, storeController stypes.StoreCo
log.Warn().Err(err).Str("repository", repo).Str("digest", manifest.Digest.String()).
Msg("skipping missing manifest blob, continuing repo parse")
partial = true
continue
}
log.Error().Err(err).Str("repository", repo).Str("digest", manifest.Digest.String()).
Msg("failed to get blob for image")
return err
return false, err
}
reference := tag
@@ -185,11 +300,11 @@ func ParseRepo(repo string, metaDB mTypes.MetaDB, storeController stypes.StoreCo
log.Error().Err(err).Str("repository", repo).Str("tag", tag).
Msg("failed to set metadata for image")
return err
return false, err
}
}
return nil
return partial, nil
}
func getAllRepos(storeController stypes.StoreController, log log.Logger) ([]string, error) {