mirror of
https://github.com/project-zot/zot.git
synced 2026-06-18 05:28:07 +08:00
metadb: add optional fast restart path that skips storage walk when (version + commit + storage config) matches metaDB stamp (#4026)
* chore(metadb): add writer version to interface Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * chore(metadb): add writer version to db mock Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * chore(metadb): implement writer version for bolt, redis, and dynamodb Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * feat(metadb): add optional fast restart path that skips storage walk when binary identity matches metaDB stamp binary identity is determined by the current release tag/commit and stored in metaDB after a successful storage parse. When fast restart is enabled, the next startup will skip the parse if the stored identity matches the current binary Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * chore(cli): serve: add a way to force reparse storage Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * refactor(meta): version: split to avoid global state mutation in tests Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * fix(meta): version: include commit in writerVersion to distinguish retags Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * chore(config): add IsFastRestartEnabled() test Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * fix(meta): skip writer-version stamp when storage parse is incomplete ParseStorage returns nil even when individual repos fail to parse or are only partially parsed (a missing manifest blob), so MaybeParseStorage would stamp a partially-populated metaDB as good. On the next restart fastRestart trusts the stamp, skips the storage walk, and never recovers. Track per-repo outcomes via parseStats and stamp only when the walk fully populated the metaDB, otherwise log and continue so the next restart reparses Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * fix(docs): readme: remove trailing comma from JSON config Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * fix(meta): dynamodb: use context.Background instead of context.TODO Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * fix(meta): invalidate fast restart on storage config changes Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * chore(meta): dynamodb: use context.Background() instead of context.TODO() Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * docs(meta): dynamodb: add comment about nil AttributeValue handling in GetWriterVersion Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * chore: rename writer-version stamp to fast-restart stamp also replaces the version/commit tracking to use BinaryVersion instead of WriterVersion This should make things more clear Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * fix(config): ensure FastRestart is on GlobalStorageConfig This is not a per-subpath setting Signed-off-by: Jacob McSwain <jacob@mcswain.dev> * fix(metadb): redis: tests: ensure clients are closed Signed-off-by: Jacob McSwain <jacob@mcswain.dev> --------- Signed-off-by: Jacob McSwain <jacob@mcswain.dev>
This commit is contained in:
+125
-10
@@ -24,14 +24,36 @@ const (
|
||||
NotationType = "notation"
|
||||
)
|
||||
|
||||
// parseStats tracks per-repo outcomes of a storage walk.
|
||||
type parseStats struct {
|
||||
failedRepos int // skipped on a StatIndex or ParseRepo error
|
||||
partialRepos int // parsed, but a manifest blob was missing
|
||||
}
|
||||
|
||||
// complete reports whether the walk fully populated the metaDB.
|
||||
func (s parseStats) complete() bool {
|
||||
return s.failedRepos == 0 && s.partialRepos == 0
|
||||
}
|
||||
|
||||
// ParseStorage will sync all repos found in the rootdirectory of the oci layout that zot was deployed on with the
|
||||
// ParseStorage database.
|
||||
func ParseStorage(metaDB mTypes.MetaDB, storeController stypes.StoreController, log log.Logger) error {
|
||||
_, err := parseStorage(metaDB, storeController, log)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// parseStorage runs the storage walk, returning per-repo outcomes in parseStats.
|
||||
// Per-repo failures are logged and skipped. Only enumeration or deletion errors
|
||||
// abort the walk and return a non-nil error.
|
||||
func parseStorage(metaDB mTypes.MetaDB, storeController stypes.StoreController, log log.Logger) (parseStats, error) {
|
||||
log.Info().Str("component", "metadb").Msg("parsing storage and initializing")
|
||||
|
||||
var stats parseStats
|
||||
|
||||
allStorageRepos, err := getAllRepos(storeController, log)
|
||||
if err != nil {
|
||||
return err
|
||||
return parseStats{}, err
|
||||
}
|
||||
|
||||
allMetaDBRepos, err := metaDB.GetAllRepoNames()
|
||||
@@ -40,7 +62,7 @@ func ParseStorage(metaDB mTypes.MetaDB, storeController stypes.StoreController,
|
||||
log.Error().Err(err).Str("component", "metadb").Str("rootDir", rootDir).
|
||||
Msg("failed to get all repo names present under rootDir")
|
||||
|
||||
return err
|
||||
return parseStats{}, err
|
||||
}
|
||||
|
||||
for _, repo := range getReposToBeDeleted(allStorageRepos, allMetaDBRepos) {
|
||||
@@ -49,7 +71,7 @@ func ParseStorage(metaDB mTypes.MetaDB, storeController stypes.StoreController,
|
||||
log.Error().Err(err).Str("rootDir", storeController.GetImageStore(repo).RootDir()).Str("component", "metadb").
|
||||
Str("repo", repo).Msg("failed to delete repo meta")
|
||||
|
||||
return err
|
||||
return parseStats{}, err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,6 +86,8 @@ func ParseStorage(metaDB mTypes.MetaDB, storeController stypes.StoreController,
|
||||
log.Error().Err(err).Str("rootDir", imgStore.RootDir()).
|
||||
Str("repo", repo).Msg("failed to sync repo")
|
||||
|
||||
stats.failedRepos++
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -75,16 +99,93 @@ func ParseStorage(metaDB mTypes.MetaDB, storeController stypes.StoreController,
|
||||
continue
|
||||
}
|
||||
|
||||
err = ParseRepo(repo, metaDB, storeController, log)
|
||||
partial, err := parseRepo(repo, metaDB, storeController, log)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("repo", repo).Str("rootDir", imgStore.RootDir()).Msg("failed to sync repo")
|
||||
|
||||
stats.failedRepos++
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if partial {
|
||||
stats.partialRepos++
|
||||
}
|
||||
}
|
||||
|
||||
log.Info().Str("component", "metadb").Msg("successfully initialized")
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// FastRestartStamp combines this binary's identity (binaryVersion, from version.CurrentBinaryVersion)
|
||||
// with a fingerprint of the storage config into the stamp used to gate a fast restart.
|
||||
func FastRestartStamp(binaryVersion, storageFingerprint string) string {
|
||||
if binaryVersion == "" || storageFingerprint == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
return binaryVersion + "|" + storageFingerprint
|
||||
}
|
||||
|
||||
// MaybeParseStorage conditionally runs ParseStorage based on a fast-restart stamp stored in metaDB.
|
||||
// When fastRestart is true and the metaDB carries a stamp matching this binary and storage config,
|
||||
// the full walk is skipped under the assumption that metaDB is consistent with storage from the
|
||||
// previous run.
|
||||
func MaybeParseStorage(metaDB mTypes.MetaDB, storeController stypes.StoreController,
|
||||
fastRestart bool, fastRestartStamp string, log log.Logger,
|
||||
) error {
|
||||
if fastRestart {
|
||||
if fastRestartStamp == "" {
|
||||
log.Info().Str("component", "metadb").
|
||||
Msg("fast-restart enabled but no stamp is available; falling back to full parse")
|
||||
} else {
|
||||
storedStamp, err := metaDB.GetFastRestartStamp()
|
||||
switch {
|
||||
case err != nil:
|
||||
log.Warn().Err(err).Str("component", "metadb").
|
||||
Msg("failed to read fast-restart stamp, falling back to full parse")
|
||||
case storedStamp == fastRestartStamp:
|
||||
log.Info().Str("component", "metadb").Str("fastRestartStamp", storedStamp).
|
||||
Msg("metaDB fast-restart stamp matches, skipping full storage parse")
|
||||
|
||||
return nil
|
||||
case storedStamp == "":
|
||||
log.Info().Str("component", "metadb").
|
||||
Msg("metaDB has no fast-restart stamp, running full parse")
|
||||
default:
|
||||
log.Info().Str("component", "metadb").
|
||||
Str("storedStamp", storedStamp).Str("currentStamp", fastRestartStamp).
|
||||
Msg("metaDB fast-restart stamp differs, running full parse")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stats, err := parseStorage(metaDB, storeController, log)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if fastRestartStamp == "" {
|
||||
// go run/go test builds have no stamp, so always reparse.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Leave the stamp untouched on an incomplete walk so the next restart
|
||||
// reparses and can recover.
|
||||
if !stats.complete() {
|
||||
log.Warn().Str("component", "metadb").
|
||||
Int("failedRepos", stats.failedRepos).Int("partialRepos", stats.partialRepos).
|
||||
Msg("storage parse incomplete; skipping fast-restart stamp so the next restart reparses")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := metaDB.SetFastRestartStamp(fastRestartStamp); err != nil {
|
||||
log.Warn().Err(err).Str("component", "metadb").
|
||||
Msg("failed to write fast-restart stamp; next restart will reparse")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -109,6 +210,16 @@ func getReposToBeDeleted(allStorageRepos []string, allMetaDBRepos []string) []st
|
||||
|
||||
// ParseRepo reads the contents of a repo and syncs all images and signatures found.
|
||||
func ParseRepo(repo string, metaDB mTypes.MetaDB, storeController stypes.StoreController, log log.Logger) error {
|
||||
_, err := parseRepo(repo, metaDB, storeController, log)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// parseRepo syncs all images and signatures in a repo. It returns partial=true
|
||||
// when a manifest was skipped because its blob is missing, so the caller knows
|
||||
// the metaDB is incomplete even though no error was returned.
|
||||
func parseRepo(repo string, metaDB mTypes.MetaDB, storeController stypes.StoreController, log log.Logger,
|
||||
) (bool, error) {
|
||||
imageStore := storeController.GetImageStore(repo)
|
||||
|
||||
var lockLatency time.Time
|
||||
@@ -120,7 +231,7 @@ func ParseRepo(repo string, metaDB mTypes.MetaDB, storeController stypes.StoreCo
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("repository", repo).Msg("failed to read index.json for repo")
|
||||
|
||||
return err
|
||||
return false, err
|
||||
}
|
||||
|
||||
var indexContent ispec.Index
|
||||
@@ -129,7 +240,7 @@ func ParseRepo(repo string, metaDB mTypes.MetaDB, storeController stypes.StoreCo
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("repository", repo).Msg("failed to unmarshal index.json for repo")
|
||||
|
||||
return err
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Collect tags that exist in storage to preserve them
|
||||
@@ -146,9 +257,11 @@ func ParseRepo(repo string, metaDB mTypes.MetaDB, storeController stypes.StoreCo
|
||||
if err != nil && !errors.Is(err, zerr.ErrRepoMetaNotFound) {
|
||||
log.Error().Err(err).Str("repository", repo).Msg("failed to reset tag field in RepoMetadata for repo")
|
||||
|
||||
return err
|
||||
return false, err
|
||||
}
|
||||
|
||||
partial := false
|
||||
|
||||
for _, manifest := range indexContent.Manifests {
|
||||
tag := manifest.Annotations[ispec.AnnotationRefName]
|
||||
|
||||
@@ -164,13 +277,15 @@ func ParseRepo(repo string, metaDB mTypes.MetaDB, storeController stypes.StoreCo
|
||||
log.Warn().Err(err).Str("repository", repo).Str("digest", manifest.Digest.String()).
|
||||
Msg("skipping missing manifest blob, continuing repo parse")
|
||||
|
||||
partial = true
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
log.Error().Err(err).Str("repository", repo).Str("digest", manifest.Digest.String()).
|
||||
Msg("failed to get blob for image")
|
||||
|
||||
return err
|
||||
return false, err
|
||||
}
|
||||
|
||||
reference := tag
|
||||
@@ -185,11 +300,11 @@ func ParseRepo(repo string, metaDB mTypes.MetaDB, storeController stypes.StoreCo
|
||||
log.Error().Err(err).Str("repository", repo).Str("tag", tag).
|
||||
Msg("failed to set metadata for image")
|
||||
|
||||
return err
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return partial, nil
|
||||
}
|
||||
|
||||
func getAllRepos(storeController stypes.StoreController, log log.Logger) ([]string, error) {
|
||||
|
||||
Reference in New Issue
Block a user