fix: gracefully handle manifests missing from storage (prepare for sparse indexes) (#3503)

GC and scrub should not stop if a manifest or index is missing from storage.
Other similar changes are also included.

WRT metadb, the missing manifests cannot be added, and the results returned from metadb
do not include the descriptors for these manifests.

Signed-off-by: Andrei Aaron <andreifdaaron@gmail.com>
This commit is contained in:
Andrei Aaron
2025-11-13 19:26:18 +02:00
committed by GitHub
parent 2b6fba7059
commit 008527b7bb
20 changed files with 1240 additions and 138 deletions
+5
View File
@@ -384,6 +384,11 @@ func TestScanGeneratorWithMockedData(t *testing.T) { //nolint: gocyclo
return false, err
}
// If all manifests are missing (e.g., from an index), Manifests will be empty
if len(manifestData.Manifests) == 0 {
return false, nil
}
for _, imageLayer := range manifestData.Manifests[0].Manifest.Layers {
switch imageLayer.MediaType {
case ispec.MediaTypeImageLayerGzip, ispec.MediaTypeImageLayer, string(regTypes.DockerLayer):
+92
View File
@@ -7406,6 +7406,98 @@ type repoRef struct {
Tag string
}
func TestSearchWithMissingManifest(t *testing.T) {
Convey("Search with missing manifest", t, func() {
dir := t.TempDir()
// 1. Write the image to the disk
log := log.NewTestLogger()
storeCtlr := ociutils.GetDefaultStoreController(dir, log)
// Create a multiarch image with exactly 2 manifests
multiarchImage := CreateMultiarchWith().RandomImages(2).Build()
// Write the multiarch image to filesystem
err := WriteMultiArchImageToFileSystem(multiarchImage, "testrepo", "latest", storeCtlr)
So(err, ShouldBeNil)
// Get the image store to access index and manifests
imageStore := storeCtlr.GetDefaultImageStore()
// Get the index content to find all manifest digests
indexBlob, err := imageStore.GetIndexContent("testrepo")
So(err, ShouldBeNil)
var indexContent ispec.Index
err = json.Unmarshal(indexBlob, &indexContent)
So(err, ShouldBeNil)
So(len(indexContent.Manifests), ShouldBeGreaterThanOrEqualTo, 2)
// Get the first manifest digest to delete
firstManifestDigest := indexContent.Manifests[0].Digest
// Get the second manifest digest (should remain valid)
secondManifestDigest := indexContent.Manifests[1].Digest
// 2. Delete the manifest from the disk
manifestBlobPath := path.Join(dir, "testrepo", "blobs", "sha256", firstManifestDigest.Encoded())
err = os.Remove(manifestBlobPath)
So(err, ShouldBeNil)
// 3. Start the controller (MetaDB parsing would be done in the background)
port := GetFreePort()
baseURL := GetBaseURL(port)
conf := config.New()
conf.HTTP.Port = port
conf.Storage.RootDirectory = dir
defaultVal := true
conf.Extensions = &extconf.ExtensionConfig{
Search: &extconf.SearchConfig{BaseConfig: extconf.BaseConfig{Enable: &defaultVal}},
}
conf.Extensions.Search.CVE = nil
ctlr := api.NewController(conf)
ctlrManager := NewControllerManager(ctlr)
ctlrManager.StartAndWait(port)
defer ctlrManager.StopServer()
// Search for the repository
query := `
{
GlobalSearch(query:"testrepo:latest"){
Images {
RepoName Tag
Manifests {
Digest
}
}
}
}`
resp, err := resty.R().Get(baseURL + graphqlQueryPrefix + "?query=" + url.QueryEscape(query))
So(resp, ShouldNotBeNil)
So(err, ShouldBeNil)
So(resp.StatusCode(), ShouldEqual, http.StatusOK)
responseStruct := &zcommon.GlobalSearchResultResp{}
err = json.Unmarshal(resp.Body(), responseStruct)
So(err, ShouldBeNil)
// Verify we found the image
So(responseStruct.GlobalSearchResult.GlobalSearch.Images, ShouldNotBeEmpty)
foundImage := responseStruct.GlobalSearchResult.GlobalSearch.Images[0]
So(foundImage.RepoName, ShouldEqual, "testrepo")
So(foundImage.Tag, ShouldEqual, "latest")
// Verify only the valid manifest is found in search results (missing one was skipped by ParseStorage)
So(len(foundImage.Manifests), ShouldEqual, 1)
So(foundImage.Manifests[0].Digest, ShouldEqual, secondManifestDigest.String())
})
}
func deleteUsedImages(repoTags []repoRef, baseURL string) {
for _, image := range repoTags {
status, err := DeleteImage(image.Repo, image.Tag, baseURL)
+22
View File
@@ -12,6 +12,7 @@ import (
"path"
"strings"
"github.com/distribution/distribution/v3/registry/storage/driver"
godigest "github.com/opencontainers/go-digest"
ispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/regclient/regclient/types/mediatype"
@@ -227,11 +228,27 @@ func (registry *DestinationRegistry) copyManifest(repo string, desc ispec.Descri
return err
}
var firstMissingErr error
for _, manifest := range indexManifest.Manifests {
reference := GetDescriptorReference(manifest)
manifestBuf, err := tempImageStore.GetBlobContent(repo, manifest.Digest)
if err != nil {
// Handle missing manifest blobs gracefully - log warning and continue with other manifests
var pathNotFoundErr driver.PathNotFoundError
if errors.Is(err, zerr.ErrBlobNotFound) || errors.As(err, &pathNotFoundErr) {
if firstMissingErr == nil {
firstMissingErr = err
}
registry.log.Warn().Err(err).Str("dir", path.Join(tempImageStore.RootDir(), repo)).
Str("digest", manifest.Digest.String()).
Msg("skipping missing manifest blob in image index, continuing sync with other manifests")
continue
}
registry.log.Error().Str("errorType", common.TypeOf(err)).
Err(err).Str("dir", path.Join(tempImageStore.RootDir(), repo)).Str("digest", manifest.Digest.String()).
Msg("failed find manifest which is part of an image index")
@@ -254,6 +271,11 @@ func (registry *DestinationRegistry) copyManifest(repo string, desc ispec.Descri
}
}
// Return error if we encountered any missing manifests
if firstMissingErr != nil {
return firstMissingErr
}
_, _, err := imageStore.PutImageManifest(repo, reference, desc.MediaType, manifestContent)
if err != nil {
registry.log.Error().Str("errorType", common.TypeOf(err)).Str("repo", repo).Str("reference", reference).