From b64add7715b65c85614325564cd5f4281681baf7 Mon Sep 17 00:00:00 2001 From: Andrei Aaron Date: Mon, 10 Nov 2025 21:08:47 +0200 Subject: [PATCH] fix: deduplicate entries in referrers responses (#3524) See: https://github.com/project-zot/zot/issues/2506 Note we are not loosing anything functionality-wise by making this change. Initially we considered the tags are in the annotations present in the referrers but the only annotations we set on referrers are the ones inside the manifests themselves, not the ones in the manifest descriptors, so the tags were not presetn anyway. Signed-off-by: Andrei Aaron --- pkg/meta/boltdb/boltdb.go | 7 +++ pkg/meta/dynamodb/dynamodb.go | 7 +++ pkg/meta/meta_test.go | 52 ++++++++++++++++++++ pkg/meta/redis/redis.go | 7 +++ pkg/storage/common/common.go | 9 ++++ pkg/storage/common/common_test.go | 80 +++++++++++++++++++++++++++++++ 6 files changed, 162 insertions(+) diff --git a/pkg/meta/boltdb/boltdb.go b/pkg/meta/boltdb/boltdb.go index cbf6bb4f..87e74f17 100644 --- a/pkg/meta/boltdb/boltdb.go +++ b/pkg/meta/boltdb/boltdb.go @@ -1187,12 +1187,19 @@ func (bdw *BoltDB) GetReferrersInfo(repo string, referredDigest godigest.Digest, } referrersInfo := protoRepoMeta.Referrers[referredDigest.String()].List + seenDigests := make(map[string]struct{}) for i := range referrersInfo { if !common.MatchesArtifactTypes(referrersInfo[i].ArtifactType, artifactTypes) { continue } + if _, seen := seenDigests[referrersInfo[i].Digest]; seen { + continue + } + + seenDigests[referrersInfo[i].Digest] = struct{}{} + referrersInfoResult = append(referrersInfoResult, mTypes.ReferrerInfo{ Digest: referrersInfo[i].Digest, MediaType: referrersInfo[i].MediaType, diff --git a/pkg/meta/dynamodb/dynamodb.go b/pkg/meta/dynamodb/dynamodb.go index a1d316fb..428c67dd 100644 --- a/pkg/meta/dynamodb/dynamodb.go +++ b/pkg/meta/dynamodb/dynamodb.go @@ -1117,12 +1117,19 @@ func (dwr *DynamoDB) GetReferrersInfo(repo string, referredDigest godigest.Diges referrersInfo := repoMeta.Referrers[referredDigest.String()] filteredResults := make([]mTypes.ReferrerInfo, 0, len(referrersInfo)) + seenDigests := make(map[string]struct{}) for _, referrerInfo := range referrersInfo { if !common.MatchesArtifactTypes(referrerInfo.ArtifactType, artifactTypes) { continue } + if _, seen := seenDigests[referrerInfo.Digest]; seen { + continue + } + + seenDigests[referrerInfo.Digest] = struct{}{} + filteredResults = append(filteredResults, referrerInfo) } diff --git a/pkg/meta/meta_test.go b/pkg/meta/meta_test.go index 93606df8..f1b49c62 100644 --- a/pkg/meta/meta_test.go +++ b/pkg/meta/meta_test.go @@ -2410,6 +2410,58 @@ func RunMetaDBTests(t *testing.T, metaDB mTypes.MetaDB, preparationFuncs ...func So(referrerInfo[0].Digest, ShouldResemble, referrerWantedType.DigestStr()) }) + Convey("GetReferrersInfo deduplication", func() { + image := CreateRandomImage() + referrer := CreateRandomImageWith().Subject(image.DescriptorRef()).Build() + + err := metaDB.SetRepoReference(ctx, "repo", "tag", image.AsImageMeta()) + So(err, ShouldBeNil) + + // Add same referrer multiple times with different tags (simulating duplicates) + err = metaDB.SetRepoReference(ctx, "repo", "ref-tag1", referrer.AsImageMeta()) + So(err, ShouldBeNil) + + err = metaDB.SetRepoReference(ctx, "repo", "ref-tag2", referrer.AsImageMeta()) + So(err, ShouldBeNil) + + err = metaDB.SetRepoReference(ctx, "repo", referrer.DigestStr(), referrer.AsImageMeta()) + So(err, ShouldBeNil) + + // GetReferrersInfo should return only one instance despite multiple tags + referrers, err := metaDB.GetReferrersInfo("repo", image.Digest(), []string{}) + So(err, ShouldBeNil) + So(len(referrers), ShouldEqual, 1) + So(referrers[0].Digest, ShouldEqual, referrer.DigestStr()) + + // Now manually add duplicate entries to test the deduplication logic in GetReferrersInfo + // This simulates a scenario where duplicates might exist (e.g., from migration or data corruption) + repoMeta, err := metaDB.GetRepoMeta(ctx, "repo") + So(err, ShouldBeNil) + + // Add duplicate referrer entries manually + duplicateReferrer := mTypes.ReferrerInfo{ + Digest: referrer.DigestStr(), + MediaType: referrer.Manifest.MediaType, + ArtifactType: zcommon.GetManifestArtifactType(referrer.Manifest), + Size: int(referrer.ManifestDescriptor.Size), + Annotations: referrer.Manifest.Annotations, + } + repoMeta.Referrers[image.Digest().String()] = append( + repoMeta.Referrers[image.Digest().String()], + duplicateReferrer, + duplicateReferrer, + ) + + err = metaDB.SetRepoMeta("repo", repoMeta) + So(err, ShouldBeNil) + + // GetReferrersInfo should still return only one instance despite duplicates in the data + referrers, err = metaDB.GetReferrersInfo("repo", image.Digest(), []string{}) + So(err, ShouldBeNil) + So(len(referrers), ShouldEqual, 1) + So(referrers[0].Digest, ShouldEqual, referrer.DigestStr()) + }) + Convey("FilterImageMeta", func() { repo := "repo" tag := "tag" diff --git a/pkg/meta/redis/redis.go b/pkg/meta/redis/redis.go index b01cd268..a3223b3a 100644 --- a/pkg/meta/redis/redis.go +++ b/pkg/meta/redis/redis.go @@ -1704,12 +1704,19 @@ func (rc *RedisDB) GetReferrersInfo(repo string, referredDigest godigest.Digest, } referrersInfo := protoRepoMeta.Referrers[referredDigest.String()].List + seenDigests := make(map[string]struct{}) for i := range referrersInfo { if !common.MatchesArtifactTypes(referrersInfo[i].ArtifactType, artifactTypes) { continue } + if _, seen := seenDigests[referrersInfo[i].Digest]; seen { + continue + } + + seenDigests[referrersInfo[i].Digest] = struct{}{} + referrersInfoResult = append(referrersInfoResult, mTypes.ReferrerInfo{ Digest: referrersInfo[i].Digest, MediaType: referrersInfo[i].MediaType, diff --git a/pkg/storage/common/common.go b/pkg/storage/common/common.go index 3a6b0a4f..3487e6dc 100644 --- a/pkg/storage/common/common.go +++ b/pkg/storage/common/common.go @@ -666,12 +666,21 @@ func GetReferrers(imgStore storageTypes.ImageStore, repo string, gdigest godiges } result := []ispec.Descriptor{} + seenDigests := make(map[godigest.Digest]struct{}) for _, descriptor := range index.Manifests { if descriptor.Digest == gdigest { continue } + // Skip if we've already processed this digest + if _, seen := seenDigests[descriptor.Digest]; seen { + continue + } + + // Mark as seen early to avoid processing duplicates + seenDigests[descriptor.Digest] = struct{}{} + buf, err := imgStore.GetBlobContent(repo, descriptor.Digest) if err != nil { log.Error().Err(err).Str("blob", imgStore.BlobPath(repo, descriptor.Digest)).Msg("failed to read manifest") diff --git a/pkg/storage/common/common_test.go b/pkg/storage/common/common_test.go index 989222b3..17e8c1da 100644 --- a/pkg/storage/common/common_test.go +++ b/pkg/storage/common/common_test.go @@ -373,6 +373,86 @@ func TestGetReferrersErrors(t *testing.T) { }) } +func TestGetReferrersDeduplication(t *testing.T) { + Convey("Test GetReferrers deduplication", t, func(c C) { + dir := t.TempDir() + + log := log.NewTestLogger() + metrics := monitoring.NewMetricsServer(false, log) + + defer metrics.Stop() // Clean up metrics server to prevent resource leaks + cacheDriver, _ := storage.Create("boltdb", cache.BoltDBDriverParameters{ + RootDir: dir, + Name: "cache", + UseRelPaths: true, + }, log) + + imgStore := local.NewImageStore(dir, false, true, log, metrics, nil, cacheDriver, nil, nil) + storageCtlr := storage.StoreController{DefaultStore: imgStore} + + // Create subject image + subjectImage := CreateDefaultImage() + err := WriteImageToFileSystem(subjectImage, "test-repo", "subject-tag", storageCtlr) + So(err, ShouldBeNil) + + subjectDigest := subjectImage.Digest() + + // Create referrer image using builder pattern + referrerImage := CreateImageWith(). + DefaultLayers(). + DefaultConfig(). + Subject(subjectImage.DescriptorRef()). + Annotations(map[string]string{ + "test": "referrer", + }). + Build() + + // Write referrer image to filesystem (this will add it to index once) + err = WriteImageToFileSystem(referrerImage, "test-repo", referrerImage.DigestStr(), storageCtlr) + So(err, ShouldBeNil) + + referrerDigest := referrerImage.Digest() + + // Add referrer manifest to index multiple times (simulating tagging) + index, err := common.GetIndex(imgStore, "test-repo", log) + So(err, ShouldBeNil) + + // Add same referrer with different tags (simulating duplicates) + desc1 := ispec.Descriptor{ + MediaType: ispec.MediaTypeImageManifest, + Digest: referrerDigest, + Size: referrerImage.ManifestDescriptor.Size, + Annotations: map[string]string{ + ispec.AnnotationRefName: "tag1", + }, + } + desc2 := ispec.Descriptor{ + MediaType: ispec.MediaTypeImageManifest, + Digest: referrerDigest, + Size: referrerImage.ManifestDescriptor.Size, + Annotations: map[string]string{ + ispec.AnnotationRefName: "tag2", + }, + } + desc3 := ispec.Descriptor{ + MediaType: ispec.MediaTypeImageManifest, + Digest: referrerDigest, + Size: referrerImage.ManifestDescriptor.Size, + } + + index.Manifests = append(index.Manifests, desc1, desc2, desc3) + + err = imgStore.PutIndexContent("test-repo", index) + So(err, ShouldBeNil) + + // Get referrers - should return only one instance despite multiple entries + referrers, err := common.GetReferrers(imgStore, "test-repo", subjectDigest, []string{}, log) + So(err, ShouldBeNil) + So(len(referrers.Manifests), ShouldEqual, 1) + So(referrers.Manifests[0].Digest, ShouldEqual, referrerDigest) + }) +} + func TestGetImageIndexErrors(t *testing.T) { log := log.NewTestLogger()