fix: remove inline GC and schedule a background task instead (#1610)

* fix: remove inline GC and set a default value of gc interval

- remove inline GC
- add a default value of GC interval
- run the GC periodically by default with the default value if no interval provided
- generate GC tasks with a random delay(0-30s) between
- add IsReady() method to scheduler.TaskGenerator interface

Signed-off-by: Andreea-Lupu <andreealupu1470@yahoo.com>

* ci: add test for gc with short interval

Signed-off-by: Andreea-Lupu <andreealupu1470@yahoo.com>

---------

Signed-off-by: Andreea-Lupu <andreealupu1470@yahoo.com>
This commit is contained in:
Andreea Lupu
2023-08-07 22:55:19 +03:00
committed by GitHub
parent fce9a02ed5
commit 76277f5ebd
24 changed files with 411 additions and 151 deletions
+82
View File
@@ -4,8 +4,11 @@ import (
"bytes"
"encoding/json"
"errors"
"io"
"math/rand"
"path"
"strings"
"time"
notreg "github.com/notaryproject/notation-go/registry"
godigest "github.com/opencontainers/go-digest"
@@ -853,6 +856,10 @@ func (gen *DedupeTaskGenerator) IsDone() bool {
return gen.done
}
func (gen *DedupeTaskGenerator) IsReady() bool {
return true
}
func (gen *DedupeTaskGenerator) Reset() {
gen.lastDigests = []godigest.Digest{}
gen.duplicateBlobs = []string{}
@@ -886,3 +893,78 @@ func (dt *dedupeTask) DoWork() error {
return err
}
/*
GCTaskGenerator takes all repositories found in the storage.imagestore
and it will execute garbage collection for each repository by creating a task
for each repository and pushing it to the task scheduler.
*/
type GCTaskGenerator struct {
ImgStore storageTypes.ImageStore
lastRepo string
nextRun time.Time
done bool
rand *rand.Rand
}
func (gen *GCTaskGenerator) getRandomDelay() int {
maxDelay := 30
return gen.rand.Intn(maxDelay)
}
func (gen *GCTaskGenerator) Next() (scheduler.Task, error) {
if gen.lastRepo == "" && gen.nextRun.IsZero() {
gen.rand = rand.New(rand.NewSource(time.Now().UTC().UnixNano())) //nolint: gosec
}
delay := gen.getRandomDelay()
gen.nextRun = time.Now().Add(time.Duration(delay) * time.Second)
repo, err := gen.ImgStore.GetNextRepository(gen.lastRepo)
if err != nil && !errors.Is(err, io.EOF) {
return nil, err
}
if repo == "" {
gen.done = true
return nil, nil
}
gen.lastRepo = repo
return NewGCTask(gen.ImgStore, repo), nil
}
func (gen *GCTaskGenerator) IsDone() bool {
return gen.done
}
func (gen *GCTaskGenerator) IsReady() bool {
return time.Now().After(gen.nextRun)
}
func (gen *GCTaskGenerator) Reset() {
gen.lastRepo = ""
gen.done = false
gen.nextRun = time.Time{}
}
type gcTask struct {
imgStore storageTypes.ImageStore
repo string
}
func NewGCTask(imgStore storageTypes.ImageStore, repo string,
) *gcTask {
return &gcTask{imgStore, repo}
}
func (gct *gcTask) DoWork() error {
// run task
return gct.imgStore.RunGCRepo(gct.repo)
}
-30
View File
@@ -77,36 +77,6 @@ func TestValidateManifest(t *testing.T) {
So(err, ShouldNotBeNil)
})
Convey("bad config blob", func() {
manifest := ispec.Manifest{
Config: ispec.Descriptor{
MediaType: ispec.MediaTypeImageConfig,
Digest: cdigest,
Size: int64(len(cblob)),
},
Layers: []ispec.Descriptor{
{
MediaType: ispec.MediaTypeImageLayer,
Digest: digest,
Size: int64(len(content)),
},
},
}
manifest.SchemaVersion = 2
configBlobPath := imgStore.BlobPath("test", cdigest)
err := os.WriteFile(configBlobPath, []byte("bad config blob"), 0o000)
So(err, ShouldBeNil)
body, err := json.Marshal(manifest)
So(err, ShouldBeNil)
_, _, err = imgStore.PutImageManifest("test", "1.0", ispec.MediaTypeImageManifest, body)
So(err, ShouldNotBeNil)
})
Convey("manifest with non-distributable layers", func() {
content := []byte("this blob doesn't exist")
digest := godigest.FromBytes(content)
+1
View File
@@ -20,5 +20,6 @@ const (
BoltdbName = "cache"
DynamoDBDriverName = "dynamodb"
DefaultGCDelay = 1 * time.Hour
DefaultGCInterval = 1 * time.Hour
S3StorageDriverName = "s3"
)
+2 -60
View File
@@ -593,12 +593,6 @@ func (is *ImageStoreLocal) PutImageManifest(repo, reference, mediaType string, /
return "", "", err
}
if is.gc {
if err := is.garbageCollect(dir, repo); err != nil {
return "", "", err
}
}
return desc.Digest, subjectDigest, nil
}
@@ -650,12 +644,6 @@ func (is *ImageStoreLocal) DeleteImageManifest(repo, reference string, detectCol
return err
}
if is.gc {
if err := is.garbageCollect(dir, repo); err != nil {
return err
}
}
// Delete blob only when blob digest not present in manifest entry.
// e.g. 1.0.1 & 1.0.2 have same blob digest so if we delete 1.0.1, blob should not be removed.
toDelete := true
@@ -1812,58 +1800,12 @@ func (is *ImageStoreLocal) RunGCRepo(repo string) error {
}
func (is *ImageStoreLocal) RunGCPeriodically(interval time.Duration, sch *scheduler.Scheduler) {
generator := &taskGenerator{
imgStore: is,
generator := &common.GCTaskGenerator{
ImgStore: is,
}
sch.SubmitGenerator(generator, interval, scheduler.MediumPriority)
}
type taskGenerator struct {
imgStore *ImageStoreLocal
lastRepo string
done bool
}
func (gen *taskGenerator) Next() (scheduler.Task, error) {
repo, err := gen.imgStore.GetNextRepository(gen.lastRepo)
if err != nil && !errors.Is(err, io.EOF) {
return nil, err
}
if repo == "" {
gen.done = true
return nil, nil
}
gen.lastRepo = repo
return newGCTask(gen.imgStore, repo), nil
}
func (gen *taskGenerator) IsDone() bool {
return gen.done
}
func (gen *taskGenerator) Reset() {
gen.lastRepo = ""
gen.done = false
}
type gcTask struct {
imgStore *ImageStoreLocal
repo string
}
func newGCTask(imgStore *ImageStoreLocal, repo string) *gcTask {
return &gcTask{imgStore, repo}
}
func (gcT *gcTask) DoWork() error {
return gcT.imgStore.RunGCRepo(gcT.repo)
}
func (is *ImageStoreLocal) GetNextDigestWithBlobPaths(lastDigests []godigest.Digest,
) (godigest.Digest, []string, error) {
var lockLatency time.Time
+166 -1
View File
@@ -2040,6 +2040,98 @@ func TestInjectWriteFile(t *testing.T) {
})
}
func TestGCInjectFailure(t *testing.T) {
Convey("code coverage: error inside garbageCollect method of img store", t, func() {
dir := t.TempDir()
logFile, _ := os.CreateTemp("", "zot-log*.txt")
defer os.Remove(logFile.Name()) // clean up
log := log.NewLogger("debug", logFile.Name())
metrics := monitoring.NewMetricsServer(false, log)
cacheDriver, _ := storage.Create("boltdb", cache.BoltDBDriverParameters{
RootDir: dir,
Name: "cache",
UseRelPaths: true,
}, log)
imgStore := local.NewImageStore(dir, true, storageConstants.DefaultGCDelay,
true, true, log, metrics, nil, cacheDriver)
repoName := "test-gc"
upload, err := imgStore.NewBlobUpload(repoName)
So(err, ShouldBeNil)
So(upload, ShouldNotBeEmpty)
content := []byte("test-data1")
buf := bytes.NewBuffer(content)
buflen := buf.Len()
bdigest := godigest.FromBytes(content)
blob, err := imgStore.PutBlobChunk(repoName, upload, 0, int64(buflen), buf)
So(err, ShouldBeNil)
So(blob, ShouldEqual, buflen)
err = imgStore.FinishBlobUpload(repoName, upload, buf, bdigest)
So(err, ShouldBeNil)
annotationsMap := make(map[string]string)
annotationsMap[ispec.AnnotationRefName] = tag
cblob, cdigest := test.GetRandomImageConfig()
_, clen, err := imgStore.FullBlobUpload(repoName, bytes.NewReader(cblob), cdigest)
So(err, ShouldBeNil)
So(clen, ShouldEqual, len(cblob))
hasBlob, _, err := imgStore.CheckBlob(repoName, cdigest)
So(err, ShouldBeNil)
So(hasBlob, ShouldEqual, true)
manifest := ispec.Manifest{
Config: ispec.Descriptor{
MediaType: "application/vnd.oci.image.config.v1+json",
Digest: cdigest,
Size: int64(len(cblob)),
},
Layers: []ispec.Descriptor{
{
MediaType: "application/vnd.oci.image.layer.v1.tar",
Digest: bdigest,
Size: int64(buflen),
},
},
Annotations: annotationsMap,
}
manifest.SchemaVersion = 2
manifestBuf, err := json.Marshal(manifest)
So(err, ShouldBeNil)
_, _, err = imgStore.PutImageManifest(repoName, tag, ispec.MediaTypeImageManifest, manifestBuf)
So(err, ShouldBeNil)
// umoci.OpenLayout error
injected := inject.InjectFailure(0)
err = imgStore.RunGCRepo(repoName)
if injected {
So(err, ShouldNotBeNil)
} else {
So(err, ShouldBeNil)
}
// oci.GC
injected = inject.InjectFailure(1)
err = imgStore.RunGCRepo(repoName)
if injected {
So(err, ShouldNotBeNil)
} else {
So(err, ShouldBeNil)
}
})
}
func TestGarbageCollect(t *testing.T) {
Convey("Repo layout", t, func(c C) {
dir := t.TempDir()
@@ -2108,6 +2200,9 @@ func TestGarbageCollect(t *testing.T) {
_, _, err = imgStore.PutImageManifest(repoName, tag, ispec.MediaTypeImageManifest, manifestBuf)
So(err, ShouldBeNil)
err = imgStore.RunGCRepo(repoName)
So(err, ShouldBeNil)
hasBlob, _, err = imgStore.CheckBlob(repoName, bdigest)
So(err, ShouldBeNil)
So(hasBlob, ShouldEqual, true)
@@ -2115,6 +2210,9 @@ func TestGarbageCollect(t *testing.T) {
err = imgStore.DeleteImageManifest(repoName, digest.String(), false)
So(err, ShouldBeNil)
err = imgStore.RunGCRepo(repoName)
So(err, ShouldBeNil)
hasBlob, _, err = imgStore.CheckBlob(repoName, bdigest)
So(err, ShouldBeNil)
So(hasBlob, ShouldEqual, true)
@@ -2201,6 +2299,9 @@ func TestGarbageCollect(t *testing.T) {
_, _, err = imgStore.PutImageManifest(repoName, tag, ispec.MediaTypeImageManifest, manifestBuf)
So(err, ShouldBeNil)
err = imgStore.RunGCRepo(repoName)
So(err, ShouldBeNil)
hasBlob, _, err = imgStore.CheckBlob(repoName, odigest)
So(err, ShouldNotBeNil)
So(hasBlob, ShouldEqual, false)
@@ -2223,6 +2324,9 @@ func TestGarbageCollect(t *testing.T) {
err = imgStore.DeleteImageManifest(repoName, digest.String(), false)
So(err, ShouldBeNil)
err = imgStore.RunGCRepo(repoName)
So(err, ShouldBeNil)
hasBlob, _, err = imgStore.CheckBlob(repoName, bdigest)
So(err, ShouldNotBeNil)
So(hasBlob, ShouldEqual, false)
@@ -2360,7 +2464,7 @@ func TestGarbageCollect(t *testing.T) {
So(err, ShouldBeNil)
So(hasBlob, ShouldEqual, true)
// immediately upload any other image to second repo which should invoke GC inline, but expect layers to persist
// immediately upload any other image to second repo and run GC, but expect layers to persist
upload, err = imgStore.NewBlobUpload(repo2Name)
So(err, ShouldBeNil)
@@ -2413,6 +2517,9 @@ func TestGarbageCollect(t *testing.T) {
_, _, err = imgStore.PutImageManifest(repo2Name, tag, ispec.MediaTypeImageManifest, manifestBuf)
So(err, ShouldBeNil)
err = imgStore.RunGCRepo(repo2Name)
So(err, ShouldBeNil)
// original blob should exist
hasBlob, _, err = imgStore.CheckBlob(repo2Name, tdigest)
@@ -2494,6 +2601,64 @@ func TestGarbageCollectForImageStore(t *testing.T) {
fmt.Sprintf("error while running GC for %s", path.Join(imgStore.RootDir(), repoName)))
So(os.Chmod(path.Join(dir, repoName, "index.json"), 0o755), ShouldBeNil)
})
Convey("Garbage collect - the manifest which the reference points to can be found", func() {
logFile, _ := os.CreateTemp("", "zot-log*.txt")
defer os.Remove(logFile.Name()) // clean up
log := log.NewLogger("debug", logFile.Name())
metrics := monitoring.NewMetricsServer(false, log)
cacheDriver, _ := storage.Create("boltdb", cache.BoltDBDriverParameters{
RootDir: dir,
Name: "cache",
UseRelPaths: true,
}, log)
imgStore := local.NewImageStore(dir, true, 1*time.Second, true, true, log, metrics, nil, cacheDriver)
repoName := "gc-sig"
storeController := storage.StoreController{DefaultStore: imgStore}
img := test.CreateRandomImage()
err := test.WriteImageToFileSystem(img, repoName, "tag1", storeController)
So(err, ShouldBeNil)
// add fake signature for tag1
cosignTag, err := test.GetCosignSignatureTagForManifest(img.Manifest)
So(err, ShouldBeNil)
cosignSig := test.CreateRandomImage()
So(err, ShouldBeNil)
err = test.WriteImageToFileSystem(cosignSig, repoName, cosignTag, storeController)
So(err, ShouldBeNil)
// add sbom
manifestBlob, err := json.Marshal(img.Manifest)
So(err, ShouldBeNil)
manifestDigest := godigest.FromBytes(manifestBlob)
sbomTag := fmt.Sprintf("sha256-%s.%s", manifestDigest.Encoded(), "sbom")
So(err, ShouldBeNil)
sbomImg := test.CreateRandomImage()
So(err, ShouldBeNil)
err = test.WriteImageToFileSystem(sbomImg, repoName, sbomTag, storeController)
So(err, ShouldBeNil)
// add fake signature for tag1
notationSig := test.CreateImageWith().
RandomLayers(1, 10).
ArtifactConfig("application/vnd.cncf.notary.signature").
Subject(img.DescriptorRef()).Build()
err = test.WriteImageToFileSystem(notationSig, repoName, "notation", storeController)
So(err, ShouldBeNil)
err = imgStore.RunGCRepo(repoName)
So(err, ShouldBeNil)
})
})
}