fix: Updating a repository should not result in a corrupted index.json file if disk is full (#3963)

See https://github.com/project-zot/zot/issues/3924

Signed-off-by: Andrei Aaron <andreifdaaron@gmail.com>
This commit is contained in:
Andrei Aaron
2026-04-14 08:59:25 +03:00
committed by GitHub
parent ba8575d960
commit 9991821295
7 changed files with 248 additions and 10 deletions
+1 -1
View File
@@ -80,6 +80,7 @@ require (
go.etcd.io/bbolt v1.4.3
golang.org/x/crypto v0.50.0
golang.org/x/oauth2 v0.36.0
golang.org/x/sys v0.43.0
google.golang.org/protobuf v1.36.11
gopkg.in/resty.v1 v1.12.0
gopkg.in/yaml.v3 v3.0.1
@@ -537,7 +538,6 @@ require (
golang.org/x/mod v0.34.0 // indirect
golang.org/x/net v0.53.0 // indirect
golang.org/x/sync v0.20.0 // indirect
golang.org/x/sys v0.43.0 // indirect
golang.org/x/term v0.42.0 // indirect
golang.org/x/text v0.36.0 // indirect
golang.org/x/time v0.15.0 // indirect
+25 -2
View File
@@ -1772,8 +1772,31 @@ func (is *ImageStore) PutIndexContent(repo string, index ispec.Index) error {
return err
}
if _, err = is.storeDriver.WriteFile(indexPath, buf); err != nil {
is.log.Error().Err(err).Str("file", indexPath).Msg("failed to write")
// Write to a unique file under .uploads (same layout as blob uploads), then rename into place.
// Stale files are picked up by the same blob-upload GC path as ordinary uploads.
// This avoids truncating/removing index.json on failure (e.g. ENOSPC) — see local Driver.WriteFile + Cancel.
stagingUUID, err := guuid.NewV4()
if err != nil {
is.log.Error().Err(err).Str("repository", repo).Msg("failed to generate staging UUID")
return err
}
stagingID := stagingUUID.String()
tmpPath := is.BlobUploadPath(repo, stagingID)
if _, err = is.storeDriver.WriteFile(tmpPath, buf); err != nil {
is.log.Error().Err(err).Str("file", tmpPath).Msg("failed to write staging index")
_ = is.storeDriver.Delete(tmpPath)
return err
}
if err := is.storeDriver.Move(tmpPath, indexPath); err != nil {
is.log.Error().Err(err).Str("from", tmpPath).Str("to", indexPath).Msg("failed to replace index.json")
_ = is.storeDriver.Delete(tmpPath)
return err
}
+3 -1
View File
@@ -255,7 +255,9 @@ func (driver *Driver) Move(sourcePath string, destPath string) error {
return driver.formatErr(err)
}
return driver.formatErr(os.Rename(sourcePath, destPath))
// Use renameReplace so an existing destination is replaced (POSIX rename); on Windows,
// os.Rename does not overwrite an existing file — see driver_unix.go / driver_windows.go.
return driver.formatErr(renameReplace(sourcePath, destPath, driver.commit))
}
func (driver *Driver) SameFile(path1, path2 string) bool {
+10 -6
View File
@@ -197,23 +197,27 @@ func TestMove(t *testing.T) {
So(storageErr.DriverName, ShouldEqual, "local")
})
Convey("Test Move() with os.Rename error to trigger formatErr", func() {
Convey("Test Move() replaces an existing destination (atomic replace)", func() {
srcFile := path.Join(rootDir, "source.txt")
destFile := path.Join(rootDir, "dest.txt")
// Create source file
err := os.WriteFile(srcFile, []byte("test content"), 0o600)
err := os.WriteFile(srcFile, []byte("source wins"), 0o600)
So(err, ShouldBeNil)
// Create destination file to cause rename conflict
err = os.WriteFile(destFile, []byte("existing content"), 0o600)
So(err, ShouldBeNil)
// Move should return a formatted error (rename conflict)
err = driver.Move(srcFile, destFile)
// Note: On some systems, os.Rename might succeed by overwriting
// So we just verify it doesn't panic and handle the result appropriately
_ = err
So(err, ShouldBeNil)
got, err := os.ReadFile(destFile)
So(err, ShouldBeNil)
So(string(got), ShouldEqual, "source wins")
_, err = os.Stat(srcFile)
So(err, ShouldNotBeNil)
})
})
}
+11
View File
@@ -0,0 +1,11 @@
//go:build !windows
package local
import "os"
// renameReplace moves src to dst, replacing dst if it already exists (POSIX rename).
// commit is ignored on Unix; durability for committed writers is handled via fsync in fileWriter.Close.
func renameReplace(src, dst string, _ bool) error {
return os.Rename(src, dst)
}
+30
View File
@@ -0,0 +1,30 @@
//go:build windows
package local
import (
"golang.org/x/sys/windows"
)
// renameReplace moves src to dst, replacing dst if it already exists.
// When commit is true, MOVEFILE_WRITE_THROUGH is set so the rename is flushed to disk, aligning
// with the local driver's commit semantics on WriteFile/Close. When commit is false, only
// MOVEFILE_REPLACE_EXISTING is used so large moves are not forced fully synchronous.
func renameReplace(src, dst string, commit bool) error {
from, err := windows.UTF16PtrFromString(src)
if err != nil {
return err
}
to, err := windows.UTF16PtrFromString(dst)
if err != nil {
return err
}
var flags uint32 = windows.MOVEFILE_REPLACE_EXISTING
if commit {
flags |= windows.MOVEFILE_WRITE_THROUGH
}
return windows.MoveFileEx(from, to, flags)
}
+168
View File
@@ -10,9 +10,11 @@ import (
"io"
"os"
"path"
"path/filepath"
"slices"
"strings"
"sync"
"syscall"
"testing"
"time"
@@ -176,6 +178,40 @@ func newLocalImageStoreWithEventRecorder(t *testing.T, recorder events.Recorder)
storeDriver, cacheDriver, nil, recorder)
}
// newLocalImageStoreWithDriver builds a filesystem-backed image store for tests with a configurable
// storage driver (nil means local.New(true)). The returned cleanup stops the metrics server and must be deferred.
func newLocalImageStoreWithDriver(t *testing.T, storeDriver storageTypes.Driver) (
string, storageTypes.ImageStore, func(),
) {
t.Helper()
rootDir := t.TempDir()
log := zlog.NewTestLogger()
metrics := monitoring.NewMetricsServer(false, log)
cleanup := func() { metrics.Stop() }
cacheDriver, err := storage.Create("boltdb", cache.BoltDBDriverParameters{
RootDir: rootDir,
Name: "cache",
UseRelPaths: true,
}, log)
if err != nil {
t.Fatal(err)
}
if storeDriver == nil {
storeDriver = local.New(true)
}
imgStore := imagestore.NewImageStore(rootDir, rootDir, true, true, log, metrics, nil,
storeDriver, cacheDriver, nil, nil)
if imgStore == nil {
t.Fatal("NewImageStore returned nil")
}
return rootDir, imgStore, cleanup
}
//nolint:gochecknoglobals
var testCases = []struct {
testCaseName string
@@ -3901,3 +3937,135 @@ func DumpKeys(t *testing.T, redisURL string) {
}
}
}
// putIndexHookDriver wraps the local driver so PutIndexContent tests can inject WriteFile / Move failures.
type putIndexHookDriver struct {
*local.Driver
writeFileHook func(filePath string, content []byte) (n int, err error, handled bool)
moveHook func(src, dst string) (err error, handled bool)
}
func (h *putIndexHookDriver) WriteFile(filePath string, content []byte) (int, error) {
if h.writeFileHook != nil {
if n, err, ok := h.writeFileHook(filePath, content); ok {
return n, err
}
}
return h.Driver.WriteFile(filePath, content)
}
func (h *putIndexHookDriver) Move(src, dst string) error {
if h.moveHook != nil {
if err, ok := h.moveHook(src, dst); ok {
return err
}
}
return h.Driver.Move(src, dst)
}
func TestPutIndexContent_atomicReplace(t *testing.T) {
Convey("PutIndexContent stages under .uploads then renames (atomic replace)", t, func() {
const repo = "r1"
Convey("staging WriteFile failure leaves index.json unchanged", func() {
hookDriver := &putIndexHookDriver{
Driver: local.New(true),
writeFileHook: func(filePath string, content []byte) (int, error, bool) {
if filepath.Base(filepath.Dir(filePath)) == storageConstants.BlobUploadDir {
return -1, syscall.ENOSPC, true
}
return 0, nil, false
},
}
root, imgStore, cleanup := newLocalImageStoreWithDriver(t, hookDriver)
defer cleanup()
So(imgStore.InitRepo(repo), ShouldBeNil)
before, err := os.ReadFile(path.Join(root, repo, ispec.ImageIndexFile))
So(err, ShouldBeNil)
So(len(before), ShouldBeGreaterThan, 0)
var idx ispec.Index
So(json.Unmarshal(before, &idx), ShouldBeNil)
idx.SchemaVersion = 999
So(imgStore.PutIndexContent(repo, idx), ShouldNotBeNil)
after, err := os.ReadFile(path.Join(root, repo, ispec.ImageIndexFile))
So(err, ShouldBeNil)
So(string(after), ShouldEqual, string(before))
uploadOrphans, err := filepath.Glob(path.Join(root, repo, storageConstants.BlobUploadDir, "*"))
So(err, ShouldBeNil)
So(uploadOrphans, ShouldBeEmpty)
})
Convey("Move into index.json failure leaves index.json unchanged", func() {
hookDriver := &putIndexHookDriver{
Driver: local.New(true),
moveHook: func(src, dst string) (error, bool) {
if filepath.Base(dst) == ispec.ImageIndexFile {
//nolint: err113
return errors.New("forced move failure"), true
}
return nil, false
},
}
root, imgStore, cleanup := newLocalImageStoreWithDriver(t, hookDriver)
defer cleanup()
So(imgStore.InitRepo(repo), ShouldBeNil)
before, err := os.ReadFile(path.Join(root, repo, ispec.ImageIndexFile))
So(err, ShouldBeNil)
var idx ispec.Index
So(json.Unmarshal(before, &idx), ShouldBeNil)
idx.SchemaVersion = 42
So(imgStore.PutIndexContent(repo, idx), ShouldNotBeNil)
after, err := os.ReadFile(path.Join(root, repo, ispec.ImageIndexFile))
So(err, ShouldBeNil)
So(string(after), ShouldEqual, string(before))
uploadOrphans, err := filepath.Glob(path.Join(root, repo, storageConstants.BlobUploadDir, "*"))
So(err, ShouldBeNil)
So(uploadOrphans, ShouldBeEmpty)
})
Convey("success updates index.json and leaves .uploads empty", func() {
root, imgStore, cleanup := newLocalImageStoreWithDriver(t, nil)
defer cleanup()
So(imgStore.InitRepo(repo), ShouldBeNil)
var idx ispec.Index
buf, err := os.ReadFile(path.Join(root, repo, ispec.ImageIndexFile))
So(err, ShouldBeNil)
So(json.Unmarshal(buf, &idx), ShouldBeNil)
idx.SchemaVersion = 7
So(imgStore.PutIndexContent(repo, idx), ShouldBeNil)
after, err := os.ReadFile(path.Join(root, repo, ispec.ImageIndexFile))
So(err, ShouldBeNil)
var got ispec.Index
So(json.Unmarshal(after, &got), ShouldBeNil)
So(got.SchemaVersion, ShouldEqual, 7)
uploadOrphans, err := filepath.Glob(path.Join(root, repo, storageConstants.BlobUploadDir, "*"))
So(err, ShouldBeNil)
So(uploadOrphans, ShouldBeEmpty)
})
})
}