diff --git a/go.mod b/go.mod index 4faa1057..f19d94a4 100644 --- a/go.mod +++ b/go.mod @@ -80,6 +80,7 @@ require ( go.etcd.io/bbolt v1.4.3 golang.org/x/crypto v0.50.0 golang.org/x/oauth2 v0.36.0 + golang.org/x/sys v0.43.0 google.golang.org/protobuf v1.36.11 gopkg.in/resty.v1 v1.12.0 gopkg.in/yaml.v3 v3.0.1 @@ -537,7 +538,6 @@ require ( golang.org/x/mod v0.34.0 // indirect golang.org/x/net v0.53.0 // indirect golang.org/x/sync v0.20.0 // indirect - golang.org/x/sys v0.43.0 // indirect golang.org/x/term v0.42.0 // indirect golang.org/x/text v0.36.0 // indirect golang.org/x/time v0.15.0 // indirect diff --git a/pkg/storage/imagestore/imagestore.go b/pkg/storage/imagestore/imagestore.go index b2a6d158..c1f75eda 100644 --- a/pkg/storage/imagestore/imagestore.go +++ b/pkg/storage/imagestore/imagestore.go @@ -1772,8 +1772,31 @@ func (is *ImageStore) PutIndexContent(repo string, index ispec.Index) error { return err } - if _, err = is.storeDriver.WriteFile(indexPath, buf); err != nil { - is.log.Error().Err(err).Str("file", indexPath).Msg("failed to write") + // Write to a unique file under .uploads (same layout as blob uploads), then rename into place. + // Stale files are picked up by the same blob-upload GC path as ordinary uploads. + // This avoids truncating/removing index.json on failure (e.g. ENOSPC) — see local Driver.WriteFile + Cancel. + stagingUUID, err := guuid.NewV4() + if err != nil { + is.log.Error().Err(err).Str("repository", repo).Msg("failed to generate staging UUID") + + return err + } + + stagingID := stagingUUID.String() + tmpPath := is.BlobUploadPath(repo, stagingID) + + if _, err = is.storeDriver.WriteFile(tmpPath, buf); err != nil { + is.log.Error().Err(err).Str("file", tmpPath).Msg("failed to write staging index") + + _ = is.storeDriver.Delete(tmpPath) + + return err + } + + if err := is.storeDriver.Move(tmpPath, indexPath); err != nil { + is.log.Error().Err(err).Str("from", tmpPath).Str("to", indexPath).Msg("failed to replace index.json") + + _ = is.storeDriver.Delete(tmpPath) return err } diff --git a/pkg/storage/local/driver.go b/pkg/storage/local/driver.go index 0afdc1b3..d208a325 100644 --- a/pkg/storage/local/driver.go +++ b/pkg/storage/local/driver.go @@ -255,7 +255,9 @@ func (driver *Driver) Move(sourcePath string, destPath string) error { return driver.formatErr(err) } - return driver.formatErr(os.Rename(sourcePath, destPath)) + // Use renameReplace so an existing destination is replaced (POSIX rename); on Windows, + // os.Rename does not overwrite an existing file — see driver_unix.go / driver_windows.go. + return driver.formatErr(renameReplace(sourcePath, destPath, driver.commit)) } func (driver *Driver) SameFile(path1, path2 string) bool { diff --git a/pkg/storage/local/driver_test.go b/pkg/storage/local/driver_test.go index b6e19304..c1cc319c 100644 --- a/pkg/storage/local/driver_test.go +++ b/pkg/storage/local/driver_test.go @@ -197,23 +197,27 @@ func TestMove(t *testing.T) { So(storageErr.DriverName, ShouldEqual, "local") }) - Convey("Test Move() with os.Rename error to trigger formatErr", func() { + Convey("Test Move() replaces an existing destination (atomic replace)", func() { srcFile := path.Join(rootDir, "source.txt") destFile := path.Join(rootDir, "dest.txt") // Create source file - err := os.WriteFile(srcFile, []byte("test content"), 0o600) + err := os.WriteFile(srcFile, []byte("source wins"), 0o600) So(err, ShouldBeNil) // Create destination file to cause rename conflict err = os.WriteFile(destFile, []byte("existing content"), 0o600) So(err, ShouldBeNil) - // Move should return a formatted error (rename conflict) err = driver.Move(srcFile, destFile) - // Note: On some systems, os.Rename might succeed by overwriting - // So we just verify it doesn't panic and handle the result appropriately - _ = err + So(err, ShouldBeNil) + + got, err := os.ReadFile(destFile) + So(err, ShouldBeNil) + So(string(got), ShouldEqual, "source wins") + + _, err = os.Stat(srcFile) + So(err, ShouldNotBeNil) }) }) } diff --git a/pkg/storage/local/driver_unix.go b/pkg/storage/local/driver_unix.go new file mode 100644 index 00000000..ccde679b --- /dev/null +++ b/pkg/storage/local/driver_unix.go @@ -0,0 +1,11 @@ +//go:build !windows + +package local + +import "os" + +// renameReplace moves src to dst, replacing dst if it already exists (POSIX rename). +// commit is ignored on Unix; durability for committed writers is handled via fsync in fileWriter.Close. +func renameReplace(src, dst string, _ bool) error { + return os.Rename(src, dst) +} diff --git a/pkg/storage/local/driver_windows.go b/pkg/storage/local/driver_windows.go new file mode 100644 index 00000000..e0d809fd --- /dev/null +++ b/pkg/storage/local/driver_windows.go @@ -0,0 +1,30 @@ +//go:build windows + +package local + +import ( + "golang.org/x/sys/windows" +) + +// renameReplace moves src to dst, replacing dst if it already exists. +// When commit is true, MOVEFILE_WRITE_THROUGH is set so the rename is flushed to disk, aligning +// with the local driver's commit semantics on WriteFile/Close. When commit is false, only +// MOVEFILE_REPLACE_EXISTING is used so large moves are not forced fully synchronous. +func renameReplace(src, dst string, commit bool) error { + from, err := windows.UTF16PtrFromString(src) + if err != nil { + return err + } + + to, err := windows.UTF16PtrFromString(dst) + if err != nil { + return err + } + + var flags uint32 = windows.MOVEFILE_REPLACE_EXISTING + if commit { + flags |= windows.MOVEFILE_WRITE_THROUGH + } + + return windows.MoveFileEx(from, to, flags) +} diff --git a/pkg/storage/storage_test.go b/pkg/storage/storage_test.go index a8eff07d..a76005a3 100644 --- a/pkg/storage/storage_test.go +++ b/pkg/storage/storage_test.go @@ -10,9 +10,11 @@ import ( "io" "os" "path" + "path/filepath" "slices" "strings" "sync" + "syscall" "testing" "time" @@ -176,6 +178,40 @@ func newLocalImageStoreWithEventRecorder(t *testing.T, recorder events.Recorder) storeDriver, cacheDriver, nil, recorder) } +// newLocalImageStoreWithDriver builds a filesystem-backed image store for tests with a configurable +// storage driver (nil means local.New(true)). The returned cleanup stops the metrics server and must be deferred. +func newLocalImageStoreWithDriver(t *testing.T, storeDriver storageTypes.Driver) ( + string, storageTypes.ImageStore, func(), +) { + t.Helper() + + rootDir := t.TempDir() + log := zlog.NewTestLogger() + metrics := monitoring.NewMetricsServer(false, log) + cleanup := func() { metrics.Stop() } + + cacheDriver, err := storage.Create("boltdb", cache.BoltDBDriverParameters{ + RootDir: rootDir, + Name: "cache", + UseRelPaths: true, + }, log) + if err != nil { + t.Fatal(err) + } + + if storeDriver == nil { + storeDriver = local.New(true) + } + + imgStore := imagestore.NewImageStore(rootDir, rootDir, true, true, log, metrics, nil, + storeDriver, cacheDriver, nil, nil) + if imgStore == nil { + t.Fatal("NewImageStore returned nil") + } + + return rootDir, imgStore, cleanup +} + //nolint:gochecknoglobals var testCases = []struct { testCaseName string @@ -3901,3 +3937,135 @@ func DumpKeys(t *testing.T, redisURL string) { } } } + +// putIndexHookDriver wraps the local driver so PutIndexContent tests can inject WriteFile / Move failures. +type putIndexHookDriver struct { + *local.Driver + + writeFileHook func(filePath string, content []byte) (n int, err error, handled bool) + moveHook func(src, dst string) (err error, handled bool) +} + +func (h *putIndexHookDriver) WriteFile(filePath string, content []byte) (int, error) { + if h.writeFileHook != nil { + if n, err, ok := h.writeFileHook(filePath, content); ok { + return n, err + } + } + + return h.Driver.WriteFile(filePath, content) +} + +func (h *putIndexHookDriver) Move(src, dst string) error { + if h.moveHook != nil { + if err, ok := h.moveHook(src, dst); ok { + return err + } + } + + return h.Driver.Move(src, dst) +} + +func TestPutIndexContent_atomicReplace(t *testing.T) { + Convey("PutIndexContent stages under .uploads then renames (atomic replace)", t, func() { + const repo = "r1" + + Convey("staging WriteFile failure leaves index.json unchanged", func() { + hookDriver := &putIndexHookDriver{ + Driver: local.New(true), + writeFileHook: func(filePath string, content []byte) (int, error, bool) { + if filepath.Base(filepath.Dir(filePath)) == storageConstants.BlobUploadDir { + return -1, syscall.ENOSPC, true + } + + return 0, nil, false + }, + } + + root, imgStore, cleanup := newLocalImageStoreWithDriver(t, hookDriver) + defer cleanup() + + So(imgStore.InitRepo(repo), ShouldBeNil) + + before, err := os.ReadFile(path.Join(root, repo, ispec.ImageIndexFile)) + So(err, ShouldBeNil) + So(len(before), ShouldBeGreaterThan, 0) + + var idx ispec.Index + So(json.Unmarshal(before, &idx), ShouldBeNil) + idx.SchemaVersion = 999 + + So(imgStore.PutIndexContent(repo, idx), ShouldNotBeNil) + + after, err := os.ReadFile(path.Join(root, repo, ispec.ImageIndexFile)) + So(err, ShouldBeNil) + So(string(after), ShouldEqual, string(before)) + + uploadOrphans, err := filepath.Glob(path.Join(root, repo, storageConstants.BlobUploadDir, "*")) + So(err, ShouldBeNil) + So(uploadOrphans, ShouldBeEmpty) + }) + + Convey("Move into index.json failure leaves index.json unchanged", func() { + hookDriver := &putIndexHookDriver{ + Driver: local.New(true), + moveHook: func(src, dst string) (error, bool) { + if filepath.Base(dst) == ispec.ImageIndexFile { + //nolint: err113 + return errors.New("forced move failure"), true + } + + return nil, false + }, + } + + root, imgStore, cleanup := newLocalImageStoreWithDriver(t, hookDriver) + defer cleanup() + + So(imgStore.InitRepo(repo), ShouldBeNil) + + before, err := os.ReadFile(path.Join(root, repo, ispec.ImageIndexFile)) + So(err, ShouldBeNil) + + var idx ispec.Index + So(json.Unmarshal(before, &idx), ShouldBeNil) + idx.SchemaVersion = 42 + + So(imgStore.PutIndexContent(repo, idx), ShouldNotBeNil) + + after, err := os.ReadFile(path.Join(root, repo, ispec.ImageIndexFile)) + So(err, ShouldBeNil) + So(string(after), ShouldEqual, string(before)) + + uploadOrphans, err := filepath.Glob(path.Join(root, repo, storageConstants.BlobUploadDir, "*")) + So(err, ShouldBeNil) + So(uploadOrphans, ShouldBeEmpty) + }) + + Convey("success updates index.json and leaves .uploads empty", func() { + root, imgStore, cleanup := newLocalImageStoreWithDriver(t, nil) + defer cleanup() + + So(imgStore.InitRepo(repo), ShouldBeNil) + + var idx ispec.Index + buf, err := os.ReadFile(path.Join(root, repo, ispec.ImageIndexFile)) + So(err, ShouldBeNil) + So(json.Unmarshal(buf, &idx), ShouldBeNil) + + idx.SchemaVersion = 7 + So(imgStore.PutIndexContent(repo, idx), ShouldBeNil) + + after, err := os.ReadFile(path.Join(root, repo, ispec.ImageIndexFile)) + So(err, ShouldBeNil) + + var got ispec.Index + So(json.Unmarshal(after, &got), ShouldBeNil) + So(got.SchemaVersion, ShouldEqual, 7) + + uploadOrphans, err := filepath.Glob(path.Join(root, repo, storageConstants.BlobUploadDir, "*")) + So(err, ShouldBeNil) + So(uploadOrphans, ShouldBeEmpty) + }) + }) +}