fix(scheduler): fix data race (#2085)

* fix(scheduler): data race when pushing new tasks

the problem here is that scheduler can be closed in two ways:
- canceling the context given as argument to scheduler.RunScheduler()
- running scheduler.Shutdown()

because of this shutdown can trigger a data race between calling scheduler.inShutdown()
and actually pushing tasks into the pool workers

solved that by keeping a quit channel and listening on both quit channel and ctx.Done()
and closing the worker chan and scheduler afterwards.

Signed-off-by: Petu Eusebiu <peusebiu@cisco.com>

* refactor(scheduler): refactor into a single shutdown

before this we could stop scheduler either by closing the context
provided to RunScheduler(ctx) or by running Shutdown().

simplify things by getting rid of the external context in RunScheduler().
keep an internal context in the scheduler itself and pass it down to all tasks.

Signed-off-by: Petu Eusebiu <peusebiu@cisco.com>

---------

Signed-off-by: Petu Eusebiu <peusebiu@cisco.com>
This commit is contained in:
peusebiu
2023-12-11 20:00:34 +02:00
committed by GitHub
parent d71a1f494e
commit 7642e5af98
31 changed files with 494 additions and 326 deletions
+88
View File
@@ -11,6 +11,7 @@ import (
"net/http"
"net/http/httptest"
"os"
"path"
"testing"
"time"
@@ -23,9 +24,14 @@ import (
"zotregistry.io/zot/pkg/api/config"
"zotregistry.io/zot/pkg/api/constants"
extconf "zotregistry.io/zot/pkg/extensions/config"
"zotregistry.io/zot/pkg/extensions/monitoring"
"zotregistry.io/zot/pkg/log"
mTypes "zotregistry.io/zot/pkg/meta/types"
reqCtx "zotregistry.io/zot/pkg/requestcontext"
"zotregistry.io/zot/pkg/scheduler"
"zotregistry.io/zot/pkg/storage"
storageConstants "zotregistry.io/zot/pkg/storage/constants"
"zotregistry.io/zot/pkg/storage/local"
authutils "zotregistry.io/zot/pkg/test/auth"
test "zotregistry.io/zot/pkg/test/common"
"zotregistry.io/zot/pkg/test/mocks"
@@ -922,6 +928,88 @@ func TestAPIKeysGeneratorErrors(t *testing.T) {
})
}
func TestCookiestoreCleanup(t *testing.T) {
log := log.Logger{}
metrics := monitoring.NewMetricsServer(true, log)
Convey("Test cookiestore cleanup works", t, func() {
taskScheduler := scheduler.NewScheduler(config.New(), metrics, log)
taskScheduler.RateLimit = 50 * time.Millisecond
taskScheduler.RunScheduler()
rootDir := t.TempDir()
err := os.MkdirAll(path.Join(rootDir, "_sessions"), storageConstants.DefaultDirPerms)
So(err, ShouldBeNil)
sessionPath := path.Join(rootDir, "_sessions", "session_1234")
err = os.WriteFile(sessionPath, []byte("session"), storageConstants.DefaultFilePerms)
So(err, ShouldBeNil)
err = os.Chtimes(sessionPath, time.Time{}, time.Time{})
So(err, ShouldBeNil)
imgStore := local.NewImageStore(rootDir, false, false, log, metrics, nil, nil)
storeController := storage.StoreController{
DefaultStore: imgStore,
}
cookieStore, err := api.NewCookieStore(storeController)
So(err, ShouldBeNil)
cookieStore.RunSessionCleaner(taskScheduler)
time.Sleep(2 * time.Second)
taskScheduler.Shutdown()
// make sure session is removed
_, err = os.Stat(sessionPath)
So(err, ShouldNotBeNil)
})
Convey("Test cookiestore cleanup without permissions on rootDir", t, func() {
taskScheduler := scheduler.NewScheduler(config.New(), metrics, log)
taskScheduler.RateLimit = 50 * time.Millisecond
taskScheduler.RunScheduler()
rootDir := t.TempDir()
err := os.MkdirAll(path.Join(rootDir, "_sessions"), storageConstants.DefaultDirPerms)
So(err, ShouldBeNil)
sessionPath := path.Join(rootDir, "_sessions", "session_1234")
err = os.WriteFile(sessionPath, []byte("session"), storageConstants.DefaultFilePerms)
So(err, ShouldBeNil)
imgStore := local.NewImageStore(rootDir, false, false, log, metrics, nil, nil)
storeController := storage.StoreController{
DefaultStore: imgStore,
}
cookieStore, err := api.NewCookieStore(storeController)
So(err, ShouldBeNil)
err = os.Chmod(rootDir, 0o000)
So(err, ShouldBeNil)
defer func() {
err = os.Chmod(rootDir, storageConstants.DefaultDirPerms)
So(err, ShouldBeNil)
}()
cookieStore.RunSessionCleaner(taskScheduler)
time.Sleep(1 * time.Second)
taskScheduler.Shutdown()
})
}
type mockUUIDGenerator struct {
guuid.Generator
succeedAttempts int
+14 -11
View File
@@ -94,12 +94,12 @@ func (c *Controller) GetPort() int {
return c.chosenPort
}
func (c *Controller) Run(reloadCtx context.Context) error {
func (c *Controller) Run() error {
if err := c.initCookieStore(); err != nil {
return err
}
c.StartBackgroundTasks(reloadCtx)
c.StartBackgroundTasks()
// setup HTTP API router
engine := mux.NewRouter()
@@ -216,7 +216,7 @@ func (c *Controller) Run(reloadCtx context.Context) error {
return server.Serve(listener)
}
func (c *Controller) Init(reloadCtx context.Context) error {
func (c *Controller) Init() error {
// print the current configuration, but strip secrets
c.Log.Info().Interface("params", c.Config.Sanitize()).Msg("configuration settings")
@@ -237,7 +237,7 @@ func (c *Controller) Init(reloadCtx context.Context) error {
return err
}
if err := c.InitMetaDB(reloadCtx); err != nil {
if err := c.InitMetaDB(); err != nil {
return err
}
@@ -280,7 +280,7 @@ func (c *Controller) initCookieStore() error {
return nil
}
func (c *Controller) InitMetaDB(reloadCtx context.Context) error {
func (c *Controller) InitMetaDB() error {
// init metaDB if search is enabled or we need to store user profiles, api keys or signatures
if c.Config.IsSearchEnabled() || c.Config.IsBasicAuthnEnabled() || c.Config.IsImageTrustEnabled() ||
c.Config.IsRetentionEnabled() {
@@ -310,7 +310,7 @@ func (c *Controller) InitMetaDB(reloadCtx context.Context) error {
return nil
}
func (c *Controller) LoadNewConfig(reloadCtx context.Context, newConfig *config.Config) {
func (c *Controller) LoadNewConfig(newConfig *config.Config) {
// reload access control config
c.Config.HTTP.AccessControl = newConfig.HTTP.AccessControl
@@ -364,21 +364,24 @@ func (c *Controller) LoadNewConfig(reloadCtx context.Context, newConfig *config.
c.InitCVEInfo()
c.StartBackgroundTasks(reloadCtx)
c.Log.Info().Interface("reloaded params", c.Config.Sanitize()).
Msg("loaded new configuration settings")
}
func (c *Controller) Shutdown() {
c.taskScheduler.Shutdown()
c.StopBackgroundTasks()
ctx := context.Background()
_ = c.Server.Shutdown(ctx)
}
func (c *Controller) StartBackgroundTasks(reloadCtx context.Context) {
// Will stop scheduler and wait for all tasks to finish their work.
func (c *Controller) StopBackgroundTasks() {
c.taskScheduler.Shutdown()
}
func (c *Controller) StartBackgroundTasks() {
c.taskScheduler = scheduler.NewScheduler(c.Config, c.Metrics, c.Log)
c.taskScheduler.RunScheduler(reloadCtx)
c.taskScheduler.RunScheduler()
// Enable running garbage-collect periodically for DefaultStore
if c.Config.Storage.GC {
+11 -11
View File
@@ -300,10 +300,10 @@ func TestRunAlreadyRunningServer(t *testing.T) {
cm.StartAndWait(port)
defer cm.StopServer()
err := ctlr.Init(context.Background())
err := ctlr.Init()
So(err, ShouldNotBeNil)
err = ctlr.Run(context.Background())
err = ctlr.Run()
So(err, ShouldNotBeNil)
})
}
@@ -377,7 +377,7 @@ func TestObjectStorageController(t *testing.T) {
ctlr := makeController(conf, tmp)
So(ctlr, ShouldNotBeNil)
err := ctlr.Init(context.Background())
err := ctlr.Init()
So(err, ShouldNotBeNil)
})
@@ -1218,7 +1218,7 @@ func TestMultipleInstance(t *testing.T) {
}
ctlr := api.NewController(conf)
ctlr.Log.Info().Int64("seedUser", seedUser).Int64("seedPass", seedPass).Msg("random seed for username & password")
err := ctlr.Init(context.Background())
err := ctlr.Init()
So(err, ShouldEqual, errors.ErrImgStoreNotFound)
globalDir := t.TempDir()
@@ -1311,7 +1311,7 @@ func TestMultipleInstance(t *testing.T) {
ctlr.Config.Storage.SubPaths = subPathMap
err := ctlr.Init(context.Background())
err := ctlr.Init()
So(err, ShouldNotBeNil)
// subpath root directory does not exist.
@@ -1320,7 +1320,7 @@ func TestMultipleInstance(t *testing.T) {
ctlr.Config.Storage.SubPaths = subPathMap
err = ctlr.Init(context.Background())
err = ctlr.Init()
So(err, ShouldNotBeNil)
subPathMap["/a"] = config.StorageConfig{RootDirectory: subDir, Dedupe: true, GC: true}
@@ -1328,7 +1328,7 @@ func TestMultipleInstance(t *testing.T) {
ctlr.Config.Storage.SubPaths = subPathMap
err = ctlr.Init(context.Background())
err = ctlr.Init()
So(err, ShouldNotBeNil)
})
}
@@ -1826,12 +1826,12 @@ func TestTSLFailedReadingOfCACert(t *testing.T) {
defer cancel()
ctlr := makeController(conf, t.TempDir())
err = ctlr.Init(ctx)
err = ctlr.Init()
So(err, ShouldBeNil)
errChan := make(chan error, 1)
go func() {
err = ctlr.Run(ctx)
err = ctlr.Run()
errChan <- err
}()
@@ -1866,12 +1866,12 @@ func TestTSLFailedReadingOfCACert(t *testing.T) {
defer cancel()
ctlr := makeController(conf, t.TempDir())
err = ctlr.Init(ctx)
err = ctlr.Init()
So(err, ShouldBeNil)
errChan := make(chan error, 1)
go func() {
err = ctlr.Run(ctx)
err = ctlr.Run()
errChan <- err
}()
+3 -1
View File
@@ -152,7 +152,9 @@ type CleanTask struct {
func (cleanTask *CleanTask) DoWork(ctx context.Context) error {
for _, session := range cleanTask.sessions {
if err := os.Remove(session); err != nil {
return err
if !os.IsNotExist(err) {
return err
}
}
}