From 566286ae422167c2926c4713d4ce9c3aa92170fa Mon Sep 17 00:00:00 2001 From: Andrei Aaron Date: Sat, 22 Nov 2025 07:57:24 +0200 Subject: [PATCH] fix: Add HTTP client timeouts to prevent indefinite hangs in sync operations (#3574) --- pkg/cli/server/root.go | 16 +-- pkg/extensions/config/sync/config.go | 25 ++--- pkg/extensions/sync/constants/constants.go | 8 ++ pkg/extensions/sync/service.go | 29 +++++- pkg/extensions/sync/sync_internal_test.go | 112 +++++++++++++++++++++ 5 files changed, 171 insertions(+), 19 deletions(-) diff --git a/pkg/cli/server/root.go b/pkg/cli/server/root.go index 40189a7f..d3cd47ac 100644 --- a/pkg/cli/server/root.go +++ b/pkg/cli/server/root.go @@ -28,6 +28,7 @@ import ( extconf "zotregistry.dev/zot/v2/pkg/extensions/config" eventsconf "zotregistry.dev/zot/v2/pkg/extensions/config/events" "zotregistry.dev/zot/v2/pkg/extensions/monitoring" + syncConstants "zotregistry.dev/zot/v2/pkg/extensions/sync/constants" zlog "zotregistry.dev/zot/v2/pkg/log" storageConstants "zotregistry.dev/zot/v2/pkg/storage/constants" ) @@ -663,15 +664,18 @@ func applyDefaultValues(config *config.Config, viperInstance *viper.Viper, logge config.Extensions.Sync.Enable = &defaultVal } - defaultSyncTimeout := 3 * time.Hour - - for idx, regCfg := range config.Extensions.Sync.Registries { + for idx := range config.Extensions.Sync.Registries { + regCfg := &config.Extensions.Sync.Registries[idx] if regCfg.TLSVerify == nil { - config.Extensions.Sync.Registries[idx].TLSVerify = &defaultVal + regCfg.TLSVerify = &defaultVal } - if config.Extensions.Sync.Registries[idx].SyncTimeout == 0 { - config.Extensions.Sync.Registries[idx].SyncTimeout = defaultSyncTimeout + if regCfg.SyncTimeout == 0 { + regCfg.SyncTimeout = syncConstants.DefaultSyncTimeout + } + + if regCfg.ResponseHeaderTimeout == 0 { + regCfg.ResponseHeaderTimeout = syncConstants.DefaultResponseHeaderTimeout } } } diff --git a/pkg/extensions/config/sync/config.go b/pkg/extensions/config/sync/config.go index e065d138..644d3e54 100644 --- a/pkg/extensions/config/sync/config.go +++ b/pkg/extensions/config/sync/config.go @@ -23,18 +23,19 @@ type Config struct { } type RegistryConfig struct { - URLs []string - PollInterval time.Duration - Content []Content - TLSVerify *bool - OnDemand bool - CertDir string - MaxRetries *int - RetryDelay *time.Duration - OnlySigned *bool - CredentialHelper string - PreserveDigest bool // sync without converting - SyncTimeout time.Duration // timeout for on-demand sync operations; if zero or unset, defaults to 3 hours + URLs []string + PollInterval time.Duration + Content []Content + TLSVerify *bool + OnDemand bool + CertDir string + MaxRetries *int + RetryDelay *time.Duration + OnlySigned *bool + CredentialHelper string + PreserveDigest bool // sync without converting + SyncTimeout time.Duration // overall HTTP client timeout for all sync operations + ResponseHeaderTimeout time.Duration `yaml:"-"` // response header timeout; set in root.go } type Content struct { diff --git a/pkg/extensions/sync/constants/constants.go b/pkg/extensions/sync/constants/constants.go index 44ef18f5..f0c5ce81 100644 --- a/pkg/extensions/sync/constants/constants.go +++ b/pkg/extensions/sync/constants/constants.go @@ -1,5 +1,7 @@ package constants +import "time" + // references type. const ( Cosign = "CosignSignature" @@ -7,3 +9,9 @@ const ( Tag = "TagReference" SyncBlobUploadDir = ".sync" ) + +// Default timeout settings for sync operations. +const ( + DefaultSyncTimeout = 3 * time.Hour // default timeout for all sync operations (on-demand and periodic) + DefaultResponseHeaderTimeout = 30 * time.Second // default timeout for reading response headers +) diff --git a/pkg/extensions/sync/service.go b/pkg/extensions/sync/service.go index 0d109190..719d0391 100644 --- a/pkg/extensions/sync/service.go +++ b/pkg/extensions/sync/service.go @@ -7,6 +7,7 @@ import ( "context" "errors" "fmt" + "net/http" "net/url" "strconv" "strings" @@ -26,6 +27,7 @@ import ( "zotregistry.dev/zot/v2/pkg/cluster" "zotregistry.dev/zot/v2/pkg/common" syncconf "zotregistry.dev/zot/v2/pkg/extensions/config/sync" + syncConstants "zotregistry.dev/zot/v2/pkg/extensions/sync/constants" "zotregistry.dev/zot/v2/pkg/log" mTypes "zotregistry.dev/zot/v2/pkg/meta/types" "zotregistry.dev/zot/v2/pkg/storage" @@ -211,7 +213,7 @@ func (service *BaseService) CanRetryOnError() bool { func (service *BaseService) GetSyncTimeout() time.Duration { if service.config.SyncTimeout == 0 { - return 3 * time.Hour // default timeout + return syncConstants.DefaultSyncTimeout } return service.config.SyncTimeout @@ -882,6 +884,31 @@ func newClient(opts syncconf.RegistryConfig, credentials syncconf.CredentialsFil regOpts = append(regOpts, reg.WithDelay(*opts.RetryDelay, *opts.RetryDelay)) } + // Configure transport with timeouts to prevent indefinite hangs. + // See https://blog.cloudflare.com/the-complete-guide-to-golang-net-http-timeouts/ + // Clone DefaultTransport to preserve proxy/TLS settings and existing timeouts + // (DialContext: 30s, TLSHandshakeTimeout: 10s). + // regclient uses DefaultTransport internally if no custom transport is provided, so this ensures compatibility. + transport := http.DefaultTransport.(*http.Transport).Clone() //nolint: forcetypeassert + + // ResponseHeaderTimeout: prevents hanging when server connects but doesn't send headers. + // Set programmatically in root.go. This timeout applies only to waiting for response headers + // after the request is sent. It does NOT include DialContext (30s) or TLSHandshakeTimeout (10s), + // which are separate component timeouts. Doesn't cover body transfer time, which is expected + // to be slow for large images. + transport.ResponseHeaderTimeout = opts.ResponseHeaderTimeout + + // Use SyncTimeout for overall HTTP client timeout. This is the maximum time for the entire + // HTTP request, covering all stages: DialContext (connection establishment), TLSHandshakeTimeout + // (TLS handshake), ResponseHeaderTimeout (waiting for headers), and body transfer time. + // Critical for periodic sync operations (catalog listing, SyncRepo, getTags) which don't use + // on-demand timeout contexts and could otherwise hang indefinitely if upstream doesn't respond. + httpClient := &http.Client{ + Transport: transport, + Timeout: opts.SyncTimeout, + } + regOpts = append(regOpts, reg.WithHTTPClient(httpClient)) + client := regclient.New( regclient.WithDockerCerts(), regclient.WithDockerCreds(), diff --git a/pkg/extensions/sync/sync_internal_test.go b/pkg/extensions/sync/sync_internal_test.go index 68692939..05f380ea 100644 --- a/pkg/extensions/sync/sync_internal_test.go +++ b/pkg/extensions/sync/sync_internal_test.go @@ -9,7 +9,10 @@ import ( "encoding/json" "errors" "fmt" + "net/http" + "net/http/httptest" "os" + "strings" "testing" "time" @@ -23,6 +26,7 @@ import ( syncconf "zotregistry.dev/zot/v2/pkg/extensions/config/sync" "zotregistry.dev/zot/v2/pkg/extensions/lint" "zotregistry.dev/zot/v2/pkg/extensions/monitoring" + syncConstants "zotregistry.dev/zot/v2/pkg/extensions/sync/constants" "zotregistry.dev/zot/v2/pkg/log" mTypes "zotregistry.dev/zot/v2/pkg/meta/types" "zotregistry.dev/zot/v2/pkg/storage" @@ -994,3 +998,111 @@ func TestDestinationRegistry(t *testing.T) { }) }) } + +// TestNewClientTimeoutBehavior verifies that newClient creates a client that respects timeouts. +func TestNewClientTimeoutBehavior(t *testing.T) { + Convey("Test newClient timeout behavior", t, func() { + logger := log.NewTestLogger() + zeroRetries := 0 + retryDelay := 1 * time.Millisecond + + Convey("Client respects ResponseHeaderTimeout from config", func() { + // Server delay - must be longer than ResponseHeaderTimeout to trigger timeout + serverDelay := 1 * time.Second + + // Create a test server that accepts connections but delays sending headers + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Sleep longer than the timeout to simulate a server that connects + // but doesn't send headers quickly + time.Sleep(serverDelay) + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + host := strings.TrimPrefix(server.URL, "http://") + + // Create sync config with short ResponseHeaderTimeout + opts := syncconf.RegistryConfig{ + URLs: []string{server.URL}, + SyncTimeout: syncConstants.DefaultSyncTimeout, + ResponseHeaderTimeout: 10 * time.Millisecond, + MaxRetries: &zeroRetries, + RetryDelay: &retryDelay, + } + + // Create the client using the actual production function + client, _, err := newClient(opts, syncconf.CredentialsFile{}, logger) + So(err, ShouldBeNil) + + // Create a reference to the test server + r, err := ref.New(host + "/repo:tag") + So(err, ShouldBeNil) + + // Make a request - it should timeout quickly due to ResponseHeaderTimeout + start := time.Now() + // Use ManifestHead as a lightweight operation to trigger the HTTP request + _, err = client.ManifestHead(context.Background(), r) + elapsed := time.Since(start) + + // Should timeout quickly (approx 10ms * retries + overhead) + // With 1ms retry delay and 10ms timeout, retries add overhead but should still be < serverDelay + So(err, ShouldNotBeNil) + // Allow small tolerance for timing variability (5ms) + So(elapsed, ShouldBeGreaterThanOrEqualTo, opts.ResponseHeaderTimeout-5*time.Millisecond) + So(elapsed, ShouldBeLessThan, serverDelay) + }) + + Convey("Client respects SyncTimeout (overall) from config", func() { + // Server delay - must be longer than SyncTimeout to trigger timeout + serverDelay := 1 * time.Second + + // Create a test server that sends headers quickly but delays body transfer + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Send headers immediately with Content-Length to bypass ResponseHeaderTimeout + // but indicate there's a body coming + w.Header().Set("Content-Length", "100") + w.WriteHeader(http.StatusOK) + // Flush headers to ensure they're sent + if flusher, ok := w.(http.Flusher); ok { + flusher.Flush() + } + // Delay sending body data - this will trigger SyncTimeout (overall client timeout) + time.Sleep(serverDelay) + // Write body data (client should timeout before this completes) + w.Write(make([]byte, 100)) + })) + defer server.Close() + + host := strings.TrimPrefix(server.URL, "http://") + + // Create sync config with short SyncTimeout + opts := syncconf.RegistryConfig{ + URLs: []string{server.URL}, + SyncTimeout: 10 * time.Millisecond, + ResponseHeaderTimeout: syncConstants.DefaultResponseHeaderTimeout, + MaxRetries: &zeroRetries, + RetryDelay: &retryDelay, + } + + // Create the client using the actual production function + client, _, err := newClient(opts, syncconf.CredentialsFile{}, logger) + So(err, ShouldBeNil) + + r, err := ref.New(host + "/repo:tag") + So(err, ShouldBeNil) + + // Make a request - it should timeout due to overall SyncTimeout + // Use ManifestGet (not ManifestHead) to test body transfer timeout + start := time.Now() + _, err = client.ManifestGet(context.Background(), r) + elapsed := time.Since(start) + + // Should timeout within the SyncTimeout period (+ retries) + // Elapsed should be >= SyncTimeout (since that's when it times out) and < serverDelay + So(err, ShouldNotBeNil) + // Allow small tolerance for timing variability (5ms) + So(elapsed, ShouldBeGreaterThanOrEqualTo, opts.SyncTimeout-5*time.Millisecond) + So(elapsed, ShouldBeLessThan, serverDelay) + }) + }) +}