fix: Add HTTP client timeouts to prevent indefinite hangs in sync operations (#3574)

This commit is contained in:
Andrei Aaron
2025-11-22 07:57:24 +02:00
committed by GitHub
parent bd32c62eb6
commit 566286ae42
5 changed files with 171 additions and 19 deletions
+10 -6
View File
@@ -28,6 +28,7 @@ import (
extconf "zotregistry.dev/zot/v2/pkg/extensions/config"
eventsconf "zotregistry.dev/zot/v2/pkg/extensions/config/events"
"zotregistry.dev/zot/v2/pkg/extensions/monitoring"
syncConstants "zotregistry.dev/zot/v2/pkg/extensions/sync/constants"
zlog "zotregistry.dev/zot/v2/pkg/log"
storageConstants "zotregistry.dev/zot/v2/pkg/storage/constants"
)
@@ -663,15 +664,18 @@ func applyDefaultValues(config *config.Config, viperInstance *viper.Viper, logge
config.Extensions.Sync.Enable = &defaultVal
}
defaultSyncTimeout := 3 * time.Hour
for idx, regCfg := range config.Extensions.Sync.Registries {
for idx := range config.Extensions.Sync.Registries {
regCfg := &config.Extensions.Sync.Registries[idx]
if regCfg.TLSVerify == nil {
config.Extensions.Sync.Registries[idx].TLSVerify = &defaultVal
regCfg.TLSVerify = &defaultVal
}
if config.Extensions.Sync.Registries[idx].SyncTimeout == 0 {
config.Extensions.Sync.Registries[idx].SyncTimeout = defaultSyncTimeout
if regCfg.SyncTimeout == 0 {
regCfg.SyncTimeout = syncConstants.DefaultSyncTimeout
}
if regCfg.ResponseHeaderTimeout == 0 {
regCfg.ResponseHeaderTimeout = syncConstants.DefaultResponseHeaderTimeout
}
}
}
+13 -12
View File
@@ -23,18 +23,19 @@ type Config struct {
}
type RegistryConfig struct {
URLs []string
PollInterval time.Duration
Content []Content
TLSVerify *bool
OnDemand bool
CertDir string
MaxRetries *int
RetryDelay *time.Duration
OnlySigned *bool
CredentialHelper string
PreserveDigest bool // sync without converting
SyncTimeout time.Duration // timeout for on-demand sync operations; if zero or unset, defaults to 3 hours
URLs []string
PollInterval time.Duration
Content []Content
TLSVerify *bool
OnDemand bool
CertDir string
MaxRetries *int
RetryDelay *time.Duration
OnlySigned *bool
CredentialHelper string
PreserveDigest bool // sync without converting
SyncTimeout time.Duration // overall HTTP client timeout for all sync operations
ResponseHeaderTimeout time.Duration `yaml:"-"` // response header timeout; set in root.go
}
type Content struct {
@@ -1,5 +1,7 @@
package constants
import "time"
// references type.
const (
Cosign = "CosignSignature"
@@ -7,3 +9,9 @@ const (
Tag = "TagReference"
SyncBlobUploadDir = ".sync"
)
// Default timeout settings for sync operations.
const (
DefaultSyncTimeout = 3 * time.Hour // default timeout for all sync operations (on-demand and periodic)
DefaultResponseHeaderTimeout = 30 * time.Second // default timeout for reading response headers
)
+28 -1
View File
@@ -7,6 +7,7 @@ import (
"context"
"errors"
"fmt"
"net/http"
"net/url"
"strconv"
"strings"
@@ -26,6 +27,7 @@ import (
"zotregistry.dev/zot/v2/pkg/cluster"
"zotregistry.dev/zot/v2/pkg/common"
syncconf "zotregistry.dev/zot/v2/pkg/extensions/config/sync"
syncConstants "zotregistry.dev/zot/v2/pkg/extensions/sync/constants"
"zotregistry.dev/zot/v2/pkg/log"
mTypes "zotregistry.dev/zot/v2/pkg/meta/types"
"zotregistry.dev/zot/v2/pkg/storage"
@@ -211,7 +213,7 @@ func (service *BaseService) CanRetryOnError() bool {
func (service *BaseService) GetSyncTimeout() time.Duration {
if service.config.SyncTimeout == 0 {
return 3 * time.Hour // default timeout
return syncConstants.DefaultSyncTimeout
}
return service.config.SyncTimeout
@@ -882,6 +884,31 @@ func newClient(opts syncconf.RegistryConfig, credentials syncconf.CredentialsFil
regOpts = append(regOpts, reg.WithDelay(*opts.RetryDelay, *opts.RetryDelay))
}
// Configure transport with timeouts to prevent indefinite hangs.
// See https://blog.cloudflare.com/the-complete-guide-to-golang-net-http-timeouts/
// Clone DefaultTransport to preserve proxy/TLS settings and existing timeouts
// (DialContext: 30s, TLSHandshakeTimeout: 10s).
// regclient uses DefaultTransport internally if no custom transport is provided, so this ensures compatibility.
transport := http.DefaultTransport.(*http.Transport).Clone() //nolint: forcetypeassert
// ResponseHeaderTimeout: prevents hanging when server connects but doesn't send headers.
// Set programmatically in root.go. This timeout applies only to waiting for response headers
// after the request is sent. It does NOT include DialContext (30s) or TLSHandshakeTimeout (10s),
// which are separate component timeouts. Doesn't cover body transfer time, which is expected
// to be slow for large images.
transport.ResponseHeaderTimeout = opts.ResponseHeaderTimeout
// Use SyncTimeout for overall HTTP client timeout. This is the maximum time for the entire
// HTTP request, covering all stages: DialContext (connection establishment), TLSHandshakeTimeout
// (TLS handshake), ResponseHeaderTimeout (waiting for headers), and body transfer time.
// Critical for periodic sync operations (catalog listing, SyncRepo, getTags) which don't use
// on-demand timeout contexts and could otherwise hang indefinitely if upstream doesn't respond.
httpClient := &http.Client{
Transport: transport,
Timeout: opts.SyncTimeout,
}
regOpts = append(regOpts, reg.WithHTTPClient(httpClient))
client := regclient.New(
regclient.WithDockerCerts(),
regclient.WithDockerCreds(),
+112
View File
@@ -9,7 +9,10 @@ import (
"encoding/json"
"errors"
"fmt"
"net/http"
"net/http/httptest"
"os"
"strings"
"testing"
"time"
@@ -23,6 +26,7 @@ import (
syncconf "zotregistry.dev/zot/v2/pkg/extensions/config/sync"
"zotregistry.dev/zot/v2/pkg/extensions/lint"
"zotregistry.dev/zot/v2/pkg/extensions/monitoring"
syncConstants "zotregistry.dev/zot/v2/pkg/extensions/sync/constants"
"zotregistry.dev/zot/v2/pkg/log"
mTypes "zotregistry.dev/zot/v2/pkg/meta/types"
"zotregistry.dev/zot/v2/pkg/storage"
@@ -994,3 +998,111 @@ func TestDestinationRegistry(t *testing.T) {
})
})
}
// TestNewClientTimeoutBehavior verifies that newClient creates a client that respects timeouts.
func TestNewClientTimeoutBehavior(t *testing.T) {
Convey("Test newClient timeout behavior", t, func() {
logger := log.NewTestLogger()
zeroRetries := 0
retryDelay := 1 * time.Millisecond
Convey("Client respects ResponseHeaderTimeout from config", func() {
// Server delay - must be longer than ResponseHeaderTimeout to trigger timeout
serverDelay := 1 * time.Second
// Create a test server that accepts connections but delays sending headers
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Sleep longer than the timeout to simulate a server that connects
// but doesn't send headers quickly
time.Sleep(serverDelay)
w.WriteHeader(http.StatusOK)
}))
defer server.Close()
host := strings.TrimPrefix(server.URL, "http://")
// Create sync config with short ResponseHeaderTimeout
opts := syncconf.RegistryConfig{
URLs: []string{server.URL},
SyncTimeout: syncConstants.DefaultSyncTimeout,
ResponseHeaderTimeout: 10 * time.Millisecond,
MaxRetries: &zeroRetries,
RetryDelay: &retryDelay,
}
// Create the client using the actual production function
client, _, err := newClient(opts, syncconf.CredentialsFile{}, logger)
So(err, ShouldBeNil)
// Create a reference to the test server
r, err := ref.New(host + "/repo:tag")
So(err, ShouldBeNil)
// Make a request - it should timeout quickly due to ResponseHeaderTimeout
start := time.Now()
// Use ManifestHead as a lightweight operation to trigger the HTTP request
_, err = client.ManifestHead(context.Background(), r)
elapsed := time.Since(start)
// Should timeout quickly (approx 10ms * retries + overhead)
// With 1ms retry delay and 10ms timeout, retries add overhead but should still be < serverDelay
So(err, ShouldNotBeNil)
// Allow small tolerance for timing variability (5ms)
So(elapsed, ShouldBeGreaterThanOrEqualTo, opts.ResponseHeaderTimeout-5*time.Millisecond)
So(elapsed, ShouldBeLessThan, serverDelay)
})
Convey("Client respects SyncTimeout (overall) from config", func() {
// Server delay - must be longer than SyncTimeout to trigger timeout
serverDelay := 1 * time.Second
// Create a test server that sends headers quickly but delays body transfer
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Send headers immediately with Content-Length to bypass ResponseHeaderTimeout
// but indicate there's a body coming
w.Header().Set("Content-Length", "100")
w.WriteHeader(http.StatusOK)
// Flush headers to ensure they're sent
if flusher, ok := w.(http.Flusher); ok {
flusher.Flush()
}
// Delay sending body data - this will trigger SyncTimeout (overall client timeout)
time.Sleep(serverDelay)
// Write body data (client should timeout before this completes)
w.Write(make([]byte, 100))
}))
defer server.Close()
host := strings.TrimPrefix(server.URL, "http://")
// Create sync config with short SyncTimeout
opts := syncconf.RegistryConfig{
URLs: []string{server.URL},
SyncTimeout: 10 * time.Millisecond,
ResponseHeaderTimeout: syncConstants.DefaultResponseHeaderTimeout,
MaxRetries: &zeroRetries,
RetryDelay: &retryDelay,
}
// Create the client using the actual production function
client, _, err := newClient(opts, syncconf.CredentialsFile{}, logger)
So(err, ShouldBeNil)
r, err := ref.New(host + "/repo:tag")
So(err, ShouldBeNil)
// Make a request - it should timeout due to overall SyncTimeout
// Use ManifestGet (not ManifestHead) to test body transfer timeout
start := time.Now()
_, err = client.ManifestGet(context.Background(), r)
elapsed := time.Since(start)
// Should timeout within the SyncTimeout period (+ retries)
// Elapsed should be >= SyncTimeout (since that's when it times out) and < serverDelay
So(err, ShouldNotBeNil)
// Allow small tolerance for timing variability (5ms)
So(elapsed, ShouldBeGreaterThanOrEqualTo, opts.SyncTimeout-5*time.Millisecond)
So(elapsed, ShouldBeLessThan, serverDelay)
})
})
}