mirror of
https://github.com/project-zot/zot.git
synced 2026-06-15 11:37:56 +08:00
fix: Add HTTP client timeouts to prevent indefinite hangs in sync operations (#3574)
This commit is contained in:
+10
-6
@@ -28,6 +28,7 @@ import (
|
||||
extconf "zotregistry.dev/zot/v2/pkg/extensions/config"
|
||||
eventsconf "zotregistry.dev/zot/v2/pkg/extensions/config/events"
|
||||
"zotregistry.dev/zot/v2/pkg/extensions/monitoring"
|
||||
syncConstants "zotregistry.dev/zot/v2/pkg/extensions/sync/constants"
|
||||
zlog "zotregistry.dev/zot/v2/pkg/log"
|
||||
storageConstants "zotregistry.dev/zot/v2/pkg/storage/constants"
|
||||
)
|
||||
@@ -663,15 +664,18 @@ func applyDefaultValues(config *config.Config, viperInstance *viper.Viper, logge
|
||||
config.Extensions.Sync.Enable = &defaultVal
|
||||
}
|
||||
|
||||
defaultSyncTimeout := 3 * time.Hour
|
||||
|
||||
for idx, regCfg := range config.Extensions.Sync.Registries {
|
||||
for idx := range config.Extensions.Sync.Registries {
|
||||
regCfg := &config.Extensions.Sync.Registries[idx]
|
||||
if regCfg.TLSVerify == nil {
|
||||
config.Extensions.Sync.Registries[idx].TLSVerify = &defaultVal
|
||||
regCfg.TLSVerify = &defaultVal
|
||||
}
|
||||
|
||||
if config.Extensions.Sync.Registries[idx].SyncTimeout == 0 {
|
||||
config.Extensions.Sync.Registries[idx].SyncTimeout = defaultSyncTimeout
|
||||
if regCfg.SyncTimeout == 0 {
|
||||
regCfg.SyncTimeout = syncConstants.DefaultSyncTimeout
|
||||
}
|
||||
|
||||
if regCfg.ResponseHeaderTimeout == 0 {
|
||||
regCfg.ResponseHeaderTimeout = syncConstants.DefaultResponseHeaderTimeout
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,18 +23,19 @@ type Config struct {
|
||||
}
|
||||
|
||||
type RegistryConfig struct {
|
||||
URLs []string
|
||||
PollInterval time.Duration
|
||||
Content []Content
|
||||
TLSVerify *bool
|
||||
OnDemand bool
|
||||
CertDir string
|
||||
MaxRetries *int
|
||||
RetryDelay *time.Duration
|
||||
OnlySigned *bool
|
||||
CredentialHelper string
|
||||
PreserveDigest bool // sync without converting
|
||||
SyncTimeout time.Duration // timeout for on-demand sync operations; if zero or unset, defaults to 3 hours
|
||||
URLs []string
|
||||
PollInterval time.Duration
|
||||
Content []Content
|
||||
TLSVerify *bool
|
||||
OnDemand bool
|
||||
CertDir string
|
||||
MaxRetries *int
|
||||
RetryDelay *time.Duration
|
||||
OnlySigned *bool
|
||||
CredentialHelper string
|
||||
PreserveDigest bool // sync without converting
|
||||
SyncTimeout time.Duration // overall HTTP client timeout for all sync operations
|
||||
ResponseHeaderTimeout time.Duration `yaml:"-"` // response header timeout; set in root.go
|
||||
}
|
||||
|
||||
type Content struct {
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package constants
|
||||
|
||||
import "time"
|
||||
|
||||
// references type.
|
||||
const (
|
||||
Cosign = "CosignSignature"
|
||||
@@ -7,3 +9,9 @@ const (
|
||||
Tag = "TagReference"
|
||||
SyncBlobUploadDir = ".sync"
|
||||
)
|
||||
|
||||
// Default timeout settings for sync operations.
|
||||
const (
|
||||
DefaultSyncTimeout = 3 * time.Hour // default timeout for all sync operations (on-demand and periodic)
|
||||
DefaultResponseHeaderTimeout = 30 * time.Second // default timeout for reading response headers
|
||||
)
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -26,6 +27,7 @@ import (
|
||||
"zotregistry.dev/zot/v2/pkg/cluster"
|
||||
"zotregistry.dev/zot/v2/pkg/common"
|
||||
syncconf "zotregistry.dev/zot/v2/pkg/extensions/config/sync"
|
||||
syncConstants "zotregistry.dev/zot/v2/pkg/extensions/sync/constants"
|
||||
"zotregistry.dev/zot/v2/pkg/log"
|
||||
mTypes "zotregistry.dev/zot/v2/pkg/meta/types"
|
||||
"zotregistry.dev/zot/v2/pkg/storage"
|
||||
@@ -211,7 +213,7 @@ func (service *BaseService) CanRetryOnError() bool {
|
||||
|
||||
func (service *BaseService) GetSyncTimeout() time.Duration {
|
||||
if service.config.SyncTimeout == 0 {
|
||||
return 3 * time.Hour // default timeout
|
||||
return syncConstants.DefaultSyncTimeout
|
||||
}
|
||||
|
||||
return service.config.SyncTimeout
|
||||
@@ -882,6 +884,31 @@ func newClient(opts syncconf.RegistryConfig, credentials syncconf.CredentialsFil
|
||||
regOpts = append(regOpts, reg.WithDelay(*opts.RetryDelay, *opts.RetryDelay))
|
||||
}
|
||||
|
||||
// Configure transport with timeouts to prevent indefinite hangs.
|
||||
// See https://blog.cloudflare.com/the-complete-guide-to-golang-net-http-timeouts/
|
||||
// Clone DefaultTransport to preserve proxy/TLS settings and existing timeouts
|
||||
// (DialContext: 30s, TLSHandshakeTimeout: 10s).
|
||||
// regclient uses DefaultTransport internally if no custom transport is provided, so this ensures compatibility.
|
||||
transport := http.DefaultTransport.(*http.Transport).Clone() //nolint: forcetypeassert
|
||||
|
||||
// ResponseHeaderTimeout: prevents hanging when server connects but doesn't send headers.
|
||||
// Set programmatically in root.go. This timeout applies only to waiting for response headers
|
||||
// after the request is sent. It does NOT include DialContext (30s) or TLSHandshakeTimeout (10s),
|
||||
// which are separate component timeouts. Doesn't cover body transfer time, which is expected
|
||||
// to be slow for large images.
|
||||
transport.ResponseHeaderTimeout = opts.ResponseHeaderTimeout
|
||||
|
||||
// Use SyncTimeout for overall HTTP client timeout. This is the maximum time for the entire
|
||||
// HTTP request, covering all stages: DialContext (connection establishment), TLSHandshakeTimeout
|
||||
// (TLS handshake), ResponseHeaderTimeout (waiting for headers), and body transfer time.
|
||||
// Critical for periodic sync operations (catalog listing, SyncRepo, getTags) which don't use
|
||||
// on-demand timeout contexts and could otherwise hang indefinitely if upstream doesn't respond.
|
||||
httpClient := &http.Client{
|
||||
Transport: transport,
|
||||
Timeout: opts.SyncTimeout,
|
||||
}
|
||||
regOpts = append(regOpts, reg.WithHTTPClient(httpClient))
|
||||
|
||||
client := regclient.New(
|
||||
regclient.WithDockerCerts(),
|
||||
regclient.WithDockerCreds(),
|
||||
|
||||
@@ -9,7 +9,10 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -23,6 +26,7 @@ import (
|
||||
syncconf "zotregistry.dev/zot/v2/pkg/extensions/config/sync"
|
||||
"zotregistry.dev/zot/v2/pkg/extensions/lint"
|
||||
"zotregistry.dev/zot/v2/pkg/extensions/monitoring"
|
||||
syncConstants "zotregistry.dev/zot/v2/pkg/extensions/sync/constants"
|
||||
"zotregistry.dev/zot/v2/pkg/log"
|
||||
mTypes "zotregistry.dev/zot/v2/pkg/meta/types"
|
||||
"zotregistry.dev/zot/v2/pkg/storage"
|
||||
@@ -994,3 +998,111 @@ func TestDestinationRegistry(t *testing.T) {
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// TestNewClientTimeoutBehavior verifies that newClient creates a client that respects timeouts.
|
||||
func TestNewClientTimeoutBehavior(t *testing.T) {
|
||||
Convey("Test newClient timeout behavior", t, func() {
|
||||
logger := log.NewTestLogger()
|
||||
zeroRetries := 0
|
||||
retryDelay := 1 * time.Millisecond
|
||||
|
||||
Convey("Client respects ResponseHeaderTimeout from config", func() {
|
||||
// Server delay - must be longer than ResponseHeaderTimeout to trigger timeout
|
||||
serverDelay := 1 * time.Second
|
||||
|
||||
// Create a test server that accepts connections but delays sending headers
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
// Sleep longer than the timeout to simulate a server that connects
|
||||
// but doesn't send headers quickly
|
||||
time.Sleep(serverDelay)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
host := strings.TrimPrefix(server.URL, "http://")
|
||||
|
||||
// Create sync config with short ResponseHeaderTimeout
|
||||
opts := syncconf.RegistryConfig{
|
||||
URLs: []string{server.URL},
|
||||
SyncTimeout: syncConstants.DefaultSyncTimeout,
|
||||
ResponseHeaderTimeout: 10 * time.Millisecond,
|
||||
MaxRetries: &zeroRetries,
|
||||
RetryDelay: &retryDelay,
|
||||
}
|
||||
|
||||
// Create the client using the actual production function
|
||||
client, _, err := newClient(opts, syncconf.CredentialsFile{}, logger)
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
// Create a reference to the test server
|
||||
r, err := ref.New(host + "/repo:tag")
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
// Make a request - it should timeout quickly due to ResponseHeaderTimeout
|
||||
start := time.Now()
|
||||
// Use ManifestHead as a lightweight operation to trigger the HTTP request
|
||||
_, err = client.ManifestHead(context.Background(), r)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
// Should timeout quickly (approx 10ms * retries + overhead)
|
||||
// With 1ms retry delay and 10ms timeout, retries add overhead but should still be < serverDelay
|
||||
So(err, ShouldNotBeNil)
|
||||
// Allow small tolerance for timing variability (5ms)
|
||||
So(elapsed, ShouldBeGreaterThanOrEqualTo, opts.ResponseHeaderTimeout-5*time.Millisecond)
|
||||
So(elapsed, ShouldBeLessThan, serverDelay)
|
||||
})
|
||||
|
||||
Convey("Client respects SyncTimeout (overall) from config", func() {
|
||||
// Server delay - must be longer than SyncTimeout to trigger timeout
|
||||
serverDelay := 1 * time.Second
|
||||
|
||||
// Create a test server that sends headers quickly but delays body transfer
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
// Send headers immediately with Content-Length to bypass ResponseHeaderTimeout
|
||||
// but indicate there's a body coming
|
||||
w.Header().Set("Content-Length", "100")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
// Flush headers to ensure they're sent
|
||||
if flusher, ok := w.(http.Flusher); ok {
|
||||
flusher.Flush()
|
||||
}
|
||||
// Delay sending body data - this will trigger SyncTimeout (overall client timeout)
|
||||
time.Sleep(serverDelay)
|
||||
// Write body data (client should timeout before this completes)
|
||||
w.Write(make([]byte, 100))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
host := strings.TrimPrefix(server.URL, "http://")
|
||||
|
||||
// Create sync config with short SyncTimeout
|
||||
opts := syncconf.RegistryConfig{
|
||||
URLs: []string{server.URL},
|
||||
SyncTimeout: 10 * time.Millisecond,
|
||||
ResponseHeaderTimeout: syncConstants.DefaultResponseHeaderTimeout,
|
||||
MaxRetries: &zeroRetries,
|
||||
RetryDelay: &retryDelay,
|
||||
}
|
||||
|
||||
// Create the client using the actual production function
|
||||
client, _, err := newClient(opts, syncconf.CredentialsFile{}, logger)
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
r, err := ref.New(host + "/repo:tag")
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
// Make a request - it should timeout due to overall SyncTimeout
|
||||
// Use ManifestGet (not ManifestHead) to test body transfer timeout
|
||||
start := time.Now()
|
||||
_, err = client.ManifestGet(context.Background(), r)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
// Should timeout within the SyncTimeout period (+ retries)
|
||||
// Elapsed should be >= SyncTimeout (since that's when it times out) and < serverDelay
|
||||
So(err, ShouldNotBeNil)
|
||||
// Allow small tolerance for timing variability (5ms)
|
||||
So(elapsed, ShouldBeGreaterThanOrEqualTo, opts.SyncTimeout-5*time.Millisecond)
|
||||
So(elapsed, ShouldBeLessThan, serverDelay)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user