Files
zot/pkg/extensions/sync/on_demand.go
T
Vishwas Rajashekar 14cd52e993 feat(sync): move stream from global to per upstream
Signed-off-by: Vishwas Rajashekar <dev@vrajashkr.com>
2026-05-22 17:40:26 +05:30

342 lines
9.5 KiB
Go

//go:build sync
package sync
import (
"context"
"errors"
"sync"
"time"
"github.com/regclient/regclient/types/manifest"
zerr "zotregistry.dev/zot/v2/errors"
"zotregistry.dev/zot/v2/pkg/common"
"zotregistry.dev/zot/v2/pkg/log"
)
type request struct {
repo string
reference string
// used for background retries, at most one background retry per service
serviceID int
isBackground bool
}
/*
BaseOnDemand tracks requests that can be an image/signature/sbom.
It keeps track of all parallel requests, if two requests of same image/signature/sbom comes at the same time,
process just the first one, also keep track of all background retrying routines.
*/
type BaseOnDemand struct {
services []Service
// map[request]chan err
requestStore *sync.Map
log log.Logger
streamManager StreamManager
}
func NewOnDemand(log log.Logger) *BaseOnDemand {
return &BaseOnDemand{log: log, requestStore: &sync.Map{}}
}
func (onDemand *BaseOnDemand) Add(service Service) {
onDemand.services = append(onDemand.services, service)
}
func (onDemand *BaseOnDemand) SetStreamManager(sm StreamManager) {
onDemand.streamManager = sm
}
func (onDemand *BaseOnDemand) StreamManager() StreamManager {
return onDemand.streamManager
}
// IsStreamingEnabledForRepo returns true if any on-demand service has streaming enabled for the given repo.
func (onDemand *BaseOnDemand) IsStreamingEnabledForRepo(repo string) bool {
for _, service := range onDemand.services {
if service.IsStreamingForRepo(repo) {
return true
}
}
return false
}
// FetchManifestForStream directly fetches the manifest from the upstream services and prepares the image
// for streaming.
// This is only intended for use with streaming sync.
func (onDemand *BaseOnDemand) FetchManifestForStream(
ctx context.Context, repo, reference string,
) (manifest.Manifest, error) {
// If an image is already streaming, return the one in cache.
// There is no need to start a new background sync if the manifest is already cached.
cachedManifest, ok := onDemand.streamManager.StreamingImageManifest(repo, reference)
if ok {
onDemand.log.Debug().Str("repo", repo).Str("reference", reference).
Msg("streaming manifest already present in cache.")
return cachedManifest, nil
}
var manifest manifest.Manifest
for _, service := range onDemand.services {
onDemand.log.Debug().Str("repo", repo).Str("ref", reference).Msg("attempting to fetch manifest")
fetchedManifest, err := service.FetchManifest(ctx, repo, reference)
if err != nil {
onDemand.log.Error().Err(err).Msg("failed to fetch manifest from service")
continue
}
manifest = fetchedManifest
break
}
if manifest == nil {
return nil, zerr.ErrBlobNotFound
}
onDemand.log.Debug().Str("repo", repo).Str("reference", reference).
Msg("storing image for streaming")
err := onDemand.streamManager.StoreImageForStreaming(repo, reference, manifest)
if err != nil {
onDemand.log.Error().Err(err).Str("repo", repo).Str("reference", reference).
Msg("failed to store manifest for streaming")
return nil, err
}
// sync the image in the background
go func() {
syncCtx := context.WithoutCancel(ctx)
if errSync := onDemand.SyncImage(syncCtx, repo, reference); errSync != nil {
onDemand.log.Err(errSync).Str("repository", repo).Str("reference", reference).
Msg("failed to sync image")
}
}()
return manifest, nil
}
func (onDemand *BaseOnDemand) SyncImage(ctx context.Context, repo, reference string) error {
req := request{
repo: repo,
reference: reference,
}
syncResult := make(chan error)
val, loaded := onDemand.requestStore.LoadOrStore(req, syncResult)
if loaded {
onDemand.log.Info().Str("repo", repo).Str("reference", reference).
Msg("image already demanded, waiting on channel")
syncResult, _ := val.(chan error)
err := <-syncResult
return err
}
defer onDemand.requestStore.Delete(req)
go onDemand.syncImage(ctx, repo, reference, syncResult)
err := <-syncResult
return err
}
func (onDemand *BaseOnDemand) SyncReferrers(ctx context.Context, repo string,
subjectDigestStr string, referenceTypes []string,
) error {
req := request{
repo: repo,
reference: subjectDigestStr,
}
syncResult := make(chan error)
val, loaded := onDemand.requestStore.LoadOrStore(req, syncResult)
if loaded {
onDemand.log.Info().Str("repo", repo).Str("reference", subjectDigestStr).
Msg("referrers for image already demanded, waiting on channel")
syncResult, _ := val.(chan error)
err := <-syncResult
return err
}
defer onDemand.requestStore.Delete(req)
go onDemand.syncReferrers(ctx, repo, subjectDigestStr, referenceTypes, syncResult)
err := <-syncResult
return err
}
func (onDemand *BaseOnDemand) syncReferrers(ctx context.Context, repo, subjectDigestStr string,
referenceTypes []string, syncResult chan error,
) {
defer close(syncResult)
var err error
for serviceID, service := range onDemand.services {
timeout := service.GetSyncTimeout()
onDemand.log.Debug().
Str("repo", repo).
Str("reference", subjectDigestStr).
Int("serviceID", serviceID).
Dur("timeout", timeout).
Msg("starting on-demand referrer sync")
// Create a detached context with timeout to ensure sync completes even if HTTP client disconnects.
// This prevents Kubernetes timeout/retries from aborting in-progress referrer downloads.
syncCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), timeout)
err = service.SyncReferrers(syncCtx, repo, subjectDigestStr, referenceTypes)
cancel()
if err != nil {
if errors.Is(err, zerr.ErrManifestNotFound) ||
errors.Is(err, zerr.ErrSyncImageFilteredOut) ||
errors.Is(err, zerr.ErrSyncImageNotSigned) ||
errors.Is(err, zerr.ErrRepoNotFound) ||
// some public registries may return 401 for not found.
errors.Is(err, zerr.ErrUnauthorizedAccess) {
continue
}
req := request{
repo: repo,
reference: subjectDigestStr,
serviceID: serviceID,
isBackground: true,
}
// if there is already a background routine, skip
if _, requested := onDemand.requestStore.LoadOrStore(req, struct{}{}); requested {
continue
}
if service.CanRetryOnError() {
retryErr := err
// retry in background
go func(service Service, serviceTimeout time.Duration) {
// remove image after syncing
defer func() {
onDemand.requestStore.Delete(req)
onDemand.log.Info().Str("repo", repo).Str("reference", subjectDigestStr).
Msg("sync routine for image exited")
}()
onDemand.log.Info().Str("repo", repo).Str("reference", subjectDigestStr).Str("err", retryErr.Error()).
Msg("sync routine: starting routine to copy image, because of error")
// Use detached context with timeout for background retry
retryCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), serviceTimeout)
defer cancel()
err := service.SyncReferrers(retryCtx, repo, subjectDigestStr, referenceTypes)
if err != nil {
onDemand.log.Error().Str("errorType", common.TypeOf(err)).Str("repo", repo).Str("reference", subjectDigestStr).
Err(err).Msg("sync routine: starting routine to retry copy image due to error")
}
}(service, timeout)
}
} else {
break
}
}
syncResult <- err
}
func (onDemand *BaseOnDemand) syncImage(ctx context.Context, repo, reference string, syncResult chan error) {
defer close(syncResult)
var err error
for serviceID, service := range onDemand.services {
timeout := service.GetSyncTimeout()
onDemand.log.Debug().
Str("repo", repo).
Str("reference", reference).
Int("serviceID", serviceID).
Dur("timeout", timeout).
Msg("starting on-demand image sync")
// Create a detached context with timeout to ensure sync completes even if HTTP client disconnects.
// This prevents Kubernetes timeout/retries from aborting in-progress image downloads.
syncCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), timeout)
err = service.SyncImage(syncCtx, repo, reference)
cancel()
if err != nil {
if errors.Is(err, zerr.ErrManifestNotFound) ||
errors.Is(err, zerr.ErrSyncImageFilteredOut) ||
errors.Is(err, zerr.ErrSyncImageNotSigned) ||
errors.Is(err, zerr.ErrRepoNotFound) ||
// some public registries may return 401 for not found.
errors.Is(err, zerr.ErrUnauthorizedAccess) {
continue
}
req := request{
repo: repo,
reference: reference,
serviceID: serviceID,
isBackground: true,
}
// if there is already a background routine, skip
if _, requested := onDemand.requestStore.LoadOrStore(req, struct{}{}); requested {
continue
}
if service.CanRetryOnError() {
retryErr := err
// retry in background
go func(service Service, serviceTimeout time.Duration) {
// remove image after syncing
defer func() {
onDemand.requestStore.Delete(req)
onDemand.log.Info().Str("repo", repo).Str("reference", reference).
Msg("sync routine for image exited")
}()
onDemand.log.Info().Str("repo", repo).Str("reference", reference).Str("err", retryErr.Error()).
Msg("sync routine: starting routine to retry copy image due to error")
// Use detached context with timeout for background retry
retryCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), serviceTimeout)
defer cancel()
err := service.SyncImage(retryCtx, repo, reference)
if err != nil {
onDemand.log.Error().Str("errorType", common.TypeOf(err)).Str("repo", repo).Str("reference", reference).
Err(err).Msg("sync routine: error while copying image")
}
}(service, timeout)
}
} else {
break
}
}
syncResult <- err
}