Files
zot/pkg/extensions/sync/chunked_blob_reader.go
T
Vishwas Rajashekar fa39761700 feat(sync): fix review comments + extras
Signed-off-by: Vishwas Rajashekar <dev@vrajashkr.com>
2026-05-22 17:40:26 +05:30

174 lines
3.9 KiB
Go

package sync
import (
"bytes"
"errors"
"io"
"os"
"sync"
"github.com/regclient/regclient/types/blob"
zerr "zotregistry.dev/zot/v2/errors"
"zotregistry.dev/zot/v2/pkg/log"
)
// ChunkedBlobReader is a helper that splits a blob into chunks based on chunkSize
// It then copies chunks to disk.
// The latest chunk number is announced to channels of subscribers.
type ChunkedBlobReader struct {
numChunksTotal int64
numChunksRead int64
chunkSizeBytes int64
onDiskPath string
onDiskFile *os.File
InFlightReader *blob.BReader
clientMu sync.Mutex
clientCond *sync.Cond
chunksMu sync.RWMutex
clients map[int]chan int64
numClientsTotal int
logger log.Logger
}
func NewChunkedBlobReader(onDiskPath string, chunkSizeBytes int64, logger log.Logger) (*ChunkedBlobReader, error) {
createdFile, err := os.OpenFile(onDiskPath, os.O_CREATE|os.O_WRONLY, 0o644)
if err != nil {
return nil, err
}
cbr := &ChunkedBlobReader{
clients: make(map[int]chan int64),
logger: logger,
onDiskPath: onDiskPath,
onDiskFile: createdFile,
chunkSizeBytes: chunkSizeBytes,
}
cbr.clientCond = sync.NewCond(&cbr.clientMu)
return cbr, nil
}
func (cbr *ChunkedBlobReader) InitReader(r *blob.BReader, numChunksTotal int64) {
if cbr.InFlightReader == nil {
cbr.numChunksTotal = numChunksTotal
cbr.InFlightReader = r
}
}
func (cbr *ChunkedBlobReader) Read(buff []byte) (int, error) {
if cbr.InFlightReader == nil {
return 0, zerr.ErrStreamReaderNotInitialized
}
cbr.chunksMu.Lock()
// Access the file that regclient is writing to avoid this extra duplication.
var internalBuffBytes []byte = make([]byte, 0, cbr.chunkSizeBytes)
internalBuff := bytes.NewBuffer(internalBuffBytes)
multiWriter := io.MultiWriter(cbr.onDiskFile, internalBuff)
numBytesRead, err := io.CopyN(multiWriter, cbr.InFlightReader, cbr.chunkSizeBytes)
if err != nil {
if !errors.Is(err, io.EOF) {
// upstream download error
cbr.logger.Error().Err(err).Msg("failed to copy from in flight reader")
cbr.chunksMu.Unlock()
// drain all clients and close their channels
for clientId := range cbr.clients {
cbr.Unsubscribe(clientId)
}
return -1, err
}
}
copy(buff, internalBuff.Bytes())
cbr.numChunksRead++
if cbr.numChunksRead == cbr.numChunksTotal {
cbr.onDiskFile.Close()
}
cbr.chunksMu.Unlock()
cbr.clientMu.Lock()
// Update all clients about the new chunk
// Clients always read the chunk from disk
var wg sync.WaitGroup
for _, c := range cbr.clients {
wg.Go(func() {
c <- cbr.numChunksRead
})
}
cbr.clientMu.Unlock()
wg.Wait()
return int(numBytesRead), err
}
// Subscribe to the reader each time a new client is interested in the current blob,
// the client would create a subscription here with a channel where latest chunk info is sent.
func (cbr *ChunkedBlobReader) Subscribe(channel chan int64) int {
cbr.clientMu.Lock()
defer func() {
cbr.clientCond.Broadcast()
cbr.clientMu.Unlock()
}()
cbr.clients[cbr.numClientsTotal] = channel
chanId := cbr.numClientsTotal
cbr.numClientsTotal++
// Announce the current number of available chunks to the new client only if the reader is initialized
if cbr.InFlightReader != nil {
cbr.chunksMu.RLock()
defer cbr.chunksMu.RUnlock()
go func() {
channel <- cbr.numChunksRead
}()
}
return chanId
}
func (cbr *ChunkedBlobReader) Unsubscribe(clientId int) {
cbr.clientMu.Lock()
defer func() {
cbr.clientCond.Broadcast()
cbr.clientMu.Unlock()
}()
channel, ok := cbr.clients[clientId]
if ok {
close(channel)
cbr.numClientsTotal--
delete(cbr.clients, clientId)
}
}
func (cbr *ChunkedBlobReader) ToBReader() *blob.BReader {
return blob.NewReader(
blob.WithHeader(cbr.InFlightReader.RawHeaders()),
blob.WithDesc(cbr.InFlightReader.GetDescriptor()),
blob.WithReader(cbr),
)
}
func (cbr *ChunkedBlobReader) WaitForClientEmpty() {
cbr.clientMu.Lock()
defer cbr.clientMu.Unlock()
for len(cbr.clients) > 0 {
cbr.clientCond.Wait()
}
}