From b87c18fc09aa3216a39a6da169885d18440e468b Mon Sep 17 00:00:00 2001 From: Vishwas Rajashekar Date: Mon, 18 May 2026 18:57:12 +0530 Subject: [PATCH] feat(sync): add bats + remove sync retry Signed-off-by: Vishwas Rajashekar --- examples/config-sync-stream.json | 5 +- test/blackbox/ci.sh | 2 +- test/blackbox/sync_streaming.bats | 359 ++++++++++++++++++++++++++++++ test/ports.json | 10 + 4 files changed, 371 insertions(+), 5 deletions(-) create mode 100644 test/blackbox/sync_streaming.bats diff --git a/examples/config-sync-stream.json b/examples/config-sync-stream.json index 55e29053..fc392972 100644 --- a/examples/config-sync-stream.json +++ b/examples/config-sync-stream.json @@ -20,10 +20,7 @@ "http://localhost:9000" ], "onDemand": true, - "tlsVerify": false, - "maxRetries": 5, - "retryDelay": "30s", - "syncTimeout": "10m" + "tlsVerify": false } ] } diff --git a/test/blackbox/ci.sh b/test/blackbox/ci.sh index 8f0d3da2..171c9484 100755 --- a/test/blackbox/ci.sh +++ b/test/blackbox/ci.sh @@ -19,7 +19,7 @@ tests=("pushpull" "pushpull_authn" "delete_images" "referrers" "metadata" "anony "annotations" "detect_manifest_collision" "cve" "sync" "sync_docker" "sync_replica_cluster" "scrub" "garbage_collect" "metrics" "metrics_minimal" "multiarch_index" "docker_compat" "redis_local" "redis_session_store" "events_nats" "events_http" "events_nats_lint_failure" "events_http_lint_failure" "events_sink_failure" "events_config_decoding" - "fips140" "fips140_authn" "openid_claim_mapping" "upgrade" "upgrade_minimal" "dynamic_tls" "quota") + "fips140" "fips140_authn" "openid_claim_mapping" "upgrade" "upgrade_minimal" "dynamic_tls" "quota", "sync_streaming") for test in ${tests[*]}; do ${BATS} ${BATS_FLAGS} ${SCRIPTPATH}/${test}.bats > ${test}.log & pids+=($!) diff --git a/test/blackbox/sync_streaming.bats b/test/blackbox/sync_streaming.bats new file mode 100644 index 00000000..2e3190b4 --- /dev/null +++ b/test/blackbox/sync_streaming.bats @@ -0,0 +1,359 @@ +# Note: Intended to be run as "make run-blackbox-tests" or "make run-blackbox-ci" + +# This test suite verifies the behavior of zot when streaming is enabled. + +load helpers_zot +load helpers_wait +load ../port_helper + +function verify_prerequisites() { + if [ ! $(command -v curl) ]; then + echo "you need to install curl as a prerequisite to running the tests" >&3 + return 1 + fi + + if [ ! $(command -v jq) ]; then + echo "you need to install jq as a prerequisite to running the tests" >&3 + return 1 + fi + + if [ ! $(command -v skopeo) ]; then + echo "you need to install skopeo as a prerequisite to running the tests" >&3 + return 1 + fi + + return 0 +} + +# delete_repo_from_zot +# +# Deletes a manifest by tag from the registry at , then removes the +# repo directory from the local storage root so that a subsequent on-demand +# sync fetch is triggered unconditionally (no stale blobs remain in cache). +function delete_repo_from_zot() { + local port="${1}" + local repo="${2}" + local tag="${3}" + local root="${4}" + + local digest + digest=$(curl -sI "http://127.0.0.1:${port}/v2/${repo}/manifests/${tag}" \ + | grep -i docker-content-digest \ + | tr -d '\r' \ + | awk '{print $2}') + + curl -s -X DELETE "http://127.0.0.1:${port}/v2/${repo}/manifests/${digest}" >/dev/null + + # delete blobs from disk + rm -r "${root}/${repo}/blobs" +} + +function setup_file() { + if ! $(verify_prerequisites); then + exit 1 + fi + + local upstream_root="${BATS_FILE_TMPDIR}/zot-upstream" + local test_root="${BATS_FILE_TMPDIR}/zot-test" + mkdir -p "${upstream_root}" "${test_root}" + + # Persist storage root path for use in individual tests + echo "${test_root}" > "${BATS_FILE_TMPDIR}/test_root" + + local upstream_port + upstream_port=$(get_free_port_for_service "zot_upstream") + echo "${upstream_port}" > "${BATS_FILE_TMPDIR}/zot.upstream.port" + + local test_port + test_port=$(get_free_port_for_service "zot_test") + echo "${test_port}" > "${BATS_FILE_TMPDIR}/zot.test.port" + + # Upstream config (bare minimum, latest release minimal binary) + local upstream_config="${BATS_FILE_TMPDIR}/zot_upstream_config.json" + cat > "${upstream_config}" < "${test_config}" <&2 + exit 1 + fi + chmod +x "${upstream_bin}" + fi + + "${upstream_bin}" serve "${upstream_config}" & + local upstream_pid=$! + echo "${upstream_pid}" > "${BATS_FILE_TMPDIR}/zot.upstream.pid" + echo -n "${upstream_pid} " >> "${BATS_FILE_TMPDIR}/zot.pid" + echo "wait for upstream zot to be reachable..." >&3 + wait_zot_reachable "${upstream_port}" + echo "upstream zot is reachable" >&3 + + # Start zot server under test + echo "starting zot server under test..." >&3 + zot_serve "${ZOT_PATH}" "${test_config}" + wait_zot_reachable "${test_port}" + echo "test zot is reachable" >&3 + + # Download the test image to the shared test-data directory + # ollama/ollama:rocm is a ~1GB image + # image. + skopeo copy \ + "docker://docker.io/ollama/ollama:rocm" \ + "oci:${TEST_DATA_DIR}/ollama:rocm" +} + +function teardown_file() { + zot_stop_all + local test_root + test_root=$(cat "${BATS_FILE_TMPDIR}/test_root" 2>/dev/null || echo "") + + echo "=== upstream zot log ===" >&3 + cat "${BATS_FILE_TMPDIR}/zot-upstream/zot.log" >&3 || true + + echo "=== test zot log ===" >&3 + [ -n "${test_root}" ] && cat "${test_root}/zot.log" >&3 || true +} + +@test "pull non-existent image returns NAME_UNKNOWN error" { + local test_port + test_port=$(cat "${BATS_FILE_TMPDIR}/zot.test.port") + + run curl -s "http://127.0.0.1:${test_port}/v2/nonexistent/manifests/latest" + [ "$status" -eq 0 ] + [ "$(echo "${lines[-1]}" | jq -r '.errors[0].code')" = "NAME_UNKNOWN" ] +} + +@test "push image to upstream" { + local upstream_port + upstream_port=$(cat "${BATS_FILE_TMPDIR}/zot.upstream.port") + + run skopeo copy --dest-tls-verify=false \ + "oci:${TEST_DATA_DIR}/ollama:rocm" \ + "docker://127.0.0.1:${upstream_port}/ollama/ollama:rocm" + [ "$status" -eq 0 ] + + # sleep for a bit to allow commit + sleep 3 + + run curl -s "http://127.0.0.1:${upstream_port}/v2/ollama/ollama/tags/list" + [ "$status" -eq 0 ] + [ "$(echo "${lines[-1]}" | jq -r '.tags[]')" = "rocm" ] +} + +@test "concurrent pulls of image through streaming sync both succeed" { + local test_port + test_port=$(cat "${BATS_FILE_TMPDIR}/zot.test.port") + local pull_dir1="${BATS_FILE_TMPDIR}/pull1" + local pull_dir2="${BATS_FILE_TMPDIR}/pull2" + mkdir -p "${pull_dir1}" "${pull_dir2}" + + # Launch both pulls in parallel + skopeo copy --src-tls-verify=false \ + "docker://127.0.0.1:${test_port}/ollama/ollama:rocm" \ + "oci:${pull_dir1}/ollama:rocm" >/dev/null 2>&1 & + local pid1=$! + + sleep 1 + + skopeo copy --src-tls-verify=false \ + "docker://127.0.0.1:${test_port}/ollama/ollama:rocm" \ + "oci:${pull_dir2}/ollama:rocm" >/dev/null 2>&1 & + local pid2=$! + + wait "${pid1}" + local status1=$? + wait "${pid2}" + local status2=$? + + [ "${status1}" -eq 0 ] + [ "${status2}" -eq 0 ] +} + +@test "delete image from zot after first concurrent pull" { + local test_port + test_port=$(cat "${BATS_FILE_TMPDIR}/zot.test.port") + local test_root + test_root=$(cat "${BATS_FILE_TMPDIR}/test_root") + local index_json="${test_root}/ollama/ollama/index.json" + + sleep 3 + + # Confirm the image is present on the filesystem before deleting. + # Can't use curl here — an HTTP request would re-trigger on-demand sync. + run jq '(.manifests // []) | map(select(.annotations["org.opencontainers.image.ref.name"] == "rocm")) | length' \ + "${index_json}" + [ "$status" -eq 0 ] + [ "${lines[-1]}" -gt 0 ] + + delete_repo_from_zot "${test_port}" "ollama/ollama" "rocm" "${test_root}" + + sleep 2 + + # Confirm the manifest is absent from the local OCI index after deletion. + # Again, avoid curl to prevent on-demand re-sync from the upstream. + run jq '(.manifests // []) | map(select(.annotations["org.opencontainers.image.ref.name"] == "rocm")) | length' \ + "${index_json}" + [ "$status" -eq 0 ] + [ "${lines[-1]}" -eq 0 ] +} + +@test "concurrent pulls - one terminated early while the other succeeds" { + local test_port + test_port=$(cat "${BATS_FILE_TMPDIR}/zot.test.port") + local pull_dir1="${BATS_FILE_TMPDIR}/pull3" + local pull_dir2="${BATS_FILE_TMPDIR}/pull4" + mkdir -p "${pull_dir1}" "${pull_dir2}" + + # Start both pulls in parallel. + skopeo copy --src-tls-verify=false \ + "docker://127.0.0.1:${test_port}/ollama/ollama:rocm" \ + "oci:${pull_dir1}/ollama:rocm" >/dev/null 2>&1 & + local pid1=$! + + sleep 1 + + skopeo copy --src-tls-verify=false \ + "docker://127.0.0.1:${test_port}/ollama/ollama:rocm" \ + "oci:${pull_dir2}/ollama:rocm" >/dev/null 2>&1 & + local pid2=$! + + # Allow streaming to begin, then terminate the first client. + sleep 2 + + kill "${pid1}" 2>/dev/null || true + wait "${pid1}" 2>/dev/null || true + + # The second pull must complete successfully regardless. + wait "${pid2}" + local status_pid2=$? + [ "${status_pid2}" -eq 0 ] +} + +@test "delete image from zot after interrupted pull" { + local test_port + test_port=$(cat "${BATS_FILE_TMPDIR}/zot.test.port") + local test_root + test_root=$(cat "${BATS_FILE_TMPDIR}/test_root") + local index_json="${test_root}/ollama/ollama/index.json" + + sleep 3 + + # Confirm the image is present on the filesystem before deleting. + # Can't use curl here — an HTTP request would re-trigger on-demand sync. + run jq '(.manifests // []) | map(select(.annotations["org.opencontainers.image.ref.name"] == "rocm")) | length' \ + "${index_json}" + [ "$status" -eq 0 ] + [ "${lines[-1]}" -gt 0 ] + + delete_repo_from_zot "${test_port}" "ollama/ollama" "rocm" "${test_root}" + + sleep 2 + + # Confirm the manifest is absent from the local OCI index after deletion. + # Again, avoid curl to prevent on-demand re-sync from the upstream. + run jq '(.manifests // []) | map(select(.annotations["org.opencontainers.image.ref.name"] == "rocm")) | length' \ + "${index_json}" + [ "$status" -eq 0 ] + [ "${lines[-1]}" -eq 0 ] +} + +@test "pull fails with error when upstream is killed during streaming" { + local test_port + test_port=$(cat "${BATS_FILE_TMPDIR}/zot.test.port") + local upstream_pid + upstream_pid=$(cat "${BATS_FILE_TMPDIR}/zot.upstream.pid") + local pull_dir="${BATS_FILE_TMPDIR}/pull5" + mkdir -p "${pull_dir}" + + # Start the pull in the background + skopeo copy --src-tls-verify=false \ + "docker://127.0.0.1:${test_port}/ollama/ollama:rocm" \ + "oci:${pull_dir}/ollama:rocm" >/dev/null 2>&1 & + local copier_pid=$! + + sleep 1 + + kill "${upstream_pid}" 2>/dev/null || true + + # Wait for copier to exit; it must fail because the upstream is gone. + run wait "${copier_pid}" + [ "$status" -ne 0 ] +} + +@test "pull succeeds after upstream is restarted" { + local upstream_port + upstream_port=$(cat "${BATS_FILE_TMPDIR}/zot.upstream.port") + local test_port + test_port=$(cat "${BATS_FILE_TMPDIR}/zot.test.port") + + # Restart the upstream with the same binary and config used in setup_file. + local upstream_bin="${BATS_FILE_TMPDIR}/zot-${OS}-${ARCH}-minimal" + local upstream_config="${BATS_FILE_TMPDIR}/zot_upstream_config.json" + + "${upstream_bin}" serve "${upstream_config}" & + local new_upstream_pid=$! + echo -n "${new_upstream_pid} " >> "${BATS_FILE_TMPDIR}/zot.pid" + echo "${new_upstream_pid}" > "${BATS_FILE_TMPDIR}/zot.upstream.pid" + wait_zot_reachable "${upstream_port}" + + local pull_dir="${BATS_FILE_TMPDIR}/pull6" + mkdir -p "${pull_dir}" + + run skopeo copy --src-tls-verify=false \ + "docker://127.0.0.1:${test_port}/ollama/ollama:rocm" \ + "oci:${pull_dir}/ollama:rocm" + [ "$status" -eq 0 ] +} diff --git a/test/ports.json b/test/ports.json index dca09f56..dc1dcb94 100644 --- a/test/ports.json +++ b/test/ports.json @@ -460,5 +460,15 @@ "begin": 11530, "end": 11539 } + }, + "blackbox/sync_streaming.bats": { + "zot_upstream": { + "begin": 11540, + "end": 11549 + }, + "zot_test": { + "begin": 11550, + "end": 11559 + } } }