test(blackbox): harden zot restart + reachability checks (#3907)

Wait for zot PIDs (and the port) to fully shut down before restarting, and retry transient curl failures (e.g. exit 52) in wait_zot_reachable.
Send checksum verification errors to stderr for clearer failure output.

Signed-off-by: Andrei Aaron <andreifdaaron@gmail.com>
This commit is contained in:
Andrei Aaron
2026-03-30 22:41:01 +03:00
committed by GitHub
parent e188f45890
commit 2ca0392825
+80 -20
View File
@@ -43,12 +43,12 @@ function zot_rel_serve() {
curl -L -o "${checksum_file}" "${checksum_url}"
expected_sum=$(grep "zot-${OS}-${ARCH}$" "${checksum_file}" | awk '{print $1}')
if [ -z "${expected_sum}" ]; then
echo "ERROR: Could not find checksum for zot-${OS}-${ARCH} in checksums.sha256.txt"
echo "ERROR: Could not find checksum for zot-${OS}-${ARCH} in checksums.sha256.txt" >&2
exit 1
fi
actual_sum=$(sha256sum "${zot_path}" | awk '{print $1}')
if [ "${expected_sum}" != "${actual_sum}" ]; then
echo "ERROR: Checksum verification failed for zot-${OS}-${ARCH}"
echo "ERROR: Checksum verification failed for zot-${OS}-${ARCH}" >&2
exit 1
fi
chmod +x "${zot_path}"
@@ -74,12 +74,12 @@ function zot_rel_min_serve() {
curl -L -o "${checksum_file}" "${checksum_url}"
expected_sum=$(grep "zot-${OS}-${ARCH}-minimal$" "${checksum_file}" | awk '{print $1}')
if [ -z "${expected_sum}" ]; then
echo "ERROR: Could not find checksum for zot-${OS}-${ARCH}-minimal in checksums.sha256.txt"
echo "ERROR: Could not find checksum for zot-${OS}-${ARCH}-minimal in checksums.sha256.txt" >&2
exit 1
fi
actual_sum=$(sha256sum "${zot_path}" | awk '{print $1}')
if [ "${expected_sum}" != "${actual_sum}" ]; then
echo "ERROR: Checksum verification failed for zot-${OS}-${ARCH}-minimal"
echo "ERROR: Checksum verification failed for zot-${OS}-${ARCH}-minimal" >&2
exit 1
fi
chmod +x "${zot_path}"
@@ -93,8 +93,50 @@ function zot_rel_min_serve() {
# stops all zot instances started by the test
function zot_stop_all() {
if [ -f "${BATS_FILE_TMPDIR}/zot.pid" ]; then
kill $(cat ${BATS_FILE_TMPDIR}/zot.pid) 2>/dev/null || true
rm -f ${BATS_FILE_TMPDIR}/zot.pid
local pids
pids="$(cat "${BATS_FILE_TMPDIR}/zot.pid" 2>/dev/null || true)"
# Ask zot(s) to terminate.
if [ -n "$pids" ]; then
kill $pids 2>/dev/null || true
fi
# Wait for processes to exit (helps avoid stale responders on restart).
local deadline=$((SECONDS + 10))
while [ $SECONDS -lt $deadline ]; do
local any_alive=0
local p
for p in $pids; do
if kill -0 "$p" 2>/dev/null; then
any_alive=1
break
fi
done
[ "$any_alive" -eq 0 ] && break
sleep 0.1
done
# Force kill any stragglers.
local p
for p in $pids; do
kill -0 "$p" 2>/dev/null && kill -9 "$p" 2>/dev/null || true
done
# If we know the port, wait for it to stop accepting connections.
if [ -f "${BATS_FILE_TMPDIR}/zot.port" ]; then
local zot_port
zot_port="$(cat "${BATS_FILE_TMPDIR}/zot.port" 2>/dev/null || true)"
if [ -n "$zot_port" ]; then
local port_deadline=$((SECONDS + 10))
while [ $SECONDS -lt $port_deadline ]; do
# bash /dev/tcp returns success when something is accepting connections.
(echo >"/dev/tcp/127.0.0.1/${zot_port}") >/dev/null 2>&1 || break
sleep 0.1
done
fi
fi
rm -f "${BATS_FILE_TMPDIR}/zot.pid"
fi
}
@@ -120,20 +162,38 @@ function wait_zot_reachable() {
fi
fi
local response
response=$(curl -s --connect-timeout 3 \
--max-time 5 \
--retry 60 \
--retry-delay 1 \
--retry-max-time 180 \
--retry-connrefused \
-w "\n%{http_code}" \
"${zot_url}")
local curl_ret=$?
if [ $curl_ret -ne 0 ]; then
echo "ERROR: zot did not become reachable at ${zot_url}" >&2
exit 1
fi
local response curl_ret curl_err curl_err_file
local start_ts=$SECONDS
while true; do
curl_err_file="$(mktemp "${BATS_FILE_TMPDIR}/curl_err.XXXXXX")"
# Be robust if the test harness enables `set -e` (errexit): a failing `curl` inside
# command substitution can otherwise abort the function before we can retry.
local errexit_was_set=0
case "$-" in
*e*) errexit_was_set=1 ;;
esac
set +e
response="$(curl -sS --connect-timeout 3 \
--max-time 5 \
-w "\n%{http_code}" \
"${zot_url}" 2>"${curl_err_file}")"
curl_ret=$?
[ "$errexit_was_set" -eq 1 ] && set -e
curl_err="$(cat "${curl_err_file}" 2>/dev/null || true)"
rm -f "${curl_err_file}"
if [ $curl_ret -eq 0 ]; then
break
fi
# Retry transient startup/shutdown races (notably curl 52: "Empty reply from server").
if [ $((SECONDS - start_ts)) -ge 180 ]; then
echo "ERROR: zot did not become reachable at ${zot_url} (curl exit ${curl_ret})" >&2
[ -n "$curl_err" ] && echo "curl: ${curl_err}" >&2
exit 1
fi
sleep 1
done
# curl -s -w "\n%{http_code}" appends HTTP code on last line; body is everything else
local http_code