feat(cluster): Add support for request proxying for scale out (#2385)

* feat(cluster): initial commit for scale-out cluster

Signed-off-by: Ramkumar Chinchani <rchincha@cisco.com>

* feat(cluster): support shared storage scale out

This change introduces support for shared storage backed
zot cluster scale out.

New feature
Multiple stateless zot instances can run using the same shared
storage backend where each instance looks at a specific set
of repositories based on a siphash of the repository name to improve
scale as the load is distributed across multiple instances.
For a given config, there will only be one instance that can perform
dist-spec read/write on a given repository.

What's changed?
- introduced a transparent request proxy for dist-spec endpoints based on
siphash of repository name.
- new config for scale out cluster that specifies list of
cluster members.

Signed-off-by: Vishwas Rajashekar <vrajashe@cisco.com>

---------

Signed-off-by: Ramkumar Chinchani <rchincha@cisco.com>
Signed-off-by: Vishwas Rajashekar <vrajashe@cisco.com>
Co-authored-by: Ramkumar Chinchani <rchincha@cisco.com>
This commit is contained in:
Vishwas R
2024-05-20 21:35:21 +05:30
committed by GitHub
parent be5ad66797
commit 5ae7a028d9
30 changed files with 2320 additions and 24 deletions
@@ -0,0 +1,75 @@
# note: intended to be run as "make run-cloud-scale-out-tests".
# makefile target installs & checks all necessary tooling
# extra tools that are not covered in Makefile target needs to be added in verify_prerequisites()
NUM_ZOT_INSTANCES=6
ZOT_LOG_DIR=/tmp/zot-ft-logs/auth-tls
load helpers_zot
load helpers_cloud
load helpers_haproxy
function launch_zot_server() {
local zot_server_address=${1}
local zot_server_port=${2}
local zot_root_dir=${ZOT_ROOT_DIR}
mkdir -p ${zot_root_dir}
mkdir -p ${ZOT_LOG_DIR}
local zot_config_file="${BATS_FILE_TMPDIR}/zot_config_${zot_server_address}_${zot_server_port}.json"
local zot_log_file="${ZOT_LOG_DIR}/zot-${zot_server_address}-${zot_server_port}.log"
create_zot_cloud_base_config_file ${zot_server_address} ${zot_server_port} ${zot_root_dir} ${zot_config_file} ${zot_log_file}
update_zot_cluster_member_list_in_config_file ${zot_config_file} ${ZOT_CLUSTER_MEMBERS_PATCH_FILE}
update_zot_cfg_set_htpasswd_auth "${zot_config_file}" ${ZOT_HTPASSWD_PATH}
update_zot_cfg_set_tls "${zot_config_file}" ${ZOT_TLS_CERT_PATH} ${ZOT_TLS_KEY_PATH} ${ZOT_TLS_CA_CERT_PATH}
echo "launching zot server ${zot_server_address}:${zot_server_port}" >&3
echo "config file: ${zot_config_file}" >&3
echo "log file: ${zot_log_file}" >&3
zot_serve ${zot_config_file}
wait_zot_reachable ${zot_server_port} "https"
}
function setup() {
# verify prerequisites are available
if ! $(verify_prerequisites); then
exit 1
fi
# setup S3 bucket and DynamoDB tables
setup_cloud_services
# setup htpasswd for local auth
setup_local_htpasswd
generate_zot_cluster_member_list ${NUM_ZOT_INSTANCES} ${ZOT_CLUSTER_MEMBERS_PATCH_FILE}
for ((i=0;i<${NUM_ZOT_INSTANCES};i++)); do
launch_zot_server 127.0.0.1 $(( 10000 + $i ))
done
# list all zot processes that were started
ps -ef | grep ".*zot.*serve.*" | grep -v grep >&3
generate_haproxy_config ${HAPROXY_CFG_FILE} "https"
haproxy_start ${HAPROXY_CFG_FILE}
# list haproxy processes that were started
ps -ef | grep "haproxy" | grep -v grep >&3
}
function teardown() {
local zot_root_dir=${ZOT_ROOT_DIR}
haproxy_stop_all
zot_stop_all
rm -rf ${zot_root_dir}
teardown_cloud_services
}
@test "Check for successful zb run on haproxy frontend" {
# zb_run <test_name> <zot_address> <concurrency> <num_requests> <credentials (optional)>
zb_run "cloud-scale-out-basic-auth-tls-bats" "https://127.0.0.1:8000" 3 5 "${ZOT_AUTH_USER}:${ZOT_AUTH_PASS}"
}
@@ -0,0 +1,74 @@
# note: intended to be run as "make run-cloud-scale-out-high-scale-tests"
# makefile target installs & checks all necessary tooling
# extra tools that are not covered in Makefile target needs to be added in verify_prerequisites()
NUM_ZOT_INSTANCES=6
load helpers_zot
load helpers_cloud
load helpers_haproxy
function launch_zot_server() {
local zot_server_address=${1}
local zot_server_port=${2}
local zot_root_dir=${ZOT_ROOT_DIR}
mkdir -p ${zot_root_dir}
mkdir -p /tmp/zot-logs
local zot_config_file="${BATS_FILE_TMPDIR}/zot_config_${zot_server_address}_${zot_server_port}.json"
local zot_log_file="/tmp/zot-logs/zot-${zot_server_address}-${zot_server_port}.log"
create_zot_cloud_base_config_file ${zot_server_address} ${zot_server_port} ${zot_root_dir} ${zot_config_file} ${zot_log_file}
update_zot_cluster_member_list_in_config_file ${zot_config_file} ${ZOT_CLUSTER_MEMBERS_PATCH_FILE}
update_zot_cfg_set_htpasswd_auth "${zot_config_file}" ${ZOT_HTPASSWD_PATH}
update_zot_cfg_set_tls "${zot_config_file}" ${ZOT_TLS_CERT_PATH} ${ZOT_TLS_KEY_PATH} ${ZOT_TLS_CA_CERT_PATH}
echo "launching zot server ${zot_server_address}:${zot_server_port}" >&3
echo "config file: ${zot_config_file}" >&3
echo "log file: ${zot_log_file}" >&3
zot_serve ${zot_config_file}
wait_zot_reachable ${zot_server_port} "https"
}
function setup() {
# verify prerequisites are available
if ! $(verify_prerequisites); then
exit 1
fi
# setup S3 bucket and DynamoDB tables
setup_cloud_services
# setup htpasswd for local auth
setup_local_htpasswd
generate_zot_cluster_member_list ${NUM_ZOT_INSTANCES} ${ZOT_CLUSTER_MEMBERS_PATCH_FILE}
for ((i=0;i<${NUM_ZOT_INSTANCES};i++)); do
launch_zot_server 127.0.0.1 $(( 10000 + $i ))
done
# list all zot processes that were started
ps -ef | grep ".*zot.*serve.*" | grep -v grep >&3
generate_haproxy_config ${HAPROXY_CFG_FILE} "https"
haproxy_start ${HAPROXY_CFG_FILE}
# list haproxy processes that were started
ps -ef | grep "haproxy" | grep -v grep >&3
}
function teardown() {
local zot_root_dir=${ZOT_ROOT_DIR}
haproxy_stop_all
zot_stop_all
rm -rf ${zot_root_dir}
teardown_cloud_services
}
@test "Check for successful zb run on haproxy frontend" {
# zb_run <test_name> <zot_address> <concurrency> <num_requests> <credentials (optional)>
zb_run "cloud-scale-out-high-scale-bats" "https://127.0.0.1:8000" 10 100 "${ZOT_AUTH_USER}:${ZOT_AUTH_PASS}"
}
@@ -0,0 +1,69 @@
# note: intended to be run as "make run-cloud-scale-out-tests"
# makefile target installs & checks all necessary tooling
# extra tools that are not covered in Makefile target needs to be added in verify_prerequisites()
NUM_ZOT_INSTANCES=6
ZOT_LOG_DIR=/tmp/zot-ft-logs/no-auth
load helpers_zot
load helpers_cloud
load helpers_haproxy
function launch_zot_server() {
local zot_server_address=${1}
local zot_server_port=${2}
local zot_root_dir=${ZOT_ROOT_DIR}
mkdir -p ${zot_root_dir}
mkdir -p ${ZOT_LOG_DIR}
local zot_config_file="${BATS_FILE_TMPDIR}/zot_config_${zot_server_address}_${zot_server_port}.json"
local zot_log_file="${ZOT_LOG_DIR}/zot-${zot_server_address}-${zot_server_port}.log"
create_zot_cloud_base_config_file ${zot_server_address} ${zot_server_port} ${zot_root_dir} ${zot_config_file} ${zot_log_file}
update_zot_cluster_member_list_in_config_file ${zot_config_file} ${ZOT_CLUSTER_MEMBERS_PATCH_FILE}
echo "launching zot server ${zot_server_address}:${zot_server_port}" >&3
echo "config file: ${zot_config_file}" >&3
echo "log file: ${zot_log_file}" >&3
zot_serve ${zot_config_file}
wait_zot_reachable ${zot_server_port}
}
function setup() {
# verify prerequisites are available
if ! $(verify_prerequisites); then
exit 1
fi
# setup S3 bucket and DynamoDB tables
setup_cloud_services
generate_zot_cluster_member_list ${NUM_ZOT_INSTANCES} ${ZOT_CLUSTER_MEMBERS_PATCH_FILE}
for ((i=0;i<${NUM_ZOT_INSTANCES};i++)); do
launch_zot_server 127.0.0.1 $(( 10000 + $i ))
done
# list all zot processes that were started
ps -ef | grep ".*zot.*serve.*" | grep -v grep >&3
generate_haproxy_config ${HAPROXY_CFG_FILE} "http"
haproxy_start ${HAPROXY_CFG_FILE}
# list HAproxy processes that were started
ps -ef | grep "haproxy" | grep -v grep >&3
}
function teardown() {
local zot_root_dir=${ZOT_ROOT_DIR}
haproxy_stop_all
zot_stop_all
rm -rf ${zot_root_dir}
teardown_cloud_services
}
@test "Check for successful zb run on haproxy frontend" {
# zb_run <test_name> <zot_address> <concurrency> <num_requests> <credentials (optional)>
zb_run "cloud-scale-out-no-auth-bats" "http://127.0.0.1:8000" 3 5
}
+35
View File
@@ -0,0 +1,35 @@
function setup_cloud_services() {
setup_s3 "us-east-2" "zot-storage-test"
setup_dynamodb "us-east-2"
}
function teardown_cloud_services() {
delete_s3_bucket "zot-storage-test"
teardown_dynamodb "us-east-2"
}
function setup_s3() {
local region=${1}
local bucket=${2}
awslocal s3 --region ${region} mb s3://${bucket}
}
function delete_s3_bucket() {
local bucket=${1}
awslocal s3 rb s3://${bucket} --force
}
function setup_dynamodb() {
local region=${1}
awslocal dynamodb --region ${region} \
create-table \
--table-name "BlobTable" \
--attribute-definitions AttributeName=Digest,AttributeType=S \
--key-schema AttributeName=Digest,KeyType=HASH \
--provisioned-throughput ReadCapacityUnits=10,WriteCapacityUnits=5
}
function teardown_dynamodb() {
local region=${1}
awslocal dynamodb --region ${region} delete-table --table-name "BlobTable"
}
+71
View File
@@ -0,0 +1,71 @@
HAPROXY_CFG_FILE="${BATS_FILE_TMPDIR}/haproxy/haproxy-test.cfg"
function generate_haproxy_server_list() {
local num_instances=${1}
for ((i=0;i<${num_instances};i++)) do
local port=$(( 10000 + $i ))
echo " server zot${i} 127.0.0.1:${port}"
done
}
# stops all haproxy instances started by the test
function haproxy_stop_all() {
pkill haproxy
}
# starts one haproxy instance with the given config file
# expects the haproxy config to specify daemon mode
function haproxy_start() {
local haproxy_cfg_file=${1}
# Check the config file
haproxy -f ${haproxy_cfg_file} -c >&3
# Start haproxy
haproxy -f ${haproxy_cfg_file}
}
# generates HAproxy config for use in the test
function generate_haproxy_config() {
local haproxy_cfg_file="${1}"
local haproxy_root_dir="$(dirname ${haproxy_cfg_file})"
# can be either http or https
local protocol="${2}"
mkdir -p ${haproxy_root_dir}
local haproxy_mode='http'
if [ "$protocol" == 'https' ]; then
haproxy_mode='tcp'
fi
cat > ${haproxy_cfg_file}<<EOF
global
log ${haproxy_root_dir}/log local0
log ${haproxy_root_dir}/log local1 notice
maxconn 20000
stats timeout 30s
daemon
defaults
log global
mode ${haproxy_mode}
option ${haproxy_mode}log
option dontlognull
timeout connect 5000
timeout client 50000
timeout server 50000
frontend zot
bind *:8000
default_backend zot-cluster
backend zot-cluster
balance roundrobin
EOF
# Populate server list
generate_haproxy_server_list ${NUM_ZOT_INSTANCES} >> ${haproxy_cfg_file}
cat ${haproxy_cfg_file} >&3
}
+273
View File
@@ -0,0 +1,273 @@
ROOT_DIR=$(git rev-parse --show-toplevel)
OS=$(go env GOOS)
ARCH=$(go env GOARCH)
ZOT_PATH=${ROOT_DIR}/bin/zot-${OS}-${ARCH}
ZLI_PATH=${ROOT_DIR}/bin/zli-${OS}-${ARCH}
ZOT_MINIMAL_PATH=${ROOT_DIR}/bin/zot-${OS}-${ARCH}-minimal
# basic auth
ZOT_AUTH_USER=poweruser
ZOT_AUTH_PASS=sup*rSecr9T
ZOT_CREDS_PATH="${BATS_FILE_TMPDIR}/creds"
ZOT_HTPASSWD_PATH="${ZOT_CREDS_PATH}/htpasswd"
# zb
ZB_PATH=${ROOT_DIR}/bin/zb-${OS}-${ARCH}
ZB_RESULTS_PATH=${ROOT_DIR}/zb-results
ZB_CI_CD_OUTPUT_FILE=${ROOT_DIR}/ci-cd.json
# zot scale out cluster
ZOT_CLUSTER_MEMBERS_PATCH_FILE="${BATS_FILE_TMPDIR}/members-patch.json"
ZOT_ROOT_DIR="${BATS_FILE_TMPDIR}/zot"
ZOT_TLS_CERT_PATH="${ROOT_DIR}/test/data/server.cert"
ZOT_TLS_KEY_PATH="${ROOT_DIR}/test/data/server.key"
ZOT_TLS_CA_CERT_PATH="${ROOT_DIR}/test/data/ca.crt"
function verify_prerequisites {
if [ ! -f ${ZOT_PATH} ]; then
echo "you need to build ${ZOT_PATH} before running the tests" >&3
return 1
fi
if [ ! -f ${ZB_PATH} ]; then
echo "you need to build ${ZB_PATH} before running the tests" >&3
return 1
fi
if [ ! $(command -v skopeo) ]; then
echo "you need to install skopeo as a prerequisite to running the tests" >&3
return 1
fi
if [ ! $(command -v awslocal) ] &>/dev/null; then
echo "you need to install aws cli as a prerequisite to running the tests" >&3
return 1
fi
if [ ! $(command -v haproxy) ] &>/dev/null; then
echo "you need to install haproxy as a prerequisite to running the tests" >&3
return 1
fi
return 0
}
function get_free_port(){
while true
do
random_port=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 ))
status="$(nc -z 127.0.0.1 $random_port < /dev/null &>/dev/null; echo $?)"
if [ "${status}" != "0" ]; then
free_port=${random_port};
break;
fi
done
echo ${free_port}
}
function zot_serve() {
local config_file=${1}
${ZOT_PATH} serve ${config_file} &
}
# stops all zot instances started by the test
function zot_stop_all() {
pkill zot
}
# waits for the zot server to be reachable
# leaving argument 2 blank or specifying "http" causes the function to use HTTP
# specifying "https" for argument 2 causes the function to use TLS
function wait_zot_reachable() {
local zot_port=${1}
local protocol=${2}
if [ -z "${protocol}" ]; then
protocol="http"
fi
local zot_url="${protocol}://127.0.0.1:${zot_port}/v2/_catalog"
local curl_opts=(
--connect-timeout 3
--max-time 5
--retry 20
--retry-delay 1
--retry-max-time 180
--retry-connrefused
)
# since this is only a reachability check, we can disable cert verification
if [ "${protocol}" == "https" ]; then
curl_opts=(--insecure "${curl_opts[@]}")
fi
curl "${curl_opts[@]}" ${zot_url}
}
function zb_run() {
local test_name=${1}
local zot_address=${2}
local concurrent_reqs=${3}
local num_requests=${4}
local credentials=${5}
if [ ! -d "${ZB_RESULTS_PATH}" ]; then
mkdir -p "${ZB_RESULTS_PATH}"
fi
local zb_args=(
-c ${concurrent_reqs}
-n ${num_requests}
--src-cidr 127.0.10.0/24
-o ci-cd
--skip-cleanup
)
if [ ! -z "${credentials}" ]; then
zb_args=(-A ${credentials} "${zb_args[@]}")
fi
start=$(date +%s)
${ZB_PATH} "${zb_args[@]}" ${zot_address}
stop=$(date +%s)
runtime=$((stop-start))
echo "Duration: ${runtime} seconds" >&3
if [ -f "${ZB_CI_CD_OUTPUT_FILE}" ]; then
mv "${ZB_CI_CD_OUTPUT_FILE}" "${ZB_RESULTS_PATH}/${test_name}-results.json"
fi
}
function setup_local_htpasswd() {
create_htpasswd_file "${ZOT_CREDS_PATH}" "${ZOT_HTPASSWD_PATH}" ${ZOT_AUTH_USER} ${ZOT_AUTH_PASS}
}
function create_htpasswd_file() {
local creds_dir_path="${1}"
local htpasswd_file_path="${2}"
local user=${3}
local password=${4}
mkdir -p "${creds_dir_path}"
htpasswd -b -c -B "${htpasswd_file_path}" ${user} ${password}
}
# given the number of zot instances, computes a list of cluster members
# and saves them as a JSON to a file that can be used with jq later.
function generate_zot_cluster_member_list() {
local num_zot_instances=${1}
local patch_file_path=${2}
local temp_file="${BATS_FILE_TMPDIR}/jq-member-dump.json"
echo "{\"cluster\":{\"members\":[]}}" > ${patch_file_path}
for ((i=0;i<${num_zot_instances};i++)); do
local member="127.0.0.1:$(( 10000 + $i ))"
jq ".cluster.members += [\"${member}\"]" ${patch_file_path} > ${temp_file} && \
mv ${temp_file} ${patch_file_path}
done
echo "cluster members patch file" >&3
cat ${patch_file_path} >&3
}
# patches an existing zot config file to add all the cluster members.
function update_zot_cluster_member_list_in_config_file() {
local zot_config_file=${1}
local zot_members_patch_file=${2}
local temp_file="${BATS_FILE_TMPDIR}/jq-mem-update-dump.json"
jq -s '.[0] * .[1]' ${zot_config_file} ${zot_members_patch_file} > ${temp_file} && \
mv ${temp_file} ${zot_config_file}
}
# generates and saves a base cloud config with shared storage
# given some basic parameters about the zot instance.
function create_zot_cloud_base_config_file() {
local zot_server_address=${1}
local zot_server_port=${2}
local zot_root_dir="${3}"
local zot_config_file="${4}"
local zot_log_file="${5}"
cat > ${zot_config_file}<<EOF
{
"distSpecVersion": "1.1.0",
"storage": {
"rootDirectory": "${zot_root_dir}",
"dedupe": false,
"remoteCache": true,
"storageDriver": {
"name": "s3",
"rootdirectory": "/zot",
"region": "us-east-2",
"regionendpoint": "localhost:4566",
"bucket": "zot-storage-test",
"secure": false,
"skipverify": false
},
"cacheDriver": {
"name": "dynamodb",
"endpoint": "http://localhost:4566",
"region": "us-east-2",
"cacheTablename": "BlobTable",
"repoMetaTablename": "RepoMetadataTable",
"imageMetaTablename": "ImageMetaTable",
"repoBlobsInfoTablename": "RepoBlobsInfoTable",
"userDataTablename": "UserDataTable",
"apiKeyTablename":"ApiKeyTable",
"versionTablename": "Version"
}
},
"http": {
"address": "${zot_server_address}",
"port": "${zot_server_port}",
"realm": "zot"
},
"cluster": {
"members": [],
"hashKey": "loremipsumdolors"
},
"log": {
"level": "debug",
"output": "${zot_log_file}"
}
}
EOF
}
# updates an existing zot config file that already has an HTTP config
# to include htpasswd auth settings.
# intended for use with create_zot_cloud_base_config_file() above.
function update_zot_cfg_set_htpasswd_auth() {
local zot_config_file="${1}"
local zot_htpasswd_path="${2}"
local temp_file="${BATS_FILE_TMPDIR}/jq-auth-dump.json"
# set zot htpasswd auth
jq --arg htpasswd_path "${zot_htpasswd_path}" \
'(.http) += {auth: {htpasswd: {path: $htpasswd_path}}}' \
${zot_config_file} > ${temp_file} && \
mv ${temp_file} ${zot_config_file}
}
# updates an existing zot config file that already has an HTTP config
# to include TLS configuration.
# intended for use with create_zot_cloud_base_config_file() above.
function update_zot_cfg_set_tls() {
local zot_config_file="${1}"
local zot_cert_path="${2}"
local zot_key_path="${3}"
local zot_cacert_path="${4}"
local temp_file="${BATS_FILE_TMPDIR}/jq-tls-dump.json"
# set zot TLS config
jq --arg zot_cert_path "${zot_cert_path}" --arg zot_key_path "${zot_key_path}" '(.http) += {tls: {cert: $zot_cert_path, key: $zot_key_path}}' \
${zot_config_file} > ${temp_file} && \
mv ${temp_file} ${zot_config_file}
jq --arg zot_cacert_path "${zot_cacert_path}" '(.cluster) += {tls: {cacert: $zot_cacert_path}}' \
${zot_config_file} > ${temp_file} && \
mv ${temp_file} ${zot_config_file}
}