github.com/k8snetworkplumbingwg/sriov-network-operator@v1.2.1-0.20240408194816-2d2e5a45d453/hack/run-e2e-conformance-virtual-cluster.sh (about) 1 #!/usr/bin/env bash 2 set -xeo pipefail 3 4 cluster_name=${CLUSTER_NAME:-virtual} 5 domain_name=$cluster_name.lab 6 7 api_ip=${API_IP:-192.168.124.250} 8 virtual_router_id=${VIRTUAL_ROUTER_ID:-250} 9 HOME="/root" 10 11 here="$(dirname "$(readlink --canonicalize "${BASH_SOURCE[0]}")")" 12 root="$(readlink --canonicalize "$here/..")" 13 14 NUM_OF_WORKERS=${NUM_OF_WORKERS:-2} 15 total_number_of_nodes=$((1 + NUM_OF_WORKERS)) 16 17 if [ "$NUM_OF_WORKERS" -lt 2 ]; then 18 echo "Min number of workers is 2" 19 exit 1 20 fi 21 22 export MULTUS_NAMESPACE="kube-system" 23 24 source $here/run-e2e-conformance-common 25 26 check_requirements() { 27 for cmd in kcli virsh virt-edit podman make go; do 28 if ! command -v "$cmd" &> /dev/null; then 29 echo "$cmd is not available" 30 exit 1 31 fi 32 done 33 return 0 34 } 35 36 echo "## checking requirements" 37 check_requirements 38 echo "## delete existing cluster name $cluster_name" 39 kcli delete cluster $cluster_name -y 40 kcli delete network $cluster_name -y 41 42 function cleanup { 43 kcli delete cluster $cluster_name -y 44 kcli delete network $cluster_name -y 45 } 46 47 if [ -z $SKIP_DELETE ]; then 48 trap cleanup EXIT 49 fi 50 51 kcli create network -c 192.168.124.0/24 k8s 52 kcli create network -c 192.168.${virtual_router_id}.0/24 --nodhcp -i $cluster_name 53 54 cat <<EOF > ./${cluster_name}-plan.yaml 55 ctlplane_memory: 4096 56 worker_memory: 4096 57 pool: default 58 disk_size: 50 59 network: k8s 60 api_ip: $api_ip 61 virtual_router_id: $virtual_router_id 62 domain: $domain_name 63 ctlplanes: 1 64 workers: $NUM_OF_WORKERS 65 ingress: false 66 machine: q35 67 engine: crio 68 sdn: flannel 69 autolabeller: false 70 vmrules: 71 - $cluster_name-worker-.*: 72 nets: 73 - name: k8s 74 type: igb 75 vfio: true 76 noconf: true 77 numa: 0 78 - name: $cluster_name 79 type: igb 80 vfio: true 81 noconf: true 82 numa: 1 83 numcpus: 6 84 numa: 85 - id: 0 86 vcpus: 0,2,4 87 memory: 2048 88 - id: 1 89 vcpus: 1,3,5 90 memory: 2048 91 92 EOF 93 94 kcli create cluster generic --paramfile ./${cluster_name}-plan.yaml $cluster_name 95 96 export KUBECONFIG=$HOME/.kcli/clusters/$cluster_name/auth/kubeconfig 97 export PATH=$PWD:$PATH 98 99 ATTEMPTS=0 100 MAX_ATTEMPTS=72 101 ready=false 102 sleep_time=10 103 104 until $ready || [ $ATTEMPTS -eq $MAX_ATTEMPTS ] 105 do 106 echo "waiting for cluster to be ready" 107 if [ `kubectl get node | grep Ready | wc -l` == $total_number_of_nodes ]; then 108 echo "cluster is ready" 109 ready=true 110 else 111 echo "cluster is not ready yet" 112 sleep $sleep_time 113 fi 114 ATTEMPTS=$((ATTEMPTS+1)) 115 done 116 117 if ! $ready; then 118 echo "Timed out waiting for cluster to be ready" 119 kubectl get nodes 120 exit 1 121 fi 122 123 function update_worker_labels() { 124 echo "## label cluster workers as sriov capable" 125 for ((num=0; num<NUM_OF_WORKERS; num++)) 126 do 127 kubectl label node $cluster_name-worker-$num.$domain_name feature.node.kubernetes.io/network-sriov.capable=true --overwrite 128 done 129 130 echo "## label cluster worker as worker" 131 for ((num=0; num<NUM_OF_WORKERS; num++)) 132 do 133 kubectl label node $cluster_name-worker-$num.$domain_name node-role.kubernetes.io/worker= --overwrite 134 done 135 } 136 137 update_worker_labels 138 139 controller_ip=`kubectl get node -o wide | grep ctlp | awk '{print $6}'` 140 insecure_registry="[[registry]] 141 location = \"$controller_ip:5000\" 142 insecure = true 143 144 [aliases] 145 \"golang\" = \"docker.io/library/golang\" 146 " 147 148 cat << EOF > /etc/containers/registries.conf.d/003-${cluster_name}.conf 149 $insecure_registry 150 EOF 151 152 function update_host() { 153 node_name=$1 154 kcli ssh $node_name << EOF 155 sudo su 156 echo '$insecure_registry' > /etc/containers/registries.conf.d/003-internal.conf 157 systemctl restart crio 158 159 echo '[connection] 160 id=multi 161 type=ethernet 162 [ethernet] 163 [match] 164 driver=igbvf; 165 [ipv4] 166 method=disabled 167 [ipv6] 168 addr-gen-mode=default 169 method=disabled 170 [proxy]' > /etc/NetworkManager/system-connections/multi.nmconnection 171 172 chmod 600 /etc/NetworkManager/system-connections/multi.nmconnection 173 174 echo '[Unit] 175 Description=disable checksum offload to avoid vf bug 176 After=network.target 177 178 [Service] 179 Type=oneshot 180 ExecStart=/usr/bin/bash -c "ethtool --offload eth1 rx off tx off && ethtool -K eth1 gso off" 181 StandardOutput=journal+console 182 StandardError=journal+console 183 184 [Install] 185 WantedBy=default.target' > /etc/systemd/system/disable-offload.service 186 187 systemctl daemon-reload 188 systemctl enable --now disable-offload 189 190 systemctl restart NetworkManager 191 192 EOF 193 194 } 195 196 update_host $cluster_name-ctlplane-0 197 for ((num=0; num<NUM_OF_WORKERS; num++)) 198 do 199 update_host $cluster_name-worker-$num 200 done 201 202 # remove the patch after multus bug is fixed 203 # https://github.com/k8snetworkplumbingwg/multus-cni/issues/1221 204 kubectl patch -n ${MULTUS_NAMESPACE} ds/kube-multus-ds --type=json -p='[{"op": "replace", "path": "/spec/template/spec/initContainers/0/command", "value":["cp", "-f","/usr/src/multus-cni/bin/multus-shim", "/host/opt/cni/bin/multus-shim"]}]' 205 206 kubectl create namespace container-registry 207 208 echo "## deploy internal registry" 209 cat <<EOF | kubectl apply -f - 210 apiVersion: v1 211 kind: PersistentVolume 212 metadata: 213 name: registry-pv 214 spec: 215 capacity: 216 storage: 60Gi 217 volumeMode: Filesystem 218 accessModes: 219 - ReadWriteOnce 220 persistentVolumeReclaimPolicy: Delete 221 storageClassName: registry-local-storage 222 local: 223 path: /mnt/ 224 nodeAffinity: 225 required: 226 nodeSelectorTerms: 227 - matchExpressions: 228 - key: kubernetes.io/hostname 229 operator: In 230 values: 231 - ${cluster_name}-ctlplane-0.${domain_name} 232 EOF 233 234 cat <<EOF | kubectl apply -f - 235 apiVersion: v1 236 kind: PersistentVolumeClaim 237 metadata: 238 name: registry-pv-claim 239 namespace: container-registry 240 spec: 241 accessModes: 242 - ReadWriteOnce 243 volumeMode: Filesystem 244 resources: 245 requests: 246 storage: 60Gi 247 storageClassName: registry-local-storage 248 EOF 249 250 cat <<EOF | kubectl apply -f - 251 apiVersion: apps/v1 252 kind: Deployment 253 metadata: 254 name: registry 255 namespace: container-registry 256 spec: 257 replicas: 1 258 selector: 259 matchLabels: 260 app: registry 261 template: 262 metadata: 263 labels: 264 app: registry 265 spec: 266 hostNetwork: true 267 tolerations: 268 - effect: NoSchedule 269 key: node-role.kubernetes.io/control-plane 270 containers: 271 - image: quay.io/libpod/registry:2.8.2 272 imagePullPolicy: Always 273 name: registry 274 volumeMounts: 275 - name: data 276 mountPath: /var/lib/registry 277 volumes: 278 - name: data 279 persistentVolumeClaim: 280 claimName: registry-pv-claim 281 terminationGracePeriodSeconds: 10 282 EOF 283 284 285 export SRIOV_NETWORK_OPERATOR_IMAGE="$controller_ip:5000/sriov-network-operator:latest" 286 export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE="$controller_ip:5000/sriov-network-config-daemon:latest" 287 export SRIOV_NETWORK_WEBHOOK_IMAGE="$controller_ip:5000/sriov-network-operator-webhook:latest" 288 289 echo "## build operator image" 290 podman build -t "${SRIOV_NETWORK_OPERATOR_IMAGE}" -f "${root}/Dockerfile" "${root}" 291 292 echo "## build daemon image" 293 podman build -t "${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE}" -f "${root}/Dockerfile.sriov-network-config-daemon" "${root}" 294 295 echo "## build webhook image" 296 podman build -t "${SRIOV_NETWORK_WEBHOOK_IMAGE}" -f "${root}/Dockerfile.webhook" "${root}" 297 298 podman push --tls-verify=false "${SRIOV_NETWORK_OPERATOR_IMAGE}" 299 podman rmi -fi ${SRIOV_NETWORK_OPERATOR_IMAGE} 300 podman push --tls-verify=false "${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE}" 301 podman rmi -fi ${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE} 302 podman push --tls-verify=false "${SRIOV_NETWORK_WEBHOOK_IMAGE}" 303 podman rmi -fi ${SRIOV_NETWORK_WEBHOOK_IMAGE} 304 305 306 if [[ -v LOCAL_SRIOV_CNI_IMAGE ]]; then 307 export SRIOV_CNI_IMAGE="$controller_ip:5000/sriov-cni:latest" 308 podman_tag_and_push ${LOCAL_SRIOV_CNI_IMAGE} ${SRIOV_CNI_IMAGE} 309 fi 310 311 if [[ -v LOCAL_SRIOV_DEVICE_PLUGIN_IMAGE ]]; then 312 export SRIOV_DEVICE_PLUGIN_IMAGE="$controller_ip:5000/sriov-network-device-plugin:latest" 313 podman_tag_and_push ${LOCAL_SRIOV_DEVICE_PLUGIN_IMAGE} ${SRIOV_DEVICE_PLUGIN_IMAGE} 314 fi 315 316 if [[ -v LOCAL_NETWORK_RESOURCES_INJECTOR_IMAGE ]]; then 317 export NETWORK_RESOURCES_INJECTOR_IMAGE="$controller_ip:5000/network-resources-injector:latest" 318 podman_tag_and_push ${LOCAL_NETWORK_RESOURCES_INJECTOR_IMAGE} ${NETWORK_RESOURCES_INJECTOR_IMAGE} 319 fi 320 321 322 # remove the crio bridge and let flannel to recreate 323 kcli ssh $cluster_name-ctlplane-0 << EOF 324 sudo su 325 if [ $(ip a | grep 10.85.0 | wc -l) -eq 0 ]; then ip link del cni0; fi 326 EOF 327 328 329 kubectl -n ${MULTUS_NAMESPACE} get po | grep multus | awk '{print "kubectl -n kube-system delete po",$1}' | sh 330 kubectl -n kube-system get po | grep coredns | awk '{print "kubectl -n kube-system delete po",$1}' | sh 331 332 TIMEOUT=400 333 echo "## wait for coredns" 334 kubectl -n kube-system wait --for=condition=available deploy/coredns --timeout=${TIMEOUT}s 335 echo "## wait for multus" 336 kubectl -n ${MULTUS_NAMESPACE} wait --for=condition=ready -l name=multus pod --timeout=${TIMEOUT}s 337 338 echo "## deploy cert manager" 339 kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.12.0/cert-manager.yaml 340 341 echo "## wait for cert manager to be ready" 342 343 ATTEMPTS=0 344 MAX_ATTEMPTS=72 345 ready=false 346 sleep_time=5 347 348 until $ready || [ $ATTEMPTS -eq $MAX_ATTEMPTS ] 349 do 350 echo "waiting for cert manager webhook to be ready" 351 if [ `kubectl -n cert-manager get po | grep webhook | grep "1/1" | wc -l` == 1 ]; then 352 echo "cluster is ready" 353 ready=true 354 else 355 echo "cert manager webhook is not ready yet" 356 sleep $sleep_time 357 fi 358 ATTEMPTS=$((ATTEMPTS+1)) 359 done 360 361 362 export ADMISSION_CONTROLLERS_ENABLED=true 363 export ADMISSION_CONTROLLERS_CERTIFICATES_CERT_MANAGER_ENABLED=true 364 export SKIP_VAR_SET="" 365 export NAMESPACE="sriov-network-operator" 366 export OPERATOR_NAMESPACE="sriov-network-operator" 367 export CNI_BIN_PATH=/opt/cni/bin 368 export OPERATOR_EXEC=kubectl 369 export CLUSTER_TYPE=kubernetes 370 export DEV_MODE=TRUE 371 export CLUSTER_HAS_EMULATED_PF=TRUE 372 373 echo "## deploy namespace" 374 envsubst< $root/deploy/namespace.yaml | ${OPERATOR_EXEC} apply -f - 375 376 echo "## create certificates for webhook" 377 cat <<EOF | kubectl apply -f - 378 --- 379 apiVersion: cert-manager.io/v1 380 kind: Issuer 381 metadata: 382 name: selfsigned-issuer 383 namespace: ${NAMESPACE} 384 spec: 385 selfSigned: {} 386 --- 387 apiVersion: cert-manager.io/v1 388 kind: Certificate 389 metadata: 390 name: network-resources-injector-cert 391 namespace: ${NAMESPACE} 392 spec: 393 commonName: network-resources-injector-service.svc 394 dnsNames: 395 - network-resources-injector-service.${NAMESPACE}.svc.cluster.local 396 - network-resources-injector-service.${NAMESPACE}.svc 397 issuerRef: 398 kind: Issuer 399 name: selfsigned-issuer 400 secretName: network-resources-injector-cert 401 --- 402 apiVersion: cert-manager.io/v1 403 kind: Certificate 404 metadata: 405 name: operator-webhook-cert 406 namespace: ${NAMESPACE} 407 spec: 408 commonName: operator-webhook-service.svc 409 dnsNames: 410 - operator-webhook-service.${NAMESPACE}.svc.cluster.local 411 - operator-webhook-service.${NAMESPACE}.svc 412 issuerRef: 413 kind: Issuer 414 name: selfsigned-issuer 415 secretName: operator-webhook-cert 416 EOF 417 418 419 echo "## apply CRDs" 420 kubectl apply -k $root/config/crd 421 422 echo "## deploying SRIOV Network Operator" 423 hack/deploy-setup.sh $NAMESPACE 424 425 echo "## wait for sriov operator to be ready" 426 hack/deploy-wait.sh 427 428 if [ -z $SKIP_TEST ]; then 429 echo "## run sriov e2e conformance tests" 430 431 if [[ -v TEST_REPORT_PATH ]]; then 432 export JUNIT_OUTPUT="${root}/${TEST_REPORT_PATH}/conformance-test-report" 433 fi 434 435 # Disable exit on error temporarily to gather cluster information 436 set +e 437 SUITE=./test/conformance hack/run-e2e-conformance.sh 438 TEST_EXITE_CODE=$? 439 set -e 440 441 if [[ -v TEST_REPORT_PATH ]]; then 442 kubectl cluster-info dump --namespaces ${NAMESPACE},${MULTUS_NAMESPACE} --output-directory "${root}/${TEST_REPORT_PATH}/cluster-info" 443 fi 444 445 if [[ $TEST_EXITE_CODE -ne 0 ]]; then 446 exit $TEST_EXITE_CODE 447 fi 448 fi