github.com/k8snetworkplumbingwg/sriov-network-operator@v1.2.1-0.20240408194816-2d2e5a45d453/hack/run-e2e-conformance-virtual-cluster.sh (about)

     1  #!/usr/bin/env bash
     2  set -xeo pipefail
     3  
     4  cluster_name=${CLUSTER_NAME:-virtual}
     5  domain_name=$cluster_name.lab
     6  
     7  api_ip=${API_IP:-192.168.124.250}
     8  virtual_router_id=${VIRTUAL_ROUTER_ID:-250}
     9  HOME="/root"
    10  
    11  here="$(dirname "$(readlink --canonicalize "${BASH_SOURCE[0]}")")"
    12  root="$(readlink --canonicalize "$here/..")"
    13  
    14  NUM_OF_WORKERS=${NUM_OF_WORKERS:-2}
    15  total_number_of_nodes=$((1 + NUM_OF_WORKERS))
    16  
    17  if [ "$NUM_OF_WORKERS" -lt 2 ]; then
    18      echo "Min number of workers is 2"
    19      exit 1
    20  fi
    21  
    22  export MULTUS_NAMESPACE="kube-system"
    23  
    24  source $here/run-e2e-conformance-common
    25  
    26  check_requirements() {
    27    for cmd in kcli virsh virt-edit podman make go; do
    28      if ! command -v "$cmd" &> /dev/null; then
    29        echo "$cmd is not available"
    30        exit 1
    31      fi
    32    done
    33    return 0
    34  }
    35  
    36  echo "## checking requirements"
    37  check_requirements
    38  echo "## delete existing cluster name $cluster_name"
    39  kcli delete cluster $cluster_name -y
    40  kcli delete network $cluster_name -y
    41  
    42  function cleanup {
    43    kcli delete cluster $cluster_name -y
    44    kcli delete network $cluster_name -y
    45  }
    46  
    47  if [ -z $SKIP_DELETE ]; then
    48    trap cleanup EXIT
    49  fi
    50  
    51  kcli create network -c 192.168.124.0/24 k8s
    52  kcli create network -c 192.168.${virtual_router_id}.0/24 --nodhcp -i $cluster_name
    53  
    54  cat <<EOF > ./${cluster_name}-plan.yaml
    55  ctlplane_memory: 4096
    56  worker_memory: 4096
    57  pool: default
    58  disk_size: 50
    59  network: k8s
    60  api_ip: $api_ip
    61  virtual_router_id: $virtual_router_id
    62  domain: $domain_name
    63  ctlplanes: 1
    64  workers: $NUM_OF_WORKERS
    65  ingress: false
    66  machine: q35
    67  engine: crio
    68  sdn: flannel
    69  autolabeller: false
    70  vmrules:
    71    - $cluster_name-worker-.*:
    72        nets:
    73          - name: k8s
    74            type: igb
    75            vfio: true
    76            noconf: true
    77            numa: 0
    78          - name: $cluster_name
    79            type: igb
    80            vfio: true
    81            noconf: true
    82            numa: 1
    83        numcpus: 6
    84        numa:
    85          - id: 0
    86            vcpus: 0,2,4
    87            memory: 2048
    88          - id: 1
    89            vcpus: 1,3,5
    90            memory: 2048
    91  
    92  EOF
    93  
    94  kcli create cluster generic --paramfile ./${cluster_name}-plan.yaml $cluster_name
    95  
    96  export KUBECONFIG=$HOME/.kcli/clusters/$cluster_name/auth/kubeconfig
    97  export PATH=$PWD:$PATH
    98  
    99  ATTEMPTS=0
   100  MAX_ATTEMPTS=72
   101  ready=false
   102  sleep_time=10
   103  
   104  until $ready || [ $ATTEMPTS -eq $MAX_ATTEMPTS ]
   105  do
   106      echo "waiting for cluster to be ready"
   107      if [ `kubectl get node | grep Ready | wc -l` == $total_number_of_nodes ]; then
   108          echo "cluster is ready"
   109          ready=true
   110      else
   111          echo "cluster is not ready yet"
   112          sleep $sleep_time
   113      fi
   114      ATTEMPTS=$((ATTEMPTS+1))
   115  done
   116  
   117  if ! $ready; then
   118      echo "Timed out waiting for cluster to be ready"
   119      kubectl get nodes
   120      exit 1
   121  fi
   122  
   123  function update_worker_labels() {
   124  echo "## label cluster workers as sriov capable"
   125  for ((num=0; num<NUM_OF_WORKERS; num++))
   126  do
   127      kubectl label node $cluster_name-worker-$num.$domain_name feature.node.kubernetes.io/network-sriov.capable=true --overwrite
   128  done
   129  
   130  echo "## label cluster worker as worker"
   131  for ((num=0; num<NUM_OF_WORKERS; num++))
   132  do
   133    kubectl label node $cluster_name-worker-$num.$domain_name node-role.kubernetes.io/worker= --overwrite
   134  done
   135  }
   136  
   137  update_worker_labels
   138  
   139  controller_ip=`kubectl get node -o wide | grep ctlp | awk '{print $6}'`
   140  insecure_registry="[[registry]]
   141  location = \"$controller_ip:5000\"
   142  insecure = true
   143  
   144  [aliases]
   145  \"golang\" = \"docker.io/library/golang\"
   146  "
   147  
   148  cat << EOF > /etc/containers/registries.conf.d/003-${cluster_name}.conf
   149  $insecure_registry
   150  EOF
   151  
   152  function update_host() {
   153      node_name=$1
   154      kcli ssh $node_name << EOF
   155  sudo su
   156  echo '$insecure_registry' > /etc/containers/registries.conf.d/003-internal.conf
   157  systemctl restart crio
   158  
   159  echo '[connection]
   160  id=multi
   161  type=ethernet
   162  [ethernet]
   163  [match]
   164  driver=igbvf;
   165  [ipv4]
   166  method=disabled
   167  [ipv6]
   168  addr-gen-mode=default
   169  method=disabled
   170  [proxy]' > /etc/NetworkManager/system-connections/multi.nmconnection
   171  
   172  chmod 600 /etc/NetworkManager/system-connections/multi.nmconnection
   173  
   174  echo '[Unit]
   175  Description=disable checksum offload to avoid vf bug
   176  After=network.target
   177  
   178  [Service]
   179  Type=oneshot
   180  ExecStart=/usr/bin/bash -c "ethtool --offload  eth1  rx off  tx off && ethtool -K eth1 gso off"
   181  StandardOutput=journal+console
   182  StandardError=journal+console
   183  
   184  [Install]
   185  WantedBy=default.target' > /etc/systemd/system/disable-offload.service
   186  
   187  systemctl daemon-reload
   188  systemctl enable --now disable-offload
   189  
   190  systemctl restart NetworkManager
   191  
   192  EOF
   193  
   194  }
   195  
   196  update_host $cluster_name-ctlplane-0
   197  for ((num=0; num<NUM_OF_WORKERS; num++))
   198  do
   199    update_host $cluster_name-worker-$num
   200  done
   201  
   202  # remove the patch after multus bug is fixed
   203  # https://github.com/k8snetworkplumbingwg/multus-cni/issues/1221
   204  kubectl patch  -n ${MULTUS_NAMESPACE} ds/kube-multus-ds --type=json -p='[{"op": "replace", "path": "/spec/template/spec/initContainers/0/command", "value":["cp", "-f","/usr/src/multus-cni/bin/multus-shim", "/host/opt/cni/bin/multus-shim"]}]'
   205  
   206  kubectl create namespace container-registry
   207  
   208  echo "## deploy internal registry"
   209  cat <<EOF | kubectl apply -f -
   210  apiVersion: v1
   211  kind: PersistentVolume
   212  metadata:
   213    name: registry-pv
   214  spec:
   215    capacity:
   216      storage: 60Gi
   217    volumeMode: Filesystem
   218    accessModes:
   219    - ReadWriteOnce
   220    persistentVolumeReclaimPolicy: Delete
   221    storageClassName: registry-local-storage
   222    local:
   223      path: /mnt/
   224    nodeAffinity:
   225      required:
   226        nodeSelectorTerms:
   227        - matchExpressions:
   228          - key: kubernetes.io/hostname
   229            operator: In
   230            values:
   231            - ${cluster_name}-ctlplane-0.${domain_name}
   232  EOF
   233  
   234  cat <<EOF | kubectl apply -f -
   235  apiVersion: v1
   236  kind: PersistentVolumeClaim
   237  metadata:
   238    name: registry-pv-claim
   239    namespace: container-registry
   240  spec:
   241    accessModes:
   242      - ReadWriteOnce
   243    volumeMode: Filesystem
   244    resources:
   245      requests:
   246        storage: 60Gi
   247    storageClassName: registry-local-storage
   248  EOF
   249  
   250  cat <<EOF | kubectl apply -f -
   251  apiVersion: apps/v1
   252  kind: Deployment
   253  metadata:
   254    name: registry
   255    namespace: container-registry
   256  spec:
   257    replicas: 1
   258    selector:
   259      matchLabels:
   260        app: registry
   261    template:
   262      metadata:
   263        labels:
   264          app: registry
   265      spec:
   266        hostNetwork: true
   267        tolerations:
   268          - effect: NoSchedule
   269            key: node-role.kubernetes.io/control-plane
   270        containers:
   271        - image: quay.io/libpod/registry:2.8.2
   272          imagePullPolicy: Always
   273          name: registry
   274          volumeMounts:
   275          - name: data
   276            mountPath: /var/lib/registry
   277        volumes:
   278        - name: data
   279          persistentVolumeClaim:
   280            claimName: registry-pv-claim
   281        terminationGracePeriodSeconds: 10
   282  EOF
   283  
   284  
   285  export SRIOV_NETWORK_OPERATOR_IMAGE="$controller_ip:5000/sriov-network-operator:latest"
   286  export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE="$controller_ip:5000/sriov-network-config-daemon:latest"
   287  export SRIOV_NETWORK_WEBHOOK_IMAGE="$controller_ip:5000/sriov-network-operator-webhook:latest"
   288  
   289  echo "## build operator image"
   290  podman build -t "${SRIOV_NETWORK_OPERATOR_IMAGE}" -f "${root}/Dockerfile" "${root}"
   291  
   292  echo "## build daemon image"
   293  podman build -t "${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE}" -f "${root}/Dockerfile.sriov-network-config-daemon" "${root}"
   294  
   295  echo "## build webhook image"
   296  podman build -t "${SRIOV_NETWORK_WEBHOOK_IMAGE}" -f "${root}/Dockerfile.webhook" "${root}"
   297  
   298  podman push --tls-verify=false "${SRIOV_NETWORK_OPERATOR_IMAGE}"
   299  podman rmi -fi ${SRIOV_NETWORK_OPERATOR_IMAGE}
   300  podman push --tls-verify=false "${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE}"
   301  podman rmi -fi ${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE}
   302  podman push --tls-verify=false "${SRIOV_NETWORK_WEBHOOK_IMAGE}"
   303  podman rmi -fi ${SRIOV_NETWORK_WEBHOOK_IMAGE}
   304  
   305  
   306  if [[ -v LOCAL_SRIOV_CNI_IMAGE ]]; then
   307    export SRIOV_CNI_IMAGE="$controller_ip:5000/sriov-cni:latest"
   308    podman_tag_and_push ${LOCAL_SRIOV_CNI_IMAGE} ${SRIOV_CNI_IMAGE}
   309  fi
   310  
   311  if [[ -v LOCAL_SRIOV_DEVICE_PLUGIN_IMAGE ]]; then
   312    export SRIOV_DEVICE_PLUGIN_IMAGE="$controller_ip:5000/sriov-network-device-plugin:latest"
   313    podman_tag_and_push ${LOCAL_SRIOV_DEVICE_PLUGIN_IMAGE} ${SRIOV_DEVICE_PLUGIN_IMAGE}
   314  fi
   315  
   316  if [[ -v LOCAL_NETWORK_RESOURCES_INJECTOR_IMAGE ]]; then
   317    export NETWORK_RESOURCES_INJECTOR_IMAGE="$controller_ip:5000/network-resources-injector:latest"
   318    podman_tag_and_push ${LOCAL_NETWORK_RESOURCES_INJECTOR_IMAGE} ${NETWORK_RESOURCES_INJECTOR_IMAGE}
   319  fi
   320  
   321  
   322  # remove the crio bridge and let flannel to recreate
   323  kcli ssh $cluster_name-ctlplane-0 << EOF
   324  sudo su
   325  if [ $(ip a | grep 10.85.0 | wc -l) -eq 0 ]; then ip link del cni0; fi
   326  EOF
   327  
   328  
   329  kubectl -n ${MULTUS_NAMESPACE} get po | grep multus | awk '{print "kubectl -n kube-system delete po",$1}' | sh
   330  kubectl -n kube-system get po | grep coredns | awk '{print "kubectl -n kube-system delete po",$1}' | sh
   331  
   332  TIMEOUT=400
   333  echo "## wait for coredns"
   334  kubectl -n kube-system wait --for=condition=available deploy/coredns --timeout=${TIMEOUT}s
   335  echo "## wait for multus"
   336  kubectl -n ${MULTUS_NAMESPACE} wait --for=condition=ready -l name=multus pod --timeout=${TIMEOUT}s
   337  
   338  echo "## deploy cert manager"
   339  kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.12.0/cert-manager.yaml
   340  
   341  echo "## wait for cert manager to be ready"
   342  
   343  ATTEMPTS=0
   344  MAX_ATTEMPTS=72
   345  ready=false
   346  sleep_time=5
   347  
   348  until $ready || [ $ATTEMPTS -eq $MAX_ATTEMPTS ]
   349  do
   350      echo "waiting for cert manager webhook to be ready"
   351      if [ `kubectl -n cert-manager get po | grep webhook | grep "1/1" | wc -l` == 1 ]; then
   352          echo "cluster is ready"
   353          ready=true
   354      else
   355          echo "cert manager webhook is not ready yet"
   356          sleep $sleep_time
   357      fi
   358      ATTEMPTS=$((ATTEMPTS+1))
   359  done
   360  
   361  
   362  export ADMISSION_CONTROLLERS_ENABLED=true
   363  export ADMISSION_CONTROLLERS_CERTIFICATES_CERT_MANAGER_ENABLED=true
   364  export SKIP_VAR_SET=""
   365  export NAMESPACE="sriov-network-operator"
   366  export OPERATOR_NAMESPACE="sriov-network-operator"
   367  export CNI_BIN_PATH=/opt/cni/bin
   368  export OPERATOR_EXEC=kubectl
   369  export CLUSTER_TYPE=kubernetes
   370  export DEV_MODE=TRUE
   371  export CLUSTER_HAS_EMULATED_PF=TRUE
   372  
   373  echo "## deploy namespace"
   374  envsubst< $root/deploy/namespace.yaml | ${OPERATOR_EXEC} apply -f -
   375  
   376  echo "## create certificates for webhook"
   377  cat <<EOF | kubectl apply -f -
   378  ---
   379  apiVersion: cert-manager.io/v1
   380  kind: Issuer
   381  metadata:
   382    name: selfsigned-issuer
   383    namespace: ${NAMESPACE}
   384  spec:
   385    selfSigned: {}
   386  ---
   387  apiVersion: cert-manager.io/v1
   388  kind: Certificate
   389  metadata:
   390    name: network-resources-injector-cert
   391    namespace: ${NAMESPACE}
   392  spec:
   393    commonName: network-resources-injector-service.svc
   394    dnsNames:
   395    - network-resources-injector-service.${NAMESPACE}.svc.cluster.local
   396    - network-resources-injector-service.${NAMESPACE}.svc
   397    issuerRef:
   398      kind: Issuer
   399      name: selfsigned-issuer
   400    secretName: network-resources-injector-cert
   401  ---
   402  apiVersion: cert-manager.io/v1
   403  kind: Certificate
   404  metadata:
   405    name: operator-webhook-cert
   406    namespace: ${NAMESPACE}
   407  spec:
   408    commonName: operator-webhook-service.svc
   409    dnsNames:
   410    - operator-webhook-service.${NAMESPACE}.svc.cluster.local
   411    - operator-webhook-service.${NAMESPACE}.svc
   412    issuerRef:
   413      kind: Issuer
   414      name: selfsigned-issuer
   415    secretName: operator-webhook-cert
   416  EOF
   417  
   418  
   419  echo "## apply CRDs"
   420  kubectl apply -k $root/config/crd
   421  
   422  echo "## deploying SRIOV Network Operator"
   423  hack/deploy-setup.sh $NAMESPACE
   424  
   425  echo "## wait for sriov operator to be ready"
   426  hack/deploy-wait.sh
   427  
   428  if [ -z $SKIP_TEST ]; then
   429    echo "## run sriov e2e conformance tests"
   430  
   431    if [[ -v TEST_REPORT_PATH ]]; then
   432      export JUNIT_OUTPUT="${root}/${TEST_REPORT_PATH}/conformance-test-report"
   433    fi
   434  
   435    # Disable exit on error temporarily to gather cluster information
   436    set +e
   437    SUITE=./test/conformance hack/run-e2e-conformance.sh
   438    TEST_EXITE_CODE=$?
   439    set -e
   440  
   441    if [[ -v TEST_REPORT_PATH ]]; then
   442      kubectl cluster-info dump --namespaces ${NAMESPACE},${MULTUS_NAMESPACE} --output-directory "${root}/${TEST_REPORT_PATH}/cluster-info"
   443    fi
   444  
   445    if [[ $TEST_EXITE_CODE -ne 0 ]]; then
   446      exit $TEST_EXITE_CODE
   447    fi
   448  fi