sigs.k8s.io/cluster-api-provider-azure@v1.14.3/scripts/ci-entrypoint.sh (about)

     1  #!/bin/bash
     2  
     3  # Copyright 2020 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  ###############################################################################
    18  
    19  # To run locally, set AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, AZURE_SUBSCRIPTION_ID, AZURE_TENANT_ID
    20  
    21  set -o errexit
    22  set -o nounset
    23  set -o pipefail
    24  
    25  # Install kubectl, helm and kustomize
    26  REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
    27  KUBECTL="${REPO_ROOT}/hack/tools/bin/kubectl"
    28  HELM="${REPO_ROOT}/hack/tools/bin/helm"
    29  KIND="${REPO_ROOT}/hack/tools/bin/kind"
    30  KUSTOMIZE="${REPO_ROOT}/hack/tools/bin/kustomize"
    31  make --directory="${REPO_ROOT}" "${KUBECTL##*/}" "${HELM##*/}" "${KIND##*/}" "${KUSTOMIZE##*/}"
    32  KIND_CLUSTER_NAME="${KIND_CLUSTER_NAME:-capz}"
    33  export KIND_CLUSTER_NAME
    34  # export the variables so they are available in bash -c wait_for_nodes below
    35  export KUBECTL
    36  export HELM
    37  
    38  # shellcheck source=hack/ensure-go.sh
    39  source "${REPO_ROOT}/hack/ensure-go.sh"
    40  # shellcheck source=hack/ensure-tags.sh
    41  source "${REPO_ROOT}/hack/ensure-tags.sh"
    42  # shellcheck source=hack/parse-prow-creds.sh
    43  source "${REPO_ROOT}/hack/parse-prow-creds.sh"
    44  # shellcheck source=hack/util.sh
    45  source "${REPO_ROOT}/hack/util.sh"
    46  
    47  setup() {
    48      if [[ -n "${KUBERNETES_VERSION:-}" ]] && [[ -n "${CI_VERSION:-}" ]]; then
    49          echo "You may not set both \$KUBERNETES_VERSION and \$CI_VERSION, use one or the other to configure the version/build of Kubernetes to use"
    50          exit 1
    51      fi
    52      # setup REGISTRY for custom images.
    53      : "${REGISTRY:?Environment variable empty or not defined.}"
    54      "${REPO_ROOT}/hack/ensure-acr-login.sh"
    55      if [[ "$(capz::util::should_build_ccm)" == "true" ]]; then
    56          # shellcheck source=scripts/ci-build-azure-ccm.sh
    57          source "${REPO_ROOT}/scripts/ci-build-azure-ccm.sh"
    58          echo "Will use the ${IMAGE_REGISTRY}/${CCM_IMAGE_NAME}:${IMAGE_TAG_CCM} cloud-controller-manager image for external cloud-provider-cluster"
    59          echo "Will use the ${IMAGE_REGISTRY}/${CNM_IMAGE_NAME}:${IMAGE_TAG_CNM} cloud-node-manager image for external cloud-provider-azure cluster"
    60  
    61          if [[ -n "${LOAD_CLOUD_CONFIG_FROM_SECRET:-}" ]]; then
    62              export CLOUD_CONFIG=""
    63              export CONFIG_SECRET_NAME="azure-cloud-provider"
    64              export ENABLE_DYNAMIC_RELOADING=true
    65              until copy_secret; do
    66                  sleep 5
    67              done
    68          fi
    69  
    70          export CCM_LOG_VERBOSITY="${CCM_LOG_VERBOSITY:-4}"
    71          export CLOUD_PROVIDER_AZURE_LABEL="azure-ci"
    72      fi
    73  
    74      if [[ "$(capz::util::should_build_kubernetes)" == "true" ]]; then
    75          # shellcheck source=scripts/ci-build-kubernetes.sh
    76          source "${REPO_ROOT}/scripts/ci-build-kubernetes.sh"
    77      fi
    78  
    79      if [[ "${KUBERNETES_VERSION:-}" =~ "latest" ]]; then
    80          CI_VERSION_URL="https://dl.k8s.io/ci/${KUBERNETES_VERSION}.txt"
    81          export CI_VERSION="${CI_VERSION:-$(curl --retry 3 -sSL "${CI_VERSION_URL}")}"
    82      fi
    83      if [[ -n "${CI_VERSION:-}" ]]; then
    84          echo "Using CI_VERSION ${CI_VERSION}"
    85          export KUBERNETES_VERSION="${CI_VERSION}"
    86      fi
    87      echo "Using KUBERNETES_VERSION ${KUBERNETES_VERSION:-}"
    88  
    89      if [[ -z "${CLUSTER_TEMPLATE:-}" ]]; then
    90          select_cluster_template
    91      fi
    92      echo "Using cluster template: ${CLUSTER_TEMPLATE}"
    93  
    94      export CLUSTER_NAME="${CLUSTER_NAME:-capz-$(
    95          head /dev/urandom | LC_ALL=C tr -dc a-z0-9 | head -c 6
    96          echo ''
    97      )}"
    98      export AZURE_RESOURCE_GROUP="${CLUSTER_NAME}"
    99      export AZURE_LOCATION="${AZURE_LOCATION:-$(capz::util::get_random_region)}"
   100      echo "Using AZURE_LOCATION: ${AZURE_LOCATION}"
   101      export AZURE_LOCATION_GPU="${AZURE_LOCATION_GPU:-$(capz::util::get_random_region_gpu)}"
   102      echo "Using AZURE_LOCATION_GPU: ${AZURE_LOCATION_GPU}"
   103      export AZURE_LOCATION_EDGEZONE="${AZURE_LOCATION_EDGEZONE:-$(capz::util::get_random_region_edgezone)}"
   104      echo "Using AZURE_LOCATION_EDGEZONE: ${AZURE_LOCATION_EDGEZONE}"
   105      # Need a cluster with at least 2 nodes
   106      export CONTROL_PLANE_MACHINE_COUNT="${CONTROL_PLANE_MACHINE_COUNT:-1}"
   107      export CCM_COUNT="${CCM_COUNT:-1}"
   108      export WORKER_MACHINE_COUNT="${WORKER_MACHINE_COUNT:-2}"
   109      export EXP_CLUSTER_RESOURCE_SET="true"
   110  
   111      # TODO figure out a better way to account for expected Windows node count
   112      if [[ -n "${TEST_WINDOWS:-}" ]]; then
   113          export WINDOWS_WORKER_MACHINE_COUNT="${WINDOWS_WORKER_MACHINE_COUNT:-2}"
   114      fi
   115  }
   116  
   117  select_cluster_template() {
   118      if [[ "$(capz::util::should_build_kubernetes)" == "true" ]]; then
   119          export CLUSTER_TEMPLATE="test/dev/cluster-template-custom-builds.yaml"
   120      elif [[ -n "${CI_VERSION:-}" ]]; then
   121          # export cluster template which contains the manifests needed for creating the Azure cluster to run the tests
   122          export CLUSTER_TEMPLATE="test/ci/cluster-template-prow-ci-version.yaml"
   123      else
   124          export CLUSTER_TEMPLATE="test/ci/cluster-template-prow.yaml"
   125      fi
   126  
   127      if [[ "${EXP_MACHINE_POOL:-}" == "true" ]]; then
   128          if [[ "${CLUSTER_TEMPLATE}" =~ "prow" ]]; then
   129              export CLUSTER_TEMPLATE="${CLUSTER_TEMPLATE/prow/prow-machine-pool}"
   130          elif [[ "${CLUSTER_TEMPLATE}" =~ "custom-builds" ]]; then
   131              export CLUSTER_TEMPLATE="${CLUSTER_TEMPLATE/custom-builds/custom-builds-machine-pool}"
   132          fi
   133      fi
   134  }
   135  
   136  create_cluster() {
   137      "${REPO_ROOT}/hack/create-dev-cluster.sh"
   138      if [ ! -f "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" ]; then
   139          echo "Unable to find kubeconfig for kind mgmt cluster ${KIND_CLUSTER_NAME}"
   140          exit 1
   141      fi
   142  
   143      # set the SSH bastion and user that can be used to SSH into nodes
   144      KUBE_SSH_BASTION=$(${KUBECTL} get azurecluster -o json | jq '.items[0].spec.networkSpec.apiServerLB.frontendIPs[0].publicIP.dnsName' | tr -d \"):22
   145      export KUBE_SSH_BASTION
   146      KUBE_SSH_USER=capi
   147      export KUBE_SSH_USER
   148  }
   149  
   150  # copy_kubeadm_config_map copies the kubeadm configmap into the calico-system namespace.
   151  # any retryable operation in this function must return a non-zero exit code on failure so that we can
   152  # retry it using a `until copy_kubeadm_config_map; do sleep 5; done` pattern;
   153  # and any statement must be idempotent so that subsequent retry attempts can make forward progress.
   154  copy_kubeadm_config_map() {
   155      # Copy the kubeadm configmap to the calico-system namespace.
   156      # This is a workaround needed for the calico-node-windows daemonset
   157      # to be able to run in the calico-system namespace.
   158      # First, validate that the kubeadm-config configmap has been created.
   159      "${KUBECTL}" get configmap kubeadm-config --namespace=kube-system -o yaml || return 1
   160      "${KUBECTL}" create namespace calico-system --dry-run=client -o yaml | kubectl apply -f - || return 1
   161      if ! "${KUBECTL}" get configmap kubeadm-config --namespace=calico-system; then
   162          "${KUBECTL}" get configmap kubeadm-config --namespace=kube-system -o yaml | sed 's/namespace: kube-system/namespace: calico-system/' | "${KUBECTL}" apply -f - || return 1
   163      fi
   164  }
   165  
   166  # wait_for_nodes returns when all nodes in the workload cluster are Ready.
   167  wait_for_nodes() {
   168      echo "Waiting for ${CONTROL_PLANE_MACHINE_COUNT} control plane machine(s), ${WORKER_MACHINE_COUNT} worker machine(s), and ${WINDOWS_WORKER_MACHINE_COUNT:-0} windows machine(s) to become Ready"
   169  
   170      # Ensure that all nodes are registered with the API server before checking for readiness
   171      local total_nodes="$((CONTROL_PLANE_MACHINE_COUNT + WORKER_MACHINE_COUNT + WINDOWS_WORKER_MACHINE_COUNT))"
   172      while [[ $("${KUBECTL}" get nodes -ojson | jq '.items | length') -ne "${total_nodes}" ]]; do
   173          sleep 10
   174      done
   175  
   176      until "${KUBECTL}" wait --for=condition=Ready node --all --timeout=15m; do
   177          sleep 5
   178      done
   179      until "${KUBECTL}" get nodes -o wide; do
   180          sleep 5
   181      done
   182  }
   183  
   184  # wait_for_pods returns when all pods on the workload cluster are Running.
   185  wait_for_pods() {
   186      echo "Waiting for all pod init containers scheduled in the cluster to be ready"
   187      while "${KUBECTL}" get pods --all-namespaces -o jsonpath="{.items[*].status.initContainerStatuses[*].ready}" | grep -q false; do
   188          echo "Not all pod init containers are Ready...."
   189          sleep 5
   190      done
   191  
   192      echo "Waiting for all pod containers scheduled in the cluster to be ready"
   193      while "${KUBECTL}" get pods --all-namespaces -o jsonpath="{.items[*].status.containerStatuses[*].ready}" | grep -q false; do
   194          echo "Not all pod containers are Ready...."
   195          sleep 5
   196      done
   197      until "${KUBECTL}" get pods --all-namespaces -o wide; do
   198          sleep 5
   199      done
   200  }
   201  
   202  install_addons() {
   203      # export the target cluster KUBECONFIG if not already set
   204      export KUBECONFIG="${KUBECONFIG:-${PWD}/kubeconfig}"
   205      until copy_kubeadm_config_map; do
   206          sleep 5
   207      done
   208      # In order to determine the successful outcome of CNI and cloud-provider-azure,
   209      # we need to wait a little bit for nodes and pods terminal state,
   210      # so we block successful return upon the cluster being fully operational.
   211      export -f wait_for_nodes
   212      timeout --foreground 1800 bash -c wait_for_nodes
   213      export -f wait_for_pods
   214      timeout --foreground 1800 bash -c wait_for_pods
   215  }
   216  
   217  copy_secret() {
   218      # point at the management cluster
   219      "${KUBECTL}" --kubeconfig "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" get secret "${CLUSTER_NAME}-control-plane-azure-json" -o jsonpath='{.data.control-plane-azure\.json}' | base64 --decode >azure_json || return 1
   220  
   221      # create the secret on the workload cluster
   222      "${KUBECTL}" create secret generic "${CONFIG_SECRET_NAME}" -n kube-system \
   223          --from-file=cloud-config=azure_json || return 1
   224      rm azure_json
   225  }
   226  
   227  capz::ci-entrypoint::on_exit() {
   228      if [[ -n ${KUBECONFIG:-} ]]; then
   229          "${KUBECTL}" get nodes -o wide || echo "Unable to get nodes"
   230          "${KUBECTL}" get pods -A -o wide || echo "Unable to get pods"
   231      fi
   232      # unset kubeconfig which is currently pointing at workload cluster.
   233      # we want to be pointing at the management cluster (kind in this case)
   234      unset KUBECONFIG
   235      go run -tags e2e "${REPO_ROOT}"/test/logger.go --name "${CLUSTER_NAME}" --namespace default
   236      "${REPO_ROOT}/hack/log/redact.sh" || true
   237      # cleanup all resources we use
   238      if [[ ! "${SKIP_CLEANUP:-}" == "true" ]]; then
   239          timeout 1800 "${KUBECTL}" --kubeconfig "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" delete cluster "${CLUSTER_NAME}" || echo "Unable to delete cluster ${CLUSTER_NAME}"
   240          make --directory="${REPO_ROOT}" kind-reset || true
   241      fi
   242  }
   243  
   244  # setup all required variables and images
   245  setup
   246  
   247  trap capz::ci-entrypoint::on_exit EXIT
   248  export ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}"
   249  
   250  # create cluster
   251  create_cluster
   252  
   253  # install CNI and CCM
   254  install_addons
   255  echo "Cluster ${CLUSTER_NAME} created and fully operational"
   256  
   257  if [[ "${#}" -gt 0 ]]; then
   258      # disable error exit so we can run post-command cleanup
   259      set +o errexit
   260      "${@}"
   261      EXIT_VALUE="${?}"
   262      exit ${EXIT_VALUE}
   263  fi