sigs.k8s.io/cluster-api-provider-azure@v1.17.0/scripts/ci-entrypoint.sh (about)

     1  #!/bin/bash
     2  
     3  # Copyright 2020 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  ###############################################################################
    18  
    19  # To run locally, set AZURE_CLIENT_ID, AZURE_SUBSCRIPTION_ID, AZURE_TENANT_ID
    20  
    21  set -o errexit
    22  set -o nounset
    23  set -o pipefail
    24  
    25  # Install kubectl, helm and kustomize
    26  REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
    27  KUBECTL="${REPO_ROOT}/hack/tools/bin/kubectl"
    28  HELM="${REPO_ROOT}/hack/tools/bin/helm"
    29  KIND="${REPO_ROOT}/hack/tools/bin/kind"
    30  KUSTOMIZE="${REPO_ROOT}/hack/tools/bin/kustomize"
    31  make --directory="${REPO_ROOT}" "${KUBECTL##*/}" "${HELM##*/}" "${KIND##*/}" "${KUSTOMIZE##*/}"
    32  KIND_CLUSTER_NAME="${KIND_CLUSTER_NAME:-capz}"
    33  export KIND_CLUSTER_NAME
    34  # export the variables so they are available in bash -c wait_for_nodes below
    35  export KUBECTL
    36  export HELM
    37  
    38  # shellcheck source=hack/ensure-go.sh
    39  source "${REPO_ROOT}/hack/ensure-go.sh"
    40  # shellcheck source=hack/ensure-tags.sh
    41  source "${REPO_ROOT}/hack/ensure-tags.sh"
    42  # shellcheck source=hack/util.sh
    43  source "${REPO_ROOT}/hack/util.sh"
    44  
    45  setup() {
    46      if [[ -n "${KUBERNETES_VERSION:-}" ]] && [[ -n "${CI_VERSION:-}" ]]; then
    47          echo "You may not set both \$KUBERNETES_VERSION and \$CI_VERSION, use one or the other to configure the version/build of Kubernetes to use"
    48          exit 1
    49      fi
    50      # setup REGISTRY for custom images.
    51      : "${REGISTRY:?Environment variable empty or not defined.}"
    52      "${REPO_ROOT}/hack/ensure-acr-login.sh"
    53      if [[ "$(capz::util::should_build_ccm)" == "true" ]]; then
    54          # shellcheck source=scripts/ci-build-azure-ccm.sh
    55          source "${REPO_ROOT}/scripts/ci-build-azure-ccm.sh"
    56          echo "Will use the ${IMAGE_REGISTRY}/${CCM_IMAGE_NAME}:${IMAGE_TAG_CCM} cloud-controller-manager image for external cloud-provider-cluster"
    57          echo "Will use the ${IMAGE_REGISTRY}/${CNM_IMAGE_NAME}:${IMAGE_TAG_CNM} cloud-node-manager image for external cloud-provider-azure cluster"
    58  
    59          if [[ -n "${LOAD_CLOUD_CONFIG_FROM_SECRET:-}" ]]; then
    60              export CLOUD_CONFIG=""
    61              export CONFIG_SECRET_NAME="azure-cloud-provider"
    62              export ENABLE_DYNAMIC_RELOADING=true
    63              until copy_secret; do
    64                  sleep 5
    65              done
    66          fi
    67  
    68          export CCM_LOG_VERBOSITY="${CCM_LOG_VERBOSITY:-4}"
    69          export CLOUD_PROVIDER_AZURE_LABEL="azure-ci"
    70      fi
    71  
    72      if [[ "$(capz::util::should_build_kubernetes)" == "true" ]]; then
    73          # shellcheck source=scripts/ci-build-kubernetes.sh
    74          source "${REPO_ROOT}/scripts/ci-build-kubernetes.sh"
    75      fi
    76  
    77      if [[ "${KUBERNETES_VERSION:-}" =~ "latest" ]]; then
    78          CI_VERSION_URL="https://dl.k8s.io/ci/${KUBERNETES_VERSION}.txt"
    79          export CI_VERSION="${CI_VERSION:-$(curl --retry 3 -sSL "${CI_VERSION_URL}")}"
    80      fi
    81      if [[ -n "${CI_VERSION:-}" ]]; then
    82          echo "Using CI_VERSION ${CI_VERSION}"
    83          export KUBERNETES_VERSION="${CI_VERSION}"
    84      fi
    85      echo "Using KUBERNETES_VERSION ${KUBERNETES_VERSION:-}"
    86  
    87      if [[ -z "${CLUSTER_TEMPLATE:-}" ]]; then
    88          select_cluster_template
    89      fi
    90      echo "Using cluster template: ${CLUSTER_TEMPLATE}"
    91  
    92      export CLUSTER_NAME="${CLUSTER_NAME:-capz-$(
    93          head /dev/urandom | LC_ALL=C tr -dc a-z0-9 | head -c 6
    94          echo ''
    95      )}"
    96      export AZURE_RESOURCE_GROUP="${CLUSTER_NAME}"
    97      export AZURE_LOCATION="${AZURE_LOCATION:-$(capz::util::get_random_region)}"
    98      echo "Using AZURE_LOCATION: ${AZURE_LOCATION}"
    99      export AZURE_LOCATION_GPU="${AZURE_LOCATION_GPU:-$(capz::util::get_random_region_gpu)}"
   100      echo "Using AZURE_LOCATION_GPU: ${AZURE_LOCATION_GPU}"
   101      export AZURE_LOCATION_EDGEZONE="${AZURE_LOCATION_EDGEZONE:-$(capz::util::get_random_region_edgezone)}"
   102      echo "Using AZURE_LOCATION_EDGEZONE: ${AZURE_LOCATION_EDGEZONE}"
   103      # Need a cluster with at least 2 nodes
   104      export CONTROL_PLANE_MACHINE_COUNT="${CONTROL_PLANE_MACHINE_COUNT:-1}"
   105      export CCM_COUNT="${CCM_COUNT:-1}"
   106      export WORKER_MACHINE_COUNT="${WORKER_MACHINE_COUNT:-2}"
   107      export EXP_CLUSTER_RESOURCE_SET="true"
   108  
   109      # TODO figure out a better way to account for expected Windows node count
   110      if [[ -n "${TEST_WINDOWS:-}" ]]; then
   111          export WINDOWS_WORKER_MACHINE_COUNT="${WINDOWS_WORKER_MACHINE_COUNT:-2}"
   112      fi
   113  }
   114  
   115  select_cluster_template() {
   116      if [[ "$(capz::util::should_build_kubernetes)" == "true" ]]; then
   117          export CLUSTER_TEMPLATE="test/dev/cluster-template-custom-builds.yaml"
   118      elif [[ -n "${CI_VERSION:-}" ]]; then
   119          # export cluster template which contains the manifests needed for creating the Azure cluster to run the tests
   120          export CLUSTER_TEMPLATE="test/ci/cluster-template-prow-ci-version.yaml"
   121      else
   122          export CLUSTER_TEMPLATE="test/ci/cluster-template-prow.yaml"
   123      fi
   124  
   125      if [[ "${EXP_MACHINE_POOL:-}" == "true" ]]; then
   126          if [[ "${CLUSTER_TEMPLATE}" =~ "prow" ]]; then
   127              export CLUSTER_TEMPLATE="${CLUSTER_TEMPLATE/prow/prow-machine-pool}"
   128          elif [[ "${CLUSTER_TEMPLATE}" =~ "custom-builds" ]]; then
   129              export CLUSTER_TEMPLATE="${CLUSTER_TEMPLATE/custom-builds/custom-builds-machine-pool}"
   130          fi
   131      fi
   132  }
   133  
   134  create_cluster() {
   135      "${REPO_ROOT}/hack/create-dev-cluster.sh"
   136      if [ ! -f "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" ]; then
   137          echo "Unable to find kubeconfig for kind mgmt cluster ${KIND_CLUSTER_NAME}"
   138          exit 1
   139      fi
   140      "${KUBECTL}" --kubeconfig "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" get clusters -A
   141  
   142      # set the SSH bastion and user that can be used to SSH into nodes
   143      KUBE_SSH_BASTION=$(${KUBECTL} get azurecluster -o json | jq '.items[0].spec.networkSpec.apiServerLB.frontendIPs[0].publicIP.dnsName' | tr -d \"):22
   144      export KUBE_SSH_BASTION
   145      KUBE_SSH_USER=capi
   146      export KUBE_SSH_USER
   147  }
   148  
   149  # copy_kubeadm_config_map copies the kubeadm configmap into the calico-system namespace.
   150  # any retryable operation in this function must return a non-zero exit code on failure so that we can
   151  # retry it using a `until copy_kubeadm_config_map; do sleep 5; done` pattern;
   152  # and any statement must be idempotent so that subsequent retry attempts can make forward progress.
   153  copy_kubeadm_config_map() {
   154      # Copy the kubeadm configmap to the calico-system namespace.
   155      # This is a workaround needed for the calico-node-windows daemonset
   156      # to be able to run in the calico-system namespace.
   157      # First, validate that the kubeadm-config configmap has been created.
   158      "${KUBECTL}" get configmap kubeadm-config --namespace=kube-system -o yaml || return 1
   159      "${KUBECTL}" create namespace calico-system --dry-run=client -o yaml | kubectl apply -f - || return 1
   160      if ! "${KUBECTL}" get configmap kubeadm-config --namespace=calico-system; then
   161          "${KUBECTL}" get configmap kubeadm-config --namespace=kube-system -o yaml | sed 's/namespace: kube-system/namespace: calico-system/' | "${KUBECTL}" apply -f - || return 1
   162      fi
   163  }
   164  
   165  wait_for_copy_kubeadm_config_map() {
   166      echo "Copying kubeadm ConfigMap into calico-system namespace"
   167      until copy_kubeadm_config_map; do
   168          sleep 5
   169      done
   170  }
   171  
   172  # wait_for_nodes returns when all nodes in the workload cluster are Ready.
   173  wait_for_nodes() {
   174      echo "Waiting for ${CONTROL_PLANE_MACHINE_COUNT} control plane machine(s), ${WORKER_MACHINE_COUNT} worker machine(s), and ${WINDOWS_WORKER_MACHINE_COUNT:-0} windows machine(s) to become Ready"
   175  
   176      # Ensure that all nodes are registered with the API server before checking for readiness
   177      local total_nodes="$((CONTROL_PLANE_MACHINE_COUNT + WORKER_MACHINE_COUNT + WINDOWS_WORKER_MACHINE_COUNT))"
   178      while [[ $("${KUBECTL}" get nodes -ojson | jq '.items | length') -ne "${total_nodes}" ]]; do
   179          sleep 10
   180      done
   181  
   182      until "${KUBECTL}" wait --for=condition=Ready node --all --timeout=15m; do
   183          sleep 5
   184      done
   185      until "${KUBECTL}" get nodes -o wide; do
   186          sleep 5
   187      done
   188  }
   189  
   190  # wait_for_pods returns when all pods on the workload cluster are Running.
   191  wait_for_pods() {
   192      echo "Waiting for all pod init containers scheduled in the cluster to be ready"
   193      while "${KUBECTL}" get pods --all-namespaces -o jsonpath="{.items[*].status.initContainerStatuses[*].ready}" | grep -q false; do
   194          echo "Not all pod init containers are Ready...."
   195          sleep 5
   196      done
   197  
   198      echo "Waiting for all pod containers scheduled in the cluster to be ready"
   199      while "${KUBECTL}" get pods --all-namespaces -o jsonpath="{.items[*].status.containerStatuses[*].ready}" | grep -q false; do
   200          echo "Not all pod containers are Ready...."
   201          sleep 5
   202      done
   203      until "${KUBECTL}" get pods --all-namespaces -o wide; do
   204          sleep 5
   205      done
   206  }
   207  
   208  install_addons() {
   209      export -f copy_kubeadm_config_map wait_for_copy_kubeadm_config_map
   210      timeout --foreground 600 bash -c wait_for_copy_kubeadm_config_map
   211      # In order to determine the successful outcome of CNI and cloud-provider-azure,
   212      # we need to wait a little bit for nodes and pods terminal state,
   213      # so we block successful return upon the cluster being fully operational.
   214      export -f wait_for_nodes
   215      timeout --foreground 1800 bash -c wait_for_nodes
   216      export -f wait_for_pods
   217      timeout --foreground 1800 bash -c wait_for_pods
   218  }
   219  
   220  copy_secret() {
   221      # point at the management cluster
   222      "${KUBECTL}" --kubeconfig "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" get secret "${CLUSTER_NAME}-control-plane-azure-json" -o jsonpath='{.data.control-plane-azure\.json}' | base64 --decode >azure_json || return 1
   223  
   224      # create the secret on the workload cluster
   225      "${KUBECTL}" create secret generic "${CONFIG_SECRET_NAME}" -n kube-system \
   226          --from-file=cloud-config=azure_json || return 1
   227      rm azure_json
   228  }
   229  
   230  capz::ci-entrypoint::on_exit() {
   231      if [[ -n ${KUBECONFIG:-} ]]; then
   232          "${KUBECTL}" get nodes -o wide || echo "Unable to get nodes"
   233          "${KUBECTL}" get pods -A -o wide || echo "Unable to get pods"
   234      fi
   235      # unset kubeconfig which is currently pointing at workload cluster.
   236      # we want to be pointing at the management cluster (kind in this case)
   237      unset KUBECONFIG
   238      go run -tags e2e "${REPO_ROOT}"/test/logger.go --name "${CLUSTER_NAME}" --namespace default
   239      "${REPO_ROOT}/hack/log/redact.sh" || true
   240      # cleanup all resources we use
   241      if [[ ! "${SKIP_CLEANUP:-}" == "true" ]]; then
   242          timeout 1800 "${KUBECTL}" --kubeconfig "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" delete cluster "${CLUSTER_NAME}" -n default || echo "Unable to delete cluster ${CLUSTER_NAME}"
   243          make --directory="${REPO_ROOT}" kind-reset || true
   244      fi
   245  }
   246  
   247  # setup all required variables and images
   248  setup
   249  
   250  trap capz::ci-entrypoint::on_exit EXIT
   251  export ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}"
   252  
   253  # create cluster
   254  create_cluster
   255  
   256  # export the target cluster KUBECONFIG if not already set
   257  export KUBECONFIG="${KUBECONFIG:-${PWD}/kubeconfig}"
   258  
   259  if [[ ! "${CLUSTER_TEMPLATE}" =~ "aks" ]]; then
   260    # install CNI and CCM
   261    install_addons
   262  fi
   263  
   264  "${KUBECTL}" --kubeconfig "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" wait -A --for=condition=Ready --timeout=10m -l "cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}" machinedeployments,machinepools
   265  
   266  echo "Cluster ${CLUSTER_NAME} created and fully operational"
   267  
   268  if [[ "${#}" -gt 0 ]]; then
   269      # disable error exit so we can run post-command cleanup
   270      set +o errexit
   271      "${@}"
   272      EXIT_VALUE="${?}"
   273      exit ${EXIT_VALUE}
   274  fi