sigs.k8s.io/cluster-api-provider-azure@v1.17.0/scripts/ci-entrypoint.sh (about) 1 #!/bin/bash 2 3 # Copyright 2020 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 ############################################################################### 18 19 # To run locally, set AZURE_CLIENT_ID, AZURE_SUBSCRIPTION_ID, AZURE_TENANT_ID 20 21 set -o errexit 22 set -o nounset 23 set -o pipefail 24 25 # Install kubectl, helm and kustomize 26 REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. 27 KUBECTL="${REPO_ROOT}/hack/tools/bin/kubectl" 28 HELM="${REPO_ROOT}/hack/tools/bin/helm" 29 KIND="${REPO_ROOT}/hack/tools/bin/kind" 30 KUSTOMIZE="${REPO_ROOT}/hack/tools/bin/kustomize" 31 make --directory="${REPO_ROOT}" "${KUBECTL##*/}" "${HELM##*/}" "${KIND##*/}" "${KUSTOMIZE##*/}" 32 KIND_CLUSTER_NAME="${KIND_CLUSTER_NAME:-capz}" 33 export KIND_CLUSTER_NAME 34 # export the variables so they are available in bash -c wait_for_nodes below 35 export KUBECTL 36 export HELM 37 38 # shellcheck source=hack/ensure-go.sh 39 source "${REPO_ROOT}/hack/ensure-go.sh" 40 # shellcheck source=hack/ensure-tags.sh 41 source "${REPO_ROOT}/hack/ensure-tags.sh" 42 # shellcheck source=hack/util.sh 43 source "${REPO_ROOT}/hack/util.sh" 44 45 setup() { 46 if [[ -n "${KUBERNETES_VERSION:-}" ]] && [[ -n "${CI_VERSION:-}" ]]; then 47 echo "You may not set both \$KUBERNETES_VERSION and \$CI_VERSION, use one or the other to configure the version/build of Kubernetes to use" 48 exit 1 49 fi 50 # setup REGISTRY for custom images. 51 : "${REGISTRY:?Environment variable empty or not defined.}" 52 "${REPO_ROOT}/hack/ensure-acr-login.sh" 53 if [[ "$(capz::util::should_build_ccm)" == "true" ]]; then 54 # shellcheck source=scripts/ci-build-azure-ccm.sh 55 source "${REPO_ROOT}/scripts/ci-build-azure-ccm.sh" 56 echo "Will use the ${IMAGE_REGISTRY}/${CCM_IMAGE_NAME}:${IMAGE_TAG_CCM} cloud-controller-manager image for external cloud-provider-cluster" 57 echo "Will use the ${IMAGE_REGISTRY}/${CNM_IMAGE_NAME}:${IMAGE_TAG_CNM} cloud-node-manager image for external cloud-provider-azure cluster" 58 59 if [[ -n "${LOAD_CLOUD_CONFIG_FROM_SECRET:-}" ]]; then 60 export CLOUD_CONFIG="" 61 export CONFIG_SECRET_NAME="azure-cloud-provider" 62 export ENABLE_DYNAMIC_RELOADING=true 63 until copy_secret; do 64 sleep 5 65 done 66 fi 67 68 export CCM_LOG_VERBOSITY="${CCM_LOG_VERBOSITY:-4}" 69 export CLOUD_PROVIDER_AZURE_LABEL="azure-ci" 70 fi 71 72 if [[ "$(capz::util::should_build_kubernetes)" == "true" ]]; then 73 # shellcheck source=scripts/ci-build-kubernetes.sh 74 source "${REPO_ROOT}/scripts/ci-build-kubernetes.sh" 75 fi 76 77 if [[ "${KUBERNETES_VERSION:-}" =~ "latest" ]]; then 78 CI_VERSION_URL="https://dl.k8s.io/ci/${KUBERNETES_VERSION}.txt" 79 export CI_VERSION="${CI_VERSION:-$(curl --retry 3 -sSL "${CI_VERSION_URL}")}" 80 fi 81 if [[ -n "${CI_VERSION:-}" ]]; then 82 echo "Using CI_VERSION ${CI_VERSION}" 83 export KUBERNETES_VERSION="${CI_VERSION}" 84 fi 85 echo "Using KUBERNETES_VERSION ${KUBERNETES_VERSION:-}" 86 87 if [[ -z "${CLUSTER_TEMPLATE:-}" ]]; then 88 select_cluster_template 89 fi 90 echo "Using cluster template: ${CLUSTER_TEMPLATE}" 91 92 export CLUSTER_NAME="${CLUSTER_NAME:-capz-$( 93 head /dev/urandom | LC_ALL=C tr -dc a-z0-9 | head -c 6 94 echo '' 95 )}" 96 export AZURE_RESOURCE_GROUP="${CLUSTER_NAME}" 97 export AZURE_LOCATION="${AZURE_LOCATION:-$(capz::util::get_random_region)}" 98 echo "Using AZURE_LOCATION: ${AZURE_LOCATION}" 99 export AZURE_LOCATION_GPU="${AZURE_LOCATION_GPU:-$(capz::util::get_random_region_gpu)}" 100 echo "Using AZURE_LOCATION_GPU: ${AZURE_LOCATION_GPU}" 101 export AZURE_LOCATION_EDGEZONE="${AZURE_LOCATION_EDGEZONE:-$(capz::util::get_random_region_edgezone)}" 102 echo "Using AZURE_LOCATION_EDGEZONE: ${AZURE_LOCATION_EDGEZONE}" 103 # Need a cluster with at least 2 nodes 104 export CONTROL_PLANE_MACHINE_COUNT="${CONTROL_PLANE_MACHINE_COUNT:-1}" 105 export CCM_COUNT="${CCM_COUNT:-1}" 106 export WORKER_MACHINE_COUNT="${WORKER_MACHINE_COUNT:-2}" 107 export EXP_CLUSTER_RESOURCE_SET="true" 108 109 # TODO figure out a better way to account for expected Windows node count 110 if [[ -n "${TEST_WINDOWS:-}" ]]; then 111 export WINDOWS_WORKER_MACHINE_COUNT="${WINDOWS_WORKER_MACHINE_COUNT:-2}" 112 fi 113 } 114 115 select_cluster_template() { 116 if [[ "$(capz::util::should_build_kubernetes)" == "true" ]]; then 117 export CLUSTER_TEMPLATE="test/dev/cluster-template-custom-builds.yaml" 118 elif [[ -n "${CI_VERSION:-}" ]]; then 119 # export cluster template which contains the manifests needed for creating the Azure cluster to run the tests 120 export CLUSTER_TEMPLATE="test/ci/cluster-template-prow-ci-version.yaml" 121 else 122 export CLUSTER_TEMPLATE="test/ci/cluster-template-prow.yaml" 123 fi 124 125 if [[ "${EXP_MACHINE_POOL:-}" == "true" ]]; then 126 if [[ "${CLUSTER_TEMPLATE}" =~ "prow" ]]; then 127 export CLUSTER_TEMPLATE="${CLUSTER_TEMPLATE/prow/prow-machine-pool}" 128 elif [[ "${CLUSTER_TEMPLATE}" =~ "custom-builds" ]]; then 129 export CLUSTER_TEMPLATE="${CLUSTER_TEMPLATE/custom-builds/custom-builds-machine-pool}" 130 fi 131 fi 132 } 133 134 create_cluster() { 135 "${REPO_ROOT}/hack/create-dev-cluster.sh" 136 if [ ! -f "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" ]; then 137 echo "Unable to find kubeconfig for kind mgmt cluster ${KIND_CLUSTER_NAME}" 138 exit 1 139 fi 140 "${KUBECTL}" --kubeconfig "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" get clusters -A 141 142 # set the SSH bastion and user that can be used to SSH into nodes 143 KUBE_SSH_BASTION=$(${KUBECTL} get azurecluster -o json | jq '.items[0].spec.networkSpec.apiServerLB.frontendIPs[0].publicIP.dnsName' | tr -d \"):22 144 export KUBE_SSH_BASTION 145 KUBE_SSH_USER=capi 146 export KUBE_SSH_USER 147 } 148 149 # copy_kubeadm_config_map copies the kubeadm configmap into the calico-system namespace. 150 # any retryable operation in this function must return a non-zero exit code on failure so that we can 151 # retry it using a `until copy_kubeadm_config_map; do sleep 5; done` pattern; 152 # and any statement must be idempotent so that subsequent retry attempts can make forward progress. 153 copy_kubeadm_config_map() { 154 # Copy the kubeadm configmap to the calico-system namespace. 155 # This is a workaround needed for the calico-node-windows daemonset 156 # to be able to run in the calico-system namespace. 157 # First, validate that the kubeadm-config configmap has been created. 158 "${KUBECTL}" get configmap kubeadm-config --namespace=kube-system -o yaml || return 1 159 "${KUBECTL}" create namespace calico-system --dry-run=client -o yaml | kubectl apply -f - || return 1 160 if ! "${KUBECTL}" get configmap kubeadm-config --namespace=calico-system; then 161 "${KUBECTL}" get configmap kubeadm-config --namespace=kube-system -o yaml | sed 's/namespace: kube-system/namespace: calico-system/' | "${KUBECTL}" apply -f - || return 1 162 fi 163 } 164 165 wait_for_copy_kubeadm_config_map() { 166 echo "Copying kubeadm ConfigMap into calico-system namespace" 167 until copy_kubeadm_config_map; do 168 sleep 5 169 done 170 } 171 172 # wait_for_nodes returns when all nodes in the workload cluster are Ready. 173 wait_for_nodes() { 174 echo "Waiting for ${CONTROL_PLANE_MACHINE_COUNT} control plane machine(s), ${WORKER_MACHINE_COUNT} worker machine(s), and ${WINDOWS_WORKER_MACHINE_COUNT:-0} windows machine(s) to become Ready" 175 176 # Ensure that all nodes are registered with the API server before checking for readiness 177 local total_nodes="$((CONTROL_PLANE_MACHINE_COUNT + WORKER_MACHINE_COUNT + WINDOWS_WORKER_MACHINE_COUNT))" 178 while [[ $("${KUBECTL}" get nodes -ojson | jq '.items | length') -ne "${total_nodes}" ]]; do 179 sleep 10 180 done 181 182 until "${KUBECTL}" wait --for=condition=Ready node --all --timeout=15m; do 183 sleep 5 184 done 185 until "${KUBECTL}" get nodes -o wide; do 186 sleep 5 187 done 188 } 189 190 # wait_for_pods returns when all pods on the workload cluster are Running. 191 wait_for_pods() { 192 echo "Waiting for all pod init containers scheduled in the cluster to be ready" 193 while "${KUBECTL}" get pods --all-namespaces -o jsonpath="{.items[*].status.initContainerStatuses[*].ready}" | grep -q false; do 194 echo "Not all pod init containers are Ready...." 195 sleep 5 196 done 197 198 echo "Waiting for all pod containers scheduled in the cluster to be ready" 199 while "${KUBECTL}" get pods --all-namespaces -o jsonpath="{.items[*].status.containerStatuses[*].ready}" | grep -q false; do 200 echo "Not all pod containers are Ready...." 201 sleep 5 202 done 203 until "${KUBECTL}" get pods --all-namespaces -o wide; do 204 sleep 5 205 done 206 } 207 208 install_addons() { 209 export -f copy_kubeadm_config_map wait_for_copy_kubeadm_config_map 210 timeout --foreground 600 bash -c wait_for_copy_kubeadm_config_map 211 # In order to determine the successful outcome of CNI and cloud-provider-azure, 212 # we need to wait a little bit for nodes and pods terminal state, 213 # so we block successful return upon the cluster being fully operational. 214 export -f wait_for_nodes 215 timeout --foreground 1800 bash -c wait_for_nodes 216 export -f wait_for_pods 217 timeout --foreground 1800 bash -c wait_for_pods 218 } 219 220 copy_secret() { 221 # point at the management cluster 222 "${KUBECTL}" --kubeconfig "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" get secret "${CLUSTER_NAME}-control-plane-azure-json" -o jsonpath='{.data.control-plane-azure\.json}' | base64 --decode >azure_json || return 1 223 224 # create the secret on the workload cluster 225 "${KUBECTL}" create secret generic "${CONFIG_SECRET_NAME}" -n kube-system \ 226 --from-file=cloud-config=azure_json || return 1 227 rm azure_json 228 } 229 230 capz::ci-entrypoint::on_exit() { 231 if [[ -n ${KUBECONFIG:-} ]]; then 232 "${KUBECTL}" get nodes -o wide || echo "Unable to get nodes" 233 "${KUBECTL}" get pods -A -o wide || echo "Unable to get pods" 234 fi 235 # unset kubeconfig which is currently pointing at workload cluster. 236 # we want to be pointing at the management cluster (kind in this case) 237 unset KUBECONFIG 238 go run -tags e2e "${REPO_ROOT}"/test/logger.go --name "${CLUSTER_NAME}" --namespace default 239 "${REPO_ROOT}/hack/log/redact.sh" || true 240 # cleanup all resources we use 241 if [[ ! "${SKIP_CLEANUP:-}" == "true" ]]; then 242 timeout 1800 "${KUBECTL}" --kubeconfig "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" delete cluster "${CLUSTER_NAME}" -n default || echo "Unable to delete cluster ${CLUSTER_NAME}" 243 make --directory="${REPO_ROOT}" kind-reset || true 244 fi 245 } 246 247 # setup all required variables and images 248 setup 249 250 trap capz::ci-entrypoint::on_exit EXIT 251 export ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}" 252 253 # create cluster 254 create_cluster 255 256 # export the target cluster KUBECONFIG if not already set 257 export KUBECONFIG="${KUBECONFIG:-${PWD}/kubeconfig}" 258 259 if [[ ! "${CLUSTER_TEMPLATE}" =~ "aks" ]]; then 260 # install CNI and CCM 261 install_addons 262 fi 263 264 "${KUBECTL}" --kubeconfig "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" wait -A --for=condition=Ready --timeout=10m -l "cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}" machinedeployments,machinepools 265 266 echo "Cluster ${CLUSTER_NAME} created and fully operational" 267 268 if [[ "${#}" -gt 0 ]]; then 269 # disable error exit so we can run post-command cleanup 270 set +o errexit 271 "${@}" 272 EXIT_VALUE="${?}" 273 exit ${EXIT_VALUE} 274 fi