sigs.k8s.io/cluster-api-provider-azure@v1.14.3/scripts/ci-entrypoint.sh (about) 1 #!/bin/bash 2 3 # Copyright 2020 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 ############################################################################### 18 19 # To run locally, set AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, AZURE_SUBSCRIPTION_ID, AZURE_TENANT_ID 20 21 set -o errexit 22 set -o nounset 23 set -o pipefail 24 25 # Install kubectl, helm and kustomize 26 REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. 27 KUBECTL="${REPO_ROOT}/hack/tools/bin/kubectl" 28 HELM="${REPO_ROOT}/hack/tools/bin/helm" 29 KIND="${REPO_ROOT}/hack/tools/bin/kind" 30 KUSTOMIZE="${REPO_ROOT}/hack/tools/bin/kustomize" 31 make --directory="${REPO_ROOT}" "${KUBECTL##*/}" "${HELM##*/}" "${KIND##*/}" "${KUSTOMIZE##*/}" 32 KIND_CLUSTER_NAME="${KIND_CLUSTER_NAME:-capz}" 33 export KIND_CLUSTER_NAME 34 # export the variables so they are available in bash -c wait_for_nodes below 35 export KUBECTL 36 export HELM 37 38 # shellcheck source=hack/ensure-go.sh 39 source "${REPO_ROOT}/hack/ensure-go.sh" 40 # shellcheck source=hack/ensure-tags.sh 41 source "${REPO_ROOT}/hack/ensure-tags.sh" 42 # shellcheck source=hack/parse-prow-creds.sh 43 source "${REPO_ROOT}/hack/parse-prow-creds.sh" 44 # shellcheck source=hack/util.sh 45 source "${REPO_ROOT}/hack/util.sh" 46 47 setup() { 48 if [[ -n "${KUBERNETES_VERSION:-}" ]] && [[ -n "${CI_VERSION:-}" ]]; then 49 echo "You may not set both \$KUBERNETES_VERSION and \$CI_VERSION, use one or the other to configure the version/build of Kubernetes to use" 50 exit 1 51 fi 52 # setup REGISTRY for custom images. 53 : "${REGISTRY:?Environment variable empty or not defined.}" 54 "${REPO_ROOT}/hack/ensure-acr-login.sh" 55 if [[ "$(capz::util::should_build_ccm)" == "true" ]]; then 56 # shellcheck source=scripts/ci-build-azure-ccm.sh 57 source "${REPO_ROOT}/scripts/ci-build-azure-ccm.sh" 58 echo "Will use the ${IMAGE_REGISTRY}/${CCM_IMAGE_NAME}:${IMAGE_TAG_CCM} cloud-controller-manager image for external cloud-provider-cluster" 59 echo "Will use the ${IMAGE_REGISTRY}/${CNM_IMAGE_NAME}:${IMAGE_TAG_CNM} cloud-node-manager image for external cloud-provider-azure cluster" 60 61 if [[ -n "${LOAD_CLOUD_CONFIG_FROM_SECRET:-}" ]]; then 62 export CLOUD_CONFIG="" 63 export CONFIG_SECRET_NAME="azure-cloud-provider" 64 export ENABLE_DYNAMIC_RELOADING=true 65 until copy_secret; do 66 sleep 5 67 done 68 fi 69 70 export CCM_LOG_VERBOSITY="${CCM_LOG_VERBOSITY:-4}" 71 export CLOUD_PROVIDER_AZURE_LABEL="azure-ci" 72 fi 73 74 if [[ "$(capz::util::should_build_kubernetes)" == "true" ]]; then 75 # shellcheck source=scripts/ci-build-kubernetes.sh 76 source "${REPO_ROOT}/scripts/ci-build-kubernetes.sh" 77 fi 78 79 if [[ "${KUBERNETES_VERSION:-}" =~ "latest" ]]; then 80 CI_VERSION_URL="https://dl.k8s.io/ci/${KUBERNETES_VERSION}.txt" 81 export CI_VERSION="${CI_VERSION:-$(curl --retry 3 -sSL "${CI_VERSION_URL}")}" 82 fi 83 if [[ -n "${CI_VERSION:-}" ]]; then 84 echo "Using CI_VERSION ${CI_VERSION}" 85 export KUBERNETES_VERSION="${CI_VERSION}" 86 fi 87 echo "Using KUBERNETES_VERSION ${KUBERNETES_VERSION:-}" 88 89 if [[ -z "${CLUSTER_TEMPLATE:-}" ]]; then 90 select_cluster_template 91 fi 92 echo "Using cluster template: ${CLUSTER_TEMPLATE}" 93 94 export CLUSTER_NAME="${CLUSTER_NAME:-capz-$( 95 head /dev/urandom | LC_ALL=C tr -dc a-z0-9 | head -c 6 96 echo '' 97 )}" 98 export AZURE_RESOURCE_GROUP="${CLUSTER_NAME}" 99 export AZURE_LOCATION="${AZURE_LOCATION:-$(capz::util::get_random_region)}" 100 echo "Using AZURE_LOCATION: ${AZURE_LOCATION}" 101 export AZURE_LOCATION_GPU="${AZURE_LOCATION_GPU:-$(capz::util::get_random_region_gpu)}" 102 echo "Using AZURE_LOCATION_GPU: ${AZURE_LOCATION_GPU}" 103 export AZURE_LOCATION_EDGEZONE="${AZURE_LOCATION_EDGEZONE:-$(capz::util::get_random_region_edgezone)}" 104 echo "Using AZURE_LOCATION_EDGEZONE: ${AZURE_LOCATION_EDGEZONE}" 105 # Need a cluster with at least 2 nodes 106 export CONTROL_PLANE_MACHINE_COUNT="${CONTROL_PLANE_MACHINE_COUNT:-1}" 107 export CCM_COUNT="${CCM_COUNT:-1}" 108 export WORKER_MACHINE_COUNT="${WORKER_MACHINE_COUNT:-2}" 109 export EXP_CLUSTER_RESOURCE_SET="true" 110 111 # TODO figure out a better way to account for expected Windows node count 112 if [[ -n "${TEST_WINDOWS:-}" ]]; then 113 export WINDOWS_WORKER_MACHINE_COUNT="${WINDOWS_WORKER_MACHINE_COUNT:-2}" 114 fi 115 } 116 117 select_cluster_template() { 118 if [[ "$(capz::util::should_build_kubernetes)" == "true" ]]; then 119 export CLUSTER_TEMPLATE="test/dev/cluster-template-custom-builds.yaml" 120 elif [[ -n "${CI_VERSION:-}" ]]; then 121 # export cluster template which contains the manifests needed for creating the Azure cluster to run the tests 122 export CLUSTER_TEMPLATE="test/ci/cluster-template-prow-ci-version.yaml" 123 else 124 export CLUSTER_TEMPLATE="test/ci/cluster-template-prow.yaml" 125 fi 126 127 if [[ "${EXP_MACHINE_POOL:-}" == "true" ]]; then 128 if [[ "${CLUSTER_TEMPLATE}" =~ "prow" ]]; then 129 export CLUSTER_TEMPLATE="${CLUSTER_TEMPLATE/prow/prow-machine-pool}" 130 elif [[ "${CLUSTER_TEMPLATE}" =~ "custom-builds" ]]; then 131 export CLUSTER_TEMPLATE="${CLUSTER_TEMPLATE/custom-builds/custom-builds-machine-pool}" 132 fi 133 fi 134 } 135 136 create_cluster() { 137 "${REPO_ROOT}/hack/create-dev-cluster.sh" 138 if [ ! -f "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" ]; then 139 echo "Unable to find kubeconfig for kind mgmt cluster ${KIND_CLUSTER_NAME}" 140 exit 1 141 fi 142 143 # set the SSH bastion and user that can be used to SSH into nodes 144 KUBE_SSH_BASTION=$(${KUBECTL} get azurecluster -o json | jq '.items[0].spec.networkSpec.apiServerLB.frontendIPs[0].publicIP.dnsName' | tr -d \"):22 145 export KUBE_SSH_BASTION 146 KUBE_SSH_USER=capi 147 export KUBE_SSH_USER 148 } 149 150 # copy_kubeadm_config_map copies the kubeadm configmap into the calico-system namespace. 151 # any retryable operation in this function must return a non-zero exit code on failure so that we can 152 # retry it using a `until copy_kubeadm_config_map; do sleep 5; done` pattern; 153 # and any statement must be idempotent so that subsequent retry attempts can make forward progress. 154 copy_kubeadm_config_map() { 155 # Copy the kubeadm configmap to the calico-system namespace. 156 # This is a workaround needed for the calico-node-windows daemonset 157 # to be able to run in the calico-system namespace. 158 # First, validate that the kubeadm-config configmap has been created. 159 "${KUBECTL}" get configmap kubeadm-config --namespace=kube-system -o yaml || return 1 160 "${KUBECTL}" create namespace calico-system --dry-run=client -o yaml | kubectl apply -f - || return 1 161 if ! "${KUBECTL}" get configmap kubeadm-config --namespace=calico-system; then 162 "${KUBECTL}" get configmap kubeadm-config --namespace=kube-system -o yaml | sed 's/namespace: kube-system/namespace: calico-system/' | "${KUBECTL}" apply -f - || return 1 163 fi 164 } 165 166 # wait_for_nodes returns when all nodes in the workload cluster are Ready. 167 wait_for_nodes() { 168 echo "Waiting for ${CONTROL_PLANE_MACHINE_COUNT} control plane machine(s), ${WORKER_MACHINE_COUNT} worker machine(s), and ${WINDOWS_WORKER_MACHINE_COUNT:-0} windows machine(s) to become Ready" 169 170 # Ensure that all nodes are registered with the API server before checking for readiness 171 local total_nodes="$((CONTROL_PLANE_MACHINE_COUNT + WORKER_MACHINE_COUNT + WINDOWS_WORKER_MACHINE_COUNT))" 172 while [[ $("${KUBECTL}" get nodes -ojson | jq '.items | length') -ne "${total_nodes}" ]]; do 173 sleep 10 174 done 175 176 until "${KUBECTL}" wait --for=condition=Ready node --all --timeout=15m; do 177 sleep 5 178 done 179 until "${KUBECTL}" get nodes -o wide; do 180 sleep 5 181 done 182 } 183 184 # wait_for_pods returns when all pods on the workload cluster are Running. 185 wait_for_pods() { 186 echo "Waiting for all pod init containers scheduled in the cluster to be ready" 187 while "${KUBECTL}" get pods --all-namespaces -o jsonpath="{.items[*].status.initContainerStatuses[*].ready}" | grep -q false; do 188 echo "Not all pod init containers are Ready...." 189 sleep 5 190 done 191 192 echo "Waiting for all pod containers scheduled in the cluster to be ready" 193 while "${KUBECTL}" get pods --all-namespaces -o jsonpath="{.items[*].status.containerStatuses[*].ready}" | grep -q false; do 194 echo "Not all pod containers are Ready...." 195 sleep 5 196 done 197 until "${KUBECTL}" get pods --all-namespaces -o wide; do 198 sleep 5 199 done 200 } 201 202 install_addons() { 203 # export the target cluster KUBECONFIG if not already set 204 export KUBECONFIG="${KUBECONFIG:-${PWD}/kubeconfig}" 205 until copy_kubeadm_config_map; do 206 sleep 5 207 done 208 # In order to determine the successful outcome of CNI and cloud-provider-azure, 209 # we need to wait a little bit for nodes and pods terminal state, 210 # so we block successful return upon the cluster being fully operational. 211 export -f wait_for_nodes 212 timeout --foreground 1800 bash -c wait_for_nodes 213 export -f wait_for_pods 214 timeout --foreground 1800 bash -c wait_for_pods 215 } 216 217 copy_secret() { 218 # point at the management cluster 219 "${KUBECTL}" --kubeconfig "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" get secret "${CLUSTER_NAME}-control-plane-azure-json" -o jsonpath='{.data.control-plane-azure\.json}' | base64 --decode >azure_json || return 1 220 221 # create the secret on the workload cluster 222 "${KUBECTL}" create secret generic "${CONFIG_SECRET_NAME}" -n kube-system \ 223 --from-file=cloud-config=azure_json || return 1 224 rm azure_json 225 } 226 227 capz::ci-entrypoint::on_exit() { 228 if [[ -n ${KUBECONFIG:-} ]]; then 229 "${KUBECTL}" get nodes -o wide || echo "Unable to get nodes" 230 "${KUBECTL}" get pods -A -o wide || echo "Unable to get pods" 231 fi 232 # unset kubeconfig which is currently pointing at workload cluster. 233 # we want to be pointing at the management cluster (kind in this case) 234 unset KUBECONFIG 235 go run -tags e2e "${REPO_ROOT}"/test/logger.go --name "${CLUSTER_NAME}" --namespace default 236 "${REPO_ROOT}/hack/log/redact.sh" || true 237 # cleanup all resources we use 238 if [[ ! "${SKIP_CLEANUP:-}" == "true" ]]; then 239 timeout 1800 "${KUBECTL}" --kubeconfig "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" delete cluster "${CLUSTER_NAME}" || echo "Unable to delete cluster ${CLUSTER_NAME}" 240 make --directory="${REPO_ROOT}" kind-reset || true 241 fi 242 } 243 244 # setup all required variables and images 245 setup 246 247 trap capz::ci-entrypoint::on_exit EXIT 248 export ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}" 249 250 # create cluster 251 create_cluster 252 253 # install CNI and CCM 254 install_addons 255 echo "Cluster ${CLUSTER_NAME} created and fully operational" 256 257 if [[ "${#}" -gt 0 ]]; then 258 # disable error exit so we can run post-command cleanup 259 set +o errexit 260 "${@}" 261 EXIT_VALUE="${?}" 262 exit ${EXIT_VALUE} 263 fi