sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/create-build-cluster.sh (about) 1 #!/usr/bin/env bash 2 # Copyright 2020 The Kubernetes Authors. 3 # 4 # Licensed under the Apache License, Version 2.0 (the "License"); 5 # you may not use this file except in compliance with the License. 6 # You may obtain a copy of the License at 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 # See the License for the specific language governing permissions and 14 # limitations under the License. 15 16 # This script is used to create a new build cluster for use with prow. The cluster will have a 17 # single pd-ssd nodepool that will have autoupgrade and autorepair enabled. 18 # 19 # Usage: populate the parameters by setting them below or specifying environment variables then run 20 # the script and follow the prompts. You'll be prompted to share some credentials and commands 21 # with the current oncall. 22 # 23 24 set -o errexit 25 set -o nounset 26 set -o pipefail 27 28 # Specific to Prow instance. Use "k8s-prow" if onboarding prow.k8s.io 29 PROW_INSTANCE_NAME="${PROW_INSTANCE_NAME:-}" 30 # Crier and deck needs to access the GCS bucket. Use 31 # "control-plane@k8s-prow.iam.gserviceaccount.com" if onboarding prow.k8s.io 32 CONTROL_PLANE_SA="${CONTROL_PLANE_SA:-}" 33 34 PROW_SERVICE_PROJECT="${PROW_SERVICE_PROJECT:-k8s-prow}" 35 PROW_SECRET_ACCESSOR_SA="${PROW_SECRET_ACCESSOR_SA:-gencred-refresher@k8s-prow.iam.gserviceaccount.com}" 36 37 PROW_DEPLOYMENT_DIR="${PROW_DEPLOYMENT_DIR:-./config/prow/cluster}" 38 # URI for cloning your fork locally, for example "git@github.com:chaodaiG/test-infra.git" 39 GITHUB_FORK_URI="${GITHUB_FORK_URI:-}" 40 GITHUB_CLONE_URI="${GITHUB_CLONE_URI:-git@github.com:kubernetes/test-infra}" 41 42 # Specific to the build cluster 43 TEAM="${TEAM:-}" 44 PROJECT="${PROJECT:-${PROW_INSTANCE_NAME}-build-${TEAM}}" 45 ZONE="${ZONE:-us-west1-b}" 46 CLUSTER="${CLUSTER:-${PROJECT}}" 47 GCS_BUCKET="${GCS_BUCKET:-gs://${PROJECT}}" 48 49 # Only needed for creating cluster 50 MACHINE="${MACHINE:-n1-standard-8}" 51 NODECOUNT="${NODECOUNT:-5}" 52 DISKSIZE="${DISKSIZE:-100GB}" 53 54 # Only needed for creating project 55 FOLDER_ID="${FOLDER_ID:-0123}" 56 BILLING_ACCOUNT_ID="${BILLING_ACCOUNT_ID:-0123}" # Find the billing account ID in the cloud console. 57 ADMIN_IAM_MEMBER="${ADMIN_IAM_MEMBER:-group:mdb.cloud-kubernetes-engprod-oncall@google.com}" 58 59 # Overriding output 60 OUT_FILE="${OUT_FILE:-build-cluster-kubeconfig.yaml}" 61 62 63 # Require bash version >= 4.4 64 if ((${BASH_VERSINFO[0]}<4)) || ( ((${BASH_VERSINFO[0]}==4)) && ((${BASH_VERSINFO[1]}<4)) ); then 65 echo "ERROR: This script requires a minimum bash version of 4.4, but got version of ${BASH_VERSINFO[0]}.${BASH_VERSINFO[1]}" 66 if [ "$(uname)" = 'Darwin' ]; then 67 echo "On macOS with homebrew 'brew install bash' is sufficient." 68 fi 69 exit 1 70 fi 71 72 # Macos specific settings 73 SED="sed" 74 if command -v gsed &>/dev/null; then 75 SED="gsed" 76 fi 77 if ! (${SED} --version 2>&1 | grep -q GNU); then 78 # darwin is great (not) 79 echo "!!! GNU sed is required. If on OS X, use 'brew install gnu-sed'." >&2 80 return 1 81 fi 82 83 # Create temp dir to work in and clone k/t-i 84 85 origdir="$( pwd -P )" 86 tempdir="$( mktemp -d )" 87 echo 88 echo "Temporary files produced are stored at: ${tempdir}" 89 echo 90 cd "${tempdir}" 91 git clone https://github.com/kubernetes/test-infra --depth=1 92 cd "${origdir}" 93 94 ROOT_DIR="${tempdir}/test-infra" 95 96 function main() { 97 parseArgs "$@" 98 ensureProject 99 ensureBucket 100 ensureCluster 101 ensureUploadSA 102 genConfig 103 gencreds 104 echo "All done!" 105 } 106 # Prep and check args. 107 function parseArgs() { 108 for var in TEAM PROJECT ZONE CLUSTER MACHINE NODECOUNT DISKSIZE FOLDER_ID BILLING_ACCOUNT_ID GITHUB_FORK_URI; do 109 if [[ -z "${!var}" ]]; then 110 echo "Must specify ${var} environment variable (or specify a default in the script)." 111 exit 2 112 fi 113 echo "${var}=${!var}" 114 done 115 if [[ "${PROW_INSTANCE_NAME}" != "k8s-prow" ]]; then 116 if [[ "${PROW_SECRET_ACCESSOR_SA}" == "gencred-refresher@k8s-prow.iam.gserviceaccount.com" ]]; then 117 echo "${PROW_SECRET_ACCESSOR_SA} is k8s-prow specific, must pass in the service account used by ${PROW_INSTANCE_NAME}" 118 exit 2 119 fi 120 if [[ "${PROW_DEPLOYMENT_DIR}" == "./config/prow/cluster" ]]; then 121 read -r -n1 -p "${PROW_DEPLOYMENT_DIR} is k8s-prow specific, are you sure this is the same for ${PROW_INSTANCE_NAME} ? [y/n] " 122 if [[ ! $REPLY =~ ^[Yy]$ ]]; then 123 exit 2 124 fi 125 fi 126 fi 127 } 128 function prompt() { 129 local msg="$1" cmd="$2" 130 echo 131 read -r -n1 -p "$msg ? [y/n] " 132 echo 133 if [[ $REPLY =~ ^[Yy]$ ]]; then 134 "$cmd" 135 else 136 echo "Skipping and continuing to next step..." 137 fi 138 } 139 function pause() { 140 read -n 1 -s -r 141 } 142 143 authed="" 144 function getClusterCreds() { 145 if [[ -z "${authed}" ]]; then 146 gcloud container clusters get-credentials --project="${PROJECT}" --zone="${ZONE}" "${CLUSTER}" 147 authed="true" 148 fi 149 } 150 function ensureProject() { 151 if gcloud projects describe ${PROJECT}; then 152 echo "GCP project '${PROJECT}' exists, skip creating." 153 return 154 fi 155 156 prompt "Failed to describe the project ${PROJECT}, press Y/y to create the project" echo 157 # Create project, configure billing, enable GKE, add IAM rule for oncall team. 158 echo "Creating project '${PROJECT}' (this may take a few minutes)..." 159 gcloud projects create "${PROJECT}" --name="${PROJECT}" --folder="${FOLDER_ID}" 160 gcloud beta billing projects link "${PROJECT}" --billing-account="${BILLING_ACCOUNT_ID}" 161 gcloud services enable "container.googleapis.com" --project="${PROJECT}" 162 gcloud projects add-iam-policy-binding "${PROJECT}" --member="${ADMIN_IAM_MEMBER}" --role="roles/owner" 163 } 164 function ensureCluster() { 165 if gcloud container clusters describe "${CLUSTER}" --project="${PROJECT}" --zone="${ZONE}" >/dev/null 2>&1; then 166 echo "Cluster '${CLUSTER}' exists in zone '${ZONE}' in project '${PROJECT}', skip creating." 167 return 168 fi 169 170 prompt "Pressing Y/y to create the cluster" echo 171 echo "Creating cluster '${CLUSTER}' (this may take a few minutes)..." 172 echo "If this fails due to insufficient project quota, request more at https://console.cloud.google.com/iam-admin/quotas?project=${PROJECT}" 173 echo 174 gcloud container clusters create "${CLUSTER}" --project="${PROJECT}" --zone="${ZONE}" --machine-type="${MACHINE}" --num-nodes="${NODECOUNT}" --disk-size="${DISKSIZE}" --disk-type="pd-ssd" --enable-autoupgrade --enable-autorepair --workload-pool="${PROJECT}.svc.id.goog" 175 getClusterCreds 176 kubectl create namespace "test-pods" 177 } 178 179 function createBucket() { 180 gsutil mb -p "${PROJECT}" -b on "${GCS_BUCKET}" 181 for i in ${CONTROL_PLANE_SA//,/ } 182 do 183 gsutil iam ch "serviceAccount:${i}:roles/storage.objectAdmin" "${GCS_BUCKET}" 184 done 185 } 186 187 function ensureBucket() { 188 if ! gsutil ls "${GCS_BUCKET}"; then 189 prompt "The specified GCS bucket '${GCS_BUCKET}' cannot be located. This is expected if this is a shared default job result bucket. Otherwise press Y/y to create it." createBucket 190 else 191 echo "Bucket '${GCS_BUCKET}' already exists, skip creation." 192 fi 193 } 194 195 function ensureUploadSA() { 196 getClusterCreds 197 local sa="prowjob-default-sa" 198 local saFull="${sa}@${PROJECT}.iam.gserviceaccount.com" 199 # Create a GCP service account for uploading to GCS 200 if ! gcloud beta iam service-accounts describe "${saFull}" --project="${PROJECT}" >/dev/null 2>&1; then 201 gcloud beta iam service-accounts create "${sa}" --project="${PROJECT}" --description="Default SA for ProwJobs to use to upload job results to GCS." --display-name="ProwJob default SA" 202 else 203 echo "Service account '${sa}' already exists, skip creation." 204 fi 205 # Ensure workload identity is enabled on the cluster 206 if ! gcloud container clusters describe ${CLUSTER} --project=${PROJECT} --zone=${ZONE} | grep "${CLUSTER}.svc.id.goog" >/dev/null 2>&1; then 207 "${ROOT_DIR}/workload-identity/enable-workload-identity.sh" "${PROJECT}" "${ZONE}" "${CLUSTER}" 208 else 209 echo "Workload identity is enabled on cluster '${CLUSTER}', skip enabling." 210 fi 211 212 # Create a k8s service account to associate with the GCP service account 213 if ! kubectl -n test-pods get ${sa}; then 214 kubectl apply -f - <<EOF 215 apiVersion: v1 216 kind: ServiceAccount 217 metadata: 218 annotations: 219 iam.gke.io/gcp-service-account: ${saFull} 220 name: ${sa} 221 namespace: test-pods 222 EOF 223 fi 224 225 echo "Binding GCP service account with k8s service account via workload identity. Propagation and validation may take a few minutes..." 226 if ! gcloud iam service-accounts get-iam-policy --project=gob-prow prowjob-default-sa@gob-prow.iam.gserviceaccount.com | grep "${CLUSTER}.svc.id.goog[test-pods/${saFull}]" >/dev/null 2>&1; then 227 "${ROOT_DIR}/workload-identity/bind-service-accounts.sh" "${PROJECT}" "${ZONE}" "${CLUSTER}" test-pods "${sa}" "${saFull}" 228 fi 229 230 # Try to authorize SA to upload to GCS_BUCKET. If this fails, the bucket if 231 # probably a shared result bucket and oncall will need to handle. 232 if ! gsutil iam get "${GCS_BUCKET}" | grep "serviceAccount:${saFull}" >/dev/null 2>&1; then 233 if ! gsutil iam ch "serviceAccount:${saFull}:roles/storage.objectAdmin" "${GCS_BUCKET}"; then 234 echo 235 echo "It doesn't look you have permission to authorize access to this bucket. This is expected for the default job result bucket." 236 echo "If this is a default job result bucket, please ask the test-infra oncall (https://go.k8s.io/oncall) to run the following:" 237 echo " gsutil iam ch \"serviceAccount:${saFull}:roles/storage.objectAdmin\" \"${GCS_BUCKET}\"" 238 echo 239 echo "Press any key to acknowledge (this doesn't need to be completed to continue this script, but it needs to be done before uploading will work)..." 240 pause 241 fi 242 fi 243 } 244 245 function genConfig() { 246 # TODO: Automatically inject this into config.yaml at the same time as kubeconfig credential setup (which auto creates a PR we can include this in). 247 echo 248 echo "The following changes should be made to the Prow instance's config.yaml file (Probably located at ${PROW_DEPLOYMENT_DIR}/../config.yaml)." 249 echo 250 echo "Append the following entry to the end of the slice at field 'plank.default_decoration_config_entries': " 251 cat <<EOF 252 - cluster: $(cluster_alias) 253 config: 254 gcs_configuration: 255 bucket: "${GCS_BUCKET#"gs://"}" 256 default_service_account_name: "prowjob-default-sa" # Use workload identity 257 gcs_credentials_secret: "" # rather than service account key secret 258 EOF 259 echo 260 echo "Press any key to acknowledge... This doesn't need to be merged to continue this script, but it needs to be done before configuring jobs for the cluster." 261 pause 262 } 263 264 # generate a JWT kubeconfig file that we can merge into k8s-prow's kubeconfig 265 # secret so that Prow can schedule pods. This operation is now handled by a prow 266 # job runs gencred pediodically. So the only action from this function is 267 # authorizing prow service account to access the build cluster. 268 function gencreds() { 269 # The secret can be stored in prow service cluster 270 gcloud projects add-iam-policy-binding --member="serviceAccount:${PROW_SECRET_ACCESSOR_SA}" --role="roles/container.admin" "${PROJECT}" --condition=None 271 272 prompt "Create CL for you" create_cl 273 274 echo "ProwJobs that intend to use this cluster should specify 'cluster: $(cluster_alias)'" # TODO: color this 275 echo 276 echo "Press any key to acknowledge (this doesn't need to be completed to continue this script, but it needs to be done before Prow can schedule jobs to your cluster)..." 277 pause 278 } 279 280 cluster_alias() { 281 echo "build-${TEAM}" 282 } 283 gsm_secret_name() { 284 echo "prow_build_cluster_kubeconfig_$(cluster_alias)" 285 } 286 287 create_cl() { 288 local cluster_alias 289 cluster_alias="$(cluster_alias)" 290 local gsm_secret_name 291 gsm_secret_name="$(gsm_secret_name)" 292 local build_cluster_kubeconfig_mount_path="/etc/${cluster_alias}" 293 local build_clster_secret_name_in_cluster="kubeconfig-build-${TEAM}" 294 cd "${ROOT_DIR}" 295 local fork 296 fork="$(echo "${GITHUB_FORK_URI}" | "$SED" -e "s;https://github.com/;;" -e "s;git@github.com:;;" -e "s;.git;;")" 297 298 cd "${tempdir}" 299 git clone "${GITHUB_CLONE_URI}" forked-test-infra 300 cd forked-test-infra 301 git fetch 302 303 git checkout -b add-build-cluster-secret-${TEAM} 304 305 cat>>"${PROW_DEPLOYMENT_DIR}/kubernetes_external_secrets.yaml" <<EOF 306 --- 307 apiVersion: kubernetes-client.io/v1 308 kind: ExternalSecret 309 metadata: 310 name: ${build_clster_secret_name_in_cluster} 311 namespace: default 312 spec: 313 backendType: gcpSecretsManager 314 projectId: ${PROW_SERVICE_PROJECT} 315 data: 316 - key: ${gsm_secret_name} 317 name: kubeconfig 318 version: latest 319 EOF 320 321 # Also register this build cluster with gencred, so that the kubeconfig 322 # secrets can be rotated. 323 local gencred_config_file="${PROW_DEPLOYMENT_DIR}/../gencred-config/gencred-config.yaml" 324 "${SED}" -i "s;clusters:;clusters:\\ 325 - gke: projects/${PROJECT}/locations/${ZONE}/clusters/${CLUSTER}\\ 326 name: ${cluster_alias}\\ 327 duration: 48h\\ 328 gsm:\\ 329 name: ${gsm_secret_name}\\ 330 project: ${PROW_SERVICE_PROJECT};" "${gencred_config_file}" 331 332 git add "${PROW_DEPLOYMENT_DIR}/kubernetes_external_secrets.yaml" 333 git add "${gencred_config_file}" 334 git commit -m "Add external secret from build cluster for ${TEAM}" 335 git push -f "${GITHUB_FORK_URI}" "HEAD:add-build-cluster-secret-${TEAM}" 336 337 git checkout -b use-build-cluster-${TEAM} master 338 339 for app_deployment_file in ${PROW_DEPLOYMENT_DIR}/*.yaml; do 340 if ! grep "/etc/kubeconfig/config" "${app_deployment_file}">/dev/null 2>&1; then 341 if ! grep "name: KUBECONFIG" "${app_deployment_file}">/dev/null 2>&1; then 342 continue 343 fi 344 fi 345 "${SED}" -i "s;volumeMounts:;volumeMounts:\\ 346 - mountPath: ${build_cluster_kubeconfig_mount_path}\\ 347 name: ${cluster_alias}\\ 348 readOnly: true;" "${app_deployment_file}" 349 350 "${SED}" -i "s;volumes:;volumes:\\ 351 - name: ${cluster_alias}\\ 352 secret:\\ 353 defaultMode: 420\\ 354 secretName: ${build_clster_secret_name_in_cluster};" "${app_deployment_file}" 355 356 # Appends to an existing value doesn't seem to be supported by kustomize, so 357 # using sed instead. `&` represents for regex matched part 358 "${SED}" -E -i "s;/etc/kubeconfig/config(-[0-9]+)?;&:${build_cluster_kubeconfig_mount_path}/kubeconfig;" "${app_deployment_file}" 359 git add "${app_deployment_file}" 360 done 361 362 git commit -m "Add build cluster kubeconfig for ${TEAM} 363 364 Please submit this change after the previous PR was submitted and postsubmit job succeeded. 365 Prow oncall: please don't submit this change until the secret is created successfully, which will be indicated by prow alerts in 2 minutes after the postsubmit job. 366 " 367 368 git push -f "${GITHUB_FORK_URI}" "HEAD:use-build-cluster-${TEAM}" 369 echo 370 echo "Please open https://github.com/${fork}/pull/new/add-build-cluster-secret-${TEAM} and https://github.com/${fork}/pull/new/use-build-cluster-${TEAM}, creating PRs from both of them and assign to test-infra oncall for approval" 371 echo 372 pause 373 } 374 375 function cleanup() { 376 returnCode="$?" 377 rm -f "sa-key.json" || true 378 rm -rf "${tempdir}" || true 379 exit "${returnCode}" 380 } 381 trap cleanup EXIT 382 main "$@" 383 cleanup