github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/pkg/create-build-cluster.sh (about)

     1  #!/usr/bin/env bash
     2  # Copyright 2020 The Kubernetes Authors.
     3  #
     4  # Licensed under the Apache License, Version 2.0 (the "License");
     5  # you may not use this file except in compliance with the License.
     6  # You may obtain a copy of the License at
     7  #
     8  #     http://www.apache.org/licenses/LICENSE-2.0
     9  #
    10  # Unless required by applicable law or agreed to in writing, software
    11  # distributed under the License is distributed on an "AS IS" BASIS,
    12  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  # See the License for the specific language governing permissions and
    14  # limitations under the License.
    15  
    16  # This script is used to create a new build cluster for use with prow. The cluster will have a 
    17  # single pd-ssd nodepool that will have autoupgrade and autorepair enabled.
    18  #
    19  # Usage: populate the parameters by setting them below or specifying environment variables then run
    20  # the script and follow the prompts. You'll be prompted to share some credentials and commands
    21  # with the current oncall.
    22  #
    23  
    24  set -o errexit
    25  set -o nounset
    26  set -o pipefail
    27  
    28  # Specific to Prow instance. Use "k8s-prow" if onboarding prow.k8s.io
    29  PROW_INSTANCE_NAME="${PROW_INSTANCE_NAME:-}"
    30  # Crier and deck needs to access the GCS bucket. Use
    31  # "control-plane@k8s-prow.iam.gserviceaccount.com" if onboarding prow.k8s.io
    32  CONTROL_PLANE_SA="${CONTROL_PLANE_SA:-}"
    33  
    34  PROW_SERVICE_PROJECT="${PROW_SERVICE_PROJECT:-k8s-prow}"
    35  PROW_SECRET_ACCESSOR_SA="${PROW_SECRET_ACCESSOR_SA:-gencred-refresher@k8s-prow.iam.gserviceaccount.com}"
    36  
    37  PROW_DEPLOYMENT_DIR="${PROW_DEPLOYMENT_DIR:-./config/prow/cluster}"
    38  # URI for cloning your fork locally, for example "git@github.com:chaodaiG/test-infra.git"
    39  GITHUB_FORK_URI="${GITHUB_FORK_URI:-}"
    40  GITHUB_CLONE_URI="${GITHUB_CLONE_URI:-git@github.com:kubernetes/test-infra}"
    41  
    42  # Specific to the build cluster
    43  TEAM="${TEAM:-}"
    44  PROJECT="${PROJECT:-${PROW_INSTANCE_NAME}-build-${TEAM}}"
    45  ZONE="${ZONE:-us-west1-b}"
    46  CLUSTER="${CLUSTER:-${PROJECT}}"
    47  GCS_BUCKET="${GCS_BUCKET:-gs://${PROJECT}}"
    48  
    49  # Only needed for creating cluster
    50  MACHINE="${MACHINE:-n1-standard-8}"
    51  NODECOUNT="${NODECOUNT:-5}"
    52  DISKSIZE="${DISKSIZE:-100GB}"
    53  
    54  # Only needed for creating project
    55  FOLDER_ID="${FOLDER_ID:-0123}"
    56  BILLING_ACCOUNT_ID="${BILLING_ACCOUNT_ID:-0123}"  # Find the billing account ID in the cloud console.
    57  ADMIN_IAM_MEMBER="${ADMIN_IAM_MEMBER:-group:mdb.cloud-kubernetes-engprod-oncall@google.com}"
    58  
    59  # Overriding output
    60  OUT_FILE="${OUT_FILE:-build-cluster-kubeconfig.yaml}"
    61  
    62  
    63  # Require bash version >= 4.4
    64  if ((${BASH_VERSINFO[0]}<4)) || ( ((${BASH_VERSINFO[0]}==4)) && ((${BASH_VERSINFO[1]}<4)) ); then
    65    echo "ERROR: This script requires a minimum bash version of 4.4, but got version of ${BASH_VERSINFO[0]}.${BASH_VERSINFO[1]}"
    66    if [ "$(uname)" = 'Darwin' ]; then
    67      echo "On macOS with homebrew 'brew install bash' is sufficient."
    68    fi
    69    exit 1
    70  fi
    71  
    72  # Macos specific settings
    73  SED="sed"
    74  if command -v gsed &>/dev/null; then
    75    SED="gsed"
    76  fi
    77  if ! (${SED} --version 2>&1 | grep -q GNU); then
    78    # darwin is great (not)
    79    echo "!!! GNU sed is required.  If on OS X, use 'brew install gnu-sed'." >&2
    80    return 1
    81  fi
    82  
    83  # Create temp dir to work in and clone k/t-i
    84  
    85  origdir="$( pwd -P )"
    86  tempdir="$( mktemp -d )"
    87  echo
    88  echo "Temporary files produced are stored at: ${tempdir}"
    89  echo
    90  cd "${tempdir}"
    91  git clone https://github.com/kubernetes/test-infra --depth=1
    92  cd "${origdir}"
    93  
    94  ROOT_DIR="${tempdir}/test-infra"
    95  
    96  function main() {
    97    parseArgs "$@"
    98    ensureProject
    99    ensureBucket
   100    ensureCluster
   101    ensureUploadSA
   102    genConfig
   103    gencreds
   104    echo "All done!"
   105  }
   106  # Prep and check args.
   107  function parseArgs() {
   108    for var in TEAM PROJECT ZONE CLUSTER MACHINE NODECOUNT DISKSIZE FOLDER_ID BILLING_ACCOUNT_ID GITHUB_FORK_URI; do
   109      if [[ -z "${!var}" ]]; then
   110        echo "Must specify ${var} environment variable (or specify a default in the script)."
   111        exit 2
   112      fi
   113      echo "${var}=${!var}"
   114    done
   115    if [[ "${PROW_INSTANCE_NAME}" != "k8s-prow" ]]; then
   116      if [[ "${PROW_SECRET_ACCESSOR_SA}" == "gencred-refresher@k8s-prow.iam.gserviceaccount.com" ]]; then
   117        echo "${PROW_SECRET_ACCESSOR_SA} is k8s-prow specific, must pass in the service account used by ${PROW_INSTANCE_NAME}"
   118        exit 2
   119      fi
   120      if [[ "${PROW_DEPLOYMENT_DIR}" == "./config/prow/cluster" ]]; then
   121        read -r -n1 -p "${PROW_DEPLOYMENT_DIR} is k8s-prow specific, are you sure this is the same for ${PROW_INSTANCE_NAME} ? [y/n] "
   122        if [[ ! $REPLY =~ ^[Yy]$ ]]; then
   123          exit 2
   124        fi
   125      fi
   126    fi
   127  }
   128  function prompt() {
   129    local msg="$1" cmd="$2"
   130    echo
   131    read -r -n1 -p "$msg ? [y/n] "
   132    echo
   133    if [[ $REPLY =~ ^[Yy]$ ]]; then
   134      "$cmd"
   135    else
   136      echo "Skipping and continuing to next step..."
   137    fi
   138  }
   139  function pause() {
   140    read -n 1 -s -r
   141  }
   142  
   143  authed=""
   144  function getClusterCreds() {
   145    if [[ -z "${authed}" ]]; then
   146      gcloud container clusters get-credentials --project="${PROJECT}" --zone="${ZONE}" "${CLUSTER}"
   147      authed="true"
   148    fi
   149  }
   150  function ensureProject() {
   151    if gcloud projects describe ${PROJECT}; then
   152      echo "GCP project '${PROJECT}' exists, skip creating."
   153      return
   154    fi
   155  
   156    prompt "Failed to describe the project ${PROJECT}, press Y/y to create the project" echo
   157    # Create project, configure billing, enable GKE, add IAM rule for oncall team.
   158    echo "Creating project '${PROJECT}' (this may take a few minutes)..."
   159    gcloud projects create "${PROJECT}" --name="${PROJECT}" --folder="${FOLDER_ID}"
   160    gcloud beta billing projects link "${PROJECT}" --billing-account="${BILLING_ACCOUNT_ID}"
   161    gcloud services enable "container.googleapis.com" --project="${PROJECT}"
   162    gcloud projects add-iam-policy-binding "${PROJECT}" --member="${ADMIN_IAM_MEMBER}" --role="roles/owner"
   163  }
   164  function ensureCluster() {
   165    if gcloud container clusters describe "${CLUSTER}" --project="${PROJECT}" --zone="${ZONE}" >/dev/null 2>&1; then
   166      echo "Cluster '${CLUSTER}' exists in zone '${ZONE}' in project '${PROJECT}', skip creating."
   167      return
   168    fi
   169  
   170    prompt "Pressing Y/y to create the cluster" echo
   171    echo "Creating cluster '${CLUSTER}' (this may take a few minutes)..."
   172    echo "If this fails due to insufficient project quota, request more at https://console.cloud.google.com/iam-admin/quotas?project=${PROJECT}"
   173    echo
   174    gcloud container clusters create "${CLUSTER}" --project="${PROJECT}" --zone="${ZONE}" --machine-type="${MACHINE}" --num-nodes="${NODECOUNT}" --disk-size="${DISKSIZE}" --disk-type="pd-ssd" --enable-autoupgrade --enable-autorepair --workload-pool="${PROJECT}.svc.id.goog"
   175    getClusterCreds
   176    kubectl create namespace "test-pods"
   177  }
   178  
   179  function createBucket() {
   180    gsutil mb -p "${PROJECT}" -b on "${GCS_BUCKET}"
   181    for i in ${CONTROL_PLANE_SA//,/ }
   182    do
   183      gsutil iam ch "serviceAccount:${i}:roles/storage.objectAdmin" "${GCS_BUCKET}"
   184    done
   185  }
   186  
   187  function ensureBucket() {
   188    if ! gsutil ls "${GCS_BUCKET}"; then
   189      prompt "The specified GCS bucket '${GCS_BUCKET}' cannot be located. This is expected if this is a shared default job result bucket. Otherwise press Y/y to create it." createBucket
   190    else
   191      echo "Bucket '${GCS_BUCKET}' already exists, skip creation."
   192    fi
   193  }
   194  
   195  function ensureUploadSA() {
   196    getClusterCreds
   197    local sa="prowjob-default-sa"
   198    local saFull="${sa}@${PROJECT}.iam.gserviceaccount.com"
   199    # Create a GCP service account for uploading to GCS
   200    if ! gcloud beta iam service-accounts describe "${saFull}" --project="${PROJECT}" >/dev/null 2>&1; then
   201      gcloud beta iam service-accounts create "${sa}" --project="${PROJECT}" --description="Default SA for ProwJobs to use to upload job results to GCS." --display-name="ProwJob default SA"
   202    else
   203      echo "Service account '${sa}' already exists, skip creation."
   204    fi
   205    # Ensure workload identity is enabled on the cluster
   206    if ! gcloud container clusters describe ${CLUSTER} --project=${PROJECT} --zone=${ZONE} | grep "${CLUSTER}.svc.id.goog" >/dev/null 2>&1; then
   207      "${ROOT_DIR}/workload-identity/enable-workload-identity.sh" "${PROJECT}" "${ZONE}" "${CLUSTER}"
   208    else
   209      echo "Workload identity is enabled on cluster '${CLUSTER}', skip enabling."
   210    fi
   211  
   212    # Create a k8s service account to associate with the GCP service account
   213    if ! kubectl -n test-pods get ${sa}; then
   214      kubectl apply -f - <<EOF
   215  apiVersion: v1
   216  kind: ServiceAccount
   217  metadata:
   218    annotations:
   219      iam.gke.io/gcp-service-account: ${saFull}
   220    name: ${sa}
   221    namespace: test-pods
   222  EOF
   223    fi
   224  
   225    echo "Binding GCP service account with k8s service account via workload identity. Propagation and validation may take a few minutes..."
   226    if ! gcloud iam service-accounts get-iam-policy --project=gob-prow prowjob-default-sa@gob-prow.iam.gserviceaccount.com | grep "${CLUSTER}.svc.id.goog[test-pods/${saFull}]" >/dev/null 2>&1; then
   227      "${ROOT_DIR}/workload-identity/bind-service-accounts.sh" "${PROJECT}" "${ZONE}" "${CLUSTER}" test-pods "${sa}" "${saFull}"
   228    fi
   229  
   230    # Try to authorize SA to upload to GCS_BUCKET. If this fails, the bucket if
   231    # probably a shared result bucket and oncall will need to handle.
   232    if ! gsutil iam get "${GCS_BUCKET}" | grep "serviceAccount:${saFull}" >/dev/null 2>&1; then
   233      if ! gsutil iam ch "serviceAccount:${saFull}:roles/storage.objectAdmin" "${GCS_BUCKET}"; then
   234        echo
   235        echo "It doesn't look you have permission to authorize access to this bucket. This is expected for the default job result bucket."
   236        echo "If this is a default job result bucket, please ask the test-infra oncall (https://go.k8s.io/oncall) to run the following:"
   237        echo "  gsutil iam ch \"serviceAccount:${saFull}:roles/storage.objectAdmin\" \"${GCS_BUCKET}\""
   238        echo
   239        echo "Press any key to acknowledge (this doesn't need to be completed to continue this script, but it needs to be done before uploading will work)..."
   240        pause
   241      fi
   242    fi
   243  }
   244  
   245  function genConfig() {
   246    # TODO: Automatically inject this into config.yaml at the same time as kubeconfig credential setup (which auto creates a PR we can include this in).
   247    echo
   248    echo "The following changes should be made to the Prow instance's config.yaml file (Probably located at ${PROW_DEPLOYMENT_DIR}/../config.yaml)."
   249    echo
   250    echo "Append the following entry to the end of the slice at field 'plank.default_decoration_config_entries': "
   251    cat <<EOF
   252    - cluster: $(cluster_alias)
   253      config:
   254        gcs_configuration:
   255          bucket: "${GCS_BUCKET#"gs://"}"
   256        default_service_account_name: "prowjob-default-sa" # Use workload identity
   257        gcs_credentials_secret: ""                         # rather than service account key secret
   258  EOF
   259    echo
   260    echo "Press any key to acknowledge... This doesn't need to be merged to continue this script, but it needs to be done before configuring jobs for the cluster."
   261    pause
   262  }
   263  
   264  # generate a JWT kubeconfig file that we can merge into k8s-prow's kubeconfig
   265  # secret so that Prow can schedule pods. This operation is now handled by a prow
   266  # job runs gencred pediodically. So the only action from this function is
   267  # authorizing prow service account to access the build cluster.
   268  function gencreds() {
   269    # The secret can be stored in prow service cluster
   270    gcloud projects add-iam-policy-binding --member="serviceAccount:${PROW_SECRET_ACCESSOR_SA}" --role="roles/container.admin" "${PROJECT}" --condition=None
   271  
   272    prompt "Create CL for you" create_cl
   273  
   274    echo "ProwJobs that intend to use this cluster should specify 'cluster: $(cluster_alias)'" # TODO: color this
   275    echo
   276    echo "Press any key to acknowledge (this doesn't need to be completed to continue this script, but it needs to be done before Prow can schedule jobs to your cluster)..."
   277    pause
   278  }
   279  
   280  cluster_alias() {
   281    echo "build-${TEAM}"
   282  }
   283  gsm_secret_name() {
   284    echo "prow_build_cluster_kubeconfig_$(cluster_alias)"
   285  }
   286  
   287  create_cl() {
   288    local cluster_alias
   289    cluster_alias="$(cluster_alias)"
   290    local gsm_secret_name
   291    gsm_secret_name="$(gsm_secret_name)"
   292    local build_cluster_kubeconfig_mount_path="/etc/${cluster_alias}"
   293    local build_clster_secret_name_in_cluster="kubeconfig-build-${TEAM}"
   294    cd "${ROOT_DIR}"
   295    local fork
   296    fork="$(echo "${GITHUB_FORK_URI}" | "$SED" -e "s;https://github.com/;;" -e "s;git@github.com:;;" -e "s;.git;;")"
   297    
   298    cd "${tempdir}"
   299    git clone "${GITHUB_CLONE_URI}" forked-test-infra
   300    cd forked-test-infra
   301    git fetch
   302  
   303    git checkout -b add-build-cluster-secret-${TEAM}
   304  
   305    cat>>"${PROW_DEPLOYMENT_DIR}/kubernetes_external_secrets.yaml" <<EOF
   306  ---
   307  apiVersion: kubernetes-client.io/v1
   308  kind: ExternalSecret
   309  metadata:
   310    name: ${build_clster_secret_name_in_cluster}
   311    namespace: default
   312  spec:
   313    backendType: gcpSecretsManager
   314    projectId: ${PROW_SERVICE_PROJECT}
   315    data:
   316    - key: ${gsm_secret_name}
   317      name: kubeconfig
   318      version: latest
   319  EOF
   320  
   321    # Also register this build cluster with gencred, so that the kubeconfig
   322    # secrets can be rotated.
   323    local gencred_config_file="${PROW_DEPLOYMENT_DIR}/../gencred-config/gencred-config.yaml"
   324    "${SED}" -i "s;clusters:;clusters:\\
   325  - gke: projects/${PROJECT}/locations/${ZONE}/clusters/${CLUSTER}\\
   326    name: ${cluster_alias}\\
   327    duration: 48h\\
   328    gsm:\\
   329      name: ${gsm_secret_name}\\
   330      project: ${PROW_SERVICE_PROJECT};" "${gencred_config_file}"
   331  
   332    git add "${PROW_DEPLOYMENT_DIR}/kubernetes_external_secrets.yaml"
   333    git add "${gencred_config_file}"
   334    git commit -m "Add external secret from build cluster for ${TEAM}"
   335    git push -f "${GITHUB_FORK_URI}" "HEAD:add-build-cluster-secret-${TEAM}"
   336  
   337    git checkout -b use-build-cluster-${TEAM} master
   338    
   339    for app_deployment_file in ${PROW_DEPLOYMENT_DIR}/*.yaml; do
   340      if ! grep "/etc/kubeconfig/config" "${app_deployment_file}">/dev/null 2>&1; then
   341        if ! grep "name: KUBECONFIG" "${app_deployment_file}">/dev/null 2>&1; then
   342          continue
   343        fi
   344      fi
   345      "${SED}" -i "s;volumeMounts:;volumeMounts:\\
   346          - mountPath: ${build_cluster_kubeconfig_mount_path}\\
   347            name: ${cluster_alias}\\
   348            readOnly: true;" "${app_deployment_file}"
   349  
   350      "${SED}" -i "s;volumes:;volumes:\\
   351        - name: ${cluster_alias}\\
   352          secret:\\
   353            defaultMode: 420\\
   354            secretName: ${build_clster_secret_name_in_cluster};" "${app_deployment_file}"
   355  
   356      # Appends to an existing value doesn't seem to be supported by kustomize, so
   357      # using sed instead. `&` represents for regex matched part
   358      "${SED}" -E -i "s;/etc/kubeconfig/config(-[0-9]+)?;&:${build_cluster_kubeconfig_mount_path}/kubeconfig;" "${app_deployment_file}"
   359      git add "${app_deployment_file}"
   360    done
   361  
   362    git commit -m "Add build cluster kubeconfig for ${TEAM}
   363  
   364  Please submit this change after the previous PR was submitted and postsubmit job succeeded.
   365  Prow oncall: please don't submit this change until the secret is created successfully, which will be indicated by prow alerts in 2 minutes after the postsubmit job.
   366  "
   367  
   368    git push -f "${GITHUB_FORK_URI}" "HEAD:use-build-cluster-${TEAM}"
   369    echo
   370    echo "Please open https://github.com/${fork}/pull/new/add-build-cluster-secret-${TEAM} and https://github.com/${fork}/pull/new/use-build-cluster-${TEAM}, creating PRs from both of them and assign to test-infra oncall for approval"
   371    echo
   372    pause
   373  }
   374  
   375  function cleanup() {
   376    returnCode="$?"
   377    rm -f "sa-key.json" || true
   378    rm -rf "${tempdir}" || true
   379    exit "${returnCode}"
   380  }
   381  trap cleanup EXIT
   382  main "$@"
   383  cleanup