github.com/kubeflow/training-operator@v1.7.0/scripts/setup-tf-operator.sh (about)

     1  #!/bin/bash
     2  
     3  # Copyright 2018 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # This shell script is used to build a cluster and create a namespace from our
    18  # argo workflow
    19  
    20  set -o errexit
    21  set -o nounset
    22  set -o pipefail
    23  
    24  CLUSTER_NAME="${CLUSTER_NAME}"
    25  REGION="${AWS_REGION:-us-west-2}"
    26  REGISTRY="${ECR_REGISTRY:-public.ecr.aws/j1r0q0g6/training/training-operator}"
    27  VERSION="${PULL_BASE_SHA}"
    28  GO_DIR=${GOPATH}/src/github.com/${REPO_OWNER}/${REPO_NAME}
    29  
    30  echo "Configuring kubeconfig.."
    31  aws eks update-kubeconfig --region=${REGION} --name=${CLUSTER_NAME}
    32  
    33  echo "Update Training Operator manifest with new name $REGISTRY and tag $VERSION"
    34  cd manifests/overlays/standalone
    35  kustomize edit set image public.ecr.aws/j1r0q0g6/training/training-operator=${REGISTRY}:${VERSION}
    36  
    37  echo "Installing Training Operator manifests"
    38  kustomize build . | kubectl apply -f -
    39  
    40  TIMEOUT=30
    41  until kubectl get pods -n kubeflow | grep tf-job-operator | grep 1/1 || [[ $TIMEOUT -eq 1 ]]; do
    42    sleep 10
    43    TIMEOUT=$((TIMEOUT - 1))
    44  done
    45  kubectl describe all -n kubeflow
    46  kubectl describe pods -n kubeflow