github.com/kubeflow/training-operator@v1.7.0/scripts/setup-tf-operator.sh (about) 1 #!/bin/bash 2 3 # Copyright 2018 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 # This shell script is used to build a cluster and create a namespace from our 18 # argo workflow 19 20 set -o errexit 21 set -o nounset 22 set -o pipefail 23 24 CLUSTER_NAME="${CLUSTER_NAME}" 25 REGION="${AWS_REGION:-us-west-2}" 26 REGISTRY="${ECR_REGISTRY:-public.ecr.aws/j1r0q0g6/training/training-operator}" 27 VERSION="${PULL_BASE_SHA}" 28 GO_DIR=${GOPATH}/src/github.com/${REPO_OWNER}/${REPO_NAME} 29 30 echo "Configuring kubeconfig.." 31 aws eks update-kubeconfig --region=${REGION} --name=${CLUSTER_NAME} 32 33 echo "Update Training Operator manifest with new name $REGISTRY and tag $VERSION" 34 cd manifests/overlays/standalone 35 kustomize edit set image public.ecr.aws/j1r0q0g6/training/training-operator=${REGISTRY}:${VERSION} 36 37 echo "Installing Training Operator manifests" 38 kustomize build . | kubectl apply -f - 39 40 TIMEOUT=30 41 until kubectl get pods -n kubeflow | grep tf-job-operator | grep 1/1 || [[ $TIMEOUT -eq 1 ]]; do 42 sleep 10 43 TIMEOUT=$((TIMEOUT - 1)) 44 done 45 kubectl describe all -n kubeflow 46 kubectl describe pods -n kubeflow