github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/jenkins/agent-ctl.sh (about)

     1  #!/bin/bash
     2  # Copyright 2016 The Kubernetes Authors.
     3  #
     4  # Licensed under the Apache License, Version 2.0 (the "License");
     5  # you may not use this file except in compliance with the License.
     6  # You may obtain a copy of the License at
     7  #
     8  #     http://www.apache.org/licenses/LICENSE-2.0
     9  #
    10  # Unless required by applicable law or agreed to in writing, software
    11  # distributed under the License is distributed on an "AS IS" BASIS,
    12  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  # See the License for the specific language governing permissions and
    14  # limitations under the License.
    15  
    16  # Run command with no args for usage instructions.
    17  
    18  usage() {
    19    echo "Usage: $(basename "${0}") [--FLAGS] <INSTANCE> [ACTION ...]"
    20    echo 'Flags:'
    21    echo '  --base-image: use base-image instead of derived for instances'
    22    echo '  --fake: use tiny instance'
    23    echo '  --pr: talk to the pull-request instead of e2e server'
    24    echo '  --previous: use last known good CI image'
    25    echo '  --previous-pr: use last known good PR image'
    26    echo 'INSTANCE: the name of the instance to target'
    27    echo '  pr-: create a pr-builder-sized instance'
    28    echo '  light-: create an instance for light postcommit jobs (e2e)'
    29    echo '  heavy-: create an instance for heavy postcommit jobs (build)'
    30    echo 'Actions (auto by default):'
    31    echo '  attach: connect the instance to the jenkins master'
    32    echo '  auto: detach delete create attach'
    33    echo '  auto-image: delete create update copy-keys reboot update-image delete'
    34    echo '  copy-keys: copy ssh keys from master to agent'
    35    echo '  create: insert a new vm'
    36    echo '  create-image: create a new agent image'
    37    echo '  delete: delete a vm'
    38    echo '  detach: disconnect the instance from the jenkins master'
    39    echo '  reboot: reboot or hard reset the VM'
    40    echo '  update: configure prerequisite packages to run tests'
    41    echo '  update-image: update the image-family used to create new disks'
    42    echo 'Common commands:'
    43    echo '  # Refresh image'
    44    echo "  $(basename "${0}") --base-image light-agent auto-image"
    45    echo '  # Retire agent'
    46    echo "  $(basename "${0}") agent-heavy-666 detach delete"
    47    echo '  # Refresh agent'
    48    echo "  $(basename "${0}") agent-light-666"
    49    echo "  $(basename "${0}") --pr agent-pr-666"
    50    exit 1
    51  }
    52  
    53  
    54  set -o nounset
    55  set -o errexit
    56  
    57  GO_VERSION='go1.8.3.linux-amd64'
    58  TIMEZONE='America/Los_Angeles'
    59  
    60  FAKE=
    61  PR=
    62  
    63  # Defaults
    64  BASE_IMAGE='debian-9'
    65  IMAGE='jenkins-agent'
    66  IMAGE_FLAG="--image-family=${IMAGE}"
    67  IMAGE_PROJECT='kubernetes-jenkins'
    68  SCOPES='cloud-platform,compute-rw,storage-full'  # TODO(fejta): verify
    69  
    70  if [[ -z "${1:-}" ]]; then
    71    usage
    72  fi
    73  
    74  while true; do
    75    case "${1:-}" in
    76      --fake)
    77        FAKE=yes
    78        shift
    79        ;;
    80      --previous)
    81        # Currently jenkins-agent-20160926-0059
    82        IMAGE_FLAG='--image=jenkins-agent-20160613-2240'
    83        ;;
    84      --previous-pr)
    85        # Currently jenkins-agent-20160926-0000
    86        IMAGE_FLAG='--image=jenkins-agent-20160613-1431'
    87        PR=yes
    88        ;;
    89      --pr)
    90        PR=yes
    91        if [[ "${IMAGE_PROJECT}" == 'kubernetes-jenkins' ]]; then
    92          IMAGE_PROJECT='kubernetes-jenkins-pull'
    93        fi
    94        shift
    95        ;;
    96      --base-image)
    97        IMAGE_FLAG="--image-family=${BASE_IMAGE}"
    98        IMAGE_PROJECT='debian-cloud'
    99        shift
   100        ;;
   101      *)
   102        break
   103        ;;
   104    esac
   105  done
   106  
   107  INSTANCE="${1}"
   108  shift
   109  if [[ -n "${PR}" ]]; then
   110    echo 'Talking to PR jenkins'
   111    MASTER='pull-jenkins-master'
   112  else
   113    MASTER='jenkins-master'
   114  fi
   115  
   116  if [[ "${INSTANCE}" =~ light- ]]; then
   117    KIND='light'
   118  elif [[ "${INSTANCE}" =~ heavy- ]]; then
   119    KIND='heavy'
   120  elif [[ "${INSTANCE}" =~ pr- ]]; then
   121    KIND='pr'
   122  else
   123    KIND=
   124  fi
   125  
   126  case "${KIND}" in
   127    light)
   128      # Current experiment:
   129      # 14 agents
   130      # 10 executors, n1-highmem-8, 250G pd-standard
   131      # Results:
   132      # (1.0 cores, 20G active ram)
   133      # 1.46 cores, 50G ram, <250 write IOPs, <25MB/s write
   134      # Results:
   135      # 0.2 cores, 1.3G ram, low IOPs (200 IOP spikes), low write (30MB/s spikes)
   136      DISK_SIZE='250GB'
   137      DISK_TYPE='pd-standard'
   138      MACHINE_TYPE='n1-highmem-8'
   139      ;;
   140    heavy)
   141      # Current experiment:
   142      # 6 agents
   143      # 1 executor, n1-standard-8, 150G pd-standard
   144      # Results:
   145      # load 14-32, 12G ram, 150 write IOPs, <20MB/s write
   146      DISK_SIZE='150GB'
   147      DISK_TYPE='pd-standard'
   148      MACHINE_TYPE='n1-standard-8'
   149      ;;
   150    pr)
   151      # Current experiment:
   152      # 1 executor, n1-standard-8, 200G pd
   153      # Results:
   154      # load 10-30, 10G ram
   155      DISK_SIZE='200GB'
   156      DISK_TYPE='pd-standard'
   157      MACHINE_TYPE='n1-standard-8'
   158      ;;
   159    *)
   160      ;;
   161  esac
   162  
   163  if [[ -n "${FAKE}" ]]; then
   164    DISK_SIZE='200GB'
   165    DISK_TYPE='pd-standard'
   166    MACHINE_TYPE='n1-standard-1'
   167    read -p "Using ${MACHINE_TYPE} for testing. Continue [Y/n]: " ans
   168    if [[ ! "${ans}" =~ '^[yY]' ]]; then
   169      echo 'Add --real'
   170      exit 1
   171    fi
   172  fi
   173  
   174  check-kind() {
   175    if [[ -z "${KIND}" ]]; then
   176      echo "${INSTANCE} does not contain light-|heavy-|pr-"
   177      exit 1
   178    fi
   179  }
   180  
   181  auto-agent() {
   182    echo "Automatically creating ${INSTANCE}..."
   183    check-kind
   184    detach-agent
   185    delete-agent
   186    create-agent
   187    attach-agent
   188  }
   189  
   190  tunnel-to-master() {
   191    if sudo netstat -anp | grep :8080 > /dev/null 2>&1 ; then
   192      sleep 1
   193    else
   194      echo "Please run gcloud compute ssh \"${MASTER}\" --ssh-flag='-L8080:localhost:8080'"
   195      exit 1
   196    fi
   197  }
   198  
   199  
   200  master-change() {
   201    tunnel-to-master
   202    cmd="${1}"
   203    ini="${HOME}/jenkins-master-creds.ini"  # /user/<user>/configure && show api token
   204    if [[ ! -f "${ini}" ]]; then
   205      echo "Missing config: ${ini}"
   206      exit 1
   207    fi
   208    python "$(dirname "${0}")/attach_agent.py" "${cmd}" "${INSTANCE}" "${KIND}" "${ini}" "${MASTER}"
   209  }
   210  
   211  
   212  detach-agent() {
   213    echo "Detaching ${INSTANCE}..."
   214    master-change delete
   215  }
   216  
   217  
   218  attach-agent() {
   219    echo "Testing gcloud works on ${INSTANCE}..."
   220    gcloud compute ssh "${INSTANCE}" --command="gcloud compute instances list '--filter=name=${INSTANCE}'"
   221    echo "Checking presence of ssh keys on ${INSTANCE}..."
   222    gcloud compute ssh "${INSTANCE}" --command="[[ -f /var/lib/jenkins/gce_keys/google_compute_engine ]]"
   223    echo "Attaching ${INSTANCE}..."
   224    check-kind
   225    master-change create
   226  }
   227  
   228  
   229  delete-agent() {
   230    echo "Delete ${INSTANCE}..."
   231    if [[ -z "$(gcloud compute instances list --filter="name=${INSTANCE}")" ]]; then
   232      return 0
   233    fi
   234    gcloud -q compute instances delete "${INSTANCE}"
   235  }
   236  
   237  auto-image-agent() {
   238    delete-agent
   239    create-agent
   240    update-agent
   241    copy-keys-agent
   242    reboot-agent
   243    update-image-agent
   244    delete-agent
   245  }
   246  
   247  update-image-agent() {
   248    family="${IMAGE}"
   249    image="${family}-$(date +%Y%m%d-%H%M)"
   250    echo "Create ${image} for ${family} from ${INSTANCE}..."
   251    echo "  Create snapshot of ${INSTANCE}"
   252    gcloud compute disks snapshot --snapshot-names="${image}" "${INSTANCE}"
   253    echo "  Create disk from ${image} snapshot"
   254    gcloud compute disks create --source-snapshot="${image}" "${image}"
   255    echo "  Create image from ${image} image"
   256    gcloud compute images create "${image}" \
   257      --family="${family}" \
   258      --source-disk="${image}" \
   259      --description="Created by ${USER} for ${family} on $(date)"
   260    echo "  Delete ${image} disk"
   261    gcloud -q compute disks delete "${image}"
   262    echo "  Delete ${image} snapshot"
   263    gcloud -q compute snapshots delete "${image}"
   264  }
   265  
   266  create-agent() {
   267    echo "Create ${INSTANCE}..."
   268    check-kind
   269    gcloud compute instances create \
   270      "${INSTANCE}" \
   271      --description="created on $(date) by ${USER}" \
   272      --boot-disk-size="${DISK_SIZE}" \
   273      --boot-disk-type="${DISK_TYPE}" \
   274      "${IMAGE_FLAG}" \
   275      --image-project="${IMAGE_PROJECT}" \
   276      --machine-type="${MACHINE_TYPE}" \
   277      --scopes="${SCOPES}" \
   278      --tags='do-not-delete,jenkins'
   279    while ! gcloud compute ssh "${INSTANCE}" --command='uname -a' < /dev/null; do
   280      sleep 1
   281    done
   282  }
   283  
   284  copy-keys-agent() {
   285  echo "Copying ssh keys to ${INSTANCE}..."
   286  gcloud compute ssh "${MASTER}" << COPY_DONE
   287  set -o errexit
   288  sudo cp /var/lib/jenkins/gce_keys/google_compute_engine* .
   289  sudo chown "${USER}:${USER}" google_compute_engine*
   290  COPY_DONE
   291  gcloud compute copy-files "${MASTER}:google_compute_engine*" .
   292  gcloud compute copy-files google_compute_engine* "${INSTANCE}:."
   293  gcloud compute ssh "${INSTANCE}" << PLACE_DONE
   294  set -o errexit
   295  sudo cp google_compute_engine* /var/lib/jenkins/gce_keys/
   296  sudo cp /var/lib/jenkins/gce_keys/google_compute_engine* /home/jenkins/.ssh/
   297  sudo chown jenkins:jenkins {/var/lib/jenkins/gce_keys,/home/jenkins/.ssh}/google_compute_engine{,.pub}
   298  sudo su -c 'gcloud compute config-ssh' jenkins
   299  PLACE_DONE
   300  }
   301  
   302  update-agent() {
   303  
   304  echo "Instantiate ${INSTANCE}..."
   305  gcloud compute ssh "${INSTANCE}" << INSTANTIATE_DONE
   306  set -o verbose
   307  set -o errexit
   308  
   309  sudo apt-get -y update
   310  sudo apt-get -y install \
   311    apt-transport-https \
   312    ca-certificates \
   313    curl \
   314    gnupg2 \
   315    software-properties-common \
   316    python-openssl python-pyasn1 python-ndg-httpsclient \
   317    build-essential \
   318    tmpreaper \
   319    jq
   320  
   321  # Install docker
   322  curl -fsSL https://download.docker.com/linux/debian/gpg | sudo apt-key add -
   323  sudo apt-key fingerprint 0EBFCD88 | grep '9DC8 5822 9FC7 DD38 854A  E2D8 8D81 803C 0EBF CD88'
   324  sudo add-apt-repository -y "deb [arch=amd64] https://download.docker.com/linux/debian \$(lsb_release -cs) stable"
   325  sudo apt-get -y update
   326  sudo apt-get -y install docker-ce
   327  sudo docker run hello-world
   328  id jenkins || sudo useradd jenkins -m
   329  sudo usermod -aG docker jenkins
   330  
   331  # Use java8
   332  sudo apt-get -y update
   333  sudo apt-get -y install \
   334    openjdk-8-jdk
   335  sudo update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
   336  java -version 2>&1 | grep 1.8
   337  
   338  # Install go (needed for hack/e2e.go)
   339  
   340  wget "https://storage.googleapis.com/golang/${GO_VERSION}.tar.gz"
   341  sudo tar xzvf "${GO_VERSION}.tar.gz" -C /usr/local
   342  
   343  
   344  # Reboot on panic
   345  sudo touch /etc/sysctl.conf
   346  sudo sh -c 'cat << END >> /etc/sysctl.conf
   347  kernel.panic_on_oops = 1
   348  kernel.panic = 10
   349  END'
   350  sudo sysctl -p
   351  
   352  # Keep tmp clean
   353  sudo sh -c "cat << END > /etc/tmpreaper.conf
   354  TMPREAPER_PROTECT_EXTRA=''
   355  TMPREAPER_DIRS='/tmp/. /var/tmp/.'
   356  TMPREAPER_TIME='3'
   357  TMPREAPER_DELAY='256'
   358  TMPREAPER_ADDITIONALOPTIONS=''
   359  END"
   360  
   361  # Configure the time zone
   362  sudo sh -c 'echo ${TIMEZONE} > /etc/timezone'
   363  sudo dpkg-reconfigure -f noninteractive tzdata
   364  
   365  # Prepare jenkins workspace
   366  sudo mkdir -p /var/lib/jenkins/gce_keys /home/jenkins/.ssh
   367  sudo chown -R jenkins:jenkins /var/lib/jenkins /home/jenkins/.ssh
   368  
   369  # Update/upgrade
   370  sudo apt-get -y update
   371  sudo apt-get -y upgrade
   372  INSTANTIATE_DONE
   373  }
   374  
   375  reboot-agent() {
   376    echo "Rebooting ${INSTANCE}..."
   377    gcloud compute ssh "${INSTANCE}" --command='sudo reboot' || gcloud compute instances reset "${INSTANCE}"
   378    sleep 30  # TODO(fejta): still but sightly less lame
   379    while ! gcloud compute ssh "${INSTANCE}" --command='uname -a' < /dev/null; do
   380      sleep 1
   381    done
   382  }
   383  
   384  if [[ -z "${1:-}" ]]; then
   385    auto-agent
   386  fi
   387  
   388  while [[ -n "${1:-}" ]]; do
   389    "${1}-agent"
   390    shift
   391  done