github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/jenkins/agent-ctl.sh (about) 1 #!/bin/bash 2 # Copyright 2016 The Kubernetes Authors. 3 # 4 # Licensed under the Apache License, Version 2.0 (the "License"); 5 # you may not use this file except in compliance with the License. 6 # You may obtain a copy of the License at 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 # See the License for the specific language governing permissions and 14 # limitations under the License. 15 16 # Run command with no args for usage instructions. 17 18 usage() { 19 echo "Usage: $(basename "${0}") [--FLAGS] <INSTANCE> [ACTION ...]" 20 echo 'Flags:' 21 echo ' --base-image: use base-image instead of derived for instances' 22 echo ' --fake: use tiny instance' 23 echo ' --pr: talk to the pull-request instead of e2e server' 24 echo ' --previous: use last known good CI image' 25 echo ' --previous-pr: use last known good PR image' 26 echo 'INSTANCE: the name of the instance to target' 27 echo ' pr-: create a pr-builder-sized instance' 28 echo ' light-: create an instance for light postcommit jobs (e2e)' 29 echo ' heavy-: create an instance for heavy postcommit jobs (build)' 30 echo 'Actions (auto by default):' 31 echo ' attach: connect the instance to the jenkins master' 32 echo ' auto: detach delete create attach' 33 echo ' auto-image: delete create update copy-keys reboot update-image delete' 34 echo ' copy-keys: copy ssh keys from master to agent' 35 echo ' create: insert a new vm' 36 echo ' create-image: create a new agent image' 37 echo ' delete: delete a vm' 38 echo ' detach: disconnect the instance from the jenkins master' 39 echo ' reboot: reboot or hard reset the VM' 40 echo ' update: configure prerequisite packages to run tests' 41 echo ' update-image: update the image-family used to create new disks' 42 echo 'Common commands:' 43 echo ' # Refresh image' 44 echo " $(basename "${0}") --base-image light-agent auto-image" 45 echo ' # Retire agent' 46 echo " $(basename "${0}") agent-heavy-666 detach delete" 47 echo ' # Refresh agent' 48 echo " $(basename "${0}") agent-light-666" 49 echo " $(basename "${0}") --pr agent-pr-666" 50 exit 1 51 } 52 53 54 set -o nounset 55 set -o errexit 56 57 GO_VERSION='go1.8.3.linux-amd64' 58 TIMEZONE='America/Los_Angeles' 59 60 FAKE= 61 PR= 62 63 # Defaults 64 BASE_IMAGE='debian-9' 65 IMAGE='jenkins-agent' 66 IMAGE_FLAG="--image-family=${IMAGE}" 67 IMAGE_PROJECT='kubernetes-jenkins' 68 SCOPES='cloud-platform,compute-rw,storage-full' # TODO(fejta): verify 69 70 if [[ -z "${1:-}" ]]; then 71 usage 72 fi 73 74 while true; do 75 case "${1:-}" in 76 --fake) 77 FAKE=yes 78 shift 79 ;; 80 --previous) 81 # Currently jenkins-agent-20160926-0059 82 IMAGE_FLAG='--image=jenkins-agent-20160613-2240' 83 ;; 84 --previous-pr) 85 # Currently jenkins-agent-20160926-0000 86 IMAGE_FLAG='--image=jenkins-agent-20160613-1431' 87 PR=yes 88 ;; 89 --pr) 90 PR=yes 91 if [[ "${IMAGE_PROJECT}" == 'kubernetes-jenkins' ]]; then 92 IMAGE_PROJECT='kubernetes-jenkins-pull' 93 fi 94 shift 95 ;; 96 --base-image) 97 IMAGE_FLAG="--image-family=${BASE_IMAGE}" 98 IMAGE_PROJECT='debian-cloud' 99 shift 100 ;; 101 *) 102 break 103 ;; 104 esac 105 done 106 107 INSTANCE="${1}" 108 shift 109 if [[ -n "${PR}" ]]; then 110 echo 'Talking to PR jenkins' 111 MASTER='pull-jenkins-master' 112 else 113 MASTER='jenkins-master' 114 fi 115 116 if [[ "${INSTANCE}" =~ light- ]]; then 117 KIND='light' 118 elif [[ "${INSTANCE}" =~ heavy- ]]; then 119 KIND='heavy' 120 elif [[ "${INSTANCE}" =~ pr- ]]; then 121 KIND='pr' 122 else 123 KIND= 124 fi 125 126 case "${KIND}" in 127 light) 128 # Current experiment: 129 # 14 agents 130 # 10 executors, n1-highmem-8, 250G pd-standard 131 # Results: 132 # (1.0 cores, 20G active ram) 133 # 1.46 cores, 50G ram, <250 write IOPs, <25MB/s write 134 # Results: 135 # 0.2 cores, 1.3G ram, low IOPs (200 IOP spikes), low write (30MB/s spikes) 136 DISK_SIZE='250GB' 137 DISK_TYPE='pd-standard' 138 MACHINE_TYPE='n1-highmem-8' 139 ;; 140 heavy) 141 # Current experiment: 142 # 6 agents 143 # 1 executor, n1-standard-8, 150G pd-standard 144 # Results: 145 # load 14-32, 12G ram, 150 write IOPs, <20MB/s write 146 DISK_SIZE='150GB' 147 DISK_TYPE='pd-standard' 148 MACHINE_TYPE='n1-standard-8' 149 ;; 150 pr) 151 # Current experiment: 152 # 1 executor, n1-standard-8, 200G pd 153 # Results: 154 # load 10-30, 10G ram 155 DISK_SIZE='200GB' 156 DISK_TYPE='pd-standard' 157 MACHINE_TYPE='n1-standard-8' 158 ;; 159 *) 160 ;; 161 esac 162 163 if [[ -n "${FAKE}" ]]; then 164 DISK_SIZE='200GB' 165 DISK_TYPE='pd-standard' 166 MACHINE_TYPE='n1-standard-1' 167 read -p "Using ${MACHINE_TYPE} for testing. Continue [Y/n]: " ans 168 if [[ ! "${ans}" =~ '^[yY]' ]]; then 169 echo 'Add --real' 170 exit 1 171 fi 172 fi 173 174 check-kind() { 175 if [[ -z "${KIND}" ]]; then 176 echo "${INSTANCE} does not contain light-|heavy-|pr-" 177 exit 1 178 fi 179 } 180 181 auto-agent() { 182 echo "Automatically creating ${INSTANCE}..." 183 check-kind 184 detach-agent 185 delete-agent 186 create-agent 187 attach-agent 188 } 189 190 tunnel-to-master() { 191 if sudo netstat -anp | grep :8080 > /dev/null 2>&1 ; then 192 sleep 1 193 else 194 echo "Please run gcloud compute ssh \"${MASTER}\" --ssh-flag='-L8080:localhost:8080'" 195 exit 1 196 fi 197 } 198 199 200 master-change() { 201 tunnel-to-master 202 cmd="${1}" 203 ini="${HOME}/jenkins-master-creds.ini" # /user/<user>/configure && show api token 204 if [[ ! -f "${ini}" ]]; then 205 echo "Missing config: ${ini}" 206 exit 1 207 fi 208 python "$(dirname "${0}")/attach_agent.py" "${cmd}" "${INSTANCE}" "${KIND}" "${ini}" "${MASTER}" 209 } 210 211 212 detach-agent() { 213 echo "Detaching ${INSTANCE}..." 214 master-change delete 215 } 216 217 218 attach-agent() { 219 echo "Testing gcloud works on ${INSTANCE}..." 220 gcloud compute ssh "${INSTANCE}" --command="gcloud compute instances list '--filter=name=${INSTANCE}'" 221 echo "Checking presence of ssh keys on ${INSTANCE}..." 222 gcloud compute ssh "${INSTANCE}" --command="[[ -f /var/lib/jenkins/gce_keys/google_compute_engine ]]" 223 echo "Attaching ${INSTANCE}..." 224 check-kind 225 master-change create 226 } 227 228 229 delete-agent() { 230 echo "Delete ${INSTANCE}..." 231 if [[ -z "$(gcloud compute instances list --filter="name=${INSTANCE}")" ]]; then 232 return 0 233 fi 234 gcloud -q compute instances delete "${INSTANCE}" 235 } 236 237 auto-image-agent() { 238 delete-agent 239 create-agent 240 update-agent 241 copy-keys-agent 242 reboot-agent 243 update-image-agent 244 delete-agent 245 } 246 247 update-image-agent() { 248 family="${IMAGE}" 249 image="${family}-$(date +%Y%m%d-%H%M)" 250 echo "Create ${image} for ${family} from ${INSTANCE}..." 251 echo " Create snapshot of ${INSTANCE}" 252 gcloud compute disks snapshot --snapshot-names="${image}" "${INSTANCE}" 253 echo " Create disk from ${image} snapshot" 254 gcloud compute disks create --source-snapshot="${image}" "${image}" 255 echo " Create image from ${image} image" 256 gcloud compute images create "${image}" \ 257 --family="${family}" \ 258 --source-disk="${image}" \ 259 --description="Created by ${USER} for ${family} on $(date)" 260 echo " Delete ${image} disk" 261 gcloud -q compute disks delete "${image}" 262 echo " Delete ${image} snapshot" 263 gcloud -q compute snapshots delete "${image}" 264 } 265 266 create-agent() { 267 echo "Create ${INSTANCE}..." 268 check-kind 269 gcloud compute instances create \ 270 "${INSTANCE}" \ 271 --description="created on $(date) by ${USER}" \ 272 --boot-disk-size="${DISK_SIZE}" \ 273 --boot-disk-type="${DISK_TYPE}" \ 274 "${IMAGE_FLAG}" \ 275 --image-project="${IMAGE_PROJECT}" \ 276 --machine-type="${MACHINE_TYPE}" \ 277 --scopes="${SCOPES}" \ 278 --tags='do-not-delete,jenkins' 279 while ! gcloud compute ssh "${INSTANCE}" --command='uname -a' < /dev/null; do 280 sleep 1 281 done 282 } 283 284 copy-keys-agent() { 285 echo "Copying ssh keys to ${INSTANCE}..." 286 gcloud compute ssh "${MASTER}" << COPY_DONE 287 set -o errexit 288 sudo cp /var/lib/jenkins/gce_keys/google_compute_engine* . 289 sudo chown "${USER}:${USER}" google_compute_engine* 290 COPY_DONE 291 gcloud compute copy-files "${MASTER}:google_compute_engine*" . 292 gcloud compute copy-files google_compute_engine* "${INSTANCE}:." 293 gcloud compute ssh "${INSTANCE}" << PLACE_DONE 294 set -o errexit 295 sudo cp google_compute_engine* /var/lib/jenkins/gce_keys/ 296 sudo cp /var/lib/jenkins/gce_keys/google_compute_engine* /home/jenkins/.ssh/ 297 sudo chown jenkins:jenkins {/var/lib/jenkins/gce_keys,/home/jenkins/.ssh}/google_compute_engine{,.pub} 298 sudo su -c 'gcloud compute config-ssh' jenkins 299 PLACE_DONE 300 } 301 302 update-agent() { 303 304 echo "Instantiate ${INSTANCE}..." 305 gcloud compute ssh "${INSTANCE}" << INSTANTIATE_DONE 306 set -o verbose 307 set -o errexit 308 309 sudo apt-get -y update 310 sudo apt-get -y install \ 311 apt-transport-https \ 312 ca-certificates \ 313 curl \ 314 gnupg2 \ 315 software-properties-common \ 316 python-openssl python-pyasn1 python-ndg-httpsclient \ 317 build-essential \ 318 tmpreaper \ 319 jq 320 321 # Install docker 322 curl -fsSL https://download.docker.com/linux/debian/gpg | sudo apt-key add - 323 sudo apt-key fingerprint 0EBFCD88 | grep '9DC8 5822 9FC7 DD38 854A E2D8 8D81 803C 0EBF CD88' 324 sudo add-apt-repository -y "deb [arch=amd64] https://download.docker.com/linux/debian \$(lsb_release -cs) stable" 325 sudo apt-get -y update 326 sudo apt-get -y install docker-ce 327 sudo docker run hello-world 328 id jenkins || sudo useradd jenkins -m 329 sudo usermod -aG docker jenkins 330 331 # Use java8 332 sudo apt-get -y update 333 sudo apt-get -y install \ 334 openjdk-8-jdk 335 sudo update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java 336 java -version 2>&1 | grep 1.8 337 338 # Install go (needed for hack/e2e.go) 339 340 wget "https://storage.googleapis.com/golang/${GO_VERSION}.tar.gz" 341 sudo tar xzvf "${GO_VERSION}.tar.gz" -C /usr/local 342 343 344 # Reboot on panic 345 sudo touch /etc/sysctl.conf 346 sudo sh -c 'cat << END >> /etc/sysctl.conf 347 kernel.panic_on_oops = 1 348 kernel.panic = 10 349 END' 350 sudo sysctl -p 351 352 # Keep tmp clean 353 sudo sh -c "cat << END > /etc/tmpreaper.conf 354 TMPREAPER_PROTECT_EXTRA='' 355 TMPREAPER_DIRS='/tmp/. /var/tmp/.' 356 TMPREAPER_TIME='3' 357 TMPREAPER_DELAY='256' 358 TMPREAPER_ADDITIONALOPTIONS='' 359 END" 360 361 # Configure the time zone 362 sudo sh -c 'echo ${TIMEZONE} > /etc/timezone' 363 sudo dpkg-reconfigure -f noninteractive tzdata 364 365 # Prepare jenkins workspace 366 sudo mkdir -p /var/lib/jenkins/gce_keys /home/jenkins/.ssh 367 sudo chown -R jenkins:jenkins /var/lib/jenkins /home/jenkins/.ssh 368 369 # Update/upgrade 370 sudo apt-get -y update 371 sudo apt-get -y upgrade 372 INSTANTIATE_DONE 373 } 374 375 reboot-agent() { 376 echo "Rebooting ${INSTANCE}..." 377 gcloud compute ssh "${INSTANCE}" --command='sudo reboot' || gcloud compute instances reset "${INSTANCE}" 378 sleep 30 # TODO(fejta): still but sightly less lame 379 while ! gcloud compute ssh "${INSTANCE}" --command='uname -a' < /dev/null; do 380 sleep 1 381 done 382 } 383 384 if [[ -z "${1:-}" ]]; then 385 auto-agent 386 fi 387 388 while [[ -n "${1:-}" ]]; do 389 "${1}-agent" 390 shift 391 done