github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/experiment/maintenance/shift_nodepool_capacity.py (about) 1 #!/usr/bin/env python 2 3 # Copyright 2018 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 # This script drains nodes from one node pool and adds nodes to another n:m at a time 18 # 19 # Use like: 20 # shift_nodepool_capacity.py pool-to-drain pool-to-grow shrink_increment:grow_increment num_to_add 21 # 22 # EG: 23 # shift_nodepool_capacity.py default-pool pool-n1-highmem-8-300gb 2:1 5 24 # 25 # for nodefs on the prow builds cluster 26 # USE AT YOUR OWN RISK. 27 # TODO(bentheelder): delete this once dynamic kubelet config is available 28 29 30 from __future__ import print_function 31 32 import sys 33 import subprocess 34 import json 35 import math 36 37 # xref prow/Makefile get-build-cluster-credentials 38 # TODO(bentheelder): perhaps make these configurable 39 CLUSTER = 'prow' 40 ZONE = 'us-central1-f' 41 PROJECT = 'k8s-prow-builds' 42 43 44 def get_pool_sizes(project, zone, cluster): 45 """returns a map of node pool name to size using the gcloud cli.""" 46 sizes = {} 47 48 # map managed instance group names to node pools and record pool names 49 node_pools = json.loads(subprocess.check_output([ 50 'gcloud', 'container', 'node-pools', 'list', 51 '--project', project, '--cluster', cluster, '--zone', zone, 52 '--format=json', 53 ])) 54 group_to_pool = {} 55 for pool in node_pools: 56 # later on we will sum up node counts from instance groups 57 sizes[pool['name']] = 0 58 # this is somewhat brittle, the last component of the URL is the instance group name 59 # the better way to do this is probably to use the APIs directly 60 for url in pool['instanceGroupUrls']: 61 instance_group = url.split('/')[-1] 62 group_to_pool[instance_group] = pool['name'] 63 64 # map instance groups to node counts 65 groups = json.loads(subprocess.check_output([ 66 'gcloud', 'compute', 'instance-groups', 'list', 67 '--project', project, '--filter=zone:({})'.format(zone), 68 '--format=json', 69 ])) 70 for group in groups: 71 if group['name'] not in group_to_pool: 72 continue 73 sizes[group_to_pool[group['name']]] += group['size'] 74 75 return sizes 76 77 78 def resize_nodepool(pool, new_size, project, zone, cluster): 79 """resize the nodepool to new_size using the gcloud cli""" 80 cmd = [ 81 'gcloud', 'container', 'clusters', 'resize', cluster, 82 '--zone', zone, '--project', project, '--node-pool', pool, 83 '--size', str(new_size), '--quiet', 84 ] 85 print(cmd) 86 subprocess.call(cmd) 87 88 89 def prompt_confirmation(): 90 """prompts for interactive confirmation, exits 1 unless input is 'yes'""" 91 sys.stdout.write('Please confirm (yes/no): ') 92 response = raw_input() 93 if response != 'yes': 94 print('Cancelling.') 95 sys.exit(-1) 96 print('Confirmed.') 97 98 99 def main(): 100 # parse cli 101 nodes_to_add = int(sys.argv[-1]) 102 103 ratio = sys.argv[-2].split(':') 104 shrink_increment, grow_increment = int(ratio[0]), int(ratio[1]) 105 106 pool_to_grow = sys.argv[-3] 107 pool_to_shrink = sys.argv[-4] 108 109 # obtain current pool sizes 110 pool_sizes = get_pool_sizes(PROJECT, ZONE, CLUSTER) 111 pool_to_grow_initial = pool_sizes[pool_to_grow] 112 pool_to_shrink_initial = pool_sizes[pool_to_shrink] 113 114 # compute final pool sizes 115 pool_to_grow_target = pool_to_grow_initial + nodes_to_add 116 117 n_iter = int(math.ceil(float(nodes_to_add) / grow_increment)) 118 pool_to_shrink_target = pool_to_shrink_initial - n_iter*shrink_increment 119 if pool_to_shrink_target < 0: 120 pool_to_shrink_target = 0 121 122 # verify with the user 123 print(( 124 'Shifting NodePool capacity for project = "{project}",' 125 'zone = "{zone}", cluster = "{cluster}"' 126 ).format( 127 project=PROJECT, zone=ZONE, cluster=CLUSTER, 128 )) 129 print('') 130 print(( 131 'Will add {nodes_to_add} node(s) to {pool_to_grow}' 132 ' and drain {shrink_increment} node(s) from {pool_to_shrink}' 133 ' for every {grow_increment} node(s) added to {pool_to_grow}' 134 ).format( 135 nodes_to_add=nodes_to_add, shrink_increment=shrink_increment, 136 grow_increment=grow_increment, pool_to_grow=pool_to_grow, 137 pool_to_shrink=pool_to_shrink, 138 )) 139 print('') 140 print(( 141 'Current pool sizes are: {{{pool_to_grow}: {pool_to_grow_curr},' 142 ' {pool_to_shrink}: {pool_to_shrink_curr}}}' 143 ).format( 144 pool_to_grow=pool_to_grow, pool_to_grow_curr=pool_to_grow_initial, 145 pool_to_shrink=pool_to_shrink, pool_to_shrink_curr=pool_to_shrink_initial, 146 )) 147 print('') 148 print(( 149 'Target pool sizes are: {{{pool_to_grow}: {pool_to_grow_target},' 150 ' {pool_to_shrink}: {pool_to_shrink_target}}}' 151 ).format( 152 pool_to_grow=pool_to_grow, pool_to_grow_target=pool_to_grow_target, 153 pool_to_shrink=pool_to_shrink, pool_to_shrink_target=pool_to_shrink_target, 154 )) 155 print('') 156 157 prompt_confirmation() 158 print('') 159 160 161 # actually start resizing 162 # ignore pylint, "i" is a perfectly fine variable name for a loop counter... 163 # pylint: disable=invalid-name 164 for i in range(n_iter): 165 # shrink by one increment, capped at reaching zero nodes 166 print('Draining {shrink_increment} node(s) from {pool_to_shrink} ...'.format( 167 shrink_increment=shrink_increment, pool_to_shrink=pool_to_shrink, 168 )) 169 new_size = max(pool_to_shrink_initial - (i*shrink_increment + shrink_increment), 0) 170 resize_nodepool(pool_to_shrink, new_size, PROJECT, ZONE, CLUSTER) 171 print('') 172 173 # ditto for growing, modulo the cap 174 num_to_add = min(grow_increment, pool_to_grow_target - i*grow_increment) 175 print('Adding {num_to_add} node(s) to {pool_to_grow} ...'.format( 176 num_to_add=num_to_add, pool_to_grow=pool_to_grow, 177 )) 178 new_size = pool_to_grow_initial + (i*grow_increment + num_to_add) 179 resize_nodepool(pool_to_grow, new_size, PROJECT, ZONE, CLUSTER) 180 print('') 181 182 print('') 183 print('Done') 184 185 if __name__ == '__main__': 186 main()