github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/experiment/maintenance/shift_nodepool_capacity.py (about)

     1  #!/usr/bin/env python
     2  
     3  # Copyright 2018 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # This script drains nodes from one node pool and adds nodes to another n:m at a time
    18  #
    19  # Use like:
    20  # shift_nodepool_capacity.py pool-to-drain pool-to-grow shrink_increment:grow_increment num_to_add
    21  #
    22  # EG:
    23  # shift_nodepool_capacity.py default-pool pool-n1-highmem-8-300gb 2:1 5
    24  #
    25  # for nodefs on the prow builds cluster
    26  # USE AT YOUR OWN RISK.
    27  # TODO(bentheelder): delete this once dynamic kubelet config is available
    28  
    29  
    30  from __future__ import print_function
    31  
    32  import sys
    33  import subprocess
    34  import json
    35  import math
    36  
    37  # xref prow/Makefile get-build-cluster-credentials
    38  # TODO(bentheelder): perhaps make these configurable
    39  CLUSTER = 'prow'
    40  ZONE = 'us-central1-f'
    41  PROJECT = 'k8s-prow-builds'
    42  
    43  
    44  def get_pool_sizes(project, zone, cluster):
    45      """returns a map of node pool name to size using the gcloud cli."""
    46      sizes = {}
    47  
    48      # map managed instance group names to node pools and record pool names
    49      node_pools = json.loads(subprocess.check_output([
    50          'gcloud', 'container', 'node-pools', 'list',
    51          '--project', project, '--cluster', cluster, '--zone', zone,
    52          '--format=json',
    53      ]))
    54      group_to_pool = {}
    55      for pool in node_pools:
    56          # later on we will sum up node counts from instance groups
    57          sizes[pool['name']] = 0
    58          # this is somewhat brittle, the last component of the URL is the instance group name
    59          # the better way to do this is probably to use the APIs directly
    60          for url in pool['instanceGroupUrls']:
    61              instance_group = url.split('/')[-1]
    62              group_to_pool[instance_group] = pool['name']
    63  
    64      # map instance groups to node counts
    65      groups = json.loads(subprocess.check_output([
    66          'gcloud', 'compute', 'instance-groups', 'list',
    67          '--project', project, '--filter=zone:({})'.format(zone),
    68          '--format=json',
    69      ]))
    70      for group in groups:
    71          if group['name'] not in group_to_pool:
    72              continue
    73          sizes[group_to_pool[group['name']]] += group['size']
    74  
    75      return sizes
    76  
    77  
    78  def resize_nodepool(pool, new_size, project, zone, cluster):
    79      """resize the nodepool to new_size using the gcloud cli"""
    80      cmd = [
    81          'gcloud', 'container', 'clusters', 'resize', cluster,
    82          '--zone', zone, '--project', project, '--node-pool', pool,
    83          '--size', str(new_size), '--quiet',
    84      ]
    85      print(cmd)
    86      subprocess.call(cmd)
    87  
    88  
    89  def prompt_confirmation():
    90      """prompts for interactive confirmation, exits 1 unless input is 'yes'"""
    91      sys.stdout.write('Please confirm (yes/no): ')
    92      response = raw_input()
    93      if response != 'yes':
    94          print('Cancelling.')
    95          sys.exit(-1)
    96      print('Confirmed.')
    97  
    98  
    99  def main():
   100      # parse cli
   101      nodes_to_add = int(sys.argv[-1])
   102  
   103      ratio = sys.argv[-2].split(':')
   104      shrink_increment, grow_increment = int(ratio[0]), int(ratio[1])
   105  
   106      pool_to_grow = sys.argv[-3]
   107      pool_to_shrink = sys.argv[-4]
   108  
   109      # obtain current pool sizes
   110      pool_sizes = get_pool_sizes(PROJECT, ZONE, CLUSTER)
   111      pool_to_grow_initial = pool_sizes[pool_to_grow]
   112      pool_to_shrink_initial = pool_sizes[pool_to_shrink]
   113  
   114      # compute final pool sizes
   115      pool_to_grow_target = pool_to_grow_initial + nodes_to_add
   116  
   117      n_iter = int(math.ceil(float(nodes_to_add) / grow_increment))
   118      pool_to_shrink_target = pool_to_shrink_initial - n_iter*shrink_increment
   119      if pool_to_shrink_target < 0:
   120          pool_to_shrink_target = 0
   121  
   122      # verify with the user
   123      print((
   124          'Shifting NodePool capacity for project = "{project}",'
   125          'zone = "{zone}", cluster = "{cluster}"'
   126          ).format(
   127              project=PROJECT, zone=ZONE, cluster=CLUSTER,
   128          ))
   129      print('')
   130      print((
   131          'Will add {nodes_to_add} node(s) to {pool_to_grow}'
   132          ' and drain {shrink_increment} node(s) from {pool_to_shrink}'
   133          ' for every {grow_increment} node(s) added to {pool_to_grow}'
   134          ).format(
   135              nodes_to_add=nodes_to_add, shrink_increment=shrink_increment,
   136              grow_increment=grow_increment, pool_to_grow=pool_to_grow,
   137              pool_to_shrink=pool_to_shrink,
   138          ))
   139      print('')
   140      print((
   141          'Current pool sizes are: {{{pool_to_grow}: {pool_to_grow_curr},'
   142          ' {pool_to_shrink}: {pool_to_shrink_curr}}}'
   143          ).format(
   144              pool_to_grow=pool_to_grow, pool_to_grow_curr=pool_to_grow_initial,
   145              pool_to_shrink=pool_to_shrink, pool_to_shrink_curr=pool_to_shrink_initial,
   146          ))
   147      print('')
   148      print((
   149          'Target pool sizes are: {{{pool_to_grow}: {pool_to_grow_target},'
   150          ' {pool_to_shrink}: {pool_to_shrink_target}}}'
   151          ).format(
   152              pool_to_grow=pool_to_grow, pool_to_grow_target=pool_to_grow_target,
   153              pool_to_shrink=pool_to_shrink, pool_to_shrink_target=pool_to_shrink_target,
   154          ))
   155      print('')
   156  
   157      prompt_confirmation()
   158      print('')
   159  
   160  
   161      # actually start resizing
   162      # ignore pylint, "i" is a perfectly fine variable name for a loop counter...
   163      # pylint: disable=invalid-name
   164      for i in range(n_iter):
   165          # shrink by one increment, capped at reaching zero nodes
   166          print('Draining {shrink_increment} node(s) from {pool_to_shrink} ...'.format(
   167              shrink_increment=shrink_increment, pool_to_shrink=pool_to_shrink,
   168          ))
   169          new_size = max(pool_to_shrink_initial - (i*shrink_increment + shrink_increment), 0)
   170          resize_nodepool(pool_to_shrink, new_size, PROJECT, ZONE, CLUSTER)
   171          print('')
   172  
   173          # ditto for growing, modulo the cap
   174          num_to_add = min(grow_increment, pool_to_grow_target - i*grow_increment)
   175          print('Adding {num_to_add} node(s) to {pool_to_grow} ...'.format(
   176              num_to_add=num_to_add, pool_to_grow=pool_to_grow,
   177          ))
   178          new_size = pool_to_grow_initial + (i*grow_increment + num_to_add)
   179          resize_nodepool(pool_to_grow, new_size, PROJECT, ZONE, CLUSTER)
   180          print('')
   181  
   182      print('')
   183      print('Done')
   184  
   185  if __name__ == '__main__':
   186      main()