github.com/abayer/test-infra@v0.0.5/experiment/maintenance/shift_nodepool_capacity.py (about)

     1  #!/usr/bin/env python
     2  
     3  # Copyright 2018 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # This script drains nodes from one node pool and adds nodes to another n:m at a time
    18  #
    19  # Use like:
    20  # shift_nodepool_capacity.py pool-to-drain pool-to-grow shrink_increment:grow_increment num_to_add
    21  #
    22  # EG:
    23  # shift_nodepool_capacity.py default-pool pool-n1-highmem-8-300gb 2:1 5
    24  #
    25  # for nodefs on the prow builds cluster
    26  # USE AT YOUR OWN RISK.
    27  # TODO(bentheelder): delete this once dynamic kubelet config is available
    28  
    29  
    30  from __future__ import print_function
    31  
    32  import sys
    33  import subprocess
    34  import json
    35  import math
    36  
    37  # xref prow/Makefile get-build-cluster-credentials
    38  # TODO(bentheelder): perhaps make these configurable
    39  CLUSTER = 'prow'
    40  ZONE = 'us-central1-f'
    41  PROJECT = 'k8s-prow-builds'
    42  
    43  
    44  def get_pool_sizes(project, zone, cluster):
    45      """returns a map of node pool name to size using the gcloud cli."""
    46      sizes = {}
    47  
    48      # map managed instance group names to node pools and record pool names
    49      node_pools = json.loads(subprocess.check_output([
    50          'gcloud', 'container', 'node-pools', 'list',
    51          '--project', project, '--cluster', cluster, '--zone', zone,
    52          '--format=json',
    53      ]))
    54      group_to_pool = {}
    55      for pool in node_pools:
    56          # later on we will sum up node counts from instance groups
    57          sizes[pool['name']] = 0
    58          # this is somewhat brittle, the last component of the URL is the instance group name
    59          # the better way to do this is probably to use the APIs directly
    60          for url in pool['instanceGroupUrls']:
    61              instance_group = url.split('/')[-1]
    62              group_to_pool[instance_group] = pool['name']
    63  
    64      # map instance groups to node counts
    65      groups = json.loads(subprocess.check_output([
    66          'gcloud', 'compute', 'instance-groups', 'list',
    67          '--project', project, '--filter=zone:({})'.format(zone),
    68          '--format=json',
    69      ]))
    70      for group in groups:
    71          sizes[group_to_pool[group['name']]] += group['size']
    72  
    73      return sizes
    74  
    75  
    76  def resize_nodepool(pool, new_size, project, zone, cluster):
    77      """resize the nodepool to new_size using the gcloud cli"""
    78      cmd = [
    79          'gcloud', 'container', 'clusters', 'resize', cluster,
    80          '--zone', zone, '--project', project, '--node-pool', pool,
    81          '--size', str(new_size), '--quiet',
    82      ]
    83      print(cmd)
    84      subprocess.call(cmd)
    85  
    86  
    87  def prompt_confirmation():
    88      """prompts for interactive confirmation, exits 1 unless input is 'yes'"""
    89      sys.stdout.write('Please confirm (yes/no): ')
    90      response = raw_input()
    91      if response != 'yes':
    92          print('Cancelling.')
    93          sys.exit(-1)
    94      print('Confirmed.')
    95  
    96  
    97  def main():
    98      # parse cli
    99      nodes_to_add = int(sys.argv[-1])
   100  
   101      ratio = sys.argv[-2].split(':')
   102      shrink_increment, grow_increment = int(ratio[0]), int(ratio[1])
   103  
   104      pool_to_grow = sys.argv[-3]
   105      pool_to_shrink = sys.argv[-4]
   106  
   107      # obtain current pool sizes
   108      pool_sizes = get_pool_sizes(PROJECT, ZONE, CLUSTER)
   109      pool_to_grow_initial = pool_sizes[pool_to_grow]
   110      pool_to_shrink_initial = pool_sizes[pool_to_shrink]
   111  
   112      # compute final pool sizes
   113      pool_to_grow_target = pool_to_grow_initial + nodes_to_add
   114  
   115      n_iter = int(math.ceil(float(nodes_to_add) / grow_increment))
   116      pool_to_shrink_target = pool_to_shrink_initial - n_iter*shrink_increment
   117      if pool_to_shrink_target < 0:
   118          pool_to_shrink_target = 0
   119  
   120      # verify with the user
   121      print((
   122          'Shifting NodePool capacity for project = "{project}",'
   123          'zone = "{zone}", cluster = "{cluster}"'
   124          ).format(
   125              project=PROJECT, zone=ZONE, cluster=CLUSTER,
   126          ))
   127      print('')
   128      print((
   129          'Will add {nodes_to_add} node(s) to {pool_to_grow}'
   130          ' and drain {shrink_increment} node(s) from {pool_to_shrink}'
   131          ' for every {grow_increment} node(s) added to {pool_to_grow}'
   132          ).format(
   133              nodes_to_add=nodes_to_add, shrink_increment=shrink_increment,
   134              grow_increment=grow_increment, pool_to_grow=pool_to_grow,
   135              pool_to_shrink=pool_to_shrink,
   136          ))
   137      print('')
   138      print((
   139          'Current pool sizes are: {{{pool_to_grow}: {pool_to_grow_curr},'
   140          ' {pool_to_shrink}: {pool_to_shrink_curr}}}'
   141          ).format(
   142              pool_to_grow=pool_to_grow, pool_to_grow_curr=pool_to_grow_initial,
   143              pool_to_shrink=pool_to_shrink, pool_to_shrink_curr=pool_to_shrink_initial,
   144          ))
   145      print('')
   146      print((
   147          'Target pool sizes are: {{{pool_to_grow}: {pool_to_grow_target},'
   148          ' {pool_to_shrink}: {pool_to_shrink_target}}}'
   149          ).format(
   150              pool_to_grow=pool_to_grow, pool_to_grow_target=pool_to_grow_target,
   151              pool_to_shrink=pool_to_shrink, pool_to_shrink_target=pool_to_shrink_target,
   152          ))
   153      print('')
   154  
   155      prompt_confirmation()
   156      print('')
   157  
   158  
   159      # actually start resizing
   160      # ignore pylint, "i" is a perfectly fine variable name for a loop counter...
   161      # pylint: disable=invalid-name
   162      for i in range(n_iter):
   163          # shrink by one increment, capped at reaching zero nodes
   164          print('Draining {shrink_increment} node(s) from {pool_to_shrink} ...'.format(
   165              shrink_increment=shrink_increment, pool_to_shrink=pool_to_shrink,
   166          ))
   167          new_size = max(pool_to_shrink_initial - (i*shrink_increment + shrink_increment), 0)
   168          resize_nodepool(pool_to_shrink, new_size, PROJECT, ZONE, CLUSTER)
   169          print('')
   170  
   171          # ditto for growing, modulo the cap
   172          num_to_add = min(grow_increment, pool_to_grow_target - i*grow_increment)
   173          print('Adding {num_to_add} node(s) to {pool_to_grow} ...'.format(
   174              num_to_add=num_to_add, pool_to_grow=pool_to_grow,
   175          ))
   176          new_size = pool_to_grow_initial + (i*grow_increment + num_to_add)
   177          resize_nodepool(pool_to_grow, new_size, PROJECT, ZONE, CLUSTER)
   178          print('')
   179  
   180      print('')
   181      print('Done')
   182  
   183  if __name__ == '__main__':
   184      main()