github.com/SUSE/skuba@v1.4.17/ci/infra/testrunner/checks/checks.py (about)

     1  import time
     2  
     3  import platforms
     4  from kubectl import Kubectl
     5  from utils.utils import Utils
     6  
     7  
     8  class Check():
     9      def __init__(self, name, description, func, scope, roles=[], stages=[]):
    10          self.name = name
    11          self.description = description
    12          self.func = func
    13          self.scope = scope
    14          self.roles = roles
    15          self.stages = stages
    16  
    17  _checks = []
    18  _checks_by_name = {}
    19  
    20  def check(description=None, scope=None, roles=[], stages=[], check_timeout=300, check_backoff=20):
    21      """Decorator for waiting a check to become true.
    22         Can receve the following arguments when invoking the check function
    23         description: used for reporting. if not defined, check
    24                      function name is used
    25         scope: either "cluster" or "node"
    26         roles: list of node roles this check applies
    27         stages: list of deployment stages this check applies to (e.g provisioned, joined)
    28         check_timeout: the timeout for the check
    29         check_backoff: the backoff between retries
    30  
    31        The check_timout and check_backoff parameters can be overidden when
    32        calling the check_node function
    33      """
    34      def checker(check):
    35          def wait_condition(*args, **kwargs):
    36              _name = check.__name__
    37              _description = description
    38              if not _description:
    39                  _description = _name
    40  
    41              timeout = kwargs.pop('check_timeout', check_timeout)
    42              backoff = kwargs.pop('check_backoff', check_backoff)
    43              deadline = int(time.time()) + timeout
    44              while True:
    45                  last_error = None
    46                  try:
    47                      if check(*args, **kwargs):
    48                          return True
    49                  except Exception as ex:
    50                      last_error = ex
    51  
    52                  if int(time.time()) >= deadline:
    53                      msg = (f'condition "{_description}" not satisfied after {timeout} seconds'
    54                             f'{". Last error:"+str(last_error) if last_error else ""}')
    55                      raise AssertionError(msg)
    56  
    57                  time.sleep(backoff)
    58  
    59          if scope is None:
    60              raise ValueError("scope must be defined: 'cluster' or 'node'")
    61  
    62          _check = Check(check.__name__,
    63                      description,
    64                      wait_condition,
    65                      scope,
    66                      roles=roles,
    67                      stages=stages)
    68          _checks.append(_check)
    69          _checks_by_name[_check.name] = _check
    70  
    71          return wait_condition
    72  
    73      return checker
    74  
    75  
    76  class Checker:
    77  
    78      def __init__(self, conf, platform):
    79          self.conf = conf
    80          self.utils = Utils(self.conf)
    81          self.utils.setup_ssh()
    82          self.platform = platform
    83  
    84  
    85      def _filter_checks(self, checks, scope=None, stage=None):
    86          _filtered = checks
    87          if scope:
    88              _filtered= [c for c in _filtered if scope == c.scope]
    89          if stage:
    90              _filtered= [c for c in _filtered if stage in c.stages]
    91          return _filtered
    92  
    93      def _filter_by_name(self, names):
    94          checks = []
    95          for name in names:
    96              _check = _checks_by_name.get(name, None)
    97              if _check is None:
    98                  raise ValueError("Check {name} not found")
    99              checks.append(_check)
   100  
   101          return checks
   102  
   103      def check_node(self, role, node, checks=None, stage=None, timeout=180, backoff=20):
   104  
   105          #Prevent defaults to be accidentally overridden by callers with None
   106          if timeout is None:
   107              timeout = 180
   108          if backoff is None:
   109              backoff = 20
   110  
   111          if checks:
   112              checks = self._filter_by_name(checks)
   113              for check in checks:
   114                  if check.scope != "node":
   115                      raise Exception(f'check {check.name} is not a node check')
   116          else:
   117              if not stage:
   118                  raise ValueError("stage must be specified")
   119              checks = self._filter_checks(_checks, stage=stage, scope="node")
   120  
   121          start   = int(time.time())
   122          for check in checks:
   123              remaining = timeout-(int(time.time())-start)
   124              check.func(self.conf, self.platform, role, node, check_timeout=remaining, check_backoff=backoff)
   125  
   126      def check_cluster(self, checks=None, stage=None, timeout=180, backoff=20):
   127          if checks:
   128              checks = self._filter_by_name(checks)
   129              for check in checks:
   130                  if check.scope != "cluster":
   131                      raise Exception(f'check {check.name} is not a cluster check')
   132          else:
   133              if not stage:
   134                  raise ValueError("stage must be specified")
   135              checks = self._filter_checks(_checks, stage=stage, scope="cluster")
   136  
   137          start   = int(time.time())
   138          for check in checks:
   139              remaining = timeout-(int(time.time())-start)
   140              check.func(self.conf, self.platform, check_timeout=remaining, check_backoff=backoff)
   141  
   142  
   143  @check(description="apiserver healthz check", scope="node", roles=['master'])
   144  def check_apiserver_healthz(conf, platform, role, node):
   145       platform = platforms.get_platform(conf, platform)
   146       cmd =   'curl -Ls --insecure https://localhost:6443/healthz'
   147       output = platform.ssh_run(role, node, cmd)
   148       return output.find("ok") > -1
   149  
   150  @check(description="etcd health check", scope="node", roles=['master'])
   151  def check_etcd_health(conf, platform, role, node):
   152      platform = platforms.get_platform(conf, platform)
   153      cmd = ('sudo curl -Ls --cacert /etc/kubernetes/pki/etcd/ca.crt '
   154             '--key /etc/kubernetes/pki/etcd/server.key '
   155             '--cert /etc/kubernetes/pki/etcd/server.crt '
   156             'https://localhost:2379/health')
   157      output = platform.ssh_run(role, node, cmd)
   158      return output.find("true") > -1
   159  
   160  @check(description="check node is ready", scope="node", roles=["master", "worker"], stages=["joined"])
   161  def check_node_ready(conf, platform, role, node):
   162      platform = platforms.get_platform(conf, platform)
   163      node_name = platform.get_nodes_names(role)[node]
   164      cmd = ("get nodes {} -o jsonpath='{{range @.status.conditions[*]}}"
   165             "{{@.type}}={{@.status}};{{end}}'").format(node_name)
   166      kubectl = Kubectl(conf)
   167      return kubectl.run_kubectl(cmd).find("Ready=True") != -1
   168  
   169  
   170  @check(description="check system pods ready", scope="cluster", stages=["joined"])
   171  def check_system_pods_ready(conf, platform):
   172      kubectl = Kubectl(conf)
   173      return check_pods_ready(kubectl, namespace="kube-system")
   174  
   175  
   176  def check_pods_ready(kubectl, namespace=None, pods=[], node=None, statuses=['Running', 'Succeeded']):
   177      ns = f'{"--namespace="+namespace if namespace else ""}'
   178      node_selector = f'{"--field-selector spec.nodeName="+node if node else ""}'
   179      cmd = (f'get pods {" ".join(pods)} {ns} {node_selector} '
   180             f'-o jsonpath="{{ range .items[*]}}{{@.metadata.name}}:'
   181             f'{{@.status.phase}};"')
   182  
   183      result = kubectl.run_kubectl(cmd)
   184      # get pods can return a list of items or a single pod
   185      pod_list = result.split(";")
   186      for name,status in [ pod.split(":") for pod in pod_list if pod is not ""]:
   187          if status not in statuses:
   188              return False
   189  
   190      return True