github.com/SUSE/skuba@v1.4.17/ci/infra/testrunner/checks/checks.py (about) 1 import time 2 3 import platforms 4 from kubectl import Kubectl 5 from utils.utils import Utils 6 7 8 class Check(): 9 def __init__(self, name, description, func, scope, roles=[], stages=[]): 10 self.name = name 11 self.description = description 12 self.func = func 13 self.scope = scope 14 self.roles = roles 15 self.stages = stages 16 17 _checks = [] 18 _checks_by_name = {} 19 20 def check(description=None, scope=None, roles=[], stages=[], check_timeout=300, check_backoff=20): 21 """Decorator for waiting a check to become true. 22 Can receve the following arguments when invoking the check function 23 description: used for reporting. if not defined, check 24 function name is used 25 scope: either "cluster" or "node" 26 roles: list of node roles this check applies 27 stages: list of deployment stages this check applies to (e.g provisioned, joined) 28 check_timeout: the timeout for the check 29 check_backoff: the backoff between retries 30 31 The check_timout and check_backoff parameters can be overidden when 32 calling the check_node function 33 """ 34 def checker(check): 35 def wait_condition(*args, **kwargs): 36 _name = check.__name__ 37 _description = description 38 if not _description: 39 _description = _name 40 41 timeout = kwargs.pop('check_timeout', check_timeout) 42 backoff = kwargs.pop('check_backoff', check_backoff) 43 deadline = int(time.time()) + timeout 44 while True: 45 last_error = None 46 try: 47 if check(*args, **kwargs): 48 return True 49 except Exception as ex: 50 last_error = ex 51 52 if int(time.time()) >= deadline: 53 msg = (f'condition "{_description}" not satisfied after {timeout} seconds' 54 f'{". Last error:"+str(last_error) if last_error else ""}') 55 raise AssertionError(msg) 56 57 time.sleep(backoff) 58 59 if scope is None: 60 raise ValueError("scope must be defined: 'cluster' or 'node'") 61 62 _check = Check(check.__name__, 63 description, 64 wait_condition, 65 scope, 66 roles=roles, 67 stages=stages) 68 _checks.append(_check) 69 _checks_by_name[_check.name] = _check 70 71 return wait_condition 72 73 return checker 74 75 76 class Checker: 77 78 def __init__(self, conf, platform): 79 self.conf = conf 80 self.utils = Utils(self.conf) 81 self.utils.setup_ssh() 82 self.platform = platform 83 84 85 def _filter_checks(self, checks, scope=None, stage=None): 86 _filtered = checks 87 if scope: 88 _filtered= [c for c in _filtered if scope == c.scope] 89 if stage: 90 _filtered= [c for c in _filtered if stage in c.stages] 91 return _filtered 92 93 def _filter_by_name(self, names): 94 checks = [] 95 for name in names: 96 _check = _checks_by_name.get(name, None) 97 if _check is None: 98 raise ValueError("Check {name} not found") 99 checks.append(_check) 100 101 return checks 102 103 def check_node(self, role, node, checks=None, stage=None, timeout=180, backoff=20): 104 105 #Prevent defaults to be accidentally overridden by callers with None 106 if timeout is None: 107 timeout = 180 108 if backoff is None: 109 backoff = 20 110 111 if checks: 112 checks = self._filter_by_name(checks) 113 for check in checks: 114 if check.scope != "node": 115 raise Exception(f'check {check.name} is not a node check') 116 else: 117 if not stage: 118 raise ValueError("stage must be specified") 119 checks = self._filter_checks(_checks, stage=stage, scope="node") 120 121 start = int(time.time()) 122 for check in checks: 123 remaining = timeout-(int(time.time())-start) 124 check.func(self.conf, self.platform, role, node, check_timeout=remaining, check_backoff=backoff) 125 126 def check_cluster(self, checks=None, stage=None, timeout=180, backoff=20): 127 if checks: 128 checks = self._filter_by_name(checks) 129 for check in checks: 130 if check.scope != "cluster": 131 raise Exception(f'check {check.name} is not a cluster check') 132 else: 133 if not stage: 134 raise ValueError("stage must be specified") 135 checks = self._filter_checks(_checks, stage=stage, scope="cluster") 136 137 start = int(time.time()) 138 for check in checks: 139 remaining = timeout-(int(time.time())-start) 140 check.func(self.conf, self.platform, check_timeout=remaining, check_backoff=backoff) 141 142 143 @check(description="apiserver healthz check", scope="node", roles=['master']) 144 def check_apiserver_healthz(conf, platform, role, node): 145 platform = platforms.get_platform(conf, platform) 146 cmd = 'curl -Ls --insecure https://localhost:6443/healthz' 147 output = platform.ssh_run(role, node, cmd) 148 return output.find("ok") > -1 149 150 @check(description="etcd health check", scope="node", roles=['master']) 151 def check_etcd_health(conf, platform, role, node): 152 platform = platforms.get_platform(conf, platform) 153 cmd = ('sudo curl -Ls --cacert /etc/kubernetes/pki/etcd/ca.crt ' 154 '--key /etc/kubernetes/pki/etcd/server.key ' 155 '--cert /etc/kubernetes/pki/etcd/server.crt ' 156 'https://localhost:2379/health') 157 output = platform.ssh_run(role, node, cmd) 158 return output.find("true") > -1 159 160 @check(description="check node is ready", scope="node", roles=["master", "worker"], stages=["joined"]) 161 def check_node_ready(conf, platform, role, node): 162 platform = platforms.get_platform(conf, platform) 163 node_name = platform.get_nodes_names(role)[node] 164 cmd = ("get nodes {} -o jsonpath='{{range @.status.conditions[*]}}" 165 "{{@.type}}={{@.status}};{{end}}'").format(node_name) 166 kubectl = Kubectl(conf) 167 return kubectl.run_kubectl(cmd).find("Ready=True") != -1 168 169 170 @check(description="check system pods ready", scope="cluster", stages=["joined"]) 171 def check_system_pods_ready(conf, platform): 172 kubectl = Kubectl(conf) 173 return check_pods_ready(kubectl, namespace="kube-system") 174 175 176 def check_pods_ready(kubectl, namespace=None, pods=[], node=None, statuses=['Running', 'Succeeded']): 177 ns = f'{"--namespace="+namespace if namespace else ""}' 178 node_selector = f'{"--field-selector spec.nodeName="+node if node else ""}' 179 cmd = (f'get pods {" ".join(pods)} {ns} {node_selector} ' 180 f'-o jsonpath="{{ range .items[*]}}{{@.metadata.name}}:' 181 f'{{@.status.phase}};"') 182 183 result = kubectl.run_kubectl(cmd) 184 # get pods can return a list of items or a single pod 185 pod_list = result.split(";") 186 for name,status in [ pod.split(":") for pod in pod_list if pod is not ""]: 187 if status not in statuses: 188 return False 189 190 return True