github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/scripts/meta_transfer_resource_plugin.py (about) 1 #!/usr/bin/env python3 2 # -*- coding: utf-8 -*- 3 4 import argparse 5 import functools 6 import json 7 import os 8 9 import etcd3 10 11 dry_run = False 12 record_prefix = "upgrade_" 13 origin_data_record_path = 'origin_data_record.data' 14 transferred_node_record_path = 'transferred_node_record.data' 15 transferred_workload_record_path = 'transferred_workload_record.data' 16 17 origin_data_recorder = None 18 transferred_node_recorder = None 19 transferred_workload_recorder = None 20 21 transferred_workloads = set() 22 transferred_nodes = set() 23 24 25 def init_recorders(): 26 global origin_data_recorder, origin_data_record_path 27 global transferred_node_recorder, transferred_node_record_path, transferred_nodes 28 global transferred_workload_recorder, transferred_workload_record_path, transferred_workloads 29 30 origin_data_record_path = record_prefix + origin_data_record_path 31 transferred_node_record_path = record_prefix + transferred_node_record_path 32 transferred_workload_record_path = record_prefix + transferred_workload_record_path 33 34 if os.path.exists(transferred_node_record_path): 35 with open(transferred_node_record_path, 'r') as f: 36 transferred_nodes = set(f.read().strip('\n').splitlines()) 37 38 if os.path.exists(transferred_workload_record_path): 39 with open(transferred_workload_record_path, 'r') as f: 40 transferred_workloads = set(f.read().strip('\n').splitlines()) 41 42 origin_data_recorder = open(origin_data_record_path, 'a') 43 transferred_node_recorder = open(transferred_node_record_path, 'a') 44 transferred_workload_recorder = open(transferred_workload_record_path, 'a') 45 46 47 def close_recorders(): 48 transferred_node_recorder.close() 49 transferred_workload_recorder.close() 50 origin_data_recorder.close() 51 52 53 def add_record(recorder, record): 54 recorder.write('%s\n' % record) 55 56 57 def remove_prefix(s, prefix): 58 return s[len(prefix):].lstrip('/') if s.startswith(prefix) else s 59 60 61 def dict_sub(d1, d2): 62 if d1 is None: 63 return None 64 if d2 is None: 65 return d1 66 get = lambda d, k: d[k] if k in d else 0 67 return {k: d1[k] - get(d2, k) for k in d1} 68 69 70 class ETCD: 71 def __init__(self, client, prefix): 72 """Create an instance of ETCD.""" 73 self.etcd = client 74 self.prefix = prefix 75 76 def get(self, key): 77 if not key.startswith(self.prefix): 78 key = self.prefix + key 79 res = self.etcd.get(key)[0] 80 if res is None: 81 return None 82 return res.decode('utf-8') 83 84 def put(self, key, value): 85 if not key.startswith(self.prefix): 86 key = self.prefix + key 87 if dry_run: 88 print('put {}\n{}'.format(key, value)) 89 return 90 91 origin_value = self.get(key) 92 if origin_value: 93 add_record(origin_data_recorder, key) 94 add_record(origin_data_recorder, origin_value) 95 96 self.etcd.put(key, value) 97 98 def range_prefix(self, obj_prefix, fn): 99 prefix = self.prefix + obj_prefix 100 range_start = prefix 101 range_end = etcd3.utils.increment_last_byte( 102 etcd3.utils.to_bytes(range_start) 103 ) 104 105 while True: 106 range_request = etcd3.etcdrpc.RangeRequest() 107 range_request.key = etcd3.utils.to_bytes(range_start) 108 range_request.keys_only = False 109 range_request.range_end = etcd3.utils.to_bytes(range_end) 110 range_request.sort_order = etcd3.etcdrpc.RangeRequest.ASCEND 111 range_request.sort_target = etcd3.etcdrpc.RangeRequest.KEY 112 range_request.serializable = True 113 range_request.limit = 1000 114 115 range_response = self.etcd.kvstub.Range( 116 range_request, 117 self.etcd.timeout, 118 credentials=self.etcd.call_credentials, 119 metadata=self.etcd.metadata, 120 ) 121 122 for kv in range_response.kvs: 123 orig_key = kv.key.decode('utf-8') 124 objname = remove_prefix(orig_key, prefix) 125 fn(objname, kv.value.decode('utf-8')) 126 127 if not range_response.more: 128 break 129 130 range_start = etcd3.utils.increment_last_byte(kv.key) 131 132 133 etcd: ETCD = None 134 135 136 class Node: 137 def __init__(self, name, pod_name, meta): 138 """Initializes a node transfer.""" 139 self.name = name 140 self.pod_name = pod_name 141 self.meta = json.loads(meta) 142 143 def upgrade(self): 144 cpumem_meta = self._gen_cpumem_meta() 145 volume_meta = self._gen_volume_meta() 146 cpumem_key = '/resource/cpumem/%s' % self.name 147 volume_key = '/resource/volume/%s' % self.name 148 etcd.put(cpumem_key, cpumem_meta) 149 etcd.put(volume_key, volume_meta) 150 151 def downgrade(self): 152 self._load_resources_meta() 153 keys = ['/node/%s' % self.name, '/node/%s:pod/%s' % (self.pod_name, self.name)] 154 for key in keys: 155 etcd.put(key, json.dumps(self.meta)) 156 157 def _load_cpumem_meta(self, meta): 158 cpumem_meta = json.loads(meta) 159 self.meta['init_cpu'] = cpumem_meta['capacity']['cpu_map'] 160 self.meta['cpu'] = dict_sub(cpumem_meta['capacity']['cpu_map'], cpumem_meta['usage']['cpu_map']) 161 self.meta['init_memcap'] = cpumem_meta['capacity']['memory'] 162 self.meta['memcap'] = cpumem_meta['capacity']['memory'] - cpumem_meta['usage']['memory'] 163 self.meta['cpuused'] = cpumem_meta['usage']['cpu'] 164 self.meta['numa'] = cpumem_meta['capacity']['numa'] 165 self.meta['init_numa_memory'] = cpumem_meta['capacity']['numa_memory'] 166 self.meta['numa_memory'] = dict_sub(cpumem_meta['capacity']['numa_memory'], cpumem_meta['usage']['numa_memory']) 167 168 def _load_resources_meta(self): 169 # load cpumem resources 170 cpumem_key = '/resource/cpumem/%s' % self.name 171 cpumem_meta = etcd.get(cpumem_key) 172 if not cpumem_meta: 173 print("%s not found" % cpumem_key) 174 self._load_cpumem_meta(cpumem_meta) 175 176 # load volume resources 177 volume_key = '/resource/volume/%s' % self.name 178 volume_meta = etcd.get(volume_key) 179 if not volume_meta: 180 print("%s not found" % volume_key) 181 self._load_volume_meta(volume_meta) 182 183 def _load_volume_meta(self, meta): 184 volume_meta = json.loads(meta) 185 self.meta['init_volume'] = volume_meta['capacity']['volumes'] 186 self.meta['volume'] = volume_meta['usage']['volumes'] 187 self.meta['init_storage_cap'] = volume_meta['capacity']['storage'] 188 self.meta['storage_cap'] = volume_meta['usage']['storage'] 189 self.meta['volumeused'] = sum(volume_meta['usage']['volumes'].values()) 190 191 def _gen_cpumem_meta(self): 192 cpumem_meta = {"capacity": {}, "usage": {}} 193 cpumem_meta['capacity']['cpu_map'] = self.meta['init_cpu'] 194 cpumem_meta['usage']['cpu_map'] = dict_sub(self.meta['init_cpu'], self.meta['cpu']) 195 cpumem_meta['capacity']['memory'] = self.meta['init_memcap'] 196 cpumem_meta['usage']['memory'] = self.meta['init_memcap'] - self.meta['memcap'] 197 cpumem_meta['capacity']['cpu'] = len(self.meta['init_cpu']) 198 cpumem_meta['usage']['cpu'] = self.meta['cpuused'] 199 cpumem_meta['capacity']['numa'] = self.meta['numa'] 200 cpumem_meta['capacity']['numa_memory'] = self.meta['init_numa_memory'] 201 cpumem_meta['usage']['numa_memory'] = dict_sub(self.meta['init_numa_memory'], self.meta['numa_memory']) 202 return json.dumps(cpumem_meta) 203 204 def _gen_volume_meta(self): 205 volume_meta = {"capacity": {}, "usage": {}} 206 volume_meta['capacity']['volumes'] = self.meta['init_volume'] 207 volume_meta['usage']['volumes'] = dict_sub(self.meta['init_volume'], self.meta['volume']) 208 volume_meta['capacity']['storage'] = self.meta['init_storage_cap'] 209 volume_meta['usage']['storage'] = self.meta['init_storage_cap'] - self.meta['storage_cap'] 210 return json.dumps(volume_meta) 211 212 213 class Workload: 214 def __init__(self, workload_id, app_name, entry_name, node_name, meta): 215 """Initializes a workload transfer.""" 216 self.workload_id = workload_id 217 self.app_name = app_name 218 self.entry_name = entry_name 219 self.node_name = node_name 220 self.meta = json.loads(meta) 221 self.keys = ['/workloads/%s' % self.workload_id, 222 '/deploy/%s/%s/%s/%s' % (self.app_name, self.entry_name, self.node_name, self.workload_id), 223 '/node/%s:workloads/%s' % (self.node_name, self.workload_id)] 224 225 def save(self): 226 for key in self.keys: 227 etcd.put(key, json.dumps(self.meta)) 228 229 def upgrade(self): 230 if self.workload_id in transferred_workloads: 231 return 232 self._gen_resource_meta() 233 self.save() 234 235 def downgrade(self): 236 if self.workload_id in transferred_workloads: 237 return 238 self._load_resource_meta() 239 self.save() 240 241 def _gen_resource_meta(self): 242 self.meta['resource_args'] = {} 243 self.meta['resource_args']['cpumem'] = { 244 'cpu_request': self.meta['cpu_quota_request'], 245 'cpu_limit': self.meta['cpu_quota_limit'], 246 'cpu_map': self.meta['cpu'], 247 'memory_request': self.meta['memory_request'], 248 'memory_limit': self.meta['memory_limit'], 249 "numa_node": self.meta['numa_node'], 250 } 251 self.meta['resource_args']['volume'] = { 252 'volumes_request': self.meta['volume_request'], 253 'volumes_limit': self.meta['volume_limit'], 254 'volume_plan_request': self.meta['volume_plan_request'], 255 'volume_plan_limit': self.meta['volume_plan_limit'], 256 'storage_request': self.meta['storage_request'], 257 'storage_limit': self.meta['storage_limit'], 258 } 259 self.meta['engine_args'] = { 260 'cpu': self.meta['cpu_quota_limit'], 261 'memory': self.meta['memory_limit'], 262 'numa_node': self.meta['numa_node'], 263 'cpu_map': self.meta['cpu'], 264 'storage': self.meta['storage_limit'], 265 'volume': [], 266 } 267 for binding in self.meta['volume_limit']: 268 if not binding.startswith('AUTO'): 269 self.meta['engine_args']['volume'].append(binding) 270 271 for binding in self.meta['volume_plan_limit']: 272 groups = binding.split(':') 273 if len(groups) < 3: 274 print("volume plan limit of %s is invalid: %s" % (self.workload_id, binding)) 275 276 dst = groups[1] 277 flags = groups[2] 278 device = list(self.meta['volume_plan_limit'][binding].keys())[0] 279 size = self.meta['volume_plan_limit'][binding][device] 280 flags = flags.replace('m', '') 281 if 'o' in flags: 282 flags = flags.replace('o', '').replace('r', 'ro').replace('w', 'wo') 283 284 self.meta['engine_args']['volume'].append('%s:%s:%s:%s' % (device, dst, flags, size)) 285 286 def _load_resource_meta(self): 287 self.meta['cpu_quota_request'] = self.meta['resource_args']['cpumem']['cpu_request'] 288 self.meta['cpu_quota_limit'] = self.meta['resource_args']['cpumem']['cpu_limit'] 289 self.meta['cpu'] = self.meta['resource_args']['cpumem']['cpu_map'] 290 self.meta['memory_request'] = self.meta['resource_args']['cpumem']['memory_request'] 291 self.meta['memory_limit'] = self.meta['resource_args']['cpumem']['memory_limit'] 292 self.meta['numa_node'] = self.meta['resource_args']['cpumem']['numa_node'] 293 self.meta['volume_request'] = self.meta['resource_args']['volume']['volumes_request'] 294 self.meta['volume_limit'] = self.meta['resource_args']['volume']['volumes_limit'] 295 self.meta['volume_plan_request'] = self.meta['resource_args']['volume']['volume_plan_request'] 296 self.meta['volume_plan_limit'] = self.meta['resource_args']['volume']['volume_plan_limit'] 297 self.meta['storage_request'] = self.meta['resource_args']['volume']['storage_request'] 298 self.meta['storage_limit'] = self.meta['resource_args']['volume']['storage_limit'] 299 300 301 def connect_etcd(host, port): 302 return etcd3.client(host=host, port=port) 303 304 305 def transfer_node(key, value, upgrade=True): 306 if ':pod' not in key: 307 return 308 node_name = key.split('/')[-1] 309 pod_name = key.split(':')[0].strip('/') 310 if node_name in transferred_nodes: 311 return 312 313 print('transferring node %s' % node_name) 314 node = Node(node_name, pod_name, value) 315 if upgrade: 316 node.upgrade() 317 else: 318 node.downgrade() 319 add_record(transferred_node_recorder, node_name) 320 321 322 def transfer_workload(key, value, upgrade=True): 323 app_name, entry_name, node_name, workload_id = key.strip('/').split('/') 324 if workload_id in transferred_workloads: 325 return 326 327 print('transferring workload %s' % workload_id) 328 workload = Workload(workload_id, app_name, entry_name, node_name, value) 329 if upgrade: 330 workload.upgrade() 331 else: 332 workload.downgrade() 333 add_record(transferred_workload_recorder, workload_id) 334 335 336 def transfer(upgrade=True): 337 etcd.range_prefix('/node', functools.partial(transfer_node, upgrade=upgrade)) 338 etcd.range_prefix('/deploy', functools.partial(transfer_workload, upgrade=upgrade)) 339 340 341 def get_args(): 342 ap = argparse.ArgumentParser() 343 ap.add_argument('--upgrade', action='store_true', help='upgrade to new eru-core') 344 ap.add_argument('--downgrade', action='store_true', help='downgrade to old eru-core') 345 ap.add_argument('--etcd-prefix', help='etcd prefix', default='/eru') 346 ap.add_argument('--etcd-host', default='127.0.0.1') 347 ap.add_argument('--etcd-port', type=int, default=2379) 348 ap.add_argument('--dry-run', dest='dry_run', action='store_true', help='dry run, will not actually migrate') 349 return ap.parse_args() 350 351 352 def main(): 353 args = get_args() 354 if not args.upgrade and not args.downgrade: 355 print('please specify --upgrade or --downgrade') 356 357 global etcd, dry_run, record_prefix 358 etcd = ETCD(connect_etcd(args.etcd_host, args.etcd_port), args.etcd_prefix) 359 dry_run = args.dry_run 360 upgrade = args.upgrade 361 if not upgrade: 362 record_prefix = 'downgrade' 363 364 init_recorders() 365 transfer(upgrade) 366 367 368 if __name__ == '__main__': 369 main()