github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/scripts/meta_transfer_resource_plugin.py (about)

     1  #!/usr/bin/env python3
     2  # -*- coding: utf-8 -*-
     3  
     4  import argparse
     5  import functools
     6  import json
     7  import os
     8  
     9  import etcd3
    10  
    11  dry_run = False
    12  record_prefix = "upgrade_"
    13  origin_data_record_path = 'origin_data_record.data'
    14  transferred_node_record_path = 'transferred_node_record.data'
    15  transferred_workload_record_path = 'transferred_workload_record.data'
    16  
    17  origin_data_recorder = None
    18  transferred_node_recorder = None
    19  transferred_workload_recorder = None
    20  
    21  transferred_workloads = set()
    22  transferred_nodes = set()
    23  
    24  
    25  def init_recorders():
    26      global origin_data_recorder, origin_data_record_path
    27      global transferred_node_recorder, transferred_node_record_path, transferred_nodes
    28      global transferred_workload_recorder, transferred_workload_record_path, transferred_workloads
    29  
    30      origin_data_record_path = record_prefix + origin_data_record_path
    31      transferred_node_record_path = record_prefix + transferred_node_record_path
    32      transferred_workload_record_path = record_prefix + transferred_workload_record_path
    33  
    34      if os.path.exists(transferred_node_record_path):
    35          with open(transferred_node_record_path, 'r') as f:
    36              transferred_nodes = set(f.read().strip('\n').splitlines())
    37  
    38      if os.path.exists(transferred_workload_record_path):
    39          with open(transferred_workload_record_path, 'r') as f:
    40              transferred_workloads = set(f.read().strip('\n').splitlines())
    41  
    42      origin_data_recorder = open(origin_data_record_path, 'a')
    43      transferred_node_recorder = open(transferred_node_record_path, 'a')
    44      transferred_workload_recorder = open(transferred_workload_record_path, 'a')
    45  
    46  
    47  def close_recorders():
    48      transferred_node_recorder.close()
    49      transferred_workload_recorder.close()
    50      origin_data_recorder.close()
    51  
    52  
    53  def add_record(recorder, record):
    54      recorder.write('%s\n' % record)
    55  
    56  
    57  def remove_prefix(s, prefix):
    58      return s[len(prefix):].lstrip('/') if s.startswith(prefix) else s
    59  
    60  
    61  def dict_sub(d1, d2):
    62      if d1 is None:
    63          return None
    64      if d2 is None:
    65          return d1
    66      get = lambda d, k: d[k] if k in d else 0
    67      return {k: d1[k] - get(d2, k) for k in d1}
    68  
    69  
    70  class ETCD:
    71      def __init__(self, client, prefix):
    72          """Create an instance of ETCD."""
    73          self.etcd = client
    74          self.prefix = prefix
    75  
    76      def get(self, key):
    77          if not key.startswith(self.prefix):
    78              key = self.prefix + key
    79          res = self.etcd.get(key)[0]
    80          if res is None:
    81              return None
    82          return res.decode('utf-8')
    83  
    84      def put(self, key, value):
    85          if not key.startswith(self.prefix):
    86              key = self.prefix + key
    87          if dry_run:
    88              print('put {}\n{}'.format(key, value))
    89              return
    90  
    91          origin_value = self.get(key)
    92          if origin_value:
    93              add_record(origin_data_recorder, key)
    94              add_record(origin_data_recorder, origin_value)
    95  
    96          self.etcd.put(key, value)
    97  
    98      def range_prefix(self, obj_prefix, fn):
    99          prefix = self.prefix + obj_prefix
   100          range_start = prefix
   101          range_end = etcd3.utils.increment_last_byte(
   102              etcd3.utils.to_bytes(range_start)
   103          )
   104  
   105          while True:
   106              range_request = etcd3.etcdrpc.RangeRequest()
   107              range_request.key = etcd3.utils.to_bytes(range_start)
   108              range_request.keys_only = False
   109              range_request.range_end = etcd3.utils.to_bytes(range_end)
   110              range_request.sort_order = etcd3.etcdrpc.RangeRequest.ASCEND
   111              range_request.sort_target = etcd3.etcdrpc.RangeRequest.KEY
   112              range_request.serializable = True
   113              range_request.limit = 1000
   114  
   115              range_response = self.etcd.kvstub.Range(
   116                  range_request,
   117                  self.etcd.timeout,
   118                  credentials=self.etcd.call_credentials,
   119                  metadata=self.etcd.metadata,
   120              )
   121  
   122              for kv in range_response.kvs:
   123                  orig_key = kv.key.decode('utf-8')
   124                  objname = remove_prefix(orig_key, prefix)
   125                  fn(objname, kv.value.decode('utf-8'))
   126  
   127              if not range_response.more:
   128                  break
   129  
   130              range_start = etcd3.utils.increment_last_byte(kv.key)
   131  
   132  
   133  etcd: ETCD = None
   134  
   135  
   136  class Node:
   137      def __init__(self, name, pod_name, meta):
   138          """Initializes a node transfer."""
   139          self.name = name
   140          self.pod_name = pod_name
   141          self.meta = json.loads(meta)
   142  
   143      def upgrade(self):
   144          cpumem_meta = self._gen_cpumem_meta()
   145          volume_meta = self._gen_volume_meta()
   146          cpumem_key = '/resource/cpumem/%s' % self.name
   147          volume_key = '/resource/volume/%s' % self.name
   148          etcd.put(cpumem_key, cpumem_meta)
   149          etcd.put(volume_key, volume_meta)
   150  
   151      def downgrade(self):
   152          self._load_resources_meta()
   153          keys = ['/node/%s' % self.name, '/node/%s:pod/%s' % (self.pod_name, self.name)]
   154          for key in keys:
   155              etcd.put(key, json.dumps(self.meta))
   156  
   157      def _load_cpumem_meta(self, meta):
   158          cpumem_meta = json.loads(meta)
   159          self.meta['init_cpu'] = cpumem_meta['capacity']['cpu_map']
   160          self.meta['cpu'] = dict_sub(cpumem_meta['capacity']['cpu_map'], cpumem_meta['usage']['cpu_map'])
   161          self.meta['init_memcap'] = cpumem_meta['capacity']['memory']
   162          self.meta['memcap'] = cpumem_meta['capacity']['memory'] - cpumem_meta['usage']['memory']
   163          self.meta['cpuused'] = cpumem_meta['usage']['cpu']
   164          self.meta['numa'] = cpumem_meta['capacity']['numa']
   165          self.meta['init_numa_memory'] = cpumem_meta['capacity']['numa_memory']
   166          self.meta['numa_memory'] = dict_sub(cpumem_meta['capacity']['numa_memory'], cpumem_meta['usage']['numa_memory'])
   167  
   168      def _load_resources_meta(self):
   169          # load cpumem resources
   170          cpumem_key = '/resource/cpumem/%s' % self.name
   171          cpumem_meta = etcd.get(cpumem_key)
   172          if not cpumem_meta:
   173              print("%s not found" % cpumem_key)
   174          self._load_cpumem_meta(cpumem_meta)
   175  
   176          # load volume resources
   177          volume_key = '/resource/volume/%s' % self.name
   178          volume_meta = etcd.get(volume_key)
   179          if not volume_meta:
   180              print("%s not found" % volume_key)
   181          self._load_volume_meta(volume_meta)
   182  
   183      def _load_volume_meta(self, meta):
   184          volume_meta = json.loads(meta)
   185          self.meta['init_volume'] = volume_meta['capacity']['volumes']
   186          self.meta['volume'] = volume_meta['usage']['volumes']
   187          self.meta['init_storage_cap'] = volume_meta['capacity']['storage']
   188          self.meta['storage_cap'] = volume_meta['usage']['storage']
   189          self.meta['volumeused'] = sum(volume_meta['usage']['volumes'].values())
   190  
   191      def _gen_cpumem_meta(self):
   192          cpumem_meta = {"capacity": {}, "usage": {}}
   193          cpumem_meta['capacity']['cpu_map'] = self.meta['init_cpu']
   194          cpumem_meta['usage']['cpu_map'] = dict_sub(self.meta['init_cpu'], self.meta['cpu'])
   195          cpumem_meta['capacity']['memory'] = self.meta['init_memcap']
   196          cpumem_meta['usage']['memory'] = self.meta['init_memcap'] - self.meta['memcap']
   197          cpumem_meta['capacity']['cpu'] = len(self.meta['init_cpu'])
   198          cpumem_meta['usage']['cpu'] = self.meta['cpuused']
   199          cpumem_meta['capacity']['numa'] = self.meta['numa']
   200          cpumem_meta['capacity']['numa_memory'] = self.meta['init_numa_memory']
   201          cpumem_meta['usage']['numa_memory'] = dict_sub(self.meta['init_numa_memory'], self.meta['numa_memory'])
   202          return json.dumps(cpumem_meta)
   203  
   204      def _gen_volume_meta(self):
   205          volume_meta = {"capacity": {}, "usage": {}}
   206          volume_meta['capacity']['volumes'] = self.meta['init_volume']
   207          volume_meta['usage']['volumes'] = dict_sub(self.meta['init_volume'], self.meta['volume'])
   208          volume_meta['capacity']['storage'] = self.meta['init_storage_cap']
   209          volume_meta['usage']['storage'] = self.meta['init_storage_cap'] - self.meta['storage_cap']
   210          return json.dumps(volume_meta)
   211  
   212  
   213  class Workload:
   214      def __init__(self, workload_id, app_name, entry_name, node_name, meta):
   215          """Initializes a workload transfer."""
   216          self.workload_id = workload_id
   217          self.app_name = app_name
   218          self.entry_name = entry_name
   219          self.node_name = node_name
   220          self.meta = json.loads(meta)
   221          self.keys = ['/workloads/%s' % self.workload_id,
   222                       '/deploy/%s/%s/%s/%s' % (self.app_name, self.entry_name, self.node_name, self.workload_id),
   223                       '/node/%s:workloads/%s' % (self.node_name, self.workload_id)]
   224  
   225      def save(self):
   226          for key in self.keys:
   227              etcd.put(key, json.dumps(self.meta))
   228  
   229      def upgrade(self):
   230          if self.workload_id in transferred_workloads:
   231              return
   232          self._gen_resource_meta()
   233          self.save()
   234  
   235      def downgrade(self):
   236          if self.workload_id in transferred_workloads:
   237              return
   238          self._load_resource_meta()
   239          self.save()
   240  
   241      def _gen_resource_meta(self):
   242          self.meta['resource_args'] = {}
   243          self.meta['resource_args']['cpumem'] = {
   244              'cpu_request': self.meta['cpu_quota_request'],
   245              'cpu_limit': self.meta['cpu_quota_limit'],
   246              'cpu_map': self.meta['cpu'],
   247              'memory_request': self.meta['memory_request'],
   248              'memory_limit': self.meta['memory_limit'],
   249              "numa_node": self.meta['numa_node'],
   250          }
   251          self.meta['resource_args']['volume'] = {
   252              'volumes_request': self.meta['volume_request'],
   253              'volumes_limit': self.meta['volume_limit'],
   254              'volume_plan_request': self.meta['volume_plan_request'],
   255              'volume_plan_limit': self.meta['volume_plan_limit'],
   256              'storage_request': self.meta['storage_request'],
   257              'storage_limit': self.meta['storage_limit'],
   258          }
   259          self.meta['engine_args'] = {
   260              'cpu': self.meta['cpu_quota_limit'],
   261              'memory': self.meta['memory_limit'],
   262              'numa_node': self.meta['numa_node'],
   263              'cpu_map': self.meta['cpu'],
   264              'storage': self.meta['storage_limit'],
   265              'volume': [],
   266          }
   267          for binding in self.meta['volume_limit']:
   268              if not binding.startswith('AUTO'):
   269                  self.meta['engine_args']['volume'].append(binding)
   270  
   271          for binding in self.meta['volume_plan_limit']:
   272              groups = binding.split(':')
   273              if len(groups) < 3:
   274                  print("volume plan limit of %s is invalid: %s" % (self.workload_id, binding))
   275  
   276              dst = groups[1]
   277              flags = groups[2]
   278              device = list(self.meta['volume_plan_limit'][binding].keys())[0]
   279              size = self.meta['volume_plan_limit'][binding][device]
   280              flags = flags.replace('m', '')
   281              if 'o' in flags:
   282                  flags = flags.replace('o', '').replace('r', 'ro').replace('w', 'wo')
   283  
   284              self.meta['engine_args']['volume'].append('%s:%s:%s:%s' % (device, dst, flags, size))
   285  
   286      def _load_resource_meta(self):
   287          self.meta['cpu_quota_request'] = self.meta['resource_args']['cpumem']['cpu_request']
   288          self.meta['cpu_quota_limit'] = self.meta['resource_args']['cpumem']['cpu_limit']
   289          self.meta['cpu'] = self.meta['resource_args']['cpumem']['cpu_map']
   290          self.meta['memory_request'] = self.meta['resource_args']['cpumem']['memory_request']
   291          self.meta['memory_limit'] = self.meta['resource_args']['cpumem']['memory_limit']
   292          self.meta['numa_node'] = self.meta['resource_args']['cpumem']['numa_node']
   293          self.meta['volume_request'] = self.meta['resource_args']['volume']['volumes_request']
   294          self.meta['volume_limit'] = self.meta['resource_args']['volume']['volumes_limit']
   295          self.meta['volume_plan_request'] = self.meta['resource_args']['volume']['volume_plan_request']
   296          self.meta['volume_plan_limit'] = self.meta['resource_args']['volume']['volume_plan_limit']
   297          self.meta['storage_request'] = self.meta['resource_args']['volume']['storage_request']
   298          self.meta['storage_limit'] = self.meta['resource_args']['volume']['storage_limit']
   299  
   300  
   301  def connect_etcd(host, port):
   302      return etcd3.client(host=host, port=port)
   303  
   304  
   305  def transfer_node(key, value, upgrade=True):
   306      if ':pod' not in key:
   307          return
   308      node_name = key.split('/')[-1]
   309      pod_name = key.split(':')[0].strip('/')
   310      if node_name in transferred_nodes:
   311          return
   312  
   313      print('transferring node %s' % node_name)
   314      node = Node(node_name, pod_name, value)
   315      if upgrade:
   316          node.upgrade()
   317      else:
   318          node.downgrade()
   319      add_record(transferred_node_recorder, node_name)
   320  
   321  
   322  def transfer_workload(key, value, upgrade=True):
   323      app_name, entry_name, node_name, workload_id = key.strip('/').split('/')
   324      if workload_id in transferred_workloads:
   325          return
   326  
   327      print('transferring workload %s' % workload_id)
   328      workload = Workload(workload_id, app_name, entry_name, node_name, value)
   329      if upgrade:
   330          workload.upgrade()
   331      else:
   332          workload.downgrade()
   333      add_record(transferred_workload_recorder, workload_id)
   334  
   335  
   336  def transfer(upgrade=True):
   337      etcd.range_prefix('/node', functools.partial(transfer_node, upgrade=upgrade))
   338      etcd.range_prefix('/deploy', functools.partial(transfer_workload, upgrade=upgrade))
   339  
   340  
   341  def get_args():
   342      ap = argparse.ArgumentParser()
   343      ap.add_argument('--upgrade', action='store_true', help='upgrade to new eru-core')
   344      ap.add_argument('--downgrade', action='store_true', help='downgrade to old eru-core')
   345      ap.add_argument('--etcd-prefix', help='etcd prefix', default='/eru')
   346      ap.add_argument('--etcd-host', default='127.0.0.1')
   347      ap.add_argument('--etcd-port', type=int, default=2379)
   348      ap.add_argument('--dry-run', dest='dry_run', action='store_true', help='dry run, will not actually migrate')
   349      return ap.parse_args()
   350  
   351  
   352  def main():
   353      args = get_args()
   354      if not args.upgrade and not args.downgrade:
   355          print('please specify --upgrade or --downgrade')
   356  
   357      global etcd, dry_run, record_prefix
   358      etcd = ETCD(connect_etcd(args.etcd_host, args.etcd_port), args.etcd_prefix)
   359      dry_run = args.dry_run
   360      upgrade = args.upgrade
   361      if not upgrade:
   362          record_prefix = 'downgrade'
   363  
   364      init_recorders()
   365      transfer(upgrade)
   366  
   367  
   368  if __name__ == '__main__':
   369      main()