github.com/nsqio/nsq@v1.3.0/bench/bench.py (about)

     1  #!/usr/bin/env python3
     2  
     3  #
     4  # This script bootstraps an NSQ cluster in EC2 and runs benchmarks.
     5  #
     6  # Requires python3 and the following packages:
     7  #   - boto3
     8  #   - paramiko
     9  #   - tornado
    10  #
    11  # AWS authentication is delegated entirely to the boto3 environment, see:
    12  #
    13  # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html
    14  #
    15  # EC2 instances are launched into EC2 Classic, expecting a 'default' security group
    16  # that allows allows SSH (port 22) from 0.0.0.0/0 and an EC2 key pair created
    17  # (named 'default', but configurable via --ssh-key-name).
    18  #
    19  
    20  import sys
    21  import logging
    22  import time
    23  import datetime
    24  import socket
    25  import warnings
    26  import hashlib
    27  
    28  import boto3
    29  import paramiko.client
    30  import paramiko.ssh_exception
    31  import tornado.options
    32  
    33  
    34  def ssh_connect_with_retries(host, retries=3, timeout=30):
    35      for i in range(retries):
    36          try:
    37              ssh_client = paramiko.client.SSHClient()
    38              ssh_client.set_missing_host_key_policy(paramiko.client.WarningPolicy())
    39              ssh_client.connect(host, username='ubuntu', timeout=timeout)
    40              return ssh_client
    41          except (socket.error, paramiko.ssh_exception.SSHException):
    42              if i == retries - 1:
    43                  raise
    44          logging.warning('... re-trying to connect to %s:%d in 15s', host, 22)
    45          time.sleep(15)
    46  
    47  
    48  def ssh_cmd_async(ssh_client, cmd):
    49      transport = ssh_client.get_transport()
    50      chan = transport.open_session()
    51      chan.exec_command(cmd)
    52      return chan
    53  
    54  
    55  def ssh_cmd(ssh_client, cmd, timeout=2):
    56      transport = ssh_client.get_transport()
    57      chan = transport.open_session()
    58      chan.settimeout(timeout)
    59      chan.exec_command(cmd)
    60  
    61      stdout = b''
    62      stderr = b''
    63      while True:
    64          if chan.recv_ready():
    65              stdout += chan.recv(4096)
    66              continue
    67          if chan.recv_stderr_ready():
    68              stderr += chan.recv_stderr(4096)
    69              continue
    70          if chan.exit_status_ready():
    71              exit_status = chan.recv_exit_status()
    72              break
    73          time.sleep(0.1)
    74  
    75      if exit_status != 0:
    76          raise Exception('%r' % stderr)
    77  
    78      return stdout, stderr
    79  
    80  
    81  def get_session():
    82      return boto3.session.Session(region_name=tornado.options.options.region)
    83  
    84  
    85  def _bootstrap(addr):
    86      commit = tornado.options.options.commit
    87      golang_version = tornado.options.options.golang_version
    88      ssh_client = ssh_connect_with_retries(addr)
    89      for cmd in [
    90              'wget https://storage.googleapis.com/golang/go%s.linux-amd64.tar.gz' % golang_version,
    91              'sudo -S tar -C /usr/local -xzf go%s.linux-amd64.tar.gz' % golang_version,
    92              'sudo -S apt-get update',
    93              'sudo -S apt-get -y install git mercurial',
    94              'mkdir -p go/src/github.com/nsqio',
    95              'cd go/src/github.com/nsqio && git clone https://github.com/nsqio/nsq',
    96              'cd go/src/github.com/nsqio/nsq && git checkout %s' % commit,
    97              'cd go/src/github.com/nsqio/nsq/apps/nsqd && GO111MODULE=on /usr/local/go/bin/go build',
    98              'cd go/src/github.com/nsqio/nsq/bench/bench_writer && GO111MODULE=on /usr/local/go/bin/go build',
    99              'cd go/src/github.com/nsqio/nsq/bench/bench_reader && GO111MODULE=on /usr/local/go/bin/go build',
   100              'sudo -S mkdir -p /mnt/nsq',
   101              'sudo -S chmod 777 /mnt/nsq']:
   102          ssh_cmd(ssh_client, cmd, timeout=10)
   103  
   104  
   105  def bootstrap():
   106      session = get_session()
   107  
   108      ec2 = session.resource('ec2')
   109  
   110      total_count = tornado.options.options.nsqd_count + tornado.options.options.worker_count
   111      logging.info('launching %d instances', total_count)
   112      instances = ec2.create_instances(
   113          ImageId=tornado.options.options.ami,
   114          MinCount=total_count,
   115          MaxCount=total_count,
   116          KeyName=tornado.options.options.ssh_key_name,
   117          InstanceType=tornado.options.options.instance_type,
   118          SecurityGroups=['default'])
   119  
   120      logging.info('waiting for instances to launch...')
   121  
   122      while any(i.state['Name'] != 'running' for i in instances):
   123          waiting_for = [i.id for i in instances if i.state['Name'] != 'running']
   124          logging.info('... sleeping for 5s (waiting for %s)', ', '.join(waiting_for))
   125          time.sleep(5)
   126          for instance in instances:
   127              instance.load()
   128  
   129      for instance in instances:
   130          if not instance.tags:
   131              instance.create_tags(Tags=[{'Key': 'nsq_bench', 'Value': '1'}])
   132  
   133      try:
   134          c = 0
   135          for i in instances:
   136              c += 1
   137              logging.info('(%d) bootstrapping %s (%s)', c, i.public_dns_name, i.id)
   138              _bootstrap(i.public_dns_name)
   139      except Exception:
   140          logging.exception('bootstrap failed')
   141          decomm()
   142  
   143  
   144  def run():
   145      instances = _find_instances()
   146  
   147      logging.info('launching nsqd on %d host(s)', tornado.options.options.nsqd_count)
   148  
   149      nsqd_chans = []
   150      nsqd_hosts = instances[:tornado.options.options.nsqd_count]
   151      for instance in nsqd_hosts:
   152          try:
   153              ssh_client = ssh_connect_with_retries(instance.public_dns_name)
   154              for cmd in [
   155                      'sudo -S pkill -f nsqd',
   156                      'sudo -S rm -f /mnt/nsq/*.dat',
   157                      'GOMAXPROCS=32 ./go/src/github.com/nsqio/nsq/apps/nsqd/nsqd \
   158                          --data-path=/mnt/nsq \
   159                          --mem-queue-size=10000000 \
   160                          --max-rdy-count=%s' % (tornado.options.options.rdy)]:
   161                  nsqd_chans.append((ssh_client, ssh_cmd_async(ssh_client, cmd)))
   162          except Exception:
   163              logging.exception('failed')
   164  
   165      nsqd_tcp_addrs = [i.public_dns_name for i in nsqd_hosts]
   166  
   167      dt = datetime.datetime.utcnow()
   168      deadline = dt + datetime.timedelta(seconds=30)
   169  
   170      logging.info('launching %d producer(s) on %d host(s)',
   171                   tornado.options.options.nsqd_count * tornado.options.options.worker_count,
   172                   tornado.options.options.worker_count)
   173  
   174      worker_chans = []
   175  
   176      producer_hosts = instances[tornado.options.options.nsqd_count:]
   177      for instance in producer_hosts:
   178          for nsqd_tcp_addr in nsqd_tcp_addrs:
   179              topic = hashlib.md5(instance.public_dns_name.encode('utf-8')).hexdigest()
   180              try:
   181                  ssh_client = ssh_connect_with_retries(instance.public_dns_name)
   182                  for cmd in [
   183                          'GOMAXPROCS=2 \
   184                              ./go/src/github.com/nsqio/nsq/bench/bench_writer/bench_writer \
   185                              --topic=%s --nsqd-tcp-address=%s:4150 --deadline=\'%s\' --size=%d' % (
   186                              topic, nsqd_tcp_addr, deadline.strftime('%Y-%m-%d %H:%M:%S'),
   187                              tornado.options.options.msg_size)]:
   188                      worker_chans.append((ssh_client, ssh_cmd_async(ssh_client, cmd)))
   189              except Exception:
   190                  logging.exception('failed')
   191  
   192      if tornado.options.options.mode == 'pubsub':
   193          logging.info('launching %d consumer(s) on %d host(s)',
   194                       tornado.options.options.nsqd_count * tornado.options.options.worker_count,
   195                       tornado.options.options.worker_count)
   196  
   197          consumer_hosts = instances[tornado.options.options.nsqd_count:]
   198          for instance in consumer_hosts:
   199              for nsqd_tcp_addr in nsqd_tcp_addrs:
   200                  topic = hashlib.md5(instance.public_dns_name.encode('utf-8')).hexdigest()
   201                  try:
   202                      ssh_client = ssh_connect_with_retries(instance.public_dns_name)
   203                      for cmd in [
   204                              'GOMAXPROCS=8 \
   205                                  ./go/src/github.com/nsqio/nsq/bench/bench_reader/bench_reader \
   206                                  --topic=%s --nsqd-tcp-address=%s:4150 --deadline=\'%s\' --size=%d \
   207                                  --rdy=%d' % (
   208                                  topic, nsqd_tcp_addr, deadline.strftime('%Y-%m-%d %H:%M:%S'),
   209                                  tornado.options.options.msg_size, tornado.options.options.rdy)]:
   210                          worker_chans.append((ssh_client, ssh_cmd_async(ssh_client, cmd)))
   211                  except Exception:
   212                      logging.exception('failed')
   213  
   214      stats = {
   215          'bench_reader': {
   216              'durations': [],
   217              'mbytes': [],
   218              'ops': []
   219          },
   220          'bench_writer': {
   221              'durations': [],
   222              'mbytes': [],
   223              'ops': []
   224          }
   225      }
   226      while worker_chans:
   227          for ssh_client, chan in worker_chans[:]:
   228              if chan.recv_ready():
   229                  sys.stdout.write(chan.recv(4096))
   230                  sys.stdout.flush()
   231                  continue
   232              if chan.recv_stderr_ready():
   233                  line = chan.recv_stderr(4096).decode('utf-8')
   234                  if 'duration:' in line:
   235                      kind = line.split(' ')[0][1:-1]
   236                      parts = line.rsplit('duration:')[1].split('-')
   237                      stats[kind]['durations'].append(float(parts[0].strip()[:-1]))
   238                      stats[kind]['mbytes'].append(float(parts[1].strip()[:-4]))
   239                      stats[kind]['ops'].append(float(parts[2].strip()[:-5]))
   240                  sys.stdout.write(line)
   241                  sys.stdout.flush()
   242                  continue
   243              if chan.exit_status_ready():
   244                  worker_chans.remove((ssh_client, chan))
   245          time.sleep(0.1)
   246  
   247      for kind, data in stats.items():
   248          if not data['durations']:
   249              continue
   250  
   251          max_duration = max(data['durations'])
   252          total_mb = sum(data['mbytes'])
   253          total_ops = sum(data['ops'])
   254  
   255          logging.info('[%s] %fs - %fmb/s - %fops/s - %fus/op',
   256                       kind, max_duration, total_mb, total_ops,
   257                       max_duration / total_ops * 1000 * 1000)
   258  
   259      for ssh_client, chan in nsqd_chans:
   260          chan.close()
   261  
   262  
   263  def _find_instances():
   264      session = get_session()
   265      ec2 = session.resource('ec2')
   266      return [i for i in ec2.instances.all() if
   267              i.state['Name'] == 'running' and any(t['Key'] == 'nsq_bench' for t in i.tags)]
   268  
   269  
   270  def decomm():
   271      instances = _find_instances()
   272      logging.info('terminating instances %s' % ','.join(i.id for i in instances))
   273      for instance in instances:
   274          instance.terminate()
   275  
   276  
   277  if __name__ == '__main__':
   278      tornado.options.define('region', type=str, default='us-east-1',
   279                             help='EC2 region to launch instances')
   280      tornado.options.define('nsqd_count', type=int, default=3,
   281                             help='how many nsqd instances to launch')
   282      tornado.options.define('worker_count', type=int, default=3,
   283                             help='how many worker instances to launch')
   284      # ubuntu 18.04 HVM instance store us-east-1
   285      tornado.options.define('ami', type=str, default='ami-0938f2289b3fa3f5b',
   286                             help='AMI ID')
   287      tornado.options.define('ssh_key_name', type=str, default='default',
   288                             help='SSH key name')
   289      tornado.options.define('instance_type', type=str, default='c3.2xlarge',
   290                             help='EC2 instance type')
   291      tornado.options.define('msg_size', type=int, default=200,
   292                             help='size of message')
   293      tornado.options.define('rdy', type=int, default=10000,
   294                             help='RDY count to use for bench_reader')
   295      tornado.options.define('mode', type=str, default='pubsub',
   296                             help='the benchmark mode (pub, pubsub)')
   297      tornado.options.define('commit', type=str, default='master',
   298                             help='the git commit')
   299      tornado.options.define('golang_version', type=str, default='1.14.3',
   300                             help='the go version')
   301      tornado.options.parse_command_line()
   302  
   303      logging.getLogger('paramiko').setLevel(logging.WARNING)
   304      warnings.simplefilter('ignore')
   305  
   306      cmd_name = sys.argv[-1]
   307      cmd_map = {
   308          'bootstrap': bootstrap,
   309          'run': run,
   310          'decomm': decomm
   311      }
   312      cmd = cmd_map.get(cmd_name, bootstrap)
   313  
   314      sys.exit(cmd())