github.com/nsqio/nsq@v1.3.0/bench/bench.py (about) 1 #!/usr/bin/env python3 2 3 # 4 # This script bootstraps an NSQ cluster in EC2 and runs benchmarks. 5 # 6 # Requires python3 and the following packages: 7 # - boto3 8 # - paramiko 9 # - tornado 10 # 11 # AWS authentication is delegated entirely to the boto3 environment, see: 12 # 13 # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html 14 # 15 # EC2 instances are launched into EC2 Classic, expecting a 'default' security group 16 # that allows allows SSH (port 22) from 0.0.0.0/0 and an EC2 key pair created 17 # (named 'default', but configurable via --ssh-key-name). 18 # 19 20 import sys 21 import logging 22 import time 23 import datetime 24 import socket 25 import warnings 26 import hashlib 27 28 import boto3 29 import paramiko.client 30 import paramiko.ssh_exception 31 import tornado.options 32 33 34 def ssh_connect_with_retries(host, retries=3, timeout=30): 35 for i in range(retries): 36 try: 37 ssh_client = paramiko.client.SSHClient() 38 ssh_client.set_missing_host_key_policy(paramiko.client.WarningPolicy()) 39 ssh_client.connect(host, username='ubuntu', timeout=timeout) 40 return ssh_client 41 except (socket.error, paramiko.ssh_exception.SSHException): 42 if i == retries - 1: 43 raise 44 logging.warning('... re-trying to connect to %s:%d in 15s', host, 22) 45 time.sleep(15) 46 47 48 def ssh_cmd_async(ssh_client, cmd): 49 transport = ssh_client.get_transport() 50 chan = transport.open_session() 51 chan.exec_command(cmd) 52 return chan 53 54 55 def ssh_cmd(ssh_client, cmd, timeout=2): 56 transport = ssh_client.get_transport() 57 chan = transport.open_session() 58 chan.settimeout(timeout) 59 chan.exec_command(cmd) 60 61 stdout = b'' 62 stderr = b'' 63 while True: 64 if chan.recv_ready(): 65 stdout += chan.recv(4096) 66 continue 67 if chan.recv_stderr_ready(): 68 stderr += chan.recv_stderr(4096) 69 continue 70 if chan.exit_status_ready(): 71 exit_status = chan.recv_exit_status() 72 break 73 time.sleep(0.1) 74 75 if exit_status != 0: 76 raise Exception('%r' % stderr) 77 78 return stdout, stderr 79 80 81 def get_session(): 82 return boto3.session.Session(region_name=tornado.options.options.region) 83 84 85 def _bootstrap(addr): 86 commit = tornado.options.options.commit 87 golang_version = tornado.options.options.golang_version 88 ssh_client = ssh_connect_with_retries(addr) 89 for cmd in [ 90 'wget https://storage.googleapis.com/golang/go%s.linux-amd64.tar.gz' % golang_version, 91 'sudo -S tar -C /usr/local -xzf go%s.linux-amd64.tar.gz' % golang_version, 92 'sudo -S apt-get update', 93 'sudo -S apt-get -y install git mercurial', 94 'mkdir -p go/src/github.com/nsqio', 95 'cd go/src/github.com/nsqio && git clone https://github.com/nsqio/nsq', 96 'cd go/src/github.com/nsqio/nsq && git checkout %s' % commit, 97 'cd go/src/github.com/nsqio/nsq/apps/nsqd && GO111MODULE=on /usr/local/go/bin/go build', 98 'cd go/src/github.com/nsqio/nsq/bench/bench_writer && GO111MODULE=on /usr/local/go/bin/go build', 99 'cd go/src/github.com/nsqio/nsq/bench/bench_reader && GO111MODULE=on /usr/local/go/bin/go build', 100 'sudo -S mkdir -p /mnt/nsq', 101 'sudo -S chmod 777 /mnt/nsq']: 102 ssh_cmd(ssh_client, cmd, timeout=10) 103 104 105 def bootstrap(): 106 session = get_session() 107 108 ec2 = session.resource('ec2') 109 110 total_count = tornado.options.options.nsqd_count + tornado.options.options.worker_count 111 logging.info('launching %d instances', total_count) 112 instances = ec2.create_instances( 113 ImageId=tornado.options.options.ami, 114 MinCount=total_count, 115 MaxCount=total_count, 116 KeyName=tornado.options.options.ssh_key_name, 117 InstanceType=tornado.options.options.instance_type, 118 SecurityGroups=['default']) 119 120 logging.info('waiting for instances to launch...') 121 122 while any(i.state['Name'] != 'running' for i in instances): 123 waiting_for = [i.id for i in instances if i.state['Name'] != 'running'] 124 logging.info('... sleeping for 5s (waiting for %s)', ', '.join(waiting_for)) 125 time.sleep(5) 126 for instance in instances: 127 instance.load() 128 129 for instance in instances: 130 if not instance.tags: 131 instance.create_tags(Tags=[{'Key': 'nsq_bench', 'Value': '1'}]) 132 133 try: 134 c = 0 135 for i in instances: 136 c += 1 137 logging.info('(%d) bootstrapping %s (%s)', c, i.public_dns_name, i.id) 138 _bootstrap(i.public_dns_name) 139 except Exception: 140 logging.exception('bootstrap failed') 141 decomm() 142 143 144 def run(): 145 instances = _find_instances() 146 147 logging.info('launching nsqd on %d host(s)', tornado.options.options.nsqd_count) 148 149 nsqd_chans = [] 150 nsqd_hosts = instances[:tornado.options.options.nsqd_count] 151 for instance in nsqd_hosts: 152 try: 153 ssh_client = ssh_connect_with_retries(instance.public_dns_name) 154 for cmd in [ 155 'sudo -S pkill -f nsqd', 156 'sudo -S rm -f /mnt/nsq/*.dat', 157 'GOMAXPROCS=32 ./go/src/github.com/nsqio/nsq/apps/nsqd/nsqd \ 158 --data-path=/mnt/nsq \ 159 --mem-queue-size=10000000 \ 160 --max-rdy-count=%s' % (tornado.options.options.rdy)]: 161 nsqd_chans.append((ssh_client, ssh_cmd_async(ssh_client, cmd))) 162 except Exception: 163 logging.exception('failed') 164 165 nsqd_tcp_addrs = [i.public_dns_name for i in nsqd_hosts] 166 167 dt = datetime.datetime.utcnow() 168 deadline = dt + datetime.timedelta(seconds=30) 169 170 logging.info('launching %d producer(s) on %d host(s)', 171 tornado.options.options.nsqd_count * tornado.options.options.worker_count, 172 tornado.options.options.worker_count) 173 174 worker_chans = [] 175 176 producer_hosts = instances[tornado.options.options.nsqd_count:] 177 for instance in producer_hosts: 178 for nsqd_tcp_addr in nsqd_tcp_addrs: 179 topic = hashlib.md5(instance.public_dns_name.encode('utf-8')).hexdigest() 180 try: 181 ssh_client = ssh_connect_with_retries(instance.public_dns_name) 182 for cmd in [ 183 'GOMAXPROCS=2 \ 184 ./go/src/github.com/nsqio/nsq/bench/bench_writer/bench_writer \ 185 --topic=%s --nsqd-tcp-address=%s:4150 --deadline=\'%s\' --size=%d' % ( 186 topic, nsqd_tcp_addr, deadline.strftime('%Y-%m-%d %H:%M:%S'), 187 tornado.options.options.msg_size)]: 188 worker_chans.append((ssh_client, ssh_cmd_async(ssh_client, cmd))) 189 except Exception: 190 logging.exception('failed') 191 192 if tornado.options.options.mode == 'pubsub': 193 logging.info('launching %d consumer(s) on %d host(s)', 194 tornado.options.options.nsqd_count * tornado.options.options.worker_count, 195 tornado.options.options.worker_count) 196 197 consumer_hosts = instances[tornado.options.options.nsqd_count:] 198 for instance in consumer_hosts: 199 for nsqd_tcp_addr in nsqd_tcp_addrs: 200 topic = hashlib.md5(instance.public_dns_name.encode('utf-8')).hexdigest() 201 try: 202 ssh_client = ssh_connect_with_retries(instance.public_dns_name) 203 for cmd in [ 204 'GOMAXPROCS=8 \ 205 ./go/src/github.com/nsqio/nsq/bench/bench_reader/bench_reader \ 206 --topic=%s --nsqd-tcp-address=%s:4150 --deadline=\'%s\' --size=%d \ 207 --rdy=%d' % ( 208 topic, nsqd_tcp_addr, deadline.strftime('%Y-%m-%d %H:%M:%S'), 209 tornado.options.options.msg_size, tornado.options.options.rdy)]: 210 worker_chans.append((ssh_client, ssh_cmd_async(ssh_client, cmd))) 211 except Exception: 212 logging.exception('failed') 213 214 stats = { 215 'bench_reader': { 216 'durations': [], 217 'mbytes': [], 218 'ops': [] 219 }, 220 'bench_writer': { 221 'durations': [], 222 'mbytes': [], 223 'ops': [] 224 } 225 } 226 while worker_chans: 227 for ssh_client, chan in worker_chans[:]: 228 if chan.recv_ready(): 229 sys.stdout.write(chan.recv(4096)) 230 sys.stdout.flush() 231 continue 232 if chan.recv_stderr_ready(): 233 line = chan.recv_stderr(4096).decode('utf-8') 234 if 'duration:' in line: 235 kind = line.split(' ')[0][1:-1] 236 parts = line.rsplit('duration:')[1].split('-') 237 stats[kind]['durations'].append(float(parts[0].strip()[:-1])) 238 stats[kind]['mbytes'].append(float(parts[1].strip()[:-4])) 239 stats[kind]['ops'].append(float(parts[2].strip()[:-5])) 240 sys.stdout.write(line) 241 sys.stdout.flush() 242 continue 243 if chan.exit_status_ready(): 244 worker_chans.remove((ssh_client, chan)) 245 time.sleep(0.1) 246 247 for kind, data in stats.items(): 248 if not data['durations']: 249 continue 250 251 max_duration = max(data['durations']) 252 total_mb = sum(data['mbytes']) 253 total_ops = sum(data['ops']) 254 255 logging.info('[%s] %fs - %fmb/s - %fops/s - %fus/op', 256 kind, max_duration, total_mb, total_ops, 257 max_duration / total_ops * 1000 * 1000) 258 259 for ssh_client, chan in nsqd_chans: 260 chan.close() 261 262 263 def _find_instances(): 264 session = get_session() 265 ec2 = session.resource('ec2') 266 return [i for i in ec2.instances.all() if 267 i.state['Name'] == 'running' and any(t['Key'] == 'nsq_bench' for t in i.tags)] 268 269 270 def decomm(): 271 instances = _find_instances() 272 logging.info('terminating instances %s' % ','.join(i.id for i in instances)) 273 for instance in instances: 274 instance.terminate() 275 276 277 if __name__ == '__main__': 278 tornado.options.define('region', type=str, default='us-east-1', 279 help='EC2 region to launch instances') 280 tornado.options.define('nsqd_count', type=int, default=3, 281 help='how many nsqd instances to launch') 282 tornado.options.define('worker_count', type=int, default=3, 283 help='how many worker instances to launch') 284 # ubuntu 18.04 HVM instance store us-east-1 285 tornado.options.define('ami', type=str, default='ami-0938f2289b3fa3f5b', 286 help='AMI ID') 287 tornado.options.define('ssh_key_name', type=str, default='default', 288 help='SSH key name') 289 tornado.options.define('instance_type', type=str, default='c3.2xlarge', 290 help='EC2 instance type') 291 tornado.options.define('msg_size', type=int, default=200, 292 help='size of message') 293 tornado.options.define('rdy', type=int, default=10000, 294 help='RDY count to use for bench_reader') 295 tornado.options.define('mode', type=str, default='pubsub', 296 help='the benchmark mode (pub, pubsub)') 297 tornado.options.define('commit', type=str, default='master', 298 help='the git commit') 299 tornado.options.define('golang_version', type=str, default='1.14.3', 300 help='the go version') 301 tornado.options.parse_command_line() 302 303 logging.getLogger('paramiko').setLevel(logging.WARNING) 304 warnings.simplefilter('ignore') 305 306 cmd_name = sys.argv[-1] 307 cmd_map = { 308 'bootstrap': bootstrap, 309 'run': run, 310 'decomm': decomm 311 } 312 cmd = cmd_map.get(cmd_name, bootstrap) 313 314 sys.exit(cmd())