github.com/technosophos/deis@v1.7.1-0.20150915173815-f9005256004b/contrib/azure/azure-coreos-cluster (about)

     1  #!/usr/bin/env python
     2  
     3  from azure import *
     4  from azure.servicemanagement import *
     5  import argparse
     6  import urllib2
     7  import time
     8  import base64
     9  import os
    10  import subprocess
    11  
    12  parser = argparse.ArgumentParser(description='Create a CoreOS cluster on Microsoft Azure.')
    13  parser.add_argument('--version', action='version', version='azure-coreos-cluster 0.1')
    14  parser.add_argument('cloud_service_name',
    15                     help='cloud service name')
    16  parser.add_argument('--ssh-cert',
    17                     help='certificate file with public key for ssh, in .cer format')
    18  parser.add_argument('--ssh-thumb',
    19                     help='thumbprint of ssh cert')
    20  parser.add_argument('--subscription', required=True,
    21                     help='required Azure subscription id')
    22  parser.add_argument('--azure-cert', required=True,
    23                     help='required path to Azure cert pem file')
    24  parser.add_argument('--blob-container-url', required=True,
    25                     help='required url to blob container where vm disk images will be created, including /, ex: https://patcoreos.blob.core.windows.net/vhds/')
    26  parser.add_argument('--vm-size', default='Small',
    27                     help='optional, VM size [Small]')
    28  parser.add_argument('--vm-name-prefix', default='coreos',
    29                     help='optional, VM name prefix [coreos]')
    30  parser.add_argument('--availability-set', default='coreos-as',
    31                     help='optional, name of availability set for cluster [coreos-as]')
    32  parser.add_argument('--location', default='West US',
    33                     help='optional - overriden by affinity-group, [West US]')
    34  parser.add_argument('--affinity-group', default='',
    35                     help='optional, overrides location if specified')
    36  parser.add_argument('--ssh', default=22001, type=int,
    37                     help='optional, starts with 22001 and +1 for each machine in cluster')
    38  parser.add_argument('--coreos-image', default='2b171e93f07c4903bcad35bda10acf22__CoreOS-Stable-766.3.0',
    39                     help='optional, [2b171e93f07c4903bcad35bda10acf22__CoreOS-Stable-766.3.0]')
    40  parser.add_argument('--num-nodes', default=3, type=int,
    41                     help='optional, number of nodes to create (or add), defaults to 3')
    42  parser.add_argument('--virtual-network-name',
    43                     help='optional, name of an existing virtual network to which we will add the VMs')
    44  parser.add_argument('--subnet-names',
    45                     help='optional, subnet name to which the VMs will belong')
    46  parser.add_argument('--custom-data',
    47                     help='optional, path to your own cloud-init file')
    48  parser.add_argument('--discovery-service-url',
    49                     help='optional, url for an existing cluster discovery service. Else we will generate one.')
    50  parser.add_argument('--pip', action='store_true',
    51                     help='optional, assigns public instance ip addresses to each VM')
    52  parser.add_argument('--deis', action='store_true',
    53                     help='optional, automatically opens http and controller endpoints')
    54  parser.add_argument('--data-disk', action='store_true',
    55                     help='optional, attaches a data disk to each VM')
    56  parser.add_argument('--nohttps', action='store_true',
    57                     help='optional, disables the creation of the https load balanced endpoint')
    58  parser.add_argument('--no-discovery-url', action='store_true',
    59                     help='optional, disables the creation of a new coreos discovery url')
    60  
    61  cloud_init_template = """#cloud-config
    62  
    63  coreos:
    64    etcd2:
    65      # generate a new token for each unique cluster from https://discovery.etcd.io/new
    66      discovery: {0}
    67      # multi-region and multi-cloud deployments need to use $public_ipv4
    68      advertise-client-urls: http://$private_ipv4:2379
    69      initial-advertise-peer-urls: http://$private_ipv4:2380
    70      # listen on both the official ports and the legacy ports
    71      # legacy ports can be omitted if your application doesn't depend on them
    72      listen-client-urls: http://0.0.0.0:2379,http://0.0.0.0:4001
    73      listen-peer-urls: http://$private_ipv4:2380,http://$private_ipv4:7001
    74      data-dir: /var/lib/etcd2
    75    units:
    76      - name: etcd2.service
    77        command: start
    78      - name: fleet.service
    79        command: start
    80  """
    81  
    82  args = parser.parse_args()
    83  
    84  # Create SSH cert if it's not given
    85  if not args.ssh_cert and not args.ssh_thumb:
    86    print 'SSH arguments not given, generating certificate'
    87    with open(os.devnull, 'w') as shutup:
    88        subprocess.call('openssl req -x509 -nodes -days 365 -newkey rsa:2048 -config cert.conf -keyout ssh-cert.key -out ssh-cert.pem', shell=True, stdout=shutup, stderr=shutup)
    89        subprocess.call('chmod 600 ssh-cert.key', shell=True, stdout=shutup, stderr=shutup)
    90        subprocess.call('openssl  x509 -outform der -in ssh-cert.pem -out ssh-cert.cer', shell=True, stdout=shutup, stderr=shutup)
    91        thumbprint = subprocess.check_output('openssl x509 -in ssh-cert.pem -sha1 -noout -fingerprint | sed s/://g', shell=True)
    92        args.ssh_thumb = thumbprint.split('=')[1].replace('\n', '')
    93        args.ssh_cert = './ssh-cert.cer'
    94    print 'Generated SSH certificate with thumbprint ' + args.ssh_thumb
    95  
    96  # generate coreos discovery url
    97  if not args.no_discovery_url:
    98    print 'Generating new CoreOS discovery URL for the cluster'
    99    subprocess.call(['bash', '-c', 'python ./create-azure-user-data $(curl -s https://discovery.etcd.io/new)'])
   100  
   101  # Setup custom data
   102  if args.custom_data:
   103      with open(args.custom_data, 'r') as f:
   104        if not os.path.exists(args.custom_data):
   105          print "Couldn't find the user-data file. Did you remember to run `create-azure-user-data`?"
   106          sys.exit(1)
   107        cloud_init = f.read()
   108      f.closed
   109  else:
   110      if args.discovery_service_url:
   111          cloud_init = cloud_init_template.format(args.discovery_service_url)
   112      else:
   113          response = urllib2.urlopen('https://discovery.etcd.io/new')
   114          discovery_url = response.read()
   115          cloud_init = cloud_init_template.format(discovery_url)
   116  
   117  SERVICE_CERT_FORMAT = 'pfx'
   118  
   119  with open(args.ssh_cert) as f:
   120      service_cert_file_data = base64.b64encode(f.read())
   121  f.closed
   122  
   123  def wait_for_async(request_id, timeout):
   124      count = 0
   125      result = sms.get_operation_status(request_id)
   126      while result.status == 'InProgress':
   127          count = count + 1
   128          if count > timeout:
   129              print('Timed out waiting for async operation to complete.')
   130              return
   131          time.sleep(5)
   132          print('.'),
   133          sys.stdout.flush()
   134          result = sms.get_operation_status(request_id)
   135          if result.error:
   136              print(result.error.code)
   137              print(vars(result.error))
   138      print result.status + ' in ' + str(count*5) + 's'
   139  
   140  def linux_config(hostname, args):
   141      pk = PublicKey(args.ssh_thumb,
   142                     u'/home/core/.ssh/authorized_keys')
   143      system = LinuxConfigurationSet(hostname, 'core', None, True,
   144                custom_data=cloud_init)
   145      system.ssh.public_keys.public_keys.append(pk)
   146      system.disable_ssh_password_authentication = True
   147      return system
   148  
   149  def lb_endpoint_config(name, port, probe=False, idle_timeout_minutes=4):
   150      endpoint = ConfigurationSetInputEndpoint(name, 'tcp', port, port, name, False, idle_timeout_minutes)
   151      if probe:
   152        endpoint.load_balancer_probe = probe
   153      return endpoint
   154  
   155  def load_balancer_probe(path, port, protocol):
   156      load_balancer_probe = LoadBalancerProbe()
   157      load_balancer_probe.path = path
   158      load_balancer_probe.port = port
   159      load_balancer_probe.protocol = protocol
   160      return load_balancer_probe
   161  
   162  def network_config(subnet_name=None, port='59913', public_ip_name=None):
   163      network = ConfigurationSet()
   164      network.configuration_set_type = 'NetworkConfiguration'
   165      network.input_endpoints.input_endpoints.append(
   166          ConfigurationSetInputEndpoint('ssh', 'tcp', port, '22'))
   167      if subnet_name:
   168          network.subnet_names.append(subnet_name)
   169      if public_ip_name:
   170          ip = PublicIP(name=public_ip_name)
   171          ip.idle_timeout_in_minutes = 20
   172          network.public_ips.public_ips.append(ip)
   173      if args.deis:
   174          # create web endpoint with probe checking /health-check
   175          network.input_endpoints.input_endpoints.append(lb_endpoint_config('web', '80', load_balancer_probe('/health-check', '80', 'http')))
   176          if not args.nohttps:
   177            network.input_endpoints.input_endpoints.append(lb_endpoint_config('https', '443', load_balancer_probe(None, '443', 'tcp')))
   178          # create builder endpoint TCP probe check and extended timeout
   179          network.input_endpoints.input_endpoints.append(lb_endpoint_config('builder', '2222', load_balancer_probe(None, '2222', 'tcp',), 20))
   180      return network
   181  
   182  def data_hd(target_container_url, target_blob_name, target_lun, target_disk_size_in_gb):
   183      media_link = target_container_url + target_blob_name
   184      data_hd = DataVirtualHardDisk()
   185      data_hd.disk_label = target_blob_name
   186      data_hd.logical_disk_size_in_gb = target_disk_size_in_gb
   187      data_hd.lun = target_lun
   188      data_hd.media_link = media_link
   189      return data_hd
   190  
   191  sms = ServiceManagementService(args.subscription, args.azure_cert)
   192  
   193  #Create the cloud service
   194  try:
   195    print 'Creating the hosted service...',
   196    sys.stdout.flush()
   197    if args.affinity_group:
   198      sms.create_hosted_service(
   199          args.cloud_service_name, label=args.cloud_service_name, affinity_group=args.affinity_group)
   200    else:
   201      sms.create_hosted_service(
   202          args.cloud_service_name, label=args.cloud_service_name, location=args.location)
   203    print('Successfully created hosted service ' + args.cloud_service_name)
   204    sys.stdout.flush()
   205    time.sleep(2)
   206  except WindowsAzureConflictError:
   207    print "Hosted service {} already exists. Delete it or try again with a different name.".format(args.cloud_service_name)
   208    sys.exit(1)
   209  
   210  #upload ssh cert to cloud-service
   211  print 'Uploading SSH certificate...',
   212  sys.stdout.flush()
   213  result = sms.add_service_certificate(args.cloud_service_name,
   214                                       service_cert_file_data, SERVICE_CERT_FORMAT, '')
   215  wait_for_async(result.request_id, 15)
   216  
   217  def get_vm_name(args, i):
   218      return args.cloud_service_name + '-' + args.vm_name_prefix + '-' + str(i)
   219  
   220  vms =[]
   221  
   222  #Create the VMs
   223  for i in range(args.num_nodes):
   224      ssh_port = args.ssh +i
   225      vm_name = get_vm_name(args, i)
   226      if args.pip:
   227          pip_name = vm_name
   228      else:
   229          pip_name = None
   230      media_link = args.blob_container_url + vm_name
   231      os_hd = OSVirtualHardDisk(media_link=media_link,
   232                              source_image_name=args.coreos_image)
   233      system = linux_config(vm_name, args)
   234      network = network_config(subnet_name=args.subnet_names, port=ssh_port, public_ip_name=pip_name)
   235      #specifiy the data disk, important to start at lun = 0
   236      if args.data_disk:
   237          data_disk = data_hd(args.blob_container_url, vm_name + '-data.vhd', 0, 100)
   238          data_disks = DataVirtualHardDisks()
   239          data_disks.data_virtual_hard_disks.append(data_disk)
   240      else:
   241          data_disks = None
   242  
   243      try:
   244        if i == 0:
   245            result = sms.create_virtual_machine_deployment(
   246                        args.cloud_service_name, deployment_name=args.cloud_service_name,
   247                        deployment_slot='production', label=vm_name,
   248                        role_name=vm_name, system_config=system, os_virtual_hard_disk=os_hd, virtual_network_name=args.virtual_network_name,
   249                        role_size=args.vm_size, network_config=network, data_virtual_hard_disks=data_disks)
   250        else:
   251            result = sms.add_role(
   252                        args.cloud_service_name, deployment_name=args.cloud_service_name,
   253                        role_name=vm_name,
   254                        system_config=system, os_virtual_hard_disk=os_hd,
   255                        role_size=args.vm_size, network_config=network, data_virtual_hard_disks=data_disks)
   256      except WindowsAzureError as e:
   257        if "Forbidden" in str(e):
   258          print "Unable to use this CoreOS image. This usually means a newer image has been published."
   259          print "See https://coreos.com/docs/running-coreos/cloud-providers/azure/ for the latest stable image,"
   260          print "and supply it to this script with --coreos-image. If it works, please open a pull request to update this script."
   261          sys.exit(1)
   262        else:
   263          pass
   264  
   265      print 'Creating VM ' + vm_name + '...',
   266      sys.stdout.flush()
   267      wait_for_async(result.request_id, 30)
   268      vms.append({'name':vm_name,
   269                  'host':args.cloud_service_name + '.cloudapp.net',
   270                  'port':ssh_port,
   271                  'user':'core',
   272                  'identity':args.ssh_cert.replace('.cer','.key')})
   273  
   274  #get the ip addresses
   275  def get_ips(service_name, deployment_name):
   276    try:
   277      result = sms.get_deployment_by_name(service_name, deployment_name)
   278      for instance in result.role_instance_list:
   279          ips.append(instance.public_ips[0].address)
   280      return ips
   281    except WindowsAzureMissingResourceError:
   282      # some helpful user error info
   283      print 'Could not find cloud service ip address. This is likely due to the fact that the'
   284      print 'cloud service failed to start. Check that the storage account for the'
   285      print '--blob-container-url argument exists, ends with a \'/\' and that there is a container named \'vhds\' '
   286      print 'within it. You may need to delete the cloud service \'' + service_name + '\' if you try'
   287      print 'this script again'
   288      sys.exit(1)
   289  #print dns config
   290  if args.pip:
   291      ips = []
   292      ips = get_ips(args.cloud_service_name, args.cloud_service_name)
   293      print ''
   294      print '-------'
   295      print "You'll need to configure DNS records for a domain you wish to use with your Deis cluster."
   296      print 'For convenience, the public IP addresses are printed below, along with sane DNS timeouts.'
   297      print ''
   298      for ip in ips:
   299          print '@ 10800 IN A ' + ip
   300      print '* 10800 IN CNAME @'
   301      print '-------'
   302      print 'For more information, see: http://docs.deis.io/en/latest/managing_deis/configure-dns/'
   303      print ''
   304  
   305  #print ~/.ssh/config
   306  print ''
   307  print '-------'
   308  print "Instances on Azure don't use typical SSH ports. It is recommended to configure ~/.ssh/config"
   309  print 'so the instances can easily be referenced when logging in via SSH. For convenience, the config'
   310  print 'directives for your instances are below:'
   311  print ''
   312  for vm in vms:
   313      print 'Host ' + vm['name']
   314      print '    HostName ' + vm['host']
   315      print '    Port ' + str(vm['port'])
   316      print '    User ' + vm['user']
   317      print '    IdentityFile ' + vm['identity']
   318  print '-------'
   319  print ''