github.com/spg/deis@v1.7.3/contrib/azure/azure-coreos-cluster (about) 1 #!/usr/bin/env python 2 3 from azure import * 4 from azure.servicemanagement import * 5 import argparse 6 import urllib2 7 import time 8 import base64 9 import os 10 import subprocess 11 12 parser = argparse.ArgumentParser(description='Create a CoreOS cluster on Microsoft Azure.') 13 parser.add_argument('--version', action='version', version='azure-coreos-cluster 0.1') 14 parser.add_argument('cloud_service_name', 15 help='cloud service name') 16 parser.add_argument('--ssh-cert', 17 help='certificate file with public key for ssh, in .cer format') 18 parser.add_argument('--ssh-thumb', 19 help='thumbprint of ssh cert') 20 parser.add_argument('--subscription', required=True, 21 help='required Azure subscription id') 22 parser.add_argument('--azure-cert', required=True, 23 help='required path to Azure cert pem file') 24 parser.add_argument('--blob-container-url', required=True, 25 help='required url to blob container where vm disk images will be created, including /, ex: https://patcoreos.blob.core.windows.net/vhds/') 26 parser.add_argument('--vm-size', default='Small', 27 help='optional, VM size [Small]') 28 parser.add_argument('--vm-name-prefix', default='coreos', 29 help='optional, VM name prefix [coreos]') 30 parser.add_argument('--availability-set', default='coreos-as', 31 help='optional, name of availability set for cluster [coreos-as]') 32 parser.add_argument('--location', default='West US', 33 help='optional - overriden by affinity-group, [West US]') 34 parser.add_argument('--affinity-group', default='', 35 help='optional, overrides location if specified') 36 parser.add_argument('--ssh', default=22001, type=int, 37 help='optional, starts with 22001 and +1 for each machine in cluster') 38 parser.add_argument('--coreos-image', default='2b171e93f07c4903bcad35bda10acf22__CoreOS-Stable-647.2.0', 39 help='optional, [2b171e93f07c4903bcad35bda10acf22__CoreOS-Stable-647.2.0]') 40 parser.add_argument('--num-nodes', default=3, type=int, 41 help='optional, number of nodes to create (or add), defaults to 3') 42 parser.add_argument('--virtual-network-name', 43 help='optional, name of an existing virtual network to which we will add the VMs') 44 parser.add_argument('--subnet-names', 45 help='optional, subnet name to which the VMs will belong') 46 parser.add_argument('--custom-data', 47 help='optional, path to your own cloud-init file') 48 parser.add_argument('--discovery-service-url', 49 help='optional, url for an existing cluster discovery service. Else we will generate one.') 50 parser.add_argument('--pip', action='store_true', 51 help='optional, assigns public instance ip addresses to each VM') 52 parser.add_argument('--deis', action='store_true', 53 help='optional, automatically opens http and controller endpoints') 54 parser.add_argument('--data-disk', action='store_true', 55 help='optional, attaches a data disk to each VM') 56 parser.add_argument('--nohttps', action='store_true', 57 help='optional, disables the creation of the https load balanced endpoint') 58 parser.add_argument('--no-discovery-url', action='store_true', 59 help='optional, disables the creation of a new coreos discovery url') 60 61 cloud_init_template = """#cloud-config 62 63 coreos: 64 etcd: 65 # generate a new token for each unique cluster from https://discovery.etcd.io/new 66 discovery: {0} 67 # deployments across multiple cloud services will need to use $public_ipv4 68 addr: $private_ipv4:4001 69 peer-addr: $private_ipv4:7001 70 units: 71 - name: etcd.service 72 command: start 73 - name: fleet.service 74 command: start 75 """ 76 77 args = parser.parse_args() 78 79 # Create SSH cert if it's not given 80 if not args.ssh_cert and not args.ssh_thumb: 81 print 'SSH arguments not given, generating certificate' 82 with open(os.devnull, 'w') as shutup: 83 subprocess.call('openssl req -x509 -nodes -days 365 -newkey rsa:2048 -config cert.conf -keyout ssh-cert.key -out ssh-cert.pem', shell=True, stdout=shutup, stderr=shutup) 84 subprocess.call('chmod 600 ssh-cert.key', shell=True, stdout=shutup, stderr=shutup) 85 subprocess.call('openssl x509 -outform der -in ssh-cert.pem -out ssh-cert.cer', shell=True, stdout=shutup, stderr=shutup) 86 thumbprint = subprocess.check_output('openssl x509 -in ssh-cert.pem -sha1 -noout -fingerprint | sed s/://g', shell=True) 87 args.ssh_thumb = thumbprint.split('=')[1].replace('\n', '') 88 args.ssh_cert = './ssh-cert.cer' 89 print 'Generated SSH certificate with thumbprint ' + args.ssh_thumb 90 91 # generate coreos discovery url 92 if not args.no_discovery_url: 93 print 'Generating new CoreOS discovery URL for the cluster' 94 subprocess.call(['bash', '-c', 'python ./create-azure-user-data $(curl -s https://discovery.etcd.io/new)']) 95 96 # Setup custom data 97 if args.custom_data: 98 with open(args.custom_data, 'r') as f: 99 if not os.path.exists(args.custom_data): 100 print "Couldn't find the user-data file. Did you remember to run `create-azure-user-data`?" 101 sys.exit(1) 102 cloud_init = f.read() 103 f.closed 104 else: 105 if args.discovery_service_url: 106 cloud_init = cloud_init_template.format(args.discovery_service_url) 107 else: 108 response = urllib2.urlopen('https://discovery.etcd.io/new') 109 discovery_url = response.read() 110 cloud_init = cloud_init_template.format(discovery_url) 111 112 SERVICE_CERT_FORMAT = 'pfx' 113 114 with open(args.ssh_cert) as f: 115 service_cert_file_data = base64.b64encode(f.read()) 116 f.closed 117 118 def wait_for_async(request_id, timeout): 119 count = 0 120 result = sms.get_operation_status(request_id) 121 while result.status == 'InProgress': 122 count = count + 1 123 if count > timeout: 124 print('Timed out waiting for async operation to complete.') 125 return 126 time.sleep(5) 127 print('.'), 128 sys.stdout.flush() 129 result = sms.get_operation_status(request_id) 130 if result.error: 131 print(result.error.code) 132 print(vars(result.error)) 133 print result.status + ' in ' + str(count*5) + 's' 134 135 def linux_config(hostname, args): 136 pk = PublicKey(args.ssh_thumb, 137 u'/home/core/.ssh/authorized_keys') 138 system = LinuxConfigurationSet(hostname, 'core', None, True, 139 custom_data=cloud_init) 140 system.ssh.public_keys.public_keys.append(pk) 141 system.disable_ssh_password_authentication = True 142 return system 143 144 def lb_endpoint_config(name, port, probe=False, idle_timeout_minutes=4): 145 endpoint = ConfigurationSetInputEndpoint(name, 'tcp', port, port, name, False, idle_timeout_minutes) 146 if probe: 147 endpoint.load_balancer_probe = probe 148 return endpoint 149 150 def load_balancer_probe(path, port, protocol): 151 load_balancer_probe = LoadBalancerProbe() 152 load_balancer_probe.path = path 153 load_balancer_probe.port = port 154 load_balancer_probe.protocol = protocol 155 return load_balancer_probe 156 157 def network_config(subnet_name=None, port='59913', public_ip_name=None): 158 network = ConfigurationSet() 159 network.configuration_set_type = 'NetworkConfiguration' 160 network.input_endpoints.input_endpoints.append( 161 ConfigurationSetInputEndpoint('ssh', 'tcp', port, '22')) 162 if subnet_name: 163 network.subnet_names.append(subnet_name) 164 if public_ip_name: 165 ip = PublicIP(name=public_ip_name) 166 ip.idle_timeout_in_minutes = 20 167 network.public_ips.public_ips.append(ip) 168 if args.deis: 169 # create web endpoint with probe checking /health-check 170 network.input_endpoints.input_endpoints.append(lb_endpoint_config('web', '80', load_balancer_probe('/health-check', '80', 'http'))) 171 if not args.nohttps: 172 network.input_endpoints.input_endpoints.append(lb_endpoint_config('https', '443', load_balancer_probe(None, '443', 'tcp'))) 173 # create builder endpoint TCP probe check and extended timeout 174 network.input_endpoints.input_endpoints.append(lb_endpoint_config('builder', '2222', load_balancer_probe(None, '2222', 'tcp',), 20)) 175 return network 176 177 def data_hd(target_container_url, target_blob_name, target_lun, target_disk_size_in_gb): 178 media_link = target_container_url + target_blob_name 179 data_hd = DataVirtualHardDisk() 180 data_hd.disk_label = target_blob_name 181 data_hd.logical_disk_size_in_gb = target_disk_size_in_gb 182 data_hd.lun = target_lun 183 data_hd.media_link = media_link 184 return data_hd 185 186 sms = ServiceManagementService(args.subscription, args.azure_cert) 187 188 #Create the cloud service 189 try: 190 print 'Creating the hosted service...', 191 sys.stdout.flush() 192 if args.affinity_group: 193 sms.create_hosted_service( 194 args.cloud_service_name, label=args.cloud_service_name, affinity_group=args.affinity_group) 195 else: 196 sms.create_hosted_service( 197 args.cloud_service_name, label=args.cloud_service_name, location=args.location) 198 print('Successfully created hosted service ' + args.cloud_service_name) 199 sys.stdout.flush() 200 time.sleep(2) 201 except WindowsAzureConflictError: 202 print "Hosted service {} already exists. Delete it or try again with a different name.".format(args.cloud_service_name) 203 sys.exit(1) 204 205 #upload ssh cert to cloud-service 206 print 'Uploading SSH certificate...', 207 sys.stdout.flush() 208 result = sms.add_service_certificate(args.cloud_service_name, 209 service_cert_file_data, SERVICE_CERT_FORMAT, '') 210 wait_for_async(result.request_id, 15) 211 212 def get_vm_name(args, i): 213 return args.cloud_service_name + '-' + args.vm_name_prefix + '-' + str(i) 214 215 vms =[] 216 217 #Create the VMs 218 for i in range(args.num_nodes): 219 ssh_port = args.ssh +i 220 vm_name = get_vm_name(args, i) 221 if args.pip: 222 pip_name = vm_name 223 else: 224 pip_name = None 225 media_link = args.blob_container_url + vm_name 226 os_hd = OSVirtualHardDisk(media_link=media_link, 227 source_image_name=args.coreos_image) 228 system = linux_config(vm_name, args) 229 network = network_config(subnet_name=args.subnet_names, port=ssh_port, public_ip_name=pip_name) 230 #specifiy the data disk, important to start at lun = 0 231 if args.data_disk: 232 data_disk = data_hd(args.blob_container_url, vm_name + '-data.vhd', 0, 100) 233 data_disks = DataVirtualHardDisks() 234 data_disks.data_virtual_hard_disks.append(data_disk) 235 else: 236 data_disks = None 237 238 try: 239 if i == 0: 240 result = sms.create_virtual_machine_deployment( 241 args.cloud_service_name, deployment_name=args.cloud_service_name, 242 deployment_slot='production', label=vm_name, 243 role_name=vm_name, system_config=system, os_virtual_hard_disk=os_hd, virtual_network_name=args.virtual_network_name, 244 role_size=args.vm_size, network_config=network, data_virtual_hard_disks=data_disks) 245 else: 246 result = sms.add_role( 247 args.cloud_service_name, deployment_name=args.cloud_service_name, 248 role_name=vm_name, 249 system_config=system, os_virtual_hard_disk=os_hd, 250 role_size=args.vm_size, network_config=network, data_virtual_hard_disks=data_disks) 251 except WindowsAzureError as e: 252 if "Forbidden" in str(e): 253 print "Unable to use this CoreOS image. This usually means a newer image has been published." 254 print "See https://coreos.com/docs/running-coreos/cloud-providers/azure/ for the latest stable image," 255 print "and supply it to this script with --coreos-image. If it works, please open a pull request to update this script." 256 sys.exit(1) 257 else: 258 pass 259 260 print 'Creating VM ' + vm_name + '...', 261 sys.stdout.flush() 262 wait_for_async(result.request_id, 30) 263 vms.append({'name':vm_name, 264 'host':args.cloud_service_name + '.cloudapp.net', 265 'port':ssh_port, 266 'user':'core', 267 'identity':args.ssh_cert.replace('.cer','.key')}) 268 269 #get the ip addresses 270 def get_ips(service_name, deployment_name): 271 try: 272 result = sms.get_deployment_by_name(service_name, deployment_name) 273 for instance in result.role_instance_list: 274 ips.append(instance.public_ips[0].address) 275 return ips 276 except WindowsAzureMissingResourceError: 277 # some helpful user error info 278 print 'Could not find cloud service ip address. This is likely due to the fact that the' 279 print 'cloud service failed to start. Check that the storage account for the' 280 print '--blob-container-url argument exists, ends with a \'/\' and that there is a container named \'vhds\' ' 281 print 'within it. You may need to delete the cloud service \'' + service_name + '\' if you try' 282 print 'this script again' 283 sys.exit(1) 284 #print dns config 285 if args.pip: 286 ips = [] 287 ips = get_ips(args.cloud_service_name, args.cloud_service_name) 288 print '' 289 print '-------' 290 print "You'll need to configure DNS records for a domain you wish to use with your Deis cluster." 291 print 'For convenience, the public IP addresses are printed below, along with sane DNS timeouts.' 292 print '' 293 for ip in ips: 294 print '@ 10800 IN A ' + ip 295 print '* 10800 IN CNAME @' 296 print '-------' 297 print 'For more information, see: http://docs.deis.io/en/latest/managing_deis/configure-dns/' 298 print '' 299 300 #print ~/.ssh/config 301 print '' 302 print '-------' 303 print "Instances on Azure don't use typical SSH ports. It is recommended to configure ~/.ssh/config" 304 print 'so the instances can easily be referenced when logging in via SSH. For convenience, the config' 305 print 'directives for your instances are below:' 306 print '' 307 for vm in vms: 308 print 'Host ' + vm['name'] 309 print ' HostName ' + vm['host'] 310 print ' Port ' + str(vm['port']) 311 print ' User ' + vm['user'] 312 print ' IdentityFile ' + vm['identity'] 313 print '-------' 314 print ''