github.com/technosophos/deis@v1.7.1-0.20150915173815-f9005256004b/contrib/azure/azure-coreos-cluster (about) 1 #!/usr/bin/env python 2 3 from azure import * 4 from azure.servicemanagement import * 5 import argparse 6 import urllib2 7 import time 8 import base64 9 import os 10 import subprocess 11 12 parser = argparse.ArgumentParser(description='Create a CoreOS cluster on Microsoft Azure.') 13 parser.add_argument('--version', action='version', version='azure-coreos-cluster 0.1') 14 parser.add_argument('cloud_service_name', 15 help='cloud service name') 16 parser.add_argument('--ssh-cert', 17 help='certificate file with public key for ssh, in .cer format') 18 parser.add_argument('--ssh-thumb', 19 help='thumbprint of ssh cert') 20 parser.add_argument('--subscription', required=True, 21 help='required Azure subscription id') 22 parser.add_argument('--azure-cert', required=True, 23 help='required path to Azure cert pem file') 24 parser.add_argument('--blob-container-url', required=True, 25 help='required url to blob container where vm disk images will be created, including /, ex: https://patcoreos.blob.core.windows.net/vhds/') 26 parser.add_argument('--vm-size', default='Small', 27 help='optional, VM size [Small]') 28 parser.add_argument('--vm-name-prefix', default='coreos', 29 help='optional, VM name prefix [coreos]') 30 parser.add_argument('--availability-set', default='coreos-as', 31 help='optional, name of availability set for cluster [coreos-as]') 32 parser.add_argument('--location', default='West US', 33 help='optional - overriden by affinity-group, [West US]') 34 parser.add_argument('--affinity-group', default='', 35 help='optional, overrides location if specified') 36 parser.add_argument('--ssh', default=22001, type=int, 37 help='optional, starts with 22001 and +1 for each machine in cluster') 38 parser.add_argument('--coreos-image', default='2b171e93f07c4903bcad35bda10acf22__CoreOS-Stable-766.3.0', 39 help='optional, [2b171e93f07c4903bcad35bda10acf22__CoreOS-Stable-766.3.0]') 40 parser.add_argument('--num-nodes', default=3, type=int, 41 help='optional, number of nodes to create (or add), defaults to 3') 42 parser.add_argument('--virtual-network-name', 43 help='optional, name of an existing virtual network to which we will add the VMs') 44 parser.add_argument('--subnet-names', 45 help='optional, subnet name to which the VMs will belong') 46 parser.add_argument('--custom-data', 47 help='optional, path to your own cloud-init file') 48 parser.add_argument('--discovery-service-url', 49 help='optional, url for an existing cluster discovery service. Else we will generate one.') 50 parser.add_argument('--pip', action='store_true', 51 help='optional, assigns public instance ip addresses to each VM') 52 parser.add_argument('--deis', action='store_true', 53 help='optional, automatically opens http and controller endpoints') 54 parser.add_argument('--data-disk', action='store_true', 55 help='optional, attaches a data disk to each VM') 56 parser.add_argument('--nohttps', action='store_true', 57 help='optional, disables the creation of the https load balanced endpoint') 58 parser.add_argument('--no-discovery-url', action='store_true', 59 help='optional, disables the creation of a new coreos discovery url') 60 61 cloud_init_template = """#cloud-config 62 63 coreos: 64 etcd2: 65 # generate a new token for each unique cluster from https://discovery.etcd.io/new 66 discovery: {0} 67 # multi-region and multi-cloud deployments need to use $public_ipv4 68 advertise-client-urls: http://$private_ipv4:2379 69 initial-advertise-peer-urls: http://$private_ipv4:2380 70 # listen on both the official ports and the legacy ports 71 # legacy ports can be omitted if your application doesn't depend on them 72 listen-client-urls: http://0.0.0.0:2379,http://0.0.0.0:4001 73 listen-peer-urls: http://$private_ipv4:2380,http://$private_ipv4:7001 74 data-dir: /var/lib/etcd2 75 units: 76 - name: etcd2.service 77 command: start 78 - name: fleet.service 79 command: start 80 """ 81 82 args = parser.parse_args() 83 84 # Create SSH cert if it's not given 85 if not args.ssh_cert and not args.ssh_thumb: 86 print 'SSH arguments not given, generating certificate' 87 with open(os.devnull, 'w') as shutup: 88 subprocess.call('openssl req -x509 -nodes -days 365 -newkey rsa:2048 -config cert.conf -keyout ssh-cert.key -out ssh-cert.pem', shell=True, stdout=shutup, stderr=shutup) 89 subprocess.call('chmod 600 ssh-cert.key', shell=True, stdout=shutup, stderr=shutup) 90 subprocess.call('openssl x509 -outform der -in ssh-cert.pem -out ssh-cert.cer', shell=True, stdout=shutup, stderr=shutup) 91 thumbprint = subprocess.check_output('openssl x509 -in ssh-cert.pem -sha1 -noout -fingerprint | sed s/://g', shell=True) 92 args.ssh_thumb = thumbprint.split('=')[1].replace('\n', '') 93 args.ssh_cert = './ssh-cert.cer' 94 print 'Generated SSH certificate with thumbprint ' + args.ssh_thumb 95 96 # generate coreos discovery url 97 if not args.no_discovery_url: 98 print 'Generating new CoreOS discovery URL for the cluster' 99 subprocess.call(['bash', '-c', 'python ./create-azure-user-data $(curl -s https://discovery.etcd.io/new)']) 100 101 # Setup custom data 102 if args.custom_data: 103 with open(args.custom_data, 'r') as f: 104 if not os.path.exists(args.custom_data): 105 print "Couldn't find the user-data file. Did you remember to run `create-azure-user-data`?" 106 sys.exit(1) 107 cloud_init = f.read() 108 f.closed 109 else: 110 if args.discovery_service_url: 111 cloud_init = cloud_init_template.format(args.discovery_service_url) 112 else: 113 response = urllib2.urlopen('https://discovery.etcd.io/new') 114 discovery_url = response.read() 115 cloud_init = cloud_init_template.format(discovery_url) 116 117 SERVICE_CERT_FORMAT = 'pfx' 118 119 with open(args.ssh_cert) as f: 120 service_cert_file_data = base64.b64encode(f.read()) 121 f.closed 122 123 def wait_for_async(request_id, timeout): 124 count = 0 125 result = sms.get_operation_status(request_id) 126 while result.status == 'InProgress': 127 count = count + 1 128 if count > timeout: 129 print('Timed out waiting for async operation to complete.') 130 return 131 time.sleep(5) 132 print('.'), 133 sys.stdout.flush() 134 result = sms.get_operation_status(request_id) 135 if result.error: 136 print(result.error.code) 137 print(vars(result.error)) 138 print result.status + ' in ' + str(count*5) + 's' 139 140 def linux_config(hostname, args): 141 pk = PublicKey(args.ssh_thumb, 142 u'/home/core/.ssh/authorized_keys') 143 system = LinuxConfigurationSet(hostname, 'core', None, True, 144 custom_data=cloud_init) 145 system.ssh.public_keys.public_keys.append(pk) 146 system.disable_ssh_password_authentication = True 147 return system 148 149 def lb_endpoint_config(name, port, probe=False, idle_timeout_minutes=4): 150 endpoint = ConfigurationSetInputEndpoint(name, 'tcp', port, port, name, False, idle_timeout_minutes) 151 if probe: 152 endpoint.load_balancer_probe = probe 153 return endpoint 154 155 def load_balancer_probe(path, port, protocol): 156 load_balancer_probe = LoadBalancerProbe() 157 load_balancer_probe.path = path 158 load_balancer_probe.port = port 159 load_balancer_probe.protocol = protocol 160 return load_balancer_probe 161 162 def network_config(subnet_name=None, port='59913', public_ip_name=None): 163 network = ConfigurationSet() 164 network.configuration_set_type = 'NetworkConfiguration' 165 network.input_endpoints.input_endpoints.append( 166 ConfigurationSetInputEndpoint('ssh', 'tcp', port, '22')) 167 if subnet_name: 168 network.subnet_names.append(subnet_name) 169 if public_ip_name: 170 ip = PublicIP(name=public_ip_name) 171 ip.idle_timeout_in_minutes = 20 172 network.public_ips.public_ips.append(ip) 173 if args.deis: 174 # create web endpoint with probe checking /health-check 175 network.input_endpoints.input_endpoints.append(lb_endpoint_config('web', '80', load_balancer_probe('/health-check', '80', 'http'))) 176 if not args.nohttps: 177 network.input_endpoints.input_endpoints.append(lb_endpoint_config('https', '443', load_balancer_probe(None, '443', 'tcp'))) 178 # create builder endpoint TCP probe check and extended timeout 179 network.input_endpoints.input_endpoints.append(lb_endpoint_config('builder', '2222', load_balancer_probe(None, '2222', 'tcp',), 20)) 180 return network 181 182 def data_hd(target_container_url, target_blob_name, target_lun, target_disk_size_in_gb): 183 media_link = target_container_url + target_blob_name 184 data_hd = DataVirtualHardDisk() 185 data_hd.disk_label = target_blob_name 186 data_hd.logical_disk_size_in_gb = target_disk_size_in_gb 187 data_hd.lun = target_lun 188 data_hd.media_link = media_link 189 return data_hd 190 191 sms = ServiceManagementService(args.subscription, args.azure_cert) 192 193 #Create the cloud service 194 try: 195 print 'Creating the hosted service...', 196 sys.stdout.flush() 197 if args.affinity_group: 198 sms.create_hosted_service( 199 args.cloud_service_name, label=args.cloud_service_name, affinity_group=args.affinity_group) 200 else: 201 sms.create_hosted_service( 202 args.cloud_service_name, label=args.cloud_service_name, location=args.location) 203 print('Successfully created hosted service ' + args.cloud_service_name) 204 sys.stdout.flush() 205 time.sleep(2) 206 except WindowsAzureConflictError: 207 print "Hosted service {} already exists. Delete it or try again with a different name.".format(args.cloud_service_name) 208 sys.exit(1) 209 210 #upload ssh cert to cloud-service 211 print 'Uploading SSH certificate...', 212 sys.stdout.flush() 213 result = sms.add_service_certificate(args.cloud_service_name, 214 service_cert_file_data, SERVICE_CERT_FORMAT, '') 215 wait_for_async(result.request_id, 15) 216 217 def get_vm_name(args, i): 218 return args.cloud_service_name + '-' + args.vm_name_prefix + '-' + str(i) 219 220 vms =[] 221 222 #Create the VMs 223 for i in range(args.num_nodes): 224 ssh_port = args.ssh +i 225 vm_name = get_vm_name(args, i) 226 if args.pip: 227 pip_name = vm_name 228 else: 229 pip_name = None 230 media_link = args.blob_container_url + vm_name 231 os_hd = OSVirtualHardDisk(media_link=media_link, 232 source_image_name=args.coreos_image) 233 system = linux_config(vm_name, args) 234 network = network_config(subnet_name=args.subnet_names, port=ssh_port, public_ip_name=pip_name) 235 #specifiy the data disk, important to start at lun = 0 236 if args.data_disk: 237 data_disk = data_hd(args.blob_container_url, vm_name + '-data.vhd', 0, 100) 238 data_disks = DataVirtualHardDisks() 239 data_disks.data_virtual_hard_disks.append(data_disk) 240 else: 241 data_disks = None 242 243 try: 244 if i == 0: 245 result = sms.create_virtual_machine_deployment( 246 args.cloud_service_name, deployment_name=args.cloud_service_name, 247 deployment_slot='production', label=vm_name, 248 role_name=vm_name, system_config=system, os_virtual_hard_disk=os_hd, virtual_network_name=args.virtual_network_name, 249 role_size=args.vm_size, network_config=network, data_virtual_hard_disks=data_disks) 250 else: 251 result = sms.add_role( 252 args.cloud_service_name, deployment_name=args.cloud_service_name, 253 role_name=vm_name, 254 system_config=system, os_virtual_hard_disk=os_hd, 255 role_size=args.vm_size, network_config=network, data_virtual_hard_disks=data_disks) 256 except WindowsAzureError as e: 257 if "Forbidden" in str(e): 258 print "Unable to use this CoreOS image. This usually means a newer image has been published." 259 print "See https://coreos.com/docs/running-coreos/cloud-providers/azure/ for the latest stable image," 260 print "and supply it to this script with --coreos-image. If it works, please open a pull request to update this script." 261 sys.exit(1) 262 else: 263 pass 264 265 print 'Creating VM ' + vm_name + '...', 266 sys.stdout.flush() 267 wait_for_async(result.request_id, 30) 268 vms.append({'name':vm_name, 269 'host':args.cloud_service_name + '.cloudapp.net', 270 'port':ssh_port, 271 'user':'core', 272 'identity':args.ssh_cert.replace('.cer','.key')}) 273 274 #get the ip addresses 275 def get_ips(service_name, deployment_name): 276 try: 277 result = sms.get_deployment_by_name(service_name, deployment_name) 278 for instance in result.role_instance_list: 279 ips.append(instance.public_ips[0].address) 280 return ips 281 except WindowsAzureMissingResourceError: 282 # some helpful user error info 283 print 'Could not find cloud service ip address. This is likely due to the fact that the' 284 print 'cloud service failed to start. Check that the storage account for the' 285 print '--blob-container-url argument exists, ends with a \'/\' and that there is a container named \'vhds\' ' 286 print 'within it. You may need to delete the cloud service \'' + service_name + '\' if you try' 287 print 'this script again' 288 sys.exit(1) 289 #print dns config 290 if args.pip: 291 ips = [] 292 ips = get_ips(args.cloud_service_name, args.cloud_service_name) 293 print '' 294 print '-------' 295 print "You'll need to configure DNS records for a domain you wish to use with your Deis cluster." 296 print 'For convenience, the public IP addresses are printed below, along with sane DNS timeouts.' 297 print '' 298 for ip in ips: 299 print '@ 10800 IN A ' + ip 300 print '* 10800 IN CNAME @' 301 print '-------' 302 print 'For more information, see: http://docs.deis.io/en/latest/managing_deis/configure-dns/' 303 print '' 304 305 #print ~/.ssh/config 306 print '' 307 print '-------' 308 print "Instances on Azure don't use typical SSH ports. It is recommended to configure ~/.ssh/config" 309 print 'so the instances can easily be referenced when logging in via SSH. For convenience, the config' 310 print 'directives for your instances are below:' 311 print '' 312 for vm in vms: 313 print 'Host ' + vm['name'] 314 print ' HostName ' + vm['host'] 315 print ' Port ' + str(vm['port']) 316 print ' User ' + vm['user'] 317 print ' IdentityFile ' + vm['identity'] 318 print '-------' 319 print ''