github.com/billybanfield/evergreen@v0.0.0-20170525200750-eeee692790f7/cloud/providers/ec2/ec2.go (about)

     1  package ec2
     2  
     3  import (
     4  	"strings"
     5  	"time"
     6  
     7  	"github.com/evergreen-ci/evergreen"
     8  	"github.com/evergreen-ci/evergreen/cloud"
     9  	"github.com/evergreen-ci/evergreen/hostutil"
    10  	"github.com/evergreen-ci/evergreen/model/distro"
    11  	"github.com/evergreen-ci/evergreen/model/host"
    12  	"github.com/evergreen-ci/evergreen/util"
    13  	"github.com/goamz/goamz/aws"
    14  	"github.com/goamz/goamz/ec2"
    15  	"github.com/mitchellh/mapstructure"
    16  	"github.com/mongodb/grip"
    17  	"github.com/pkg/errors"
    18  )
    19  
    20  // EC2Manager implements the CloudManager interface for Amazon EC2
    21  type EC2Manager struct {
    22  	awsCredentials *aws.Auth
    23  }
    24  
    25  //Valid values for EC2 instance states:
    26  //pending | running | shutting-down | terminated | stopping | stopped
    27  //see http://goo.gl/3OrCGn
    28  const (
    29  	EC2StatusPending      = "pending"
    30  	EC2StatusRunning      = "running"
    31  	EC2StatusShuttingdown = "shutting-down"
    32  	EC2StatusTerminated   = "terminated"
    33  	EC2StatusStopped      = "stopped"
    34  )
    35  
    36  type EC2ProviderSettings struct {
    37  	AMI          string       `mapstructure:"ami" json:"ami,omitempty" bson:"ami,omitempty"`
    38  	InstanceType string       `mapstructure:"instance_type" json:"instance_type,omitempty" bson:"instance_type,omitempty"`
    39  	KeyName      string       `mapstructure:"key_name" json:"key_name,omitempty" bson:"key_name,omitempty"`
    40  	MountPoints  []MountPoint `mapstructure:"mount_points" json:"mount_points,omitempty" bson:"mount_points,omitempty"`
    41  
    42  	// this is the security group name in EC2 classic and the security group ID in VPC (eg. sg-xxxx)
    43  	SecurityGroup string `mapstructure:"security_group" json:"security_group,omitempty" bson:"security_group,omitempty"`
    44  	// only set in VPC (eg. subnet-xxxx)
    45  	SubnetId string `mapstructure:"subnet_id" json:"subnet_id,omitempty" bson:"subnet_id,omitempty"`
    46  	// this is set to true if the security group is part of a vpc
    47  	IsVpc bool `mapstructure:"is_vpc" json:"is_vpc,omitempty" bson:"is_vpc,omitempty"`
    48  }
    49  
    50  func (self *EC2ProviderSettings) Validate() error {
    51  	if self.AMI == "" {
    52  		return errors.New("AMI must not be blank")
    53  	}
    54  
    55  	if self.InstanceType == "" {
    56  		return errors.New("Instance size must not be blank")
    57  	}
    58  
    59  	if self.SecurityGroup == "" {
    60  		return errors.New("Security group must not be blank")
    61  	}
    62  
    63  	if self.KeyName == "" {
    64  		return errors.New("Key name must not be blank")
    65  	}
    66  
    67  	_, err := makeBlockDeviceMappings(self.MountPoints)
    68  
    69  	return errors.WithStack(err)
    70  }
    71  
    72  //Configure loads necessary credentials or other settings from the global config
    73  //object.
    74  func (cloudManager *EC2Manager) Configure(settings *evergreen.Settings) error {
    75  	if settings.Providers.AWS.Id == "" || settings.Providers.AWS.Secret == "" {
    76  		return errors.New("AWS ID/Secret must not be blank")
    77  	}
    78  
    79  	cloudManager.awsCredentials = &aws.Auth{
    80  		AccessKey: settings.Providers.AWS.Id,
    81  		SecretKey: settings.Providers.AWS.Secret,
    82  	}
    83  	return nil
    84  }
    85  
    86  func (cloudManager *EC2Manager) GetSSHOptions(h *host.Host, keyPath string) ([]string, error) {
    87  	return getEC2KeyOptions(h, keyPath)
    88  }
    89  
    90  func (cloudManager *EC2Manager) IsSSHReachable(host *host.Host, keyPath string) (bool, error) {
    91  	sshOpts, err := cloudManager.GetSSHOptions(host, keyPath)
    92  	if err != nil {
    93  		return false, err
    94  	}
    95  	return hostutil.CheckSSHResponse(host, sshOpts)
    96  }
    97  
    98  func (cloudManager *EC2Manager) GetInstanceStatus(host *host.Host) (cloud.CloudStatus, error) {
    99  	ec2Handle := getUSEast(*cloudManager.awsCredentials)
   100  	instanceInfo, err := getInstanceInfo(ec2Handle, host.Id)
   101  	if err != nil {
   102  		return cloud.StatusUnknown, err
   103  	}
   104  	return ec2StatusToEvergreenStatus(instanceInfo.State.Name), nil
   105  }
   106  
   107  func (cloudManager *EC2Manager) CanSpawn() (bool, error) {
   108  	return true, nil
   109  }
   110  
   111  func (*EC2Manager) GetSettings() cloud.ProviderSettings {
   112  	return &EC2ProviderSettings{}
   113  }
   114  
   115  func (cloudManager *EC2Manager) SpawnInstance(d *distro.Distro, hostOpts cloud.HostOptions) (*host.Host, error) {
   116  	if d.Provider != OnDemandProviderName {
   117  		return nil, errors.Errorf("Can't spawn instance of %v for distro %v: provider is %v", OnDemandProviderName, d.Id, d.Provider)
   118  	}
   119  	ec2Handle := getUSEast(*cloudManager.awsCredentials)
   120  
   121  	//Decode and validate the ProviderSettings into the ec2-specific ones.
   122  	ec2Settings := &EC2ProviderSettings{}
   123  	if err := mapstructure.Decode(d.ProviderSettings, ec2Settings); err != nil {
   124  		return nil, errors.Wrapf(err, "Error decoding params for distro %v: %v", d.Id)
   125  	}
   126  
   127  	if err := ec2Settings.Validate(); err != nil {
   128  		return nil, errors.Wrapf(err, "Invalid EC2 settings in distro %#v: and %#v", d, ec2Settings)
   129  	}
   130  
   131  	blockDevices, err := makeBlockDeviceMappings(ec2Settings.MountPoints)
   132  	if err != nil {
   133  		return nil, errors.WithStack(err)
   134  	}
   135  
   136  	instanceName := generateName(d.Id)
   137  
   138  	// proactively write all possible information pertaining
   139  	// to the host we want to create. this way, if we are unable
   140  	// to start it or record its instance id, we have a way of knowing
   141  	// something went wrong - and what
   142  	intentHost := cloud.NewIntent(*d, instanceName, OnDemandProviderName, hostOpts)
   143  	intentHost.InstanceType = ec2Settings.InstanceType
   144  
   145  	// record this 'intent host'
   146  	if err := intentHost.Insert(); err != nil {
   147  		err = errors.Wrapf(err, "could not insert intent host '%s'", intentHost.Id)
   148  		grip.Error(err)
   149  		return nil, err
   150  	}
   151  
   152  	grip.Debugf("Inserted intent host '%v' for distro '%v' to signal instance spawn intent",
   153  		instanceName, d.Id)
   154  
   155  	options := ec2.RunInstancesOptions{
   156  		MinCount:       1,
   157  		MaxCount:       1,
   158  		ImageId:        ec2Settings.AMI,
   159  		KeyName:        ec2Settings.KeyName,
   160  		InstanceType:   ec2Settings.InstanceType,
   161  		SecurityGroups: ec2.SecurityGroupNames(ec2Settings.SecurityGroup),
   162  		BlockDevices:   blockDevices,
   163  	}
   164  
   165  	// if it's a Vpc override the options to be the correct VPC settings.
   166  	if ec2Settings.IsVpc {
   167  		options.SecurityGroups = ec2.SecurityGroupIds(ec2Settings.SecurityGroup)
   168  		options.AssociatePublicIpAddress = true
   169  		options.SubnetId = ec2Settings.SubnetId
   170  	}
   171  
   172  	// start the instance - starting an instance does not mean you can connect
   173  	// to it immediately you have to use GetInstanceStatus to ensure that
   174  	// it's actually running
   175  	newHost, resp, err := startEC2Instance(ec2Handle, &options, intentHost)
   176  	grip.Debugf("id=%s, intentHost=%s, starResp=%+v, newHost=%+v",
   177  		instanceName, intentHost.Id, resp, newHost)
   178  
   179  	if err != nil {
   180  		err = errors.Wrapf(err, "could not start new instance for distro '%v.'"+
   181  			"Accompanying host record is '%v'", d.Id, intentHost.Id)
   182  		grip.Error(err)
   183  		return nil, err
   184  	}
   185  
   186  	instance := resp.Instances[0]
   187  	grip.Debugf("new instance: instance=%s, object=%s", instanceName, instance)
   188  
   189  	// create some tags based on user, hostname, owner, time, etc.
   190  	tags := makeTags(intentHost)
   191  
   192  	// attach the tags to this instance
   193  	err = errors.Wrapf(attachTags(ec2Handle, tags, instance.InstanceId),
   194  		"unable to attach tags for $s", instance.InstanceId)
   195  
   196  	grip.Error(err)
   197  	grip.DebugWhenf(err == nil, "attached tag name '%s' for '%s'",
   198  		instanceName, instance.InstanceId)
   199  
   200  	return newHost, nil
   201  }
   202  
   203  func (cloudManager *EC2Manager) IsUp(host *host.Host) (bool, error) {
   204  	ec2Handle := getUSEast(*cloudManager.awsCredentials)
   205  	instanceInfo, err := getInstanceInfo(ec2Handle, host.Id)
   206  	if err != nil {
   207  		return false, errors.WithStack(err)
   208  	}
   209  	if instanceInfo.State.Name == EC2StatusRunning {
   210  		return true, nil
   211  	}
   212  	return false, nil
   213  }
   214  
   215  func (cloudManager *EC2Manager) OnUp(host *host.Host) error {
   216  	//Not currently needed since we can set the tags immediately
   217  	return nil
   218  }
   219  
   220  func (cloudManager *EC2Manager) GetDNSName(host *host.Host) (string, error) {
   221  	ec2Handle := getUSEast(*cloudManager.awsCredentials)
   222  	instanceInfo, err := getInstanceInfo(ec2Handle, host.Id)
   223  	if err != nil {
   224  		return "", err
   225  	}
   226  	return instanceInfo.DNSName, nil
   227  }
   228  
   229  func (cloudManager *EC2Manager) TerminateInstance(host *host.Host) error {
   230  	// terminate the instance
   231  	if host.Status == evergreen.HostTerminated {
   232  		err := errors.Errorf("Can not terminate %v - already marked as "+
   233  			"terminated!", host.Id)
   234  		grip.Error(err)
   235  		return err
   236  	}
   237  
   238  	ec2Handle := getUSEast(*cloudManager.awsCredentials)
   239  	resp, err := ec2Handle.TerminateInstances([]string{host.Id})
   240  
   241  	if err != nil {
   242  		return err
   243  	}
   244  
   245  	for _, stateChange := range resp.StateChanges {
   246  		grip.Infoln("Terminated", stateChange.InstanceId)
   247  	}
   248  
   249  	// set the host status as terminated and update its termination time
   250  	return host.Terminate()
   251  }
   252  
   253  // determine how long until a payment is due for the host
   254  func (cloudManager *EC2Manager) TimeTilNextPayment(host *host.Host) time.Duration {
   255  	return timeTilNextEC2Payment(host)
   256  }
   257  
   258  func startEC2Instance(ec2Handle *ec2.EC2, options *ec2.RunInstancesOptions,
   259  	intentHost *host.Host) (*host.Host, *ec2.RunInstancesResp, error) {
   260  	// start the instance
   261  	resp, err := ec2Handle.RunInstances(options)
   262  
   263  	if err != nil {
   264  		// remove the intent host document
   265  		rmErr := intentHost.Remove()
   266  		if rmErr != nil {
   267  			grip.Errorf("Could not remove intent host '%s': %+v", intentHost.Id, rmErr)
   268  		}
   269  
   270  		err = errors.Wrap(err, "EC2 RunInstances API call returned error")
   271  		grip.Error(err)
   272  		return nil, nil, err
   273  
   274  	}
   275  
   276  	grip.Debugf("Spawned %d instance", len(resp.Instances))
   277  
   278  	// the instance should have been successfully spawned
   279  	instance := resp.Instances[0]
   280  	grip.Debugln("Started", instance.InstanceId)
   281  	grip.Debugln("Key name:", options.KeyName)
   282  
   283  	// find old intent host
   284  	host, err := host.FindOne(host.ById(intentHost.Id))
   285  	if host == nil {
   286  		err = errors.Errorf("can't locate record inserted for intended host '%s'",
   287  			intentHost.Id)
   288  		grip.Error(err)
   289  		return nil, nil, err
   290  	}
   291  	if err != nil {
   292  		err = errors.Wrapf(err, "Can't locate record inserted for intended host '%v' "+
   293  			"due to error", intentHost.Id)
   294  
   295  		grip.Error(err)
   296  		return nil, nil, err
   297  	}
   298  
   299  	// we found the old document now we can insert the new one
   300  	host.Id = instance.InstanceId
   301  	err = host.Insert()
   302  	if err != nil {
   303  		err = errors.Wrapf(err, "Could not insert updated host information for '%v' with '%v'",
   304  			intentHost.Id, host.Id)
   305  		grip.Error(err)
   306  		return nil, nil, err
   307  	}
   308  
   309  	// remove the intent host document
   310  	err = intentHost.Remove()
   311  	if err != nil {
   312  		err = errors.Wrapf(err, "Could not remove insert host '%v' (replaced by '%v')",
   313  			intentHost.Id, host.Id)
   314  		grip.Error(err)
   315  		return nil, nil, err
   316  	}
   317  
   318  	var infoResp *ec2.DescribeInstancesResp
   319  	instanceInfoRetryCount := 0
   320  	instanceInfoMaxRetries := 5
   321  	for {
   322  		infoResp, err = ec2Handle.DescribeInstances([]string{instance.InstanceId}, nil)
   323  		if err != nil {
   324  			instanceInfoRetryCount++
   325  			if instanceInfoRetryCount == instanceInfoMaxRetries {
   326  				grip.Errorln("There was an error querying for the instance's ",
   327  					"information and retries are exhausted. The instance may be up.")
   328  				return nil, resp, errors.WithStack(err)
   329  			}
   330  			grip.Debugf("There was an error querying for the instance's information. "+
   331  				"Retrying in 30 seconds. Error: %v", err)
   332  			time.Sleep(30 * time.Second)
   333  			continue
   334  		}
   335  		break
   336  	}
   337  
   338  	reservations := infoResp.Reservations
   339  	if len(reservations) < 1 {
   340  		return nil, resp, errors.New("Reservation was returned as nil, you " +
   341  			"may have to check manually")
   342  	}
   343  
   344  	instancesInfo := reservations[0].Instances
   345  	if len(instancesInfo) < 1 {
   346  		return nil, resp, errors.New("Reservation appears to have no " +
   347  			"associated instances")
   348  	}
   349  	return host, resp, nil
   350  }
   351  
   352  // CostForDuration returns the cost of running a host between the given start and end times
   353  func (cloudManager *EC2Manager) CostForDuration(h *host.Host, start, end time.Time) (float64, error) {
   354  	// sanity check
   355  	if end.Before(start) || util.IsZeroTime(start) || util.IsZeroTime(end) {
   356  		return 0, errors.New("task timing data is malformed")
   357  	}
   358  	// grab instance details from EC2
   359  	ec2Handle := getUSEast(*cloudManager.awsCredentials)
   360  	instance, err := getInstanceInfo(ec2Handle, h.Id)
   361  	if err != nil {
   362  		return 0, errors.WithStack(err)
   363  	}
   364  	os := osLinux
   365  	if strings.Contains(h.Distro.Arch, "windows") {
   366  		os = osWindows
   367  	}
   368  	dur := end.Sub(start)
   369  	region := azToRegion(instance.AvailabilityZone)
   370  	iType := instance.InstanceType
   371  
   372  	ebsCost, err := blockDeviceCosts(ec2Handle, instance.BlockDevices, dur)
   373  	if err != nil {
   374  		return 0, errors.Wrap(err, "calculating block device costs")
   375  	}
   376  	hostCost, err := onDemandCost(&pkgOnDemandPriceFetcher, os, iType, region, dur)
   377  	if err != nil {
   378  		return 0, errors.WithStack(err)
   379  	}
   380  	return hostCost + ebsCost, nil
   381  }