github.com/mackerelio/mackerel-agent-plugins@v0.89.3/mackerel-plugin-aws-ec2-ebs/lib/aws-ec2-ebs.go (about)

     1  package mpawsec2ebs
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"flag"
     7  	"fmt"
     8  	"io"
     9  	"log"
    10  	"os"
    11  	"os/signal"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/aws/aws-sdk-go-v2/aws"
    16  	"github.com/aws/aws-sdk-go-v2/config"
    17  	"github.com/aws/aws-sdk-go-v2/credentials"
    18  	"github.com/aws/aws-sdk-go-v2/feature/ec2/imds"
    19  	"github.com/aws/aws-sdk-go-v2/service/cloudwatch"
    20  	cloudwatchTypes "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
    21  	"github.com/aws/aws-sdk-go-v2/service/ec2"
    22  	"github.com/aws/aws-sdk-go-v2/service/ec2/types"
    23  	mp "github.com/mackerelio/go-mackerel-plugin-helper"
    24  )
    25  
    26  const (
    27  	metricPeriodDefault = 300
    28  	aggregationPeriod   = 60
    29  )
    30  
    31  var metricPeriodByVolumeType = map[types.VolumeType]int{
    32  	types.VolumeTypeIo1: 60,
    33  }
    34  
    35  var baseGraphs = []string{
    36  	"ec2.ebs.bandwidth.#",
    37  	"ec2.ebs.throughput.#",
    38  	"ec2.ebs.size_per_op.#",
    39  	"ec2.ebs.latency.#",
    40  	"ec2.ebs.queue_length.#",
    41  	"ec2.ebs.idle_time.#",
    42  }
    43  
    44  var defaultGraphs = append([]string{
    45  	"ec2.ebs.burst_balance.#",
    46  }, baseGraphs...)
    47  
    48  var io1Graphs = append([]string{
    49  	"ec2.ebs.throughput_delivered.#",
    50  	"ec2.ebs.consumed_ops.#",
    51  }, baseGraphs...)
    52  
    53  type additionalCloudWatchSetting struct {
    54  	MetricName string
    55  	Statistics cloudwatchTypes.Statistic
    56  	CalcFunc   func(float64, float64) float64
    57  }
    58  
    59  type cloudWatchSetting struct {
    60  	MetricName string
    61  	Statistics cloudwatchTypes.Statistic
    62  	CalcFunc   func(float64) float64
    63  	Additional *additionalCloudWatchSetting
    64  }
    65  
    66  func value(val float64) float64 {
    67  	return val
    68  }
    69  
    70  func valuePerSec(val float64) float64 {
    71  	return val / aggregationPeriod
    72  }
    73  
    74  func sec2msec(val float64) float64 {
    75  	return val * 1000
    76  }
    77  
    78  func valPerOps(val, ops float64) float64 {
    79  	return val / ops
    80  }
    81  
    82  // http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/monitoring-volume-status.html
    83  var cloudwatchdefs = map[string](cloudWatchSetting){
    84  	"ec2.ebs.bandwidth.#.read": cloudWatchSetting{
    85  		MetricName: "VolumeReadBytes", Statistics: cloudwatchTypes.StatisticSum,
    86  		CalcFunc: valuePerSec,
    87  	},
    88  	"ec2.ebs.bandwidth.#.write": cloudWatchSetting{
    89  		MetricName: "VolumeWriteBytes", Statistics: cloudwatchTypes.StatisticSum,
    90  		CalcFunc: valuePerSec,
    91  	},
    92  	"ec2.ebs.throughput.#.read": cloudWatchSetting{
    93  		MetricName: "VolumeReadOps", Statistics: cloudwatchTypes.StatisticSum,
    94  		CalcFunc: valuePerSec,
    95  	},
    96  	"ec2.ebs.throughput.#.write": cloudWatchSetting{
    97  		MetricName: "VolumeWriteOps", Statistics: cloudwatchTypes.StatisticSum,
    98  		CalcFunc: valuePerSec,
    99  	},
   100  	"ec2.ebs.size_per_op.#.read": cloudWatchSetting{
   101  		MetricName: "VolumeReadBytes", Statistics: cloudwatchTypes.StatisticAverage,
   102  		CalcFunc: value,
   103  	},
   104  	"ec2.ebs.size_per_op.#.write": cloudWatchSetting{
   105  		MetricName: "VolumeWriteBytes", Statistics: cloudwatchTypes.StatisticAverage,
   106  		CalcFunc: value,
   107  	},
   108  	"ec2.ebs.latency.#.read": cloudWatchSetting{
   109  		MetricName: "VolumeTotalReadTime", Statistics: cloudwatchTypes.StatisticAverage,
   110  		CalcFunc: sec2msec,
   111  	},
   112  	"ec2.ebs.latency.#.write": cloudWatchSetting{
   113  		MetricName: "VolumeTotalWriteTime", Statistics: cloudwatchTypes.StatisticAverage,
   114  		CalcFunc: sec2msec,
   115  	},
   116  	"ec2.ebs.queue_length.#.queue_length": cloudWatchSetting{
   117  		MetricName: "VolumeQueueLength", Statistics: cloudwatchTypes.StatisticAverage,
   118  		CalcFunc: value,
   119  	},
   120  	"ec2.ebs.idle_time.#.idle_time": cloudWatchSetting{
   121  		MetricName: "VolumeIdleTime", Statistics: cloudwatchTypes.StatisticSum,
   122  		CalcFunc: func(val float64) float64 { return val / aggregationPeriod * 100.0 },
   123  	},
   124  	"ec2.ebs.throughput_delivered.#.throughput_delivered": cloudWatchSetting{
   125  		MetricName: "VolumeThroughputPercentage", Statistics: cloudwatchTypes.StatisticAverage,
   126  		CalcFunc: value,
   127  	},
   128  	"ec2.ebs.consumed_ops.#.consumed_ops": cloudWatchSetting{
   129  		MetricName: "VolumeConsumedReadWriteOps", Statistics: cloudwatchTypes.StatisticSum,
   130  		CalcFunc: value,
   131  	},
   132  	"ec2.ebs.burst_balance.#.burst_balance": cloudWatchSetting{
   133  		MetricName: "BurstBalance", Statistics: cloudwatchTypes.StatisticAverage,
   134  		CalcFunc: value,
   135  	},
   136  }
   137  
   138  var cloudwatchdefsNitro = map[string](cloudWatchSetting){
   139  	"ec2.ebs.size_per_op.#.read": cloudWatchSetting{
   140  		MetricName: "VolumeReadBytes", Statistics: cloudwatchTypes.StatisticSum,
   141  		Additional: &additionalCloudWatchSetting{
   142  			MetricName: "VolumeReadOps", Statistics: cloudwatchTypes.StatisticSum,
   143  			CalcFunc: valPerOps,
   144  		},
   145  	},
   146  	"ec2.ebs.size_per_op.#.write": cloudWatchSetting{
   147  		MetricName: "VolumeWriteBytes", Statistics: cloudwatchTypes.StatisticSum,
   148  		Additional: &additionalCloudWatchSetting{
   149  			MetricName: "VolumeWriteOps", Statistics: cloudwatchTypes.StatisticSum,
   150  			CalcFunc: valPerOps,
   151  		},
   152  	},
   153  	"ec2.ebs.latency.#.read": cloudWatchSetting{
   154  		MetricName: "VolumeTotalReadTime", Statistics: cloudwatchTypes.StatisticSum,
   155  		Additional: &additionalCloudWatchSetting{
   156  			MetricName: "VolumeReadOps", Statistics: cloudwatchTypes.StatisticSum,
   157  			CalcFunc: valPerOps,
   158  		},
   159  	},
   160  	"ec2.ebs.latency.#.write": cloudWatchSetting{
   161  		MetricName: "VolumeTotalWriteTime", Statistics: cloudwatchTypes.StatisticSum,
   162  		Additional: &additionalCloudWatchSetting{
   163  			MetricName: "VolumeWriteOps", Statistics: cloudwatchTypes.StatisticSum,
   164  			CalcFunc: valPerOps,
   165  		},
   166  	},
   167  }
   168  
   169  var graphdef = map[string]mp.Graphs{
   170  	"ec2.ebs.bandwidth.#": {
   171  		Label: "EBS Bandwidth",
   172  		Unit:  "bytes/sec",
   173  		Metrics: []mp.Metrics{
   174  			{Name: "read", Label: "Read", Diff: false},
   175  			{Name: "write", Label: "Write", Diff: false},
   176  		},
   177  	},
   178  	"ec2.ebs.throughput.#": {
   179  		Label: "EBS Throughput (op/s)",
   180  		Unit:  "iops",
   181  		Metrics: []mp.Metrics{
   182  			{Name: "read", Label: "Read", Diff: false},
   183  			{Name: "write", Label: "Write", Diff: false},
   184  		},
   185  	},
   186  	"ec2.ebs.size_per_op.#": {
   187  		Label: "EBS Avg Op Size (Bytes/op)",
   188  		Unit:  "bytes",
   189  		Metrics: []mp.Metrics{
   190  			{Name: "read", Label: "Read", Diff: false},
   191  			{Name: "write", Label: "Write", Diff: false},
   192  		},
   193  	},
   194  	"ec2.ebs.latency.#": {
   195  		Label: "EBS Avg Latency (ms/op)",
   196  		Unit:  "float",
   197  		Metrics: []mp.Metrics{
   198  			{Name: "read", Label: "Read", Diff: false},
   199  			{Name: "write", Label: "Write", Diff: false},
   200  		},
   201  	},
   202  	"ec2.ebs.queue_length.#": {
   203  		Label: "EBS Avg Queue Length (ops)",
   204  		Unit:  "float",
   205  		Metrics: []mp.Metrics{
   206  			{Name: "queue_length", Label: "Queue Length", Diff: false},
   207  		},
   208  	},
   209  	"ec2.ebs.idle_time.#": {
   210  		Label: "EBS Time Spent Idle",
   211  		Unit:  "percentage",
   212  		Metrics: []mp.Metrics{
   213  			{Name: "idle_time", Label: "Idle Time", Diff: false},
   214  		},
   215  	},
   216  	"ec2.ebs.throughput_delivered.#": {
   217  		Label: "EBS Throughput of Provisioned IOPS",
   218  		Unit:  "percentage",
   219  		Metrics: []mp.Metrics{
   220  			{Name: "throughput_delivered", Label: "Throughput", Diff: false},
   221  		},
   222  	},
   223  	"ec2.ebs.consumed_ops.#": {
   224  		Label: "EBS Consumed Ops of Provisioned IOPS",
   225  		Unit:  "float",
   226  		Metrics: []mp.Metrics{
   227  			{Name: "consumed_ops", Label: "Consumed Ops", Diff: false},
   228  		},
   229  	},
   230  	"ec2.ebs.burst_balance.#": {
   231  		Label: "EBS Burst Balance",
   232  		Unit:  "percentage",
   233  		Metrics: []mp.Metrics{
   234  			{Name: "burst_balance", Label: "Burst Balance", Diff: false},
   235  		},
   236  	},
   237  }
   238  
   239  // EBSPlugin mackerel plugin for ebs
   240  type EBSPlugin struct {
   241  	// command line options
   242  	Region          string
   243  	AccessKeyID     string
   244  	SecretAccessKey string
   245  	InstanceID      string
   246  
   247  	// internal states
   248  	EC2        *ec2.Client
   249  	CloudWatch *cloudwatch.Client
   250  	Volumes    []types.Volume
   251  	Hypervisor types.InstanceTypeHypervisor
   252  }
   253  
   254  func (p *EBSPlugin) prepare(ctx context.Context) error {
   255  	var opts []func(*config.LoadOptions) error
   256  	if p.AccessKeyID != "" && p.SecretAccessKey != "" {
   257  		opts = append(opts, config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(p.AccessKeyID, p.SecretAccessKey, "")))
   258  	}
   259  	if p.Region != "" {
   260  		opts = append(opts, config.WithRegion(p.Region))
   261  	}
   262  
   263  	cfg, err := config.LoadDefaultConfig(ctx, opts...)
   264  	if err != nil {
   265  		return err
   266  	}
   267  
   268  	p.EC2 = ec2.NewFromConfig(cfg)
   269  
   270  	var instanceType types.InstanceType
   271  	instance, err := p.EC2.DescribeInstances(ctx, &ec2.DescribeInstancesInput{
   272  		InstanceIds: []string{p.InstanceID},
   273  	})
   274  	if err != nil {
   275  		return err
   276  	}
   277  	if instance.NextToken != nil {
   278  		return errors.New("DescribeInstances response has NextToken")
   279  	}
   280  	for i := range instance.Reservations {
   281  		for j := range instance.Reservations[i].Instances {
   282  			instanceType = instance.Reservations[i].Instances[j].InstanceType
   283  		}
   284  	}
   285  
   286  	instanceDetail, err := p.EC2.DescribeInstanceTypes(ctx, &ec2.DescribeInstanceTypesInput{
   287  		InstanceTypes: []types.InstanceType{instanceType},
   288  	})
   289  	if err != nil {
   290  		return err
   291  	}
   292  	if instanceDetail.NextToken != nil {
   293  		return errors.New("DescribeInstanceTypes response has NextToken")
   294  	}
   295  	for i := range instanceDetail.InstanceTypes {
   296  		p.Hypervisor = instanceDetail.InstanceTypes[i].Hypervisor
   297  	}
   298  
   299  	resp, err := p.EC2.DescribeVolumes(ctx, &ec2.DescribeVolumesInput{
   300  		Filters: []types.Filter{
   301  			{
   302  				Name:   aws.String("attachment.instance-id"),
   303  				Values: []string{p.InstanceID},
   304  			},
   305  		},
   306  	})
   307  	if err != nil {
   308  		return err
   309  	}
   310  	if resp.NextToken != nil {
   311  		return errors.New("DescribeVolumes response has NextToken")
   312  	}
   313  
   314  	p.Volumes = resp.Volumes
   315  	if len(p.Volumes) == 0 {
   316  		return errors.New("DescribeVolumes response has no volumes")
   317  	}
   318  
   319  	p.CloudWatch = cloudwatch.NewFromConfig(cfg)
   320  
   321  	return nil
   322  }
   323  
   324  var errNoDataPoint = errors.New("fetched no datapoints")
   325  
   326  func (p EBSPlugin) getLastPoint(ctx context.Context, vol types.Volume, metricName string, statType cloudwatchTypes.Statistic) (float64, error) {
   327  	now := time.Now()
   328  
   329  	period := metricPeriodDefault
   330  	if tmp, ok := metricPeriodByVolumeType[vol.VolumeType]; ok {
   331  		period = tmp
   332  	}
   333  	start := now.Add(time.Duration(period) * 3 * time.Second * -1)
   334  
   335  	resp, err := p.CloudWatch.GetMetricStatistics(ctx, &cloudwatch.GetMetricStatisticsInput{
   336  		Dimensions: []cloudwatchTypes.Dimension{
   337  			{
   338  				Name:  aws.String("VolumeId"),
   339  				Value: vol.VolumeId,
   340  			},
   341  		},
   342  		StartTime:  &start,
   343  		EndTime:    &now,
   344  		MetricName: &metricName,
   345  		Period:     aws.Int32(aggregationPeriod),
   346  		Statistics: []cloudwatchTypes.Statistic{statType},
   347  		Namespace:  aws.String("AWS/EBS"),
   348  	})
   349  	if err != nil {
   350  		return 0, err
   351  	}
   352  
   353  	datapoints := resp.Datapoints
   354  	if len(datapoints) == 0 {
   355  		return 0, errNoDataPoint
   356  	}
   357  
   358  	latest := time.Unix(0, 0)
   359  	var latestVal float64
   360  	for _, dp := range datapoints {
   361  		if dp.Timestamp.Before(latest) {
   362  			continue
   363  		}
   364  
   365  		latest = *dp.Timestamp
   366  		switch statType {
   367  		case "Average":
   368  			latestVal = *dp.Average
   369  		case "Sum":
   370  			latestVal = *dp.Sum
   371  		}
   372  	}
   373  
   374  	return latestVal, nil
   375  }
   376  
   377  func (p EBSPlugin) fetch(ctx context.Context, volume types.Volume, setting cloudWatchSetting) (float64, error) {
   378  	val, err := p.getLastPoint(ctx, volume, setting.MetricName, setting.Statistics)
   379  	if err != nil {
   380  		return 0, fmt.Errorf("%s %w : %s", *volume.VolumeId, err, setting.MetricName)
   381  	}
   382  
   383  	if setting.Additional == nil {
   384  		return setting.CalcFunc(val), nil
   385  	}
   386  
   387  	val2, err := p.getLastPoint(ctx, volume, setting.Additional.MetricName, setting.Additional.Statistics)
   388  	if err != nil {
   389  		return 0, fmt.Errorf("%s %w : %s", *volume.VolumeId, err, setting.Additional.MetricName)
   390  	}
   391  	return setting.Additional.CalcFunc(val, val2), nil
   392  }
   393  
   394  // FetchMetrics fetch the metrics
   395  func (p EBSPlugin) FetchMetrics() (map[string]interface{}, error) {
   396  	stat := make(map[string]interface{})
   397  
   398  	// Override when Nitro instance.
   399  	if p.Hypervisor == types.InstanceTypeHypervisorNitro {
   400  		for i := range cloudwatchdefsNitro {
   401  			cloudwatchdefs[i] = cloudwatchdefsNitro[i]
   402  		}
   403  	}
   404  
   405  	for _, vol := range p.Volumes {
   406  		volumeID := normalizeVolumeID(*vol.VolumeId)
   407  		var graphs []string
   408  		if vol.VolumeType == types.VolumeTypeIo1 {
   409  			graphs = io1Graphs
   410  		} else {
   411  			graphs = defaultGraphs
   412  		}
   413  		for _, graphName := range graphs {
   414  			for _, metric := range graphdef[graphName].Metrics {
   415  				metricKey := graphName + "." + metric.Name
   416  				cloudwatchdef := cloudwatchdefs[metricKey]
   417  				val, err := p.fetch(context.TODO(), vol, cloudwatchdef)
   418  				if err != nil {
   419  					if errors.Is(err, errNoDataPoint) {
   420  						// nop
   421  					} else {
   422  						return nil, err
   423  					}
   424  				} else {
   425  					stat[strings.ReplaceAll(metricKey, "#", volumeID)] = val
   426  				}
   427  			}
   428  		}
   429  	}
   430  	return stat, nil
   431  }
   432  
   433  // GraphDefinition for plugin
   434  func (p EBSPlugin) GraphDefinition() map[string]mp.Graphs {
   435  	return graphdef
   436  }
   437  
   438  func normalizeVolumeID(volumeID string) string {
   439  	return strings.ReplaceAll(volumeID, ".", "_")
   440  }
   441  
   442  // overwritten with syscall.SIGTERM on unix environment (see aws-ec2-ebs_unix.go)
   443  var defaultSignal = os.Interrupt
   444  
   445  // Do the plugin
   446  func Do() {
   447  	optRegion := flag.String("region", "", "AWS Region")
   448  	optInstanceID := flag.String("instance-id", "", "Instance ID")
   449  	optAccessKeyID := flag.String("access-key-id", "", "AWS Access Key ID")
   450  	optSecretAccessKey := flag.String("secret-access-key", "", "AWS Secret Access Key")
   451  	optTempfile := flag.String("tempfile", "", "Temp file name")
   452  	flag.Parse()
   453  
   454  	ctx, stop := signal.NotifyContext(context.Background(), defaultSignal)
   455  	defer stop()
   456  
   457  	var ebs EBSPlugin
   458  
   459  	ebs.Region = *optRegion
   460  	ebs.InstanceID = *optInstanceID
   461  
   462  	cfg, err := config.LoadDefaultConfig(ctx)
   463  	if err != nil {
   464  		log.Fatalln(err)
   465  	}
   466  
   467  	// get metadata in ec2 instance
   468  	imdsClient := imds.NewFromConfig(cfg)
   469  	if *optRegion == "" {
   470  		out, err := imdsClient.GetRegion(ctx, nil)
   471  		if err != nil {
   472  			log.Fatalln(err)
   473  		}
   474  		ebs.Region = out.Region
   475  	}
   476  	if *optInstanceID == "" {
   477  		metadata, err := imdsClient.GetMetadata(ctx, &imds.GetMetadataInput{
   478  			Path: "instance-id",
   479  		})
   480  		if err != nil {
   481  			log.Fatalln(err)
   482  		}
   483  		content, _ := io.ReadAll(metadata.Content)
   484  		ebs.InstanceID = string(content)
   485  	}
   486  
   487  	ebs.AccessKeyID = *optAccessKeyID
   488  	ebs.SecretAccessKey = *optSecretAccessKey
   489  
   490  	if err := ebs.prepare(ctx); err != nil {
   491  		log.Fatalln(err)
   492  	}
   493  
   494  	helper := mp.NewMackerelPlugin(ebs)
   495  	helper.Tempfile = *optTempfile
   496  
   497  	helper.Run()
   498  }