github.com/mackerelio/mackerel-agent-plugins@v0.89.3/mackerel-plugin-aws-ec2-ebs/lib/aws-ec2-ebs.go (about) 1 package mpawsec2ebs 2 3 import ( 4 "context" 5 "errors" 6 "flag" 7 "fmt" 8 "io" 9 "log" 10 "os" 11 "os/signal" 12 "strings" 13 "time" 14 15 "github.com/aws/aws-sdk-go-v2/aws" 16 "github.com/aws/aws-sdk-go-v2/config" 17 "github.com/aws/aws-sdk-go-v2/credentials" 18 "github.com/aws/aws-sdk-go-v2/feature/ec2/imds" 19 "github.com/aws/aws-sdk-go-v2/service/cloudwatch" 20 cloudwatchTypes "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" 21 "github.com/aws/aws-sdk-go-v2/service/ec2" 22 "github.com/aws/aws-sdk-go-v2/service/ec2/types" 23 mp "github.com/mackerelio/go-mackerel-plugin-helper" 24 ) 25 26 const ( 27 metricPeriodDefault = 300 28 aggregationPeriod = 60 29 ) 30 31 var metricPeriodByVolumeType = map[types.VolumeType]int{ 32 types.VolumeTypeIo1: 60, 33 } 34 35 var baseGraphs = []string{ 36 "ec2.ebs.bandwidth.#", 37 "ec2.ebs.throughput.#", 38 "ec2.ebs.size_per_op.#", 39 "ec2.ebs.latency.#", 40 "ec2.ebs.queue_length.#", 41 "ec2.ebs.idle_time.#", 42 } 43 44 var defaultGraphs = append([]string{ 45 "ec2.ebs.burst_balance.#", 46 }, baseGraphs...) 47 48 var io1Graphs = append([]string{ 49 "ec2.ebs.throughput_delivered.#", 50 "ec2.ebs.consumed_ops.#", 51 }, baseGraphs...) 52 53 type additionalCloudWatchSetting struct { 54 MetricName string 55 Statistics cloudwatchTypes.Statistic 56 CalcFunc func(float64, float64) float64 57 } 58 59 type cloudWatchSetting struct { 60 MetricName string 61 Statistics cloudwatchTypes.Statistic 62 CalcFunc func(float64) float64 63 Additional *additionalCloudWatchSetting 64 } 65 66 func value(val float64) float64 { 67 return val 68 } 69 70 func valuePerSec(val float64) float64 { 71 return val / aggregationPeriod 72 } 73 74 func sec2msec(val float64) float64 { 75 return val * 1000 76 } 77 78 func valPerOps(val, ops float64) float64 { 79 return val / ops 80 } 81 82 // http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/monitoring-volume-status.html 83 var cloudwatchdefs = map[string](cloudWatchSetting){ 84 "ec2.ebs.bandwidth.#.read": cloudWatchSetting{ 85 MetricName: "VolumeReadBytes", Statistics: cloudwatchTypes.StatisticSum, 86 CalcFunc: valuePerSec, 87 }, 88 "ec2.ebs.bandwidth.#.write": cloudWatchSetting{ 89 MetricName: "VolumeWriteBytes", Statistics: cloudwatchTypes.StatisticSum, 90 CalcFunc: valuePerSec, 91 }, 92 "ec2.ebs.throughput.#.read": cloudWatchSetting{ 93 MetricName: "VolumeReadOps", Statistics: cloudwatchTypes.StatisticSum, 94 CalcFunc: valuePerSec, 95 }, 96 "ec2.ebs.throughput.#.write": cloudWatchSetting{ 97 MetricName: "VolumeWriteOps", Statistics: cloudwatchTypes.StatisticSum, 98 CalcFunc: valuePerSec, 99 }, 100 "ec2.ebs.size_per_op.#.read": cloudWatchSetting{ 101 MetricName: "VolumeReadBytes", Statistics: cloudwatchTypes.StatisticAverage, 102 CalcFunc: value, 103 }, 104 "ec2.ebs.size_per_op.#.write": cloudWatchSetting{ 105 MetricName: "VolumeWriteBytes", Statistics: cloudwatchTypes.StatisticAverage, 106 CalcFunc: value, 107 }, 108 "ec2.ebs.latency.#.read": cloudWatchSetting{ 109 MetricName: "VolumeTotalReadTime", Statistics: cloudwatchTypes.StatisticAverage, 110 CalcFunc: sec2msec, 111 }, 112 "ec2.ebs.latency.#.write": cloudWatchSetting{ 113 MetricName: "VolumeTotalWriteTime", Statistics: cloudwatchTypes.StatisticAverage, 114 CalcFunc: sec2msec, 115 }, 116 "ec2.ebs.queue_length.#.queue_length": cloudWatchSetting{ 117 MetricName: "VolumeQueueLength", Statistics: cloudwatchTypes.StatisticAverage, 118 CalcFunc: value, 119 }, 120 "ec2.ebs.idle_time.#.idle_time": cloudWatchSetting{ 121 MetricName: "VolumeIdleTime", Statistics: cloudwatchTypes.StatisticSum, 122 CalcFunc: func(val float64) float64 { return val / aggregationPeriod * 100.0 }, 123 }, 124 "ec2.ebs.throughput_delivered.#.throughput_delivered": cloudWatchSetting{ 125 MetricName: "VolumeThroughputPercentage", Statistics: cloudwatchTypes.StatisticAverage, 126 CalcFunc: value, 127 }, 128 "ec2.ebs.consumed_ops.#.consumed_ops": cloudWatchSetting{ 129 MetricName: "VolumeConsumedReadWriteOps", Statistics: cloudwatchTypes.StatisticSum, 130 CalcFunc: value, 131 }, 132 "ec2.ebs.burst_balance.#.burst_balance": cloudWatchSetting{ 133 MetricName: "BurstBalance", Statistics: cloudwatchTypes.StatisticAverage, 134 CalcFunc: value, 135 }, 136 } 137 138 var cloudwatchdefsNitro = map[string](cloudWatchSetting){ 139 "ec2.ebs.size_per_op.#.read": cloudWatchSetting{ 140 MetricName: "VolumeReadBytes", Statistics: cloudwatchTypes.StatisticSum, 141 Additional: &additionalCloudWatchSetting{ 142 MetricName: "VolumeReadOps", Statistics: cloudwatchTypes.StatisticSum, 143 CalcFunc: valPerOps, 144 }, 145 }, 146 "ec2.ebs.size_per_op.#.write": cloudWatchSetting{ 147 MetricName: "VolumeWriteBytes", Statistics: cloudwatchTypes.StatisticSum, 148 Additional: &additionalCloudWatchSetting{ 149 MetricName: "VolumeWriteOps", Statistics: cloudwatchTypes.StatisticSum, 150 CalcFunc: valPerOps, 151 }, 152 }, 153 "ec2.ebs.latency.#.read": cloudWatchSetting{ 154 MetricName: "VolumeTotalReadTime", Statistics: cloudwatchTypes.StatisticSum, 155 Additional: &additionalCloudWatchSetting{ 156 MetricName: "VolumeReadOps", Statistics: cloudwatchTypes.StatisticSum, 157 CalcFunc: valPerOps, 158 }, 159 }, 160 "ec2.ebs.latency.#.write": cloudWatchSetting{ 161 MetricName: "VolumeTotalWriteTime", Statistics: cloudwatchTypes.StatisticSum, 162 Additional: &additionalCloudWatchSetting{ 163 MetricName: "VolumeWriteOps", Statistics: cloudwatchTypes.StatisticSum, 164 CalcFunc: valPerOps, 165 }, 166 }, 167 } 168 169 var graphdef = map[string]mp.Graphs{ 170 "ec2.ebs.bandwidth.#": { 171 Label: "EBS Bandwidth", 172 Unit: "bytes/sec", 173 Metrics: []mp.Metrics{ 174 {Name: "read", Label: "Read", Diff: false}, 175 {Name: "write", Label: "Write", Diff: false}, 176 }, 177 }, 178 "ec2.ebs.throughput.#": { 179 Label: "EBS Throughput (op/s)", 180 Unit: "iops", 181 Metrics: []mp.Metrics{ 182 {Name: "read", Label: "Read", Diff: false}, 183 {Name: "write", Label: "Write", Diff: false}, 184 }, 185 }, 186 "ec2.ebs.size_per_op.#": { 187 Label: "EBS Avg Op Size (Bytes/op)", 188 Unit: "bytes", 189 Metrics: []mp.Metrics{ 190 {Name: "read", Label: "Read", Diff: false}, 191 {Name: "write", Label: "Write", Diff: false}, 192 }, 193 }, 194 "ec2.ebs.latency.#": { 195 Label: "EBS Avg Latency (ms/op)", 196 Unit: "float", 197 Metrics: []mp.Metrics{ 198 {Name: "read", Label: "Read", Diff: false}, 199 {Name: "write", Label: "Write", Diff: false}, 200 }, 201 }, 202 "ec2.ebs.queue_length.#": { 203 Label: "EBS Avg Queue Length (ops)", 204 Unit: "float", 205 Metrics: []mp.Metrics{ 206 {Name: "queue_length", Label: "Queue Length", Diff: false}, 207 }, 208 }, 209 "ec2.ebs.idle_time.#": { 210 Label: "EBS Time Spent Idle", 211 Unit: "percentage", 212 Metrics: []mp.Metrics{ 213 {Name: "idle_time", Label: "Idle Time", Diff: false}, 214 }, 215 }, 216 "ec2.ebs.throughput_delivered.#": { 217 Label: "EBS Throughput of Provisioned IOPS", 218 Unit: "percentage", 219 Metrics: []mp.Metrics{ 220 {Name: "throughput_delivered", Label: "Throughput", Diff: false}, 221 }, 222 }, 223 "ec2.ebs.consumed_ops.#": { 224 Label: "EBS Consumed Ops of Provisioned IOPS", 225 Unit: "float", 226 Metrics: []mp.Metrics{ 227 {Name: "consumed_ops", Label: "Consumed Ops", Diff: false}, 228 }, 229 }, 230 "ec2.ebs.burst_balance.#": { 231 Label: "EBS Burst Balance", 232 Unit: "percentage", 233 Metrics: []mp.Metrics{ 234 {Name: "burst_balance", Label: "Burst Balance", Diff: false}, 235 }, 236 }, 237 } 238 239 // EBSPlugin mackerel plugin for ebs 240 type EBSPlugin struct { 241 // command line options 242 Region string 243 AccessKeyID string 244 SecretAccessKey string 245 InstanceID string 246 247 // internal states 248 EC2 *ec2.Client 249 CloudWatch *cloudwatch.Client 250 Volumes []types.Volume 251 Hypervisor types.InstanceTypeHypervisor 252 } 253 254 func (p *EBSPlugin) prepare(ctx context.Context) error { 255 var opts []func(*config.LoadOptions) error 256 if p.AccessKeyID != "" && p.SecretAccessKey != "" { 257 opts = append(opts, config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(p.AccessKeyID, p.SecretAccessKey, ""))) 258 } 259 if p.Region != "" { 260 opts = append(opts, config.WithRegion(p.Region)) 261 } 262 263 cfg, err := config.LoadDefaultConfig(ctx, opts...) 264 if err != nil { 265 return err 266 } 267 268 p.EC2 = ec2.NewFromConfig(cfg) 269 270 var instanceType types.InstanceType 271 instance, err := p.EC2.DescribeInstances(ctx, &ec2.DescribeInstancesInput{ 272 InstanceIds: []string{p.InstanceID}, 273 }) 274 if err != nil { 275 return err 276 } 277 if instance.NextToken != nil { 278 return errors.New("DescribeInstances response has NextToken") 279 } 280 for i := range instance.Reservations { 281 for j := range instance.Reservations[i].Instances { 282 instanceType = instance.Reservations[i].Instances[j].InstanceType 283 } 284 } 285 286 instanceDetail, err := p.EC2.DescribeInstanceTypes(ctx, &ec2.DescribeInstanceTypesInput{ 287 InstanceTypes: []types.InstanceType{instanceType}, 288 }) 289 if err != nil { 290 return err 291 } 292 if instanceDetail.NextToken != nil { 293 return errors.New("DescribeInstanceTypes response has NextToken") 294 } 295 for i := range instanceDetail.InstanceTypes { 296 p.Hypervisor = instanceDetail.InstanceTypes[i].Hypervisor 297 } 298 299 resp, err := p.EC2.DescribeVolumes(ctx, &ec2.DescribeVolumesInput{ 300 Filters: []types.Filter{ 301 { 302 Name: aws.String("attachment.instance-id"), 303 Values: []string{p.InstanceID}, 304 }, 305 }, 306 }) 307 if err != nil { 308 return err 309 } 310 if resp.NextToken != nil { 311 return errors.New("DescribeVolumes response has NextToken") 312 } 313 314 p.Volumes = resp.Volumes 315 if len(p.Volumes) == 0 { 316 return errors.New("DescribeVolumes response has no volumes") 317 } 318 319 p.CloudWatch = cloudwatch.NewFromConfig(cfg) 320 321 return nil 322 } 323 324 var errNoDataPoint = errors.New("fetched no datapoints") 325 326 func (p EBSPlugin) getLastPoint(ctx context.Context, vol types.Volume, metricName string, statType cloudwatchTypes.Statistic) (float64, error) { 327 now := time.Now() 328 329 period := metricPeriodDefault 330 if tmp, ok := metricPeriodByVolumeType[vol.VolumeType]; ok { 331 period = tmp 332 } 333 start := now.Add(time.Duration(period) * 3 * time.Second * -1) 334 335 resp, err := p.CloudWatch.GetMetricStatistics(ctx, &cloudwatch.GetMetricStatisticsInput{ 336 Dimensions: []cloudwatchTypes.Dimension{ 337 { 338 Name: aws.String("VolumeId"), 339 Value: vol.VolumeId, 340 }, 341 }, 342 StartTime: &start, 343 EndTime: &now, 344 MetricName: &metricName, 345 Period: aws.Int32(aggregationPeriod), 346 Statistics: []cloudwatchTypes.Statistic{statType}, 347 Namespace: aws.String("AWS/EBS"), 348 }) 349 if err != nil { 350 return 0, err 351 } 352 353 datapoints := resp.Datapoints 354 if len(datapoints) == 0 { 355 return 0, errNoDataPoint 356 } 357 358 latest := time.Unix(0, 0) 359 var latestVal float64 360 for _, dp := range datapoints { 361 if dp.Timestamp.Before(latest) { 362 continue 363 } 364 365 latest = *dp.Timestamp 366 switch statType { 367 case "Average": 368 latestVal = *dp.Average 369 case "Sum": 370 latestVal = *dp.Sum 371 } 372 } 373 374 return latestVal, nil 375 } 376 377 func (p EBSPlugin) fetch(ctx context.Context, volume types.Volume, setting cloudWatchSetting) (float64, error) { 378 val, err := p.getLastPoint(ctx, volume, setting.MetricName, setting.Statistics) 379 if err != nil { 380 return 0, fmt.Errorf("%s %w : %s", *volume.VolumeId, err, setting.MetricName) 381 } 382 383 if setting.Additional == nil { 384 return setting.CalcFunc(val), nil 385 } 386 387 val2, err := p.getLastPoint(ctx, volume, setting.Additional.MetricName, setting.Additional.Statistics) 388 if err != nil { 389 return 0, fmt.Errorf("%s %w : %s", *volume.VolumeId, err, setting.Additional.MetricName) 390 } 391 return setting.Additional.CalcFunc(val, val2), nil 392 } 393 394 // FetchMetrics fetch the metrics 395 func (p EBSPlugin) FetchMetrics() (map[string]interface{}, error) { 396 stat := make(map[string]interface{}) 397 398 // Override when Nitro instance. 399 if p.Hypervisor == types.InstanceTypeHypervisorNitro { 400 for i := range cloudwatchdefsNitro { 401 cloudwatchdefs[i] = cloudwatchdefsNitro[i] 402 } 403 } 404 405 for _, vol := range p.Volumes { 406 volumeID := normalizeVolumeID(*vol.VolumeId) 407 var graphs []string 408 if vol.VolumeType == types.VolumeTypeIo1 { 409 graphs = io1Graphs 410 } else { 411 graphs = defaultGraphs 412 } 413 for _, graphName := range graphs { 414 for _, metric := range graphdef[graphName].Metrics { 415 metricKey := graphName + "." + metric.Name 416 cloudwatchdef := cloudwatchdefs[metricKey] 417 val, err := p.fetch(context.TODO(), vol, cloudwatchdef) 418 if err != nil { 419 if errors.Is(err, errNoDataPoint) { 420 // nop 421 } else { 422 return nil, err 423 } 424 } else { 425 stat[strings.ReplaceAll(metricKey, "#", volumeID)] = val 426 } 427 } 428 } 429 } 430 return stat, nil 431 } 432 433 // GraphDefinition for plugin 434 func (p EBSPlugin) GraphDefinition() map[string]mp.Graphs { 435 return graphdef 436 } 437 438 func normalizeVolumeID(volumeID string) string { 439 return strings.ReplaceAll(volumeID, ".", "_") 440 } 441 442 // overwritten with syscall.SIGTERM on unix environment (see aws-ec2-ebs_unix.go) 443 var defaultSignal = os.Interrupt 444 445 // Do the plugin 446 func Do() { 447 optRegion := flag.String("region", "", "AWS Region") 448 optInstanceID := flag.String("instance-id", "", "Instance ID") 449 optAccessKeyID := flag.String("access-key-id", "", "AWS Access Key ID") 450 optSecretAccessKey := flag.String("secret-access-key", "", "AWS Secret Access Key") 451 optTempfile := flag.String("tempfile", "", "Temp file name") 452 flag.Parse() 453 454 ctx, stop := signal.NotifyContext(context.Background(), defaultSignal) 455 defer stop() 456 457 var ebs EBSPlugin 458 459 ebs.Region = *optRegion 460 ebs.InstanceID = *optInstanceID 461 462 cfg, err := config.LoadDefaultConfig(ctx) 463 if err != nil { 464 log.Fatalln(err) 465 } 466 467 // get metadata in ec2 instance 468 imdsClient := imds.NewFromConfig(cfg) 469 if *optRegion == "" { 470 out, err := imdsClient.GetRegion(ctx, nil) 471 if err != nil { 472 log.Fatalln(err) 473 } 474 ebs.Region = out.Region 475 } 476 if *optInstanceID == "" { 477 metadata, err := imdsClient.GetMetadata(ctx, &imds.GetMetadataInput{ 478 Path: "instance-id", 479 }) 480 if err != nil { 481 log.Fatalln(err) 482 } 483 content, _ := io.ReadAll(metadata.Content) 484 ebs.InstanceID = string(content) 485 } 486 487 ebs.AccessKeyID = *optAccessKeyID 488 ebs.SecretAccessKey = *optSecretAccessKey 489 490 if err := ebs.prepare(ctx); err != nil { 491 log.Fatalln(err) 492 } 493 494 helper := mp.NewMackerelPlugin(ebs) 495 helper.Tempfile = *optTempfile 496 497 helper.Run() 498 }