bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/awsBilling.go (about) 1 package collectors 2 3 import ( 4 "compress/gzip" 5 "encoding/csv" 6 "fmt" 7 "io/ioutil" 8 "os" 9 "path/filepath" 10 "reflect" 11 "regexp" 12 "sort" 13 "strconv" 14 "strings" 15 "time" 16 17 "bosun.org/metadata" 18 "bosun.org/opentsdb" 19 "bosun.org/slog" 20 "github.com/aws/aws-sdk-go/aws" 21 "github.com/aws/aws-sdk-go/aws/credentials" 22 "github.com/aws/aws-sdk-go/aws/session" 23 "github.com/aws/aws-sdk-go/service/route53" 24 "github.com/aws/aws-sdk-go/service/s3" 25 "github.com/aws/aws-sdk-go/service/s3/s3manager" 26 ) 27 28 var ( 29 awsBillingR53zones = map[string]route53.GetHostedZoneOutput{} 30 ) 31 32 const ( 33 awsBillingReportDateFormat = "20060102" 34 awsDescCostsFmt = "Usage costs for Amazon %v. Datapoints represent costs for a full hour. Data typically lags by 24 hours." 35 awsDescUsageFmt = "Usage volume for Amazon %v. Datapoints represent a full hour of usage. Data typically lags by 24 hours." 36 awsDescUsageResFmt = "Usage volume for Amazon %v, denormalized with only a resource tag. Datapoints represent a full hour of usage. Data typically lags by 24 hours." 37 awsDescUsageOpsFmt = "Usage volume for Amazon %v, denormalized with only an operation tag. Datapoints represent a full hour of usage. Data typically lags by 24 hours." 38 awsDescCostsByProduct = "Usage costs for Amazon services, denormalized with only a product tag. Datapoints represent a full hour of usage. Data typically lags by 24 hours." 39 ) 40 41 func c_awsBilling(accessKey, secretKey, region, productCodes, bucketName, bucketPath string, purgeDays int) (opentsdb.MultiDataPoint, error) { 42 creds := credentials.NewStaticCredentials(accessKey, secretKey, "") 43 conf := &aws.Config{ 44 Credentials: creds, 45 Region: ®ion, 46 } 47 awsBilling := awsBillingConfig{ 48 bucketName: bucketName, 49 bucketPath: bucketPath, 50 } 51 regCompiled, err := regexp.Compile(productCodes) 52 if err != nil { 53 return nil, err 54 } 55 awsBilling.prodCodesReg = regCompiled 56 awsBilling.s3svc = s3.New(session.New(conf)) //Connect to S3 57 if awsBilling.s3svc == nil { 58 return nil, fmt.Errorf("unable to connect to S3") 59 } 60 awsBilling.r53svc = route53.New(session.New(conf)) //Connect to R53 61 if awsBilling.r53svc == nil { 62 return nil, fmt.Errorf("unable to connect to Route 53") 63 } 64 awsBilling.downloader = s3manager.NewDownloader(session.New(conf)) //Gimmie a downloader 65 if awsBilling.downloader == nil { 66 return nil, fmt.Errorf("unable to create S3 downloader") 67 } 68 if purgeDays == 0 { 69 slog.Infof("S3 purging of objects is disabled") 70 awsBilling.purgeOlderThan = time.Date(2999, 12, 31, 23, 59, 59, 0, time.UTC) 71 } else { 72 purgeHours := time.Duration(-1 * 24 * purgeDays) 73 awsBilling.purgeOlderThan = time.Now().Add(purgeHours * time.Hour) 74 } 75 return awsBilling.Check() 76 } 77 78 type awsBillingConfig struct { 79 s3svc *s3.S3 80 r53svc *route53.Route53 81 downloader *s3manager.Downloader 82 purgeOlderThan time.Time 83 bucketName string 84 bucketPath string 85 prodCodesReg *regexp.Regexp 86 } 87 88 func (awsBilling *awsBillingConfig) Check() (opentsdb.MultiDataPoint, error) { 89 md := opentsdb.MultiDataPoint{} 90 purgeObjects := []*s3.Object{} 91 //Declare the objects we want to fetch here. For completeness sake, we're going to fetch 92 //the entire contents of the bucket, as we are going to be cleaning it out as we go. 93 getBucketObjects := &s3.ListObjectsInput{ 94 Bucket: aws.String(awsBilling.bucketName), 95 } 96 //Get the objects from the bucket 97 bucketObjects, err := awsBilling.s3svc.ListObjects(getBucketObjects) 98 if err != nil { 99 return nil, err 100 } 101 102 //Sort the files found by last modified, newest first. 103 sort.Sort(sort.Reverse(ByLastModified(bucketObjects.Contents))) 104 105 //Go through the contents of the bucket and parse the different kinds of report files 106 //that that we're going to have. At the end of it we'll have a bucketContents that contains 107 //the origina S3 file, along with hopefully enough other detail. 108 var thisBucketContents []bucketContents 109 for _, bucketObject := range bucketObjects.Contents { 110 thisReport := &billingKeyStructure{} 111 thisReport.parseFromObjectKey(bucketObject.Key, awsBilling) 112 thisBucketContents = append(thisBucketContents, bucketContents{ 113 origS3Item: bucketObject, 114 awsBillingItem: thisReport, 115 }) 116 //Mark files past the purge date to be purged 117 if bucketObject.LastModified.Before(awsBilling.purgeOlderThan) { 118 purgeObjects = append(purgeObjects, bucketObject) 119 } 120 } 121 122 //So, billing comes with a bunch of different manifets and whatnot. We don't really need all of that 123 //for this basic billing integration, as the first line of the CSVs we're interested in also has 124 //the breakdown of how it's all structured. We only want to process the last file that is present 125 //for each month, as each month we can get 30+ files. The most recent file for any given month should 126 //contain everything we need. 127 var monthsProcessed []string //The months that have been processed so far 128 var allBills []*billHeader 129 for _, billingObject := range thisBucketContents { 130 isGZ := filepath.Ext(billingObject.awsBillingItem.fileName) == ".gz" //If the filename ends in a .gz then it's probably what we want 131 isBillingPath := strings.Contains(billingObject.awsBillingItem.fileName, billingObject.awsBillingItem.reportName) //If it ends in .gz and has the report name in it, then it's definately what we want 132 notProcessedMonth := !stringInSlice(billingObject.origS3Item.LastModified.Format("200601"), monthsProcessed) //Check if we have processed this month's file already 133 if isGZ && isBillingPath && notProcessedMonth { 134 //Download the file and un-gzip it in one step 135 billingFile, err := billingObject.downloadGzippedItem(awsBilling.downloader, awsBilling) 136 if err != nil { 137 return nil, err 138 } 139 allBills = append(allBills, awsBilling.ReadBillingFile(billingFile)) 140 monthsProcessed = append(monthsProcessed, billingObject.origS3Item.LastModified.Format("200601")) //Log that we've processed this month 141 } 142 } 143 //Dump into scollector format and clean up 144 for _, thisBill := range allBills { 145 thisBill.toTSDB(&md) 146 } 147 //Clean up after ourselves 148 for _, purge := range purgeObjects { 149 _, err := awsBilling.s3svc.DeleteObject(&s3.DeleteObjectInput{ 150 Bucket: aws.String(awsBilling.bucketName), 151 Key: purge.Key, 152 }) 153 if err != nil { 154 slog.Warning("Error deleting object:", err) 155 } 156 } 157 return md, nil 158 } 159 160 //ByLastModified implements sorter for S3 data by last modified date 161 type ByLastModified []*s3.Object 162 163 func (a ByLastModified) Len() int { return len(a) } 164 func (a ByLastModified) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 165 func (a ByLastModified) Less(i, j int) bool { return a[i].LastModified.Before(*a[j].LastModified) } 166 167 type billHeader struct { 168 InvoiceID string 169 BillingEntity string 170 BillType string 171 PayerAccountID string 172 BillingPeriodStartDate time.Time 173 BillingPeriodEndDate time.Time 174 LineItems []billLineItem 175 } 176 177 func (b billHeader) toTSDB(md *opentsdb.MultiDataPoint) { 178 productCodes := []string{} 179 denormalisedMetrics := map[denormalisedMetric]float32{} 180 for _, line := range b.LineItems { 181 tags := make(opentsdb.TagSet) 182 //Keep track of the unique product codes we've seen so we can 183 //send metadata for them later 184 if !stringInSlice(line.ProductCode, productCodes) { 185 productCodes = append(productCodes, line.ProductCode) 186 } 187 188 //Suppressing AccountID for now as we only have one account 189 //tags["accountid"] = line.UsageAccountID 190 tags["operation"] = line.Operation 191 metricCode := strings.ToLower(line.ProductCode) 192 metricCode = strings.Replace(metricCode, "amazon", "", 1) 193 //Usage Type is meaningless for R53, they're all 'DNS-Queries' 194 if line.ProductCode != "AmazonRoute53" { 195 tags["usagetype"] = line.UsageType 196 } 197 //Use the zone name if present for Route 53 data 198 if line.ProductCode == "AmazonRoute53" && line.Route53Zone != nil && *line.Route53Zone.Name != "" { 199 tags["resourceid"] = strings.ToLower(*line.Route53Zone.Name) 200 } else if line.ResourceID != "" { 201 //Replace slash and colons as hypens to make them easier to read 202 tags["resourceid"] = strings.ToLower(strings.Replace(strings.Replace(line.ResourceID, "/", "-", -1), ":", "-", -1)) 203 } 204 tags.Clean() //Remove invalid characters 205 descCosts := fmt.Sprintf(awsDescCostsFmt, metricCode) 206 descUsage := fmt.Sprintf(awsDescUsageFmt, metricCode) 207 descUsageRes := fmt.Sprintf(awsDescUsageResFmt, metricCode) 208 descUsageOps := fmt.Sprintf(awsDescUsageOpsFmt, metricCode) 209 //Glacier bills by day, not by hour, for storage, so we need to fill in some fake datapoints 210 if line.ProductCode == "AmazonGlacier" && line.Operation == "Storage" { 211 //We need to create an entry for each hour, so loop from 00:00 to 23:00 212 for i := 0; i < 24; i++ { 213 thisTS := line.UsageEndDate.Add(time.Duration(-1*i) * time.Hour).Unix() 214 //Add the normal, tagged metrics straight away 215 AddTS(md, fmt.Sprintf("aws.billing.%v.cost", metricCode), thisTS, line.UnblendedCost/24, tags, metadata.Gauge, metadata.Count, descCosts) //Cost we want to split over the 24 hour period 216 AddTS(md, fmt.Sprintf("aws.billing.%v.usage", metricCode), thisTS, line.UsageAmount, tags, metadata.Gauge, metadata.Count, descUsage) //But usage we can keep the same value for each hour 217 //For the denormalised metrics we need to keep track of them ourselves and add them later 218 denormalisedMetrics[denormalisedMetric{ 219 Metric: "aws.billing.cost_by_product", 220 Time: thisTS, 221 Tag: "product", 222 TagValue: metricCode, 223 Rate: metadata.Gauge, 224 Unit: metadata.Count, 225 Desc: awsDescCostsByProduct, 226 }] += line.UnblendedCost / 24 227 denormalisedMetrics[denormalisedMetric{ 228 Metric: fmt.Sprintf("aws.billing.%v.usage_by_operation", metricCode), 229 Time: thisTS, 230 Tag: "operation", 231 TagValue: tags["operation"], 232 Rate: metadata.Gauge, 233 Unit: metadata.Count, 234 Desc: descUsageOps, 235 }] += line.UsageAmount 236 //Only add data to by_resouce if we have a resource ID 237 if _, present := tags["resourceid"]; present { 238 denormalisedMetrics[denormalisedMetric{ 239 Metric: fmt.Sprintf("aws.billing.%v.usage_by_resource", metricCode), 240 Time: thisTS, 241 Tag: "resourceid", 242 TagValue: tags["resourceid"], 243 Rate: metadata.Gauge, 244 Unit: metadata.Count, 245 Desc: descUsageRes, 246 }] += line.UsageAmount 247 } 248 } 249 } else { 250 //Add the normal, tagged metrics straight away 251 AddTS(md, fmt.Sprintf("aws.billing.%v.cost", metricCode), line.UsageEndDate.Unix(), line.UnblendedCost, tags, metadata.Gauge, metadata.Count, descCosts) 252 AddTS(md, fmt.Sprintf("aws.billing.%v.usage", metricCode), line.UsageEndDate.Unix(), line.UsageAmount, tags, metadata.Gauge, metadata.Count, descUsage) 253 //For the denormalised metrics we need to keep track of them ourselves and add them later 254 denormalisedMetrics[denormalisedMetric{ 255 Metric: "aws.billing.cost_by_product", 256 Time: line.UsageEndDate.Unix(), 257 Tag: "product", 258 TagValue: metricCode, 259 Rate: metadata.Gauge, 260 Unit: metadata.Count, 261 Desc: awsDescCostsByProduct, 262 }] += line.UnblendedCost 263 denormalisedMetrics[denormalisedMetric{ 264 Metric: fmt.Sprintf("aws.billing.%v.usage_by_operation", metricCode), 265 Time: line.UsageEndDate.Unix(), 266 Tag: "operation", 267 TagValue: tags["operation"], 268 Rate: metadata.Gauge, 269 Unit: metadata.Count, 270 Desc: descUsageOps, 271 }] += line.UsageAmount 272 //Only add data to by_resouce if we have a resource ID 273 if _, present := tags["resourceid"]; present { 274 denormalisedMetrics[denormalisedMetric{ 275 Metric: fmt.Sprintf("aws.billing.%v.usage_by_resource", metricCode), 276 Time: line.UsageEndDate.Unix(), 277 Tag: "resourceid", 278 TagValue: tags["resourceid"], 279 Rate: metadata.Gauge, 280 Unit: metadata.Count, 281 Desc: descUsageRes, 282 }] += line.UsageAmount 283 } 284 } 285 } 286 //Add our denormalised metrics 287 for metric, value := range denormalisedMetrics { 288 AddTS(md, metric.Metric, metric.Time, value, opentsdb.TagSet{metric.Tag: metric.TagValue}, metric.Rate, metric.Unit, metric.Desc) 289 } 290 } 291 292 type denormalisedMetric struct { 293 Metric string 294 Time int64 295 Tag string 296 TagValue string 297 Rate metadata.RateType 298 Unit metadata.Unit 299 Desc string 300 } 301 302 type billLineItem struct { 303 IdentityLineItemID string `csv:"identity/LineItemId"` 304 UsageAccountID string `csv:"lineItem/UsageAccountId"` 305 LineItemType string `csv:"lineItem/LineItemType"` 306 UsageStartDateRaw string `csv:"lineItem/UsageStartDate"` 307 UsageEndDateRaw string `csv:"lineItem/UsageEndDate"` 308 UsageStartDate time.Time 309 UsageEndDate time.Time 310 ProductCode string `csv:"lineItem/ProductCode"` 311 UsageType string `csv:"lineItem/UsageType"` 312 Operation string `csv:"lineItem/Operation"` 313 AvailabilityZone string `csv:"lineItem/AvailabilityZone"` 314 ResourceID string `csv:"lineItem/ResourceId"` 315 UsageAmount float32 `csv:"lineItem/UsageAmount"` 316 CurrencyCode string `csv:"lineItem/CurrencyCode"` 317 UnblendedRate float32 `csv:"lineItem/UnblendedRate"` 318 UnblendedCost float32 `csv:"lineItem/UnblendedCost"` 319 BlendedRate float32 `csv:"lineItem/BlendedRate"` 320 BlendedCost float32 `csv:"lineItem/BlendedCost"` 321 LineItemDescription string `csv:"lineItem/LineItemDescription"` 322 TaxType string `csv:"lineItem/TaxType"` 323 Route53Zone *route53.HostedZone 324 } 325 326 //Using reflection over the billLineItem struct, construct a row based on the 327 //data from the CSV 328 func (b *billLineItem) parseFromBill(row []string, cols map[string]int, awsBilling *awsBillingConfig) { 329 val := reflect.ValueOf(b).Elem() //Reflect the elements of the billLineItem struct 330 for i := 0; i < val.NumField(); i++ { //Run through the number of fields on the struct 331 valueField := val.Field(i) //Get the reflect value for this item on the struct 332 typeField := val.Type().Field(i) //Get the type of this value 333 csvField := strings.ToUpper(typeField.Tag.Get("csv")) //Get the corresponding CSV tag from the type 334 if valueField.CanSet() { //If this is a settable value 335 switch valueField.Kind() { //We only deal with strings and float32's here, as the invoice is pretty simple 336 case reflect.String: //If we have a string 337 valueField.SetString(row[cols[csvField]]) //Set the string to the value from the column labelled by the CSV tag 338 case reflect.Float32: //If we have a float32 339 float, err := strconv.ParseFloat(row[cols[csvField]], 32) //Convert to a 32-bit float 340 if err == nil { 341 valueField.SetFloat(float) //Set the string to the value from the column labelled by the CSV tag 342 } 343 } 344 } 345 } 346 //Fill in the date/time fields from their raw counterparts 347 b.UsageStartDate, _ = time.Parse(time.RFC3339, b.UsageStartDateRaw) 348 b.UsageEndDate, _ = time.Parse(time.RFC3339, b.UsageEndDateRaw) 349 //Fetch the Route53 data for this row 350 b.fetchR53(awsBilling) 351 } 352 353 //If there is Route53 data for this row, then populate the Route 53 item 354 func (b *billLineItem) fetchR53(awsBilling *awsBillingConfig) { 355 if b.ProductCode == "AmazonRoute53" && b.ResourceID != "" { //Don't do anything if we don't have any R53 info to get 356 //The billing ID has a huge resource ID, we only need the last part of it 357 zone := strings.Split(b.ResourceID, "/") 358 if len(zone) != 2 { 359 return 360 } 361 zoneID := zone[1] 362 cachedR53Zone, ok := awsBillingR53zones[zoneID] //Check if we have a copy of this zone in our local cache 363 if ok { //If we have a copy of the zone, then use that 364 b.Route53Zone = cachedR53Zone.HostedZone 365 return 366 } 367 //Otherwise we need to fetch it from Route 53 368 thisR53, fetchErr := awsBilling.r53svc.GetHostedZone(&route53.GetHostedZoneInput{ 369 Id: aws.String(zoneID), 370 }) 371 if fetchErr != nil { 372 slog.Infoln("Cannot fetch Route53 hosted zone", b.ResourceID, fetchErr) 373 } 374 awsBillingR53zones[zoneID] = *thisR53 //Store the fetched zone in the cache 375 b.Route53Zone = thisR53.HostedZone //And assign 376 } 377 } 378 379 type bucketContents struct { 380 origS3Item *s3.Object 381 awsBillingItem *billingKeyStructure 382 } 383 384 //Download a GZipped item from S3 and return the non-gzipped version of the item 385 func (billingObject *bucketContents) downloadGzippedItem(downloader *s3manager.Downloader, awsBilling *awsBillingConfig) (*[]byte, error) { 386 //Get a temporary file to dump this into 387 tempFile, err := ioutil.TempFile("", "scollector-aws-billing-") 388 if err != nil { 389 return nil, err 390 } 391 defer tempFile.Close() 392 if _, err = downloader.Download(tempFile, 393 &s3.GetObjectInput{ 394 Bucket: aws.String(awsBilling.bucketName), 395 Key: billingObject.origS3Item.Key, 396 }); err != nil { 397 return nil, err 398 } 399 unzippedFile, err := readGzFile(tempFile) 400 if err != nil { 401 return nil, err 402 } 403 tempFile.Close() 404 err = os.Remove(tempFile.Name()) 405 if err != nil { 406 slog.Warningf("Could not remove temporary file: %v", tempFile.Name()) 407 } 408 return &unzippedFile, err 409 } 410 411 //http://stackoverflow.com/a/23635064/69683 412 func readGzFile(fi *os.File) ([]byte, error) { 413 fz, err := gzip.NewReader(fi) 414 if err != nil { 415 return nil, err 416 } 417 defer fz.Close() 418 419 s, err := ioutil.ReadAll(fz) 420 if err != nil { 421 return nil, err 422 } 423 return s, nil 424 } 425 426 type billingKeyStructure struct { 427 reportName string 428 reportStart time.Time 429 reportEnd time.Time 430 reportID string 431 fileName string 432 filePath string 433 } 434 435 //This takes the S3 key for the items (which is the file path) and uses that to populate 436 //a billing key structure, which contains the dates, times, type of report, etc 437 func (b *billingKeyStructure) parseFromObjectKey(key *string, awsBilling *awsBillingConfig) { 438 keyDir, keyFile := filepath.Split(*key) 439 if keyDir == "" || keyFile == "" { 440 return 441 } 442 dirParts := strings.Split(keyDir, "/") 443 if len(dirParts) < 4 || dirParts[0] != awsBilling.bucketPath { 444 return 445 } 446 dateParts := strings.Split(dirParts[2], "-") 447 b.reportName = dirParts[1] 448 b.reportStart, _ = time.Parse(awsBillingReportDateFormat, dateParts[0]) 449 b.reportEnd, _ = time.Parse(awsBillingReportDateFormat, dateParts[1]) 450 if len(dirParts) == 5 { 451 b.reportID = dirParts[3] 452 } 453 b.fileName = keyFile 454 b.filePath = *key 455 } 456 457 func stringInSlice(a string, list []string) bool { 458 for _, b := range list { 459 if b == a { 460 return true 461 } 462 } 463 return false 464 } 465 466 //Process the billing file and spit out a bill that contains the bill header and 467 //line items 468 func (awsBilling *awsBillingConfig) ReadBillingFile(contents *[]byte) *billHeader { 469 //Parse the bill's CSV format 470 r := csv.NewReader(strings.NewReader(string(*contents))) 471 records, err := r.ReadAll() 472 if err != nil { 473 slog.Error(err) 474 } 475 //This is going to contain our mapping of fields to columns. First line of the 476 //CSV is the field names. If they change in the future (added/deleted/re-org'd) 477 //then we don't need to screw around with static mappings 478 cols := make(map[string]int) 479 //Get the columns from the first row, which are the field names, and throw 480 //them into our struct with their correct position. 481 for colNo, headerVal := range records[0] { 482 cols[strings.ToUpper(headerVal)] = colNo 483 } 484 //Somewhere to store our bill as we parse it 485 var thisBill billHeader 486 //Work through the invoice rows from 1 till the end 487 for rowNo, row := range records[1:] { 488 if rowNo == 1 { //This general invoice stuff is the same on every row, so we only need to parse it once 489 thisBill.InvoiceID = val(row, cols, "bill/InvoiceId") 490 thisBill.BillingEntity = val(row, cols, "bill/BillingEntity") 491 thisBill.BillType = val(row, cols, "bill/BillType") 492 thisBill.PayerAccountID = val(row, cols, "bill/PayerAccountId") 493 thisBill.BillingPeriodStartDate, _ = time.Parse(time.RFC3339, val(row, cols, "bill/BillingPeriodStartDate")) 494 thisBill.BillingPeriodEndDate, _ = time.Parse(time.RFC3339, val(row, cols, "bill/BillingPeriodEndDate")) 495 } 496 //Only process lines that have a product code we care about 497 if awsBilling.prodCodesReg.MatchString(val(row, cols, "lineItem/ProductCode")) { 498 //Parse the contents of the row 499 thisBillLine := &billLineItem{} //Somewhere to store our row 500 thisBillLine.parseFromBill(row, cols, awsBilling) //Parse it 501 thisBill.LineItems = append(thisBill.LineItems, *thisBillLine) //Add it to the bill 502 } 503 } 504 return &thisBill 505 } 506 507 //Return the a given column's value from a row, given a field 508 func val(row []string, cols map[string]int, field string) string { 509 col := cols[strings.ToUpper(field)] 510 if len(row)-1 < col { 511 return "" 512 } 513 return row[col] 514 }