github.com/grailbio/base@v0.0.11/cloud/spotfeed/parser.go (about)

     1  package spotfeed
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"io"
     7  	"regexp"
     8  	"strconv"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/grailbio/base/errors"
    13  )
    14  
    15  const (
    16  	feedFileTimestampFormat = "2006-01-02-15"
    17  )
    18  
    19  var (
    20  	feedFileNamePattern = regexp.MustCompile(`^[0-9]{12}\.[0-9]{4}(\-[0-9]{2}){3}\.[0-9]{3}.[a-z0-9]{8}(\.gz)?$`)
    21  )
    22  
    23  type fileMeta struct {
    24  	filterable
    25  
    26  	Name      string
    27  	AccountId string
    28  	Timestamp time.Time
    29  	Version   int64
    30  	IsGzip    bool
    31  }
    32  
    33  func (f *fileMeta) accountId() string {
    34  	return f.AccountId
    35  }
    36  
    37  func (f *fileMeta) timestamp() time.Time {
    38  	return f.Timestamp
    39  }
    40  
    41  func (f *fileMeta) version() int64 {
    42  	return f.Version
    43  }
    44  
    45  func parseFeedFileName(name string) (*fileMeta, error) {
    46  	if !feedFileNamePattern.MatchString(name) {
    47  		return nil, fmt.Errorf("%s does not match feed fileMeta pattern, skipping", name)
    48  	}
    49  
    50  	fields := strings.Split(name, ".")
    51  	var isGzip bool
    52  	switch len(fields) {
    53  	case 4:
    54  		isGzip = false
    55  	case 5:
    56  		if fields[4] == "gz" {
    57  			isGzip = true
    58  		} else {
    59  			return nil, fmt.Errorf("failed to parse fileMeta name in data feed directory: %s", name)
    60  		}
    61  	default:
    62  		return nil, fmt.Errorf("failed to parse fileMeta name in data feed directory: %s", name)
    63  	}
    64  
    65  	timestamp, err := time.Parse(feedFileTimestampFormat, fields[1])
    66  	if err != nil {
    67  		return nil, errors.E(err, fmt.Sprintf("failed to parse timestamp for name %s", name))
    68  	}
    69  
    70  	version, err := strconv.ParseInt(fields[2], 10, 64)
    71  	if err != nil {
    72  		return nil, errors.E(err, fmt.Sprintf("failed to parse version for name %s", name))
    73  	}
    74  
    75  	return &fileMeta{
    76  		Name:      name,
    77  		AccountId: fields[0],
    78  		Timestamp: timestamp,
    79  		Version:   version,
    80  		IsGzip:    isGzip,
    81  	}, nil
    82  }
    83  
    84  // Entry corresponds to a single line in a Spot Instance data feed file. The
    85  // Spot Instance data feed files are tab-delimited. Each line in the data file
    86  // corresponds to one instance hour and contains the fields listed in the
    87  // following table. The AccountId field is not specified for each individual entry
    88  // but is given as a prefix in the name of the spot data feed file.
    89  type Entry struct {
    90  	filterable
    91  
    92  	// AccountId is a 12-digit account number (ID) that specifies the AWS account
    93  	// billed for this spot instance-hour.
    94  	AccountId string
    95  
    96  	// Timestamp is used to determine the price charged for this instance usage.
    97  	// It is not at the hour boundary but within the hour specified by the title of
    98  	// the data feed file that contains this Entry.
    99  	Timestamp time.Time
   100  
   101  	// UsageType is the type of usage and instance type being charged for. For
   102  	// m1.small Spot Instances, this field is set to SpotUsage. For all other
   103  	// instance types, this field is set to SpotUsage:{instance-type}. For
   104  	// example, SpotUsage:c1.medium.
   105  	UsageType string
   106  
   107  	// Instance is the instance type being charged for and is a member of the
   108  	// set of information provided by UsageType.
   109  	Instance string
   110  
   111  	// Operation is the product being charged for. For Linux Spot Instances,
   112  	// this field is set to RunInstances. For Windows Spot Instances, this
   113  	// field is set to RunInstances:0002. Spot usage is grouped according
   114  	// to Availability Zone.
   115  	Operation string
   116  
   117  	// InstanceID is the ID of the Spot Instance that generated this instance
   118  	// usage.
   119  	InstanceID string
   120  
   121  	// MyBidID is the ID for the Spot Instance request that generated this instance usage.
   122  	MyBidID string
   123  
   124  	// MyMaxPriceUSD is the maximum price specified for this Spot Instance request.
   125  	MyMaxPriceUSD float64
   126  
   127  	// MarketPriceUSD is the Spot price at the time specified in the Timestamp field.
   128  	MarketPriceUSD float64
   129  
   130  	// ChargeUSD is the price charged for this instance usage.
   131  	ChargeUSD float64
   132  
   133  	// Version is the version included in the data feed file name for this record.
   134  	Version int64
   135  }
   136  
   137  func (e *Entry) accountId() string {
   138  	return e.AccountId
   139  }
   140  
   141  func (e *Entry) timestamp() time.Time {
   142  	return e.Timestamp
   143  }
   144  
   145  func (e *Entry) version() int64 {
   146  	return e.Version
   147  }
   148  
   149  // parsePriceUSD parses a price in USD formatted like "6.669 USD".
   150  func parsePriceUSD(priceField string) (float64, error) {
   151  	trimCurrency := strings.TrimSuffix(priceField, " USD")
   152  	if len(trimCurrency) != (len(priceField) - 4) {
   153  		return 0, fmt.Errorf("failed to trim currency from %s", priceField)
   154  	}
   155  	return strconv.ParseFloat(trimCurrency, 64)
   156  }
   157  
   158  // parseUsageType parses the EC2 instance type from the spot data feed column UsageType, as per the AWS documentation.
   159  // For m1.small Spot Instances, this field is set to SpotUsage. For all other instance types, this field is set to
   160  // SpotUsage:{instance-type}. For example, SpotUsage:c1.medium.
   161  func parseUsageType(usageType string) (string, error) {
   162  	fields := strings.Split(usageType, ":")
   163  	if len(fields) == 1 {
   164  		return "m1.small", nil
   165  	}
   166  	if len(fields) == 2 {
   167  		return fields[1], nil
   168  	}
   169  	return "", fmt.Errorf("failed to parse instance from UsageType %s", usageType)
   170  }
   171  
   172  const (
   173  	feedLineTimestampFormat = "2006-01-02 15:04:05 MST"
   174  )
   175  
   176  // parseFeedLine parses an *Entry from a line in a spot data feed file. The content and ordering of the columns
   177  // in this file are documented at https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-data-feeds.html
   178  func parseFeedLine(line string, accountId string) (*Entry, error) {
   179  	fields := strings.Split(line, "\t")
   180  	if len(fields) != 9 {
   181  		return nil, fmt.Errorf("failed to parse line in data feed: %s", line)
   182  	}
   183  
   184  	timestamp, err := time.Parse(feedLineTimestampFormat, fields[0])
   185  	if err != nil {
   186  		return nil, errors.E(err, fmt.Sprintf("failed to parse timestamp for line %s", line))
   187  	}
   188  
   189  	instance, err := parseUsageType(fields[1])
   190  	if err != nil {
   191  		return nil, errors.E(err, fmt.Sprintf("failed to parse usage type for line %s", line))
   192  	}
   193  
   194  	myMaxPriceUSD, err := parsePriceUSD(fields[5])
   195  	if err != nil {
   196  		return nil, errors.E(err, fmt.Sprintf("failed to parse my max price for line %s", line))
   197  	}
   198  
   199  	marketPriceUSD, err := parsePriceUSD(fields[6])
   200  	if err != nil {
   201  		return nil, errors.E(err, fmt.Sprintf("failed to parse market price for line %s", line))
   202  	}
   203  
   204  	chargeUSD, err := parsePriceUSD(fields[7])
   205  	if err != nil {
   206  		return nil, errors.E(err, fmt.Sprintf("failed to parse charge for line %s", line))
   207  	}
   208  
   209  	version, err := strconv.ParseInt(fields[8], 10, 64)
   210  	if err != nil {
   211  		return nil, errors.E(err, fmt.Sprintf("failed to parse version for line %s", line))
   212  	}
   213  
   214  	return &Entry{
   215  		AccountId:      accountId,
   216  		Timestamp:      timestamp,
   217  		UsageType:      fields[1],
   218  		Instance:       instance,
   219  		Operation:      fields[2],
   220  		InstanceID:     fields[3],
   221  		MyBidID:        fields[4],
   222  		MyMaxPriceUSD:  myMaxPriceUSD,
   223  		MarketPriceUSD: marketPriceUSD,
   224  		ChargeUSD:      chargeUSD,
   225  		Version:        version,
   226  	}, nil
   227  }
   228  
   229  func ParseFeedFile(feed io.Reader, accountId string) ([]*Entry, error) {
   230  	scn := bufio.NewScanner(feed)
   231  
   232  	entries := make([]*Entry, 0)
   233  	for scn.Scan() {
   234  		line := scn.Text()
   235  		if strings.HasPrefix(line, "#") {
   236  			continue
   237  		}
   238  
   239  		entry, err := parseFeedLine(scn.Text(), accountId)
   240  		if err != nil {
   241  			return nil, errors.E(err, "")
   242  		}
   243  
   244  		entries = append(entries, entry)
   245  	}
   246  
   247  	return entries, nil
   248  }