github.com/grailbio/base@v0.0.11/cloud/spotfeed/parser.go (about) 1 package spotfeed 2 3 import ( 4 "bufio" 5 "fmt" 6 "io" 7 "regexp" 8 "strconv" 9 "strings" 10 "time" 11 12 "github.com/grailbio/base/errors" 13 ) 14 15 const ( 16 feedFileTimestampFormat = "2006-01-02-15" 17 ) 18 19 var ( 20 feedFileNamePattern = regexp.MustCompile(`^[0-9]{12}\.[0-9]{4}(\-[0-9]{2}){3}\.[0-9]{3}.[a-z0-9]{8}(\.gz)?$`) 21 ) 22 23 type fileMeta struct { 24 filterable 25 26 Name string 27 AccountId string 28 Timestamp time.Time 29 Version int64 30 IsGzip bool 31 } 32 33 func (f *fileMeta) accountId() string { 34 return f.AccountId 35 } 36 37 func (f *fileMeta) timestamp() time.Time { 38 return f.Timestamp 39 } 40 41 func (f *fileMeta) version() int64 { 42 return f.Version 43 } 44 45 func parseFeedFileName(name string) (*fileMeta, error) { 46 if !feedFileNamePattern.MatchString(name) { 47 return nil, fmt.Errorf("%s does not match feed fileMeta pattern, skipping", name) 48 } 49 50 fields := strings.Split(name, ".") 51 var isGzip bool 52 switch len(fields) { 53 case 4: 54 isGzip = false 55 case 5: 56 if fields[4] == "gz" { 57 isGzip = true 58 } else { 59 return nil, fmt.Errorf("failed to parse fileMeta name in data feed directory: %s", name) 60 } 61 default: 62 return nil, fmt.Errorf("failed to parse fileMeta name in data feed directory: %s", name) 63 } 64 65 timestamp, err := time.Parse(feedFileTimestampFormat, fields[1]) 66 if err != nil { 67 return nil, errors.E(err, fmt.Sprintf("failed to parse timestamp for name %s", name)) 68 } 69 70 version, err := strconv.ParseInt(fields[2], 10, 64) 71 if err != nil { 72 return nil, errors.E(err, fmt.Sprintf("failed to parse version for name %s", name)) 73 } 74 75 return &fileMeta{ 76 Name: name, 77 AccountId: fields[0], 78 Timestamp: timestamp, 79 Version: version, 80 IsGzip: isGzip, 81 }, nil 82 } 83 84 // Entry corresponds to a single line in a Spot Instance data feed file. The 85 // Spot Instance data feed files are tab-delimited. Each line in the data file 86 // corresponds to one instance hour and contains the fields listed in the 87 // following table. The AccountId field is not specified for each individual entry 88 // but is given as a prefix in the name of the spot data feed file. 89 type Entry struct { 90 filterable 91 92 // AccountId is a 12-digit account number (ID) that specifies the AWS account 93 // billed for this spot instance-hour. 94 AccountId string 95 96 // Timestamp is used to determine the price charged for this instance usage. 97 // It is not at the hour boundary but within the hour specified by the title of 98 // the data feed file that contains this Entry. 99 Timestamp time.Time 100 101 // UsageType is the type of usage and instance type being charged for. For 102 // m1.small Spot Instances, this field is set to SpotUsage. For all other 103 // instance types, this field is set to SpotUsage:{instance-type}. For 104 // example, SpotUsage:c1.medium. 105 UsageType string 106 107 // Instance is the instance type being charged for and is a member of the 108 // set of information provided by UsageType. 109 Instance string 110 111 // Operation is the product being charged for. For Linux Spot Instances, 112 // this field is set to RunInstances. For Windows Spot Instances, this 113 // field is set to RunInstances:0002. Spot usage is grouped according 114 // to Availability Zone. 115 Operation string 116 117 // InstanceID is the ID of the Spot Instance that generated this instance 118 // usage. 119 InstanceID string 120 121 // MyBidID is the ID for the Spot Instance request that generated this instance usage. 122 MyBidID string 123 124 // MyMaxPriceUSD is the maximum price specified for this Spot Instance request. 125 MyMaxPriceUSD float64 126 127 // MarketPriceUSD is the Spot price at the time specified in the Timestamp field. 128 MarketPriceUSD float64 129 130 // ChargeUSD is the price charged for this instance usage. 131 ChargeUSD float64 132 133 // Version is the version included in the data feed file name for this record. 134 Version int64 135 } 136 137 func (e *Entry) accountId() string { 138 return e.AccountId 139 } 140 141 func (e *Entry) timestamp() time.Time { 142 return e.Timestamp 143 } 144 145 func (e *Entry) version() int64 { 146 return e.Version 147 } 148 149 // parsePriceUSD parses a price in USD formatted like "6.669 USD". 150 func parsePriceUSD(priceField string) (float64, error) { 151 trimCurrency := strings.TrimSuffix(priceField, " USD") 152 if len(trimCurrency) != (len(priceField) - 4) { 153 return 0, fmt.Errorf("failed to trim currency from %s", priceField) 154 } 155 return strconv.ParseFloat(trimCurrency, 64) 156 } 157 158 // parseUsageType parses the EC2 instance type from the spot data feed column UsageType, as per the AWS documentation. 159 // For m1.small Spot Instances, this field is set to SpotUsage. For all other instance types, this field is set to 160 // SpotUsage:{instance-type}. For example, SpotUsage:c1.medium. 161 func parseUsageType(usageType string) (string, error) { 162 fields := strings.Split(usageType, ":") 163 if len(fields) == 1 { 164 return "m1.small", nil 165 } 166 if len(fields) == 2 { 167 return fields[1], nil 168 } 169 return "", fmt.Errorf("failed to parse instance from UsageType %s", usageType) 170 } 171 172 const ( 173 feedLineTimestampFormat = "2006-01-02 15:04:05 MST" 174 ) 175 176 // parseFeedLine parses an *Entry from a line in a spot data feed file. The content and ordering of the columns 177 // in this file are documented at https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-data-feeds.html 178 func parseFeedLine(line string, accountId string) (*Entry, error) { 179 fields := strings.Split(line, "\t") 180 if len(fields) != 9 { 181 return nil, fmt.Errorf("failed to parse line in data feed: %s", line) 182 } 183 184 timestamp, err := time.Parse(feedLineTimestampFormat, fields[0]) 185 if err != nil { 186 return nil, errors.E(err, fmt.Sprintf("failed to parse timestamp for line %s", line)) 187 } 188 189 instance, err := parseUsageType(fields[1]) 190 if err != nil { 191 return nil, errors.E(err, fmt.Sprintf("failed to parse usage type for line %s", line)) 192 } 193 194 myMaxPriceUSD, err := parsePriceUSD(fields[5]) 195 if err != nil { 196 return nil, errors.E(err, fmt.Sprintf("failed to parse my max price for line %s", line)) 197 } 198 199 marketPriceUSD, err := parsePriceUSD(fields[6]) 200 if err != nil { 201 return nil, errors.E(err, fmt.Sprintf("failed to parse market price for line %s", line)) 202 } 203 204 chargeUSD, err := parsePriceUSD(fields[7]) 205 if err != nil { 206 return nil, errors.E(err, fmt.Sprintf("failed to parse charge for line %s", line)) 207 } 208 209 version, err := strconv.ParseInt(fields[8], 10, 64) 210 if err != nil { 211 return nil, errors.E(err, fmt.Sprintf("failed to parse version for line %s", line)) 212 } 213 214 return &Entry{ 215 AccountId: accountId, 216 Timestamp: timestamp, 217 UsageType: fields[1], 218 Instance: instance, 219 Operation: fields[2], 220 InstanceID: fields[3], 221 MyBidID: fields[4], 222 MyMaxPriceUSD: myMaxPriceUSD, 223 MarketPriceUSD: marketPriceUSD, 224 ChargeUSD: chargeUSD, 225 Version: version, 226 }, nil 227 } 228 229 func ParseFeedFile(feed io.Reader, accountId string) ([]*Entry, error) { 230 scn := bufio.NewScanner(feed) 231 232 entries := make([]*Entry, 0) 233 for scn.Scan() { 234 line := scn.Text() 235 if strings.HasPrefix(line, "#") { 236 continue 237 } 238 239 entry, err := parseFeedLine(scn.Text(), accountId) 240 if err != nil { 241 return nil, errors.E(err, "") 242 } 243 244 entries = append(entries, entry) 245 } 246 247 return entries, nil 248 }