github.com/windmilleng/wat@v0.0.2-0.20180626175338-9349b638e250/cli/wat/decide.go (about) 1 package wat 2 3 import ( 4 "context" 5 "fmt" 6 "sort" 7 "time" 8 9 "github.com/windmilleng/wat/os/ospath" 10 ) 11 12 // The maximum number of commands that decide should return. 13 // In the future, this might be specified by a flag. 14 const nDecideCommands = 3 15 16 // The extra weight of new duration data, to ensure new data 17 // isn't drowned out by old data. 18 // Should be a float64 between 0.0 and 0.5, not inclusive. 19 // We guarantee that a new piece of data will never have less than this weight. 20 const newCostExtraWeight = 0.2 21 22 // The extra weight to add if successCount or failCount is zero 23 const failProbabilityZeroCase = 0.1 24 25 func Decide(ctx context.Context, ws WatWorkspace, n int) ([]WatCommand, error) { 26 t := time.Now() 27 cmdList, err := List(ctx, ws, listTTL) 28 if err != nil { 29 return nil, fmt.Errorf("List: %v", err) 30 } 31 32 files, err := ws.WalkRoot() 33 if err != nil { 34 return nil, fmt.Errorf("ws.WalkRoot: %v", err) 35 } 36 37 cmds := cmdList.Commands 38 logGroups, err := Train(ctx, ws, cmds, trainTTL) 39 if err != nil { 40 return nil, fmt.Errorf("Train: %v", err) 41 } 42 43 sort.Sort(sort.Reverse(fileInfos(files))) 44 res := decideWith(cmds, logGroups, files, n) 45 ws.a.Timer(timerDecide, time.Since(t), nil) 46 return res, nil 47 } 48 49 // Choose the top N commands to run. 50 // 51 // Delegates out to an appropriage algorithm. 52 // 53 // cmds: The list of commands to decide from 54 // logGroups: The history of runs 55 // files: The list of files in this workspace, in sorted order from most 56 // recently modified 57 func decideWith(cmds []WatCommand, logGroups []CommandLogGroup, files []fileInfo, n int) []WatCommand { 58 ds := newDecisionStore() 59 ds.AddCommandLogGroups(logGroups) 60 61 // pick the most likely to fail given recent edits. 62 return gainDecideWith(cmds, ds, files, n) 63 } 64 65 // Choose the top N commands with the highest gain. 66 func gainDecideWith(cmds []WatCommand, ds DecisionStore, files []fileInfo, n int) (result []WatCommand) { 67 // TODO(nick): Right now, we only use the most recently edited file. 68 // There might be other conditions that make more sense, like 3 most-recent. 69 mostRecentFile := "" 70 if len(files) > 0 { 71 mostRecentFile = files[0].name 72 } 73 74 if len(cmds) == 0 { 75 return cmds 76 } 77 78 remainder := append([]WatCommand{}, cmds...) 79 cond := Condition{EditedFile: mostRecentFile} 80 for len(result) < n && len(remainder) > 0 { 81 // Find the maximum-gain test in the remainder list. 82 max := remainder[0] 83 maxGain := ds.CostSensitiveGain(max, cond) 84 85 // More than one index may have the same cost. 86 maxIndices := []int{0} 87 88 for i := 1; i < len(remainder); i++ { 89 cmd := remainder[i] 90 gain := ds.CostSensitiveGain(cmd, cond) 91 if gain > maxGain { 92 max = cmd 93 maxIndices = []int{i} 94 maxGain = gain 95 } else if gain == maxGain { 96 maxIndices = append(maxIndices, i) 97 } 98 } 99 100 // Grab all the commands with the same maximum gain-per-cost. 101 group := []WatCommand{} 102 for _, idx := range maxIndices { 103 group = append(group, remainder[idx]) 104 } 105 106 // If they're enough to satisfy the request, grab all of them. 107 // Otherwise, only grab the first one. 108 if len(group)+len(result) < n { 109 group = group[:1] 110 maxIndices = maxIndices[:1] 111 } 112 113 // Remove from the remainder array in reverse order, 114 // so that the removals don't affect later indices. 115 for j := len(maxIndices) - 1; j >= 0; j-- { 116 idx := maxIndices[j] 117 remainder = append(remainder[:idx], remainder[idx+1:]...) 118 } 119 120 // Use the second-tier sort to sort the commands that have the same priority. 121 group = secondTierDecideWith(group, ds, files, n) 122 result = append(result, group...) 123 124 // On the next iteration of the loop, find the best test command Y 125 // given that the current test command X succeeded. 126 cond = cond.WithSuccess(group[0].Command) 127 } 128 129 if len(result) > n { 130 result = result[:n] 131 } 132 133 return result 134 } 135 136 // All the "dumb" deciding (the non-ML deciding) 137 func secondTierDecideWith(cmds []WatCommand, ds DecisionStore, files []fileInfo, n int) (results []WatCommand) { 138 // first, decide only based on recency. 139 recencyResults, cmds := recencyDecideWith(cmds, files, n) 140 results = append(results, recencyResults...) 141 if len(results) >= n { 142 return results 143 } 144 145 // if we don't have enough results, try picking the cheapest commands 146 cheapestResults, cmds := cheapestDecideWith(cmds, ds, n-len(results)) 147 results = append(results, cheapestResults...) 148 if len(results) >= n { 149 return results 150 } 151 152 // if we still don't have enough results, naively pick the first commands. 153 naiveResults := naiveDecideWith(cmds, n-len(results)) 154 return append(results, naiveResults...) 155 } 156 157 // Choose the top N commands to run. 158 // 159 // This is a super-simple version that just looks at commands associated with recently 160 // edited files. 161 // 162 // cmds: The list of commands to decide from 163 // files: The list of files in this workspace, in sorted order from most 164 // recently modified. 165 // 166 // Returns two sets: the commands we chose, and the commands left. 167 // This makes it easy to chain with other decision algorithms. 168 func recencyDecideWith(cmds []WatCommand, files []fileInfo, n int) (result []WatCommand, remainder []WatCommand) { 169 result = make([]WatCommand, 0, n) 170 171 // We're going to modify the command array, so we need to clone it first. 172 remainder = append([]WatCommand{}, cmds...) 173 174 for _, f := range files { 175 for i, cmd := range remainder { 176 // TODO(nick): Maybe ospath should have a utility for memoizing parsing of 177 // patterns? This is probably not worth optimizing tho. 178 matcher, err := ospath.NewMatcherFromPattern(cmd.FilePattern) 179 if err != nil { 180 continue 181 } 182 183 if !matcher.Match(f.name) { 184 continue 185 } 186 187 result = append(result, cmd) 188 if len(result) >= n { 189 return result, remainder 190 } 191 192 // Remove commands from the array, so that we don't 193 // re-consider it on future iterations. 194 remainder = append(remainder[:i], remainder[i+1:]...) 195 196 // Move onto the next file 197 break 198 } 199 } 200 201 return result, remainder 202 } 203 204 // Choose the top N commands to run. 205 // 206 // This chooses the cheapest command to run. 207 // 208 // Returns two sets: the commands we chose, and the commands left. 209 // This makes it easy to chain with other decision algorithms. 210 func cheapestDecideWith(cmds []WatCommand, ds DecisionStore, n int) (result []WatCommand, remainder []WatCommand) { 211 sorter := WatCommandCostSort{DS: ds} 212 for _, c := range cmds { 213 if ds.HasCost(c) { 214 sorter.Commands = append(sorter.Commands, c) 215 } else { 216 remainder = append(remainder, c) 217 } 218 } 219 sort.Sort(sorter) 220 221 // Pick the N cheapest commands. 222 if n > len(sorter.Commands) { 223 n = len(sorter.Commands) 224 } 225 result = append(result, sorter.Commands[:n]...) 226 remainder = append(remainder, sorter.Commands[n:]...) 227 return result, remainder 228 } 229 230 // Naively pick the first n commands from the list. 231 func naiveDecideWith(cmds []WatCommand, n int) []WatCommand { 232 if n > len(cmds) { 233 n = len(cmds) 234 } 235 return cmds[:n] 236 } 237 238 type DecisionStore struct { 239 costs map[string]CostEstimate 240 history map[CommandWithCondition]ResultHistory 241 } 242 243 func (s DecisionStore) HasCost(cmd WatCommand) bool { 244 return s.costs[cmd.Command].Count != 0 245 } 246 247 func (s DecisionStore) Cost(cmd WatCommand) time.Duration { 248 return s.costs[cmd.Command].Duration 249 } 250 251 // A gain metric. Currently expressed as a unit of gain / cost 252 // Gain is directly proportional to failure probability, as explained in the design doc. 253 // Cost is expressed in seconds 254 // We weight gain higher than cost as gain ^ 2 / cost 255 func (s DecisionStore) CostSensitiveGain(cmd WatCommand, cond Condition) float64 { 256 dur := s.costs[cmd.Command].Duration 257 gain := s.FailureProbability(cmd, cond) 258 return gain * gain / dur.Seconds() 259 } 260 261 func (s DecisionStore) FailureProbability(cmd WatCommand, cond Condition) float64 { 262 results, ok := s.history[CommandWithCondition{Command: cmd.Command, Condition: cond}] 263 if !ok { 264 ancestors := cond.Ancestors() 265 for _, a := range ancestors { 266 results, ok = s.history[CommandWithCondition{Command: cmd.Command, Condition: a}] 267 if ok { 268 break 269 } 270 } 271 } 272 273 zeroCase := failProbabilityZeroCase 274 275 // If the user is editing a file related to this command 276 // (as described by FilePattern), boost the zero case way up. 277 editedFile := cond.EditedFile 278 cmdPattern := cmd.FilePattern 279 if editedFile != "" && cmdPattern != "" { 280 matcher, err := ospath.NewMatcherFromPattern(cmdPattern) 281 if err == nil && matcher.Match(editedFile) { 282 zeroCase = 1 283 } 284 } 285 286 fail := float64(results.FailCount) 287 success := float64(results.SuccessCount) 288 if fail == 0 { 289 fail = zeroCase 290 } 291 if success == 0 { 292 success = zeroCase 293 } 294 return fail / (fail + success) 295 } 296 297 func (s DecisionStore) addCommandCost(l CommandLog, ctx LogContext) { 298 s.costs[l.Command] = s.costs[l.Command].Add(l, ctx) 299 } 300 301 // Add the history of successes and failures for command against a specific environment condition. 302 // The condition must NOT express recent edits, because that information is expressed in LogContext. 303 func (s DecisionStore) addCommandHistory(l CommandLog, ctx LogContext, cond Condition) { 304 if cond.EditedFile != "" { 305 panic("Called addCommandHistory with malformed condition") 306 } 307 308 // Increment the history in the null condition where there are no recently changed files. 309 cmdWithCond := CommandWithCondition{Command: l.Command, Condition: cond} 310 history := s.history[cmdWithCond] 311 s.history[cmdWithCond] = history.Add(l.Success) 312 313 for _, recent := range ctx.RecentEdits { 314 // Increment the history in the condition where a file has been edited recently. 315 cmdWithCond.Condition = cond.WithEditedFile(recent) 316 history := s.history[cmdWithCond] 317 s.history[cmdWithCond] = history.Add(l.Success) 318 } 319 } 320 321 func (s DecisionStore) AddCommandLogGroup(g CommandLogGroup) { 322 logs := g.Logs 323 ctx := g.Context 324 325 for i, log := range logs { 326 s.addCommandCost(log, ctx) 327 s.addCommandHistory(log, ctx, Condition{}) 328 329 // Build up correlations between commands. 330 for j := i + 1; j < len(g.Logs); j++ { 331 logJ := g.Logs[j] 332 if log.Success { 333 s.addCommandHistory(logJ, ctx, Condition{}.WithSuccess(log.Command)) 334 } 335 336 if logJ.Success { 337 s.addCommandHistory(log, ctx, Condition{}.WithSuccess(logJ.Command)) 338 } 339 } 340 } 341 342 } 343 344 func (s DecisionStore) AddCommandLogGroups(logGroups []CommandLogGroup) { 345 for _, g := range logGroups { 346 s.AddCommandLogGroup(g) 347 } 348 } 349 350 func newDecisionStore() DecisionStore { 351 return DecisionStore{ 352 costs: make(map[string]CostEstimate), 353 history: make(map[CommandWithCondition]ResultHistory), 354 } 355 } 356 357 type CostEstimate struct { 358 Duration time.Duration 359 Count int 360 361 // If false, we've only seen bootstrapped durations 362 Real bool 363 } 364 365 // Creates a new cost estimate after working in the old cost estimate. 366 func (c CostEstimate) Add(log CommandLog, ctx LogContext) CostEstimate { 367 isRealLog := ctx.Source != LogSourceBootstrap 368 if isRealLog && !c.Real { 369 // This is the first real log data 370 return CostEstimate{Duration: log.Duration, Count: 1, Real: true} 371 } else if c.Real && !isRealLog { 372 // If we already have real logs, ignore the bootstrap log. 373 return c 374 } 375 376 // Otherwise, fold in new data with a weighted average, so that 377 // new data is worth at least 20%. 378 oldCount := float64(c.Count) 379 newCount := oldCount + 1 380 oldWeight := oldCount/newCount - newCostExtraWeight 381 newWeight := float64(1)/newCount + newCostExtraWeight 382 newDuration := time.Duration( 383 oldWeight*float64(c.Duration.Nanoseconds()) + 384 newWeight*float64(log.Duration.Nanoseconds())) 385 return CostEstimate{ 386 Duration: newDuration, 387 Real: c.Real, 388 Count: c.Count + 1, 389 } 390 } 391 392 type WatCommandCostSort struct { 393 Commands []WatCommand 394 DS DecisionStore 395 } 396 397 func (s WatCommandCostSort) Less(i, j int) bool { 398 return s.DS.Cost(s.Commands[i]) < s.DS.Cost(s.Commands[j]) 399 } 400 401 func (s WatCommandCostSort) Swap(i, j int) { 402 s.Commands[i], s.Commands[j] = s.Commands[j], s.Commands[i] 403 } 404 405 func (s WatCommandCostSort) Len() int { 406 return len(s.Commands) 407 } 408 409 type CommandWithCondition struct { 410 Condition Condition 411 Command string 412 } 413 414 // The environment that a test is run in. 415 // 416 // Must be a value struct so that we can use it as a key in a map. 417 type Condition struct { 418 // A known recently-edited file. 419 EditedFile string 420 421 // A known successful command. 422 SuccessCommand string 423 } 424 425 func (c Condition) WithEditedFile(f string) Condition { 426 c.EditedFile = f 427 return c 428 } 429 430 func (c Condition) WithSuccess(cmd string) Condition { 431 c.SuccessCommand = cmd 432 return c 433 } 434 435 // Get all the conditions that are "ancestors" of this condition, 436 // from most narrow to most broad. 437 func (c Condition) Ancestors() []Condition { 438 results := make([]Condition, 3) 439 hasCommand := c.SuccessCommand != "" 440 hasEditedFile := c.EditedFile != "" 441 if hasCommand { 442 results = append(results, c.WithSuccess("")) 443 } 444 if hasEditedFile { 445 results = append(results, c.WithEditedFile("")) 446 } 447 if hasCommand && hasEditedFile { 448 results = append(results, Condition{}) 449 } 450 return results 451 } 452 453 type ResultHistory struct { 454 SuccessCount uint32 455 FailCount uint32 456 } 457 458 func (h ResultHistory) Add(success bool) ResultHistory { 459 successAdd := uint32(0) 460 failAdd := uint32(0) 461 if success { 462 successAdd = 1 463 } else { 464 failAdd = 1 465 } 466 return ResultHistory{ 467 SuccessCount: h.SuccessCount + successAdd, 468 FailCount: h.FailCount + failAdd, 469 } 470 }