github.com/windmilleng/wat@v0.0.2-0.20180626175338-9349b638e250/cli/wat/decide.go

github.com/windmilleng/wat@v0.0.2-0.20180626175338-9349b638e250/cli/wat/decide.go (about)

     1  package wat
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sort"
     7  	"time"
     8  
     9  	"github.com/windmilleng/wat/os/ospath"
    10  )
    11  
    12  // The maximum number of commands that decide should return.
    13  // In the future, this might be specified by a flag.
    14  const nDecideCommands = 3
    15  
    16  // The extra weight of new duration data, to ensure new data
    17  // isn't drowned out by old data.
    18  // Should be a float64 between 0.0 and 0.5, not inclusive.
    19  // We guarantee that a new piece of data will never have less than this weight.
    20  const newCostExtraWeight = 0.2
    21  
    22  // The extra weight to add if successCount or failCount is zero
    23  const failProbabilityZeroCase = 0.1
    24  
    25  func Decide(ctx context.Context, ws WatWorkspace, n int) ([]WatCommand, error) {
    26  	t := time.Now()
    27  	cmdList, err := List(ctx, ws, listTTL)
    28  	if err != nil {
    29  		return nil, fmt.Errorf("List: %v", err)
    30  	}
    31  
    32  	files, err := ws.WalkRoot()
    33  	if err != nil {
    34  		return nil, fmt.Errorf("ws.WalkRoot: %v", err)
    35  	}
    36  
    37  	cmds := cmdList.Commands
    38  	logGroups, err := Train(ctx, ws, cmds, trainTTL)
    39  	if err != nil {
    40  		return nil, fmt.Errorf("Train: %v", err)
    41  	}
    42  
    43  	sort.Sort(sort.Reverse(fileInfos(files)))
    44  	res := decideWith(cmds, logGroups, files, n)
    45  	ws.a.Timer(timerDecide, time.Since(t), nil)
    46  	return res, nil
    47  }
    48  
    49  // Choose the top N commands to run.
    50  //
    51  // Delegates out to an appropriage algorithm.
    52  //
    53  // cmds: The list of commands to decide from
    54  // logGroups: The history of runs
    55  // files: The list of files in this workspace, in sorted order from most
    56  //    recently modified
    57  func decideWith(cmds []WatCommand, logGroups []CommandLogGroup, files []fileInfo, n int) []WatCommand {
    58  	ds := newDecisionStore()
    59  	ds.AddCommandLogGroups(logGroups)
    60  
    61  	// pick the most likely to fail given recent edits.
    62  	return gainDecideWith(cmds, ds, files, n)
    63  }
    64  
    65  // Choose the top N commands with the highest gain.
    66  func gainDecideWith(cmds []WatCommand, ds DecisionStore, files []fileInfo, n int) (result []WatCommand) {
    67  	// TODO(nick): Right now, we only use the most recently edited file.
    68  	// There might be other conditions that make more sense, like 3 most-recent.
    69  	mostRecentFile := ""
    70  	if len(files) > 0 {
    71  		mostRecentFile = files[0].name
    72  	}
    73  
    74  	if len(cmds) == 0 {
    75  		return cmds
    76  	}
    77  
    78  	remainder := append([]WatCommand{}, cmds...)
    79  	cond := Condition{EditedFile: mostRecentFile}
    80  	for len(result) < n && len(remainder) > 0 {
    81  		// Find the maximum-gain test in the remainder list.
    82  		max := remainder[0]
    83  		maxGain := ds.CostSensitiveGain(max, cond)
    84  
    85  		// More than one index may have the same cost.
    86  		maxIndices := []int{0}
    87  
    88  		for i := 1; i < len(remainder); i++ {
    89  			cmd := remainder[i]
    90  			gain := ds.CostSensitiveGain(cmd, cond)
    91  			if gain > maxGain {
    92  				max = cmd
    93  				maxIndices = []int{i}
    94  				maxGain = gain
    95  			} else if gain == maxGain {
    96  				maxIndices = append(maxIndices, i)
    97  			}
    98  		}
    99  
   100  		// Grab all the commands with the same maximum gain-per-cost.
   101  		group := []WatCommand{}
   102  		for _, idx := range maxIndices {
   103  			group = append(group, remainder[idx])
   104  		}
   105  
   106  		// If they're enough to satisfy the request, grab all of them.
   107  		// Otherwise, only grab the first one.
   108  		if len(group)+len(result) < n {
   109  			group = group[:1]
   110  			maxIndices = maxIndices[:1]
   111  		}
   112  
   113  		// Remove from the remainder array in reverse order,
   114  		// so that the removals don't affect later indices.
   115  		for j := len(maxIndices) - 1; j >= 0; j-- {
   116  			idx := maxIndices[j]
   117  			remainder = append(remainder[:idx], remainder[idx+1:]...)
   118  		}
   119  
   120  		// Use the second-tier sort to sort the commands that have the same priority.
   121  		group = secondTierDecideWith(group, ds, files, n)
   122  		result = append(result, group...)
   123  
   124  		// On the next iteration of the loop, find the best test command Y
   125  		// given that the current test command X succeeded.
   126  		cond = cond.WithSuccess(group[0].Command)
   127  	}
   128  
   129  	if len(result) > n {
   130  		result = result[:n]
   131  	}
   132  
   133  	return result
   134  }
   135  
   136  // All the "dumb" deciding (the non-ML deciding)
   137  func secondTierDecideWith(cmds []WatCommand, ds DecisionStore, files []fileInfo, n int) (results []WatCommand) {
   138  	// first, decide only based on recency.
   139  	recencyResults, cmds := recencyDecideWith(cmds, files, n)
   140  	results = append(results, recencyResults...)
   141  	if len(results) >= n {
   142  		return results
   143  	}
   144  
   145  	// if we don't have enough results, try picking the cheapest commands
   146  	cheapestResults, cmds := cheapestDecideWith(cmds, ds, n-len(results))
   147  	results = append(results, cheapestResults...)
   148  	if len(results) >= n {
   149  		return results
   150  	}
   151  
   152  	// if we still don't have enough results, naively pick the first commands.
   153  	naiveResults := naiveDecideWith(cmds, n-len(results))
   154  	return append(results, naiveResults...)
   155  }
   156  
   157  // Choose the top N commands to run.
   158  //
   159  // This is a super-simple version that just looks at commands associated with recently
   160  // edited files.
   161  //
   162  // cmds: The list of commands to decide from
   163  // files: The list of files in this workspace, in sorted order from most
   164  //    recently modified.
   165  //
   166  // Returns two sets: the commands we chose, and the commands left.
   167  // This makes it easy to chain with other decision algorithms.
   168  func recencyDecideWith(cmds []WatCommand, files []fileInfo, n int) (result []WatCommand, remainder []WatCommand) {
   169  	result = make([]WatCommand, 0, n)
   170  
   171  	// We're going to modify the command array, so we need to clone it first.
   172  	remainder = append([]WatCommand{}, cmds...)
   173  
   174  	for _, f := range files {
   175  		for i, cmd := range remainder {
   176  			// TODO(nick): Maybe ospath should have a utility for memoizing parsing of
   177  			// patterns? This is probably not worth optimizing tho.
   178  			matcher, err := ospath.NewMatcherFromPattern(cmd.FilePattern)
   179  			if err != nil {
   180  				continue
   181  			}
   182  
   183  			if !matcher.Match(f.name) {
   184  				continue
   185  			}
   186  
   187  			result = append(result, cmd)
   188  			if len(result) >= n {
   189  				return result, remainder
   190  			}
   191  
   192  			// Remove commands from the array, so that we don't
   193  			// re-consider it on future iterations.
   194  			remainder = append(remainder[:i], remainder[i+1:]...)
   195  
   196  			// Move onto the next file
   197  			break
   198  		}
   199  	}
   200  
   201  	return result, remainder
   202  }
   203  
   204  // Choose the top N commands to run.
   205  //
   206  // This chooses the cheapest command to run.
   207  //
   208  // Returns two sets: the commands we chose, and the commands left.
   209  // This makes it easy to chain with other decision algorithms.
   210  func cheapestDecideWith(cmds []WatCommand, ds DecisionStore, n int) (result []WatCommand, remainder []WatCommand) {
   211  	sorter := WatCommandCostSort{DS: ds}
   212  	for _, c := range cmds {
   213  		if ds.HasCost(c) {
   214  			sorter.Commands = append(sorter.Commands, c)
   215  		} else {
   216  			remainder = append(remainder, c)
   217  		}
   218  	}
   219  	sort.Sort(sorter)
   220  
   221  	// Pick the N cheapest commands.
   222  	if n > len(sorter.Commands) {
   223  		n = len(sorter.Commands)
   224  	}
   225  	result = append(result, sorter.Commands[:n]...)
   226  	remainder = append(remainder, sorter.Commands[n:]...)
   227  	return result, remainder
   228  }
   229  
   230  // Naively pick the first n commands from the list.
   231  func naiveDecideWith(cmds []WatCommand, n int) []WatCommand {
   232  	if n > len(cmds) {
   233  		n = len(cmds)
   234  	}
   235  	return cmds[:n]
   236  }
   237  
   238  type DecisionStore struct {
   239  	costs   map[string]CostEstimate
   240  	history map[CommandWithCondition]ResultHistory
   241  }
   242  
   243  func (s DecisionStore) HasCost(cmd WatCommand) bool {
   244  	return s.costs[cmd.Command].Count != 0
   245  }
   246  
   247  func (s DecisionStore) Cost(cmd WatCommand) time.Duration {
   248  	return s.costs[cmd.Command].Duration
   249  }
   250  
   251  // A gain metric. Currently expressed as a unit of gain / cost
   252  // Gain is directly proportional to failure probability, as explained in the design doc.
   253  // Cost is expressed in seconds
   254  // We weight gain higher than cost as gain ^ 2 / cost
   255  func (s DecisionStore) CostSensitiveGain(cmd WatCommand, cond Condition) float64 {
   256  	dur := s.costs[cmd.Command].Duration
   257  	gain := s.FailureProbability(cmd, cond)
   258  	return gain * gain / dur.Seconds()
   259  }
   260  
   261  func (s DecisionStore) FailureProbability(cmd WatCommand, cond Condition) float64 {
   262  	results, ok := s.history[CommandWithCondition{Command: cmd.Command, Condition: cond}]
   263  	if !ok {
   264  		ancestors := cond.Ancestors()
   265  		for _, a := range ancestors {
   266  			results, ok = s.history[CommandWithCondition{Command: cmd.Command, Condition: a}]
   267  			if ok {
   268  				break
   269  			}
   270  		}
   271  	}
   272  
   273  	zeroCase := failProbabilityZeroCase
   274  
   275  	// If the user is editing a file related to this command
   276  	// (as described by FilePattern), boost the zero case way up.
   277  	editedFile := cond.EditedFile
   278  	cmdPattern := cmd.FilePattern
   279  	if editedFile != "" && cmdPattern != "" {
   280  		matcher, err := ospath.NewMatcherFromPattern(cmdPattern)
   281  		if err == nil && matcher.Match(editedFile) {
   282  			zeroCase = 1
   283  		}
   284  	}
   285  
   286  	fail := float64(results.FailCount)
   287  	success := float64(results.SuccessCount)
   288  	if fail == 0 {
   289  		fail = zeroCase
   290  	}
   291  	if success == 0 {
   292  		success = zeroCase
   293  	}
   294  	return fail / (fail + success)
   295  }
   296  
   297  func (s DecisionStore) addCommandCost(l CommandLog, ctx LogContext) {
   298  	s.costs[l.Command] = s.costs[l.Command].Add(l, ctx)
   299  }
   300  
   301  // Add the history of successes and failures for command against a specific environment condition.
   302  // The condition must NOT express recent edits, because that information is expressed in LogContext.
   303  func (s DecisionStore) addCommandHistory(l CommandLog, ctx LogContext, cond Condition) {
   304  	if cond.EditedFile != "" {
   305  		panic("Called addCommandHistory with malformed condition")
   306  	}
   307  
   308  	// Increment the history in the null condition where there are no recently changed files.
   309  	cmdWithCond := CommandWithCondition{Command: l.Command, Condition: cond}
   310  	history := s.history[cmdWithCond]
   311  	s.history[cmdWithCond] = history.Add(l.Success)
   312  
   313  	for _, recent := range ctx.RecentEdits {
   314  		// Increment the history in the condition where a file has been edited recently.
   315  		cmdWithCond.Condition = cond.WithEditedFile(recent)
   316  		history := s.history[cmdWithCond]
   317  		s.history[cmdWithCond] = history.Add(l.Success)
   318  	}
   319  }
   320  
   321  func (s DecisionStore) AddCommandLogGroup(g CommandLogGroup) {
   322  	logs := g.Logs
   323  	ctx := g.Context
   324  
   325  	for i, log := range logs {
   326  		s.addCommandCost(log, ctx)
   327  		s.addCommandHistory(log, ctx, Condition{})
   328  
   329  		// Build up correlations between commands.
   330  		for j := i + 1; j < len(g.Logs); j++ {
   331  			logJ := g.Logs[j]
   332  			if log.Success {
   333  				s.addCommandHistory(logJ, ctx, Condition{}.WithSuccess(log.Command))
   334  			}
   335  
   336  			if logJ.Success {
   337  				s.addCommandHistory(log, ctx, Condition{}.WithSuccess(logJ.Command))
   338  			}
   339  		}
   340  	}
   341  
   342  }
   343  
   344  func (s DecisionStore) AddCommandLogGroups(logGroups []CommandLogGroup) {
   345  	for _, g := range logGroups {
   346  		s.AddCommandLogGroup(g)
   347  	}
   348  }
   349  
   350  func newDecisionStore() DecisionStore {
   351  	return DecisionStore{
   352  		costs:   make(map[string]CostEstimate),
   353  		history: make(map[CommandWithCondition]ResultHistory),
   354  	}
   355  }
   356  
   357  type CostEstimate struct {
   358  	Duration time.Duration
   359  	Count    int
   360  
   361  	// If false, we've only seen bootstrapped durations
   362  	Real bool
   363  }
   364  
   365  // Creates a new cost estimate after working in the old cost estimate.
   366  func (c CostEstimate) Add(log CommandLog, ctx LogContext) CostEstimate {
   367  	isRealLog := ctx.Source != LogSourceBootstrap
   368  	if isRealLog && !c.Real {
   369  		// This is the first real log data
   370  		return CostEstimate{Duration: log.Duration, Count: 1, Real: true}
   371  	} else if c.Real && !isRealLog {
   372  		// If we already have real logs, ignore the bootstrap log.
   373  		return c
   374  	}
   375  
   376  	// Otherwise, fold in new data with a weighted average, so that
   377  	// new data is worth at least 20%.
   378  	oldCount := float64(c.Count)
   379  	newCount := oldCount + 1
   380  	oldWeight := oldCount/newCount - newCostExtraWeight
   381  	newWeight := float64(1)/newCount + newCostExtraWeight
   382  	newDuration := time.Duration(
   383  		oldWeight*float64(c.Duration.Nanoseconds()) +
   384  			newWeight*float64(log.Duration.Nanoseconds()))
   385  	return CostEstimate{
   386  		Duration: newDuration,
   387  		Real:     c.Real,
   388  		Count:    c.Count + 1,
   389  	}
   390  }
   391  
   392  type WatCommandCostSort struct {
   393  	Commands []WatCommand
   394  	DS       DecisionStore
   395  }
   396  
   397  func (s WatCommandCostSort) Less(i, j int) bool {
   398  	return s.DS.Cost(s.Commands[i]) < s.DS.Cost(s.Commands[j])
   399  }
   400  
   401  func (s WatCommandCostSort) Swap(i, j int) {
   402  	s.Commands[i], s.Commands[j] = s.Commands[j], s.Commands[i]
   403  }
   404  
   405  func (s WatCommandCostSort) Len() int {
   406  	return len(s.Commands)
   407  }
   408  
   409  type CommandWithCondition struct {
   410  	Condition Condition
   411  	Command   string
   412  }
   413  
   414  // The environment that a test is run in.
   415  //
   416  // Must be a value struct so that we can use it as a key in a map.
   417  type Condition struct {
   418  	// A known recently-edited file.
   419  	EditedFile string
   420  
   421  	// A known successful command.
   422  	SuccessCommand string
   423  }
   424  
   425  func (c Condition) WithEditedFile(f string) Condition {
   426  	c.EditedFile = f
   427  	return c
   428  }
   429  
   430  func (c Condition) WithSuccess(cmd string) Condition {
   431  	c.SuccessCommand = cmd
   432  	return c
   433  }
   434  
   435  // Get all the conditions that are "ancestors" of this condition,
   436  // from most narrow to most broad.
   437  func (c Condition) Ancestors() []Condition {
   438  	results := make([]Condition, 3)
   439  	hasCommand := c.SuccessCommand != ""
   440  	hasEditedFile := c.EditedFile != ""
   441  	if hasCommand {
   442  		results = append(results, c.WithSuccess(""))
   443  	}
   444  	if hasEditedFile {
   445  		results = append(results, c.WithEditedFile(""))
   446  	}
   447  	if hasCommand && hasEditedFile {
   448  		results = append(results, Condition{})
   449  	}
   450  	return results
   451  }
   452  
   453  type ResultHistory struct {
   454  	SuccessCount uint32
   455  	FailCount    uint32
   456  }
   457  
   458  func (h ResultHistory) Add(success bool) ResultHistory {
   459  	successAdd := uint32(0)
   460  	failAdd := uint32(0)
   461  	if success {
   462  		successAdd = 1
   463  	} else {
   464  		failAdd = 1
   465  	}
   466  	return ResultHistory{
   467  		SuccessCount: h.SuccessCount + successAdd,
   468  		FailCount:    h.FailCount + failAdd,
   469  	}
   470  }