github.com/grailbio/base@v0.0.11/traverse/time_estimate_reporter.go (about)

     1  package traverse
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"os"
     7  	"sync"
     8  	"time"
     9  )
    10  
    11  type timeEstimateReporter struct {
    12  	name string
    13  
    14  	mu sync.Mutex
    15  
    16  	numWorkers int32
    17  	numQueued  int32
    18  	numRunning int32
    19  	numDone    int32
    20  	// start time of the Traverse
    21  	startTime time.Time
    22  
    23  	cumulativeRuntime time.Duration
    24  	ticker            *time.Ticker
    25  
    26  	startTimes map[int]time.Time
    27  
    28  	// used to prevent race conditions with printStatus and startTimes queue
    29  }
    30  
    31  // NewTimeEstimateReporter returns a reporter that reports the number
    32  // of jobs queued, running, and done, as well as the running time of
    33  // the Traverse and an estimate for the amount of time remaining.
    34  // Note: for estimation, it assumes jobs have roughly equal running
    35  // time and are FIFO-ish (that is, it does not try to account for the
    36  // bias of shorter jobs finishing first and therefore skewing the
    37  // average estimated job run time).
    38  func NewTimeEstimateReporter(name string) Reporter {
    39  	return &timeEstimateReporter{
    40  		name:       name,
    41  		startTimes: make(map[int]time.Time),
    42  	}
    43  }
    44  
    45  func (r *timeEstimateReporter) Init(n int) {
    46  	r.numQueued = int32(n)
    47  	r.numWorkers = 1
    48  	r.startTime = time.Now()
    49  	r.ticker = time.NewTicker(time.Second)
    50  
    51  	go func() {
    52  		for range r.ticker.C {
    53  			r.mu.Lock()
    54  			r.printStatus()
    55  			r.mu.Unlock()
    56  		}
    57  		fmt.Fprintf(os.Stderr, "\n")
    58  	}()
    59  }
    60  
    61  func (r *timeEstimateReporter) Complete() {
    62  	r.ticker.Stop()
    63  }
    64  
    65  func (r *timeEstimateReporter) Begin(i int) {
    66  	r.mu.Lock()
    67  	defer r.mu.Unlock()
    68  	r.startTimes[i] = time.Now()
    69  	r.numQueued--
    70  	r.numRunning++
    71  	if r.numRunning > r.numWorkers {
    72  		r.numWorkers = r.numRunning
    73  	}
    74  	r.printStatus()
    75  }
    76  
    77  func (r *timeEstimateReporter) End(i int) {
    78  	r.mu.Lock()
    79  	defer r.mu.Unlock()
    80  	start, ok := r.startTimes[i]
    81  	if !ok {
    82  		panic("end called without start")
    83  	}
    84  	delete(r.startTimes, i)
    85  	r.numRunning--
    86  	r.numDone++
    87  	r.cumulativeRuntime += time.Since(start)
    88  
    89  	r.printStatus()
    90  }
    91  
    92  func (r *timeEstimateReporter) printStatus() {
    93  	timeLeftStr := r.buildTimeLeftStr(time.Now())
    94  
    95  	fmt.Fprintf(os.Stderr, "%s: (queued: %d -> running: %d -> done: %d) %v %s \r",
    96  		r.name, r.numQueued, r.numRunning, r.numDone,
    97  		time.Since(r.startTime).Round(time.Second), timeLeftStr)
    98  }
    99  
   100  func (r *timeEstimateReporter) buildTimeLeftStr(currentTime time.Time) string {
   101  	// If some jobs have finished, use their running time for the estimate.  Otherwise, use the duration
   102  	// that the first job has been running.
   103  	var modifier string
   104  	var avgRunTime time.Duration
   105  	if r.cumulativeRuntime > 0 {
   106  		modifier = "~"
   107  		avgRunTime = r.cumulativeRuntime / time.Duration(r.numDone)
   108  	} else if r.numRunning > 0 {
   109  		modifier = ">"
   110  		for _, t := range r.startTimes {
   111  			avgRunTime += currentTime.Sub(t)
   112  		}
   113  		avgRunTime /= time.Duration(len(r.startTimes))
   114  	}
   115  
   116  	runningJobsTimeLeft := time.Duration(r.numRunning)*avgRunTime - r.sumCurrentRunningTimes(currentTime)
   117  	if r.numRunning > 0 {
   118  		runningJobsTimeLeft /= time.Duration(r.numRunning)
   119  	}
   120  	if runningJobsTimeLeft < 0 {
   121  		runningJobsTimeLeft = time.Duration(0)
   122  	}
   123  	queuedJobsTimeLeft := time.Duration(math.Ceil(float64(r.numQueued)/float64(r.numWorkers))) * avgRunTime
   124  
   125  	return fmt.Sprintf("(%s%v left  %v avg)", modifier,
   126  		(queuedJobsTimeLeft + runningJobsTimeLeft).Round(time.Second),
   127  		avgRunTime.Round(time.Second))
   128  }
   129  
   130  func (r *timeEstimateReporter) sumCurrentRunningTimes(currentTime time.Time) time.Duration {
   131  	var totalRunningTime time.Duration
   132  	for _, startTime := range r.startTimes {
   133  		totalRunningTime += currentTime.Sub(startTime)
   134  	}
   135  	return totalRunningTime
   136  }