github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/file/s3file/metrics.go (about) 1 package s3file 2 3 import ( 4 "expvar" 5 "flag" 6 "fmt" 7 "io" 8 "strings" 9 "sync" 10 "time" 11 12 "github.com/Schaudge/grailbase/log" 13 ) 14 15 var ( 16 metricAutologOnce sync.Once 17 metricAutologPeriod = flag.Duration("s3file.metric_log_period", 0, 18 "Interval for logging S3 operation metrics. Zero disables logging.") 19 ) 20 21 func metricAutolog() { 22 metricAutologOnce.Do(func() { 23 if period := *metricAutologPeriod; period > 0 { 24 go logMetricsLoop(period) 25 } 26 }) 27 } 28 29 type metricOpMap struct{ m sync.Map } 30 31 func (m *metricOpMap) Op(key string) *metricOp { 32 var init metricOp 33 got, _ := m.m.LoadOrStore(key, &init) 34 return got.(*metricOp) 35 } 36 37 func (m *metricOpMap) VisitAndReset(f func(string, *metricOp)) { 38 m.m.Range(func(key, value interface{}) bool { 39 m.m.Delete(key) 40 f(key.(string), value.(*metricOp)) 41 return true 42 }) 43 } 44 45 var ( 46 metrics metricOpMap 47 metricRemoteAddrs expvar.Map 48 ) 49 50 type metricOp struct { 51 Count expvar.Int 52 53 Retry1 expvar.Int 54 Retry2 expvar.Int 55 Retry4 expvar.Int 56 Retry8 expvar.Int 57 58 DurationFast expvar.Int 59 Duration1Ms expvar.Int 60 Duration10Ms expvar.Int 61 Duration100Ms expvar.Int 62 Duration1S expvar.Int 63 Duration10S expvar.Int 64 Duration100S expvar.Int 65 66 Bytes expvar.Int 67 } 68 69 type metricOpProgress struct { 70 parent *metricOp 71 start time.Time 72 retries int // == 0 if first try succeeds 73 } 74 75 func (m *metricOp) Start() *metricOpProgress { 76 m.Count.Add(1) 77 return &metricOpProgress{m, time.Now(), 0} 78 } 79 80 func (m *metricOpProgress) Retry() { m.retries++ } 81 82 func (m *metricOpProgress) Bytes(b int) { m.parent.Bytes.Add(int64(b)) } 83 84 func (m *metricOpProgress) Done() { 85 switch { 86 case m.retries >= 8: 87 m.parent.Retry8.Add(1) 88 case m.retries >= 4: 89 m.parent.Retry4.Add(1) 90 case m.retries >= 2: 91 m.parent.Retry2.Add(1) 92 case m.retries >= 1: 93 m.parent.Retry1.Add(1) 94 } 95 96 took := time.Since(m.start) 97 switch { 98 case took > 100*time.Second: 99 m.parent.Duration100S.Add(1) 100 case took > 10*time.Second: 101 m.parent.Duration10S.Add(1) 102 case took > time.Second: 103 m.parent.Duration1S.Add(1) 104 case took > 100*time.Millisecond: 105 m.parent.Duration100Ms.Add(1) 106 case took > 10*time.Millisecond: 107 m.parent.Duration10Ms.Add(1) 108 case took > 1*time.Millisecond: 109 m.parent.Duration1Ms.Add(1) 110 default: 111 m.parent.DurationFast.Add(1) 112 } 113 } 114 115 func (m *metricOp) Write(w io.Writer, period time.Duration) (int, error) { 116 perMinute := 60 / period.Seconds() 117 return fmt.Fprintf(w, "n:%d r:%d/%d/%d/%d t:%d/%d/%d/%d/%d/%d/%d mib:%d [/min]", 118 int(float64(m.Count.Value())*perMinute), 119 int(float64(m.Retry1.Value())*perMinute), 120 int(float64(m.Retry2.Value())*perMinute), 121 int(float64(m.Retry4.Value())*perMinute), 122 int(float64(m.Retry8.Value())*perMinute), 123 int(float64(m.DurationFast.Value())*perMinute), 124 int(float64(m.Duration1Ms.Value())*perMinute), 125 int(float64(m.Duration10Ms.Value())*perMinute), 126 int(float64(m.Duration100Ms.Value())*perMinute), 127 int(float64(m.Duration1S.Value())*perMinute), 128 int(float64(m.Duration10S.Value())*perMinute), 129 int(float64(m.Duration100S.Value())*perMinute), 130 int(float64(m.Bytes.Value())/(1<<20)*perMinute), 131 ) 132 } 133 134 func logMetricsLoop(period time.Duration) { 135 ticker := time.NewTicker(period) 136 defer ticker.Stop() 137 var buf strings.Builder 138 for { 139 select { 140 case <-ticker.C: 141 metrics.VisitAndReset(func(op string, metrics *metricOp) { 142 buf.Reset() 143 fmt.Fprintf(&buf, "s3file metrics: op:%s ", op) 144 _, _ = metrics.Write(&buf, period) 145 log.Print(buf.String()) 146 }) 147 } 148 } 149 }