github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/metric/profiling_metric.go (about) 1 // Copyright 2023 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package metric 16 17 import ( 18 "bufio" 19 "errors" 20 "fmt" 21 "os" 22 "strings" 23 "time" 24 25 "github.com/MerlinKodo/gvisor/pkg/atomicbitops" 26 "github.com/MerlinKodo/gvisor/pkg/log" 27 "github.com/MerlinKodo/gvisor/pkg/prometheus" 28 ) 29 30 const ( 31 snapshotBufferSize = 1000 32 snapshotRingbufferSize = 16 33 ) 34 35 var ( 36 // ProfilingMetricWriter is the output destination to which 37 // ProfilingMetrics will be written to in TSV format. 38 ProfilingMetricWriter *os.File 39 // profilingMetricsStarted indicates whether StartProfilingMetrics has 40 // been called. 41 profilingMetricsStarted atomicbitops.Bool 42 // stopProfilingMetrics is used to signal to the profiling metrics 43 // goroutine to stop recording and writing metrics. 44 stopProfilingMetrics chan bool 45 // doneProfilingMetrics is used to signal that the profiling metrics 46 // goroutines are finished. 47 doneProfilingMetrics chan bool 48 // definedProfilingMetrics is the set of metrics known to be created for 49 // profiling (see condmetric_profiling.go). 50 definedProfilingMetrics []string 51 ) 52 53 // snapshots is used to as temporary storage of metric data 54 // before it's written to the ProfilingMetricWriter. 55 type snapshots struct { 56 numMetrics int 57 // ringbuffer is used to store metric data. 58 ringbuffer [][]uint64 59 // curWriterIndex is the ringbuffer index currently being read by the 60 // writer. It should not be used by the collector. 61 curWriterIndex atomicbitops.Int32 62 } 63 64 // writeReq is the message sent between from the collector to the writer. 65 type writeReq struct { 66 ringbufferIdx int 67 // numLines indicates how many data lines are filled in the buffer. 68 numLines int 69 } 70 71 // StartProfilingMetrics checks the ProfilingMetrics runsc flags and creates 72 // goroutines responsible for outputting the profiling metric data. 73 // 74 // Preconditions: 75 // - All metrics are registered. 76 // - Initialize/Disable has been called. 77 func StartProfilingMetrics(profilingMetrics string, profilingRate time.Duration) error { 78 if !initialized.Load() { 79 // Wait for initialization to complete to make sure that all 80 // metrics are registered. 81 return errors.New("metric initialization is not complete") 82 } 83 if ProfilingMetricWriter == nil { 84 return errors.New("tried to initialize profiling metrics without log file") 85 } 86 87 var values []func(fieldValues ...*FieldValue) uint64 88 header := strings.Builder{} 89 header.WriteString("Time (ns)") 90 numMetrics := 0 91 92 if len(profilingMetrics) > 0 { 93 metrics := strings.Split(profilingMetrics, ",") 94 numMetrics = len(metrics) 95 96 for _, name := range metrics { 97 name := strings.TrimSpace(name) 98 m, ok := allMetrics.uint64Metrics[name] 99 if !ok { 100 return fmt.Errorf("given profiling metric name '%s' does not correspond to a registered Uint64 metric", name) 101 } 102 if len(m.fields) > 0 { 103 // TODO(b/240280155): Add support for field values. 104 return fmt.Errorf("will not profile metric '%s' because it has metric fields which are not supported", name) 105 } 106 header.WriteRune('\t') 107 header.WriteString(name) 108 values = append(values, m.value) 109 } 110 111 header.WriteRune('\n') 112 } else { 113 if len(definedProfilingMetrics) > 0 { 114 return fmt.Errorf("a value for --profiling-metrics was not specified; consider using a subset of '--profiling-metrics=%s'", strings.Join(definedProfilingMetrics, ",")) 115 } 116 return fmt.Errorf("a value for --profiling-metrics was not specified; also no conditionally compiled metrics found, consider compiling runsc with --go_tag=condmetric_profiling") 117 } 118 119 if !profilingMetricsStarted.CompareAndSwap(0, 1) { 120 return errors.New("profiling metrics have already been started") 121 } 122 s := snapshots{ 123 numMetrics: numMetrics, 124 ringbuffer: make([][]uint64, snapshotRingbufferSize), 125 // curWriterIndex is initialized to a valid index so that the 126 // collector cannot use up all indices before the writer even has 127 // a chance to start (as unlikely as that is). 128 curWriterIndex: atomicbitops.FromInt32(snapshotRingbufferSize - 1), 129 } 130 for i := 0; i < snapshotRingbufferSize; i++ { 131 s.ringbuffer[i] = make([]uint64, snapshotBufferSize*(numMetrics+1)) 132 } 133 134 stopProfilingMetrics = make(chan bool, 1) 135 doneProfilingMetrics = make(chan bool, 1) 136 writeCh := make(chan writeReq, snapshotRingbufferSize) 137 go collectProfilingMetrics(&s, values, profilingRate, writeCh) 138 go writeProfilingMetrics(&s, header.String(), writeCh) 139 140 return nil 141 } 142 143 // collectProfilingMetrics will send metrics to the writeCh until it receives a 144 // signal via the stopProfilingMetrics channel. 145 func collectProfilingMetrics(s *snapshots, values []func(fieldValues ...*FieldValue) uint64, profilingRate time.Duration, writeCh chan<- writeReq) { 146 defer close(writeCh) 147 148 numEntries := s.numMetrics + 1 // to account for the timestamp 149 ringbufferIdx := 0 150 curSnapshot := 0 151 startTime := CheapNowNano() 152 // getNewRingbufferIdx will block until the writer indicates that some part 153 // of the ringbuffer is available for writing. 154 getNewRingbufferIdx := func() { 155 for { 156 nextIdx := (ringbufferIdx + 1) % snapshotRingbufferSize 157 if nextIdx != int(s.curWriterIndex.Load()) { 158 ringbufferIdx = nextIdx 159 break 160 } 161 // Going too fast, stop collecting for a bit. 162 log.Warningf("Profiling metrics collector exhausted the entire ringbuffer... backing off to let writer catch up.") 163 time.Sleep(profilingRate * 100) 164 } 165 } 166 167 stopCollecting := false 168 for nextCollection := CheapNowNano() + profilingRate.Nanoseconds(); !stopCollecting; nextCollection += profilingRate.Nanoseconds() { 169 now := CheapNowNano() 170 if now < nextCollection { 171 time.Sleep(time.Duration(nextCollection-now) * time.Nanosecond) 172 } else { 173 // Skip collection since we just did one anyway. 174 continue 175 } 176 177 select { 178 case <-stopProfilingMetrics: 179 stopCollecting = true 180 // Collect one last time before stopping. 181 default: 182 } 183 184 collectStart := CheapNowNano() 185 timestamp := time.Duration(collectStart - startTime) 186 base := curSnapshot * numEntries 187 s.ringbuffer[ringbufferIdx][base] = uint64(timestamp) 188 for i := 1; i < numEntries; i++ { 189 s.ringbuffer[ringbufferIdx][base+i] = values[i-1]() 190 } 191 curSnapshot++ 192 193 if curSnapshot == snapshotBufferSize { 194 writeCh <- writeReq{ringbufferIdx: ringbufferIdx, numLines: curSnapshot} 195 curSnapshot = 0 196 getNewRingbufferIdx() 197 } 198 } 199 if curSnapshot != 0 { 200 writeCh <- writeReq{ringbufferIdx: ringbufferIdx, numLines: curSnapshot} 201 } 202 } 203 204 // writeProfilingMetrics will write to the ProfilingMetricsWriter on every 205 // request via writeReqs, until writeReqs is closed. 206 func writeProfilingMetrics(s *snapshots, header string, writeReqs <-chan writeReq) { 207 numEntries := s.numMetrics + 1 208 209 out := bufio.NewWriter(ProfilingMetricWriter) 210 out.WriteString(header) 211 212 for req := range writeReqs { 213 s.curWriterIndex.Store(int32(req.ringbufferIdx)) 214 215 for i := 0; i < req.numLines; i++ { 216 base := i * numEntries 217 // Write the time 218 prometheus.WriteInteger(out, int64(s.ringbuffer[req.ringbufferIdx][base])) 219 // Then everything else 220 for j := 1; j < numEntries; j++ { 221 out.WriteRune('\t') 222 prometheus.WriteInteger(out, int64(s.ringbuffer[req.ringbufferIdx][base+j])) 223 } 224 out.WriteRune('\n') 225 } 226 } 227 228 out.Flush() 229 ProfilingMetricWriter.Close() 230 doneProfilingMetrics <- true 231 close(doneProfilingMetrics) 232 233 profilingMetricsStarted.Store(false) 234 } 235 236 // StopProfilingMetrics stops the profiling metrics goroutines. Call to make sure 237 // all metric data has been flushed. 238 // Note that calling this function prior to StartProfilingMetrics has no effect. 239 func StopProfilingMetrics() { 240 if !profilingMetricsStarted.Load() { 241 return 242 } 243 244 select { 245 case stopProfilingMetrics <- true: 246 <-doneProfilingMetrics 247 default: // Stop signal was already sent 248 } 249 }