github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/log/log.go (about) 1 package log 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "runtime" 7 "strings" 8 "sync" 9 "time" 10 11 "github.com/fatih/camelcase" 12 "github.com/pachyderm/pachyderm/src/client/pkg/errors" 13 "github.com/prometheus/client_golang/prometheus" 14 "github.com/sirupsen/logrus" 15 ) 16 17 const ( 18 bucketFactor = 2.0 19 bucketCount = 20 // Which makes the max bucket 2^20 seconds or ~12 days in size 20 ) 21 22 // This needs to be a global var, not a field on the logger, because multiple servers 23 // create new loggers, and the prometheus registration uses a global namespace 24 var reportMetricGauge prometheus.Gauge 25 var reportMetricsOnce sync.Once 26 27 // Logger is a helper for emitting our grpc API logs 28 type Logger interface { 29 Log(request interface{}, response interface{}, err error, duration time.Duration) 30 LogAtLevelFromDepth(request interface{}, response interface{}, err error, duration time.Duration, level logrus.Level, depth int) 31 } 32 33 type logger struct { 34 *logrus.Entry 35 histogram map[string]*prometheus.HistogramVec 36 counter map[string]prometheus.Counter 37 mutex *sync.Mutex // synchronizes access to both histogram and counter maps 38 exportStats bool 39 service string 40 } 41 42 // NewLogger creates a new logger 43 func NewLogger(service string) Logger { 44 return newLogger(service, true) 45 } 46 47 // NewLocalLogger creates a new logger for local testing (which does not report prometheus metrics) 48 func NewLocalLogger(service string) Logger { 49 return newLogger(service, false) 50 } 51 52 func newLogger(service string, exportStats bool) Logger { 53 l := logrus.StandardLogger() 54 l.Formatter = FormatterFunc(Pretty) 55 newLogger := &logger{ 56 l.WithFields(logrus.Fields{"service": service}), 57 make(map[string]*prometheus.HistogramVec), 58 make(map[string]prometheus.Counter), 59 &sync.Mutex{}, 60 exportStats, 61 service, 62 } 63 if exportStats { 64 reportMetricsOnce.Do(func() { 65 newReportMetricGauge := prometheus.NewGauge( 66 prometheus.GaugeOpts{ 67 Namespace: "pachyderm", 68 Subsystem: "pachd", 69 Name: "report_metric", 70 Help: "gauge of number of calls to ReportMetric()", 71 }, 72 ) 73 if err := prometheus.Register(newReportMetricGauge); err != nil { 74 // metrics may be redundantly registered; ignore these errors 75 if !errors.As(err, &prometheus.AlreadyRegisteredError{}) { 76 entry := newLogger.WithFields(logrus.Fields{"method": "NewLogger"}) 77 newLogger.LogAtLevel(entry, logrus.WarnLevel, fmt.Sprintf("error registering prometheus metric: %v", newReportMetricGauge), err) 78 } 79 } else { 80 reportMetricGauge = newReportMetricGauge 81 } 82 }) 83 } 84 return newLogger 85 } 86 87 // Helper function used to log requests and responses from our GRPC method 88 // implementations 89 func (l *logger) Log(request interface{}, response interface{}, err error, duration time.Duration) { 90 if err != nil { 91 l.LogAtLevelFromDepth(request, response, err, duration, logrus.ErrorLevel, 4) 92 } else { 93 l.LogAtLevelFromDepth(request, response, err, duration, logrus.InfoLevel, 4) 94 } 95 // We have to grab the method's name here before we 96 // enter the goro's stack 97 go l.ReportMetric(getMethodName(), duration, err) 98 } 99 100 func getMethodName() string { 101 depth := 4 102 pc := make([]uintptr, depth) 103 runtime.Callers(depth, pc) 104 split := strings.Split(runtime.FuncForPC(pc[0]).Name(), ".") 105 return split[len(split)-1] 106 } 107 108 func (l *logger) ReportMetric(method string, duration time.Duration, err error) { 109 if !l.exportStats { 110 return 111 } 112 // Count the number of ReportMetric() goros in case we start to leak them 113 if reportMetricGauge != nil { 114 reportMetricGauge.Inc() 115 } 116 defer func() { 117 if reportMetricGauge != nil { 118 reportMetricGauge.Dec() 119 } 120 }() 121 l.mutex.Lock() // for conccurent map access (histogram,counter) 122 defer l.mutex.Unlock() 123 state := "started" 124 if err != nil { 125 state = "errored" 126 } else { 127 if duration.Seconds() > 0 { 128 state = "finished" 129 } 130 } 131 entry := l.WithFields(logrus.Fields{"method": method}) 132 133 var tokens []string 134 for _, token := range camelcase.Split(method) { 135 tokens = append(tokens, strings.ToLower(token)) 136 } 137 rootStatName := strings.Join(tokens, "_") 138 139 // Recording the distribution of started times is meaningless 140 if state != "started" { 141 runTimeName := fmt.Sprintf("%v_time", rootStatName) 142 runTime, ok := l.histogram[runTimeName] 143 if !ok { 144 runTime = prometheus.NewHistogramVec( 145 prometheus.HistogramOpts{ 146 Namespace: "pachyderm", 147 Subsystem: fmt.Sprintf("pachd_%v", topLevelService(l.service)), 148 Name: runTimeName, 149 Help: fmt.Sprintf("Run time of %v", method), 150 Buckets: prometheus.ExponentialBuckets(1.0, bucketFactor, bucketCount), 151 }, 152 []string{ 153 "state", // Since both finished and errored API calls can have run times 154 }, 155 ) 156 if err := prometheus.Register(runTime); err != nil { 157 // metrics may be redundantly registered; ignore these errors 158 if !errors.As(err, &prometheus.AlreadyRegisteredError{}) { 159 l.LogAtLevel(entry, logrus.WarnLevel, fmt.Sprintf("error registering prometheus metric %v: %v", runTime, runTimeName), err) 160 } 161 } else { 162 l.histogram[runTimeName] = runTime 163 } 164 } 165 if hist, err := runTime.GetMetricWithLabelValues(state); err != nil { 166 l.LogAtLevel(entry, logrus.WarnLevel, "failed to get histogram w labels: state (%v) with error %v", state, err) 167 } else { 168 hist.Observe(duration.Seconds()) 169 } 170 } 171 172 secondsCountName := fmt.Sprintf("%v_seconds_count", rootStatName) 173 secondsCount, ok := l.counter[secondsCountName] 174 if !ok { 175 secondsCount = prometheus.NewCounter( 176 prometheus.CounterOpts{ 177 Namespace: "pachyderm", 178 Subsystem: fmt.Sprintf("pachd_%v", topLevelService(l.service)), 179 Name: secondsCountName, 180 Help: fmt.Sprintf("cumulative number of seconds spent in %v", method), 181 }, 182 ) 183 if err := prometheus.Register(secondsCount); err != nil { 184 // metrics may be redundantly registered; ignore these errors 185 if !errors.As(err, &prometheus.AlreadyRegisteredError{}) { 186 l.LogAtLevel(entry, logrus.WarnLevel, fmt.Sprintf("error registering prometheus metric %v: %v", secondsCount, secondsCountName), err) 187 } 188 } else { 189 l.counter[secondsCountName] = secondsCount 190 } 191 } 192 secondsCount.Add(duration.Seconds()) 193 194 } 195 196 func (l *logger) LogAtLevel(entry *logrus.Entry, level logrus.Level, args ...interface{}) { 197 entry.Log(level, args) 198 } 199 200 func (l *logger) LogAtLevelFromDepth(request interface{}, response interface{}, err error, duration time.Duration, level logrus.Level, depth int) { 201 // We're only interested in 1 stack frame, however due to weirdness with 202 // inlining sometimes you need to get more than 1 caller so that 203 // CallersFrames can resolve the first function. 2 seems to be enough be 204 // we've set it to 5 to insulate us more, because this at one point broke 205 // due to some compile optimization changes. 206 pc := make([]uintptr, 5) 207 runtime.Callers(depth, pc) 208 frames := runtime.CallersFrames(pc) 209 frame, ok := frames.Next() 210 fields := logrus.Fields{} 211 if ok { 212 split := strings.Split(frame.Function, ".") 213 method := split[len(split)-1] 214 fields["method"] = method 215 } else { 216 fields["warn"] = "failed to resolve method" 217 } 218 fields["request"] = request 219 if response != nil { 220 fields["response"] = response 221 } 222 if err != nil { 223 // "err" itself might be a code or even an empty struct 224 fields["error"] = err.Error() 225 var frames []string 226 errors.ForEachStackFrame(err, func(frame errors.Frame) { 227 frames = append(frames, fmt.Sprintf("%+v", frame)) 228 }) 229 fields["stack"] = frames 230 } 231 if duration > 0 { 232 fields["duration"] = duration 233 } 234 l.LogAtLevel(l.WithFields(fields), level) 235 } 236 237 func topLevelService(fullyQualifiedService string) string { 238 tokens := strings.Split(fullyQualifiedService, ".") 239 return tokens[0] 240 } 241 242 // FormatterFunc is a type alias for a function that satisfies logrus' 243 // `Formatter` interface 244 type FormatterFunc func(entry *logrus.Entry) ([]byte, error) 245 246 // Format proxies the closure in order to satisfy `logrus.Formatter`'s 247 // interface. 248 func (f FormatterFunc) Format(entry *logrus.Entry) ([]byte, error) { 249 return f(entry) 250 } 251 252 // Pretty formats a logrus entry like so: 253 // ``` 254 // 2019-02-11T16:02:02Z INFO pfs.API.InspectRepo {"request":{"repo":{"name":"images"}}} [] 255 // ``` 256 func Pretty(entry *logrus.Entry) ([]byte, error) { 257 serialized := []byte( 258 fmt.Sprintf( 259 "%v %v ", 260 entry.Time.Format(time.RFC3339), 261 strings.ToUpper(entry.Level.String()), 262 ), 263 ) 264 if entry.Data["service"] != nil && entry.Data["method"] != nil { 265 serialized = append(serialized, []byte(fmt.Sprintf("%v.%v ", entry.Data["service"], entry.Data["method"]))...) 266 delete(entry.Data, "service") 267 delete(entry.Data, "method") 268 } 269 if len(entry.Data) > 0 { 270 if entry.Data["duration"] != nil { 271 entry.Data["duration"] = entry.Data["duration"].(time.Duration).Seconds() 272 } 273 data, err := json.Marshal(entry.Data) 274 if err != nil { 275 return nil, errors.Wrapf(err, "failed to marshal fields to JSON") 276 } 277 serialized = append(serialized, data...) 278 serialized = append(serialized, ' ') 279 } 280 281 serialized = append(serialized, []byte(entry.Message)...) 282 serialized = append(serialized, '\n') 283 return serialized, nil 284 } 285 286 // GRPCLogWriter proxies gRPC and etcd-produced log messages to a logrus 287 // logger. Because it implements `io.Writer`, it could be used anywhere where 288 // `io.Writer`s are used, but it has some logic specifically designed to 289 // handle gRPC-formatted logs. 290 type GRPCLogWriter struct { 291 logger *logrus.Logger 292 source string 293 } 294 295 // NewGRPCLogWriter creates a new GRPC log writer. `logger` specifies the 296 // underlying logger, and `source` specifies where these logs are coming from; 297 // it is added as a entry field for all log messages. 298 func NewGRPCLogWriter(logger *logrus.Logger, source string) *GRPCLogWriter { 299 return &GRPCLogWriter{ 300 logger: logger, 301 source: source, 302 } 303 } 304 305 // Write allows `GRPCInfoWriter` to implement the `io.Writer` interface. This 306 // will take gRPC logs, which look something like this: 307 // ``` 308 // INFO: 2019/02/18 12:21:54 ClientConn switching balancer to "pick_first" 309 // ``` 310 // strip out redundant content, and print the message at the appropriate log 311 // level in logrus. Any parse errors of the log message will be reported in 312 // logrus as well. 313 func (l *GRPCLogWriter) Write(p []byte) (int, error) { 314 parts := strings.SplitN(string(p), " ", 4) 315 entry := l.logger.WithField("source", l.source) 316 317 if len(parts) == 4 { 318 // parts[1] and parts[2] contain the date and time, but logrus already 319 // adds this under the `time` entry field, so it's not needed (though 320 // the time will presumably be marginally ahead of the original log 321 // message) 322 level := parts[0] 323 message := strings.TrimSpace(parts[3]) 324 325 if level == "INFO:" { 326 entry.Info(message) 327 } else if level == "ERROR:" { 328 entry.Error(message) 329 } else if level == "WARNING:" { 330 entry.Warning(message) 331 } else if level == "FATAL:" { 332 // no need to call fatal ourselves because gRPC will exit the 333 // process 334 entry.Error(message) 335 } else { 336 entry.Error(message) 337 entry.Error("entry had unknown log level prefix: '%s'; this is a bug, please report it along with the previous log entry", level) 338 } 339 } else { 340 // can't format the message -- just display the contents 341 entry := l.logger.WithFields(logrus.Fields{ 342 "source": l.source, 343 }) 344 entry.Error(p) 345 entry.Error("entry had unexpected format; this is a bug, please report it along with the previous log entry") 346 } 347 348 return len(p), nil 349 }