go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/logging/sdlogger/logger.go (about)

     1  // Copyright 2018 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package sdlogger is a logger that formats entries as JSON understood by
    16  // Stackdriver log collectors.
    17  //
    18  // The JSON format is defined by the Google Cloud Logging plugin for fluentd,
    19  // which is mainly used by Google Kubernetes Engine + Stackdriver stack.
    20  // See https://github.com/GoogleCloudPlatform/fluent-plugin-google-cloud
    21  //
    22  // Though not properly documented, GAE environment also understands subset of
    23  // this format.
    24  package sdlogger
    25  
    26  import (
    27  	"bytes"
    28  	"context"
    29  	"encoding/json"
    30  	"fmt"
    31  	"io"
    32  	"net/http"
    33  	"regexp"
    34  	"runtime"
    35  	"strings"
    36  	"sync"
    37  	"time"
    38  
    39  	"cloud.google.com/go/errorreporting"
    40  
    41  	"go.chromium.org/luci/common/clock"
    42  	"go.chromium.org/luci/common/errors"
    43  	"go.chromium.org/luci/common/logging"
    44  )
    45  
    46  var errStackRe = regexp.MustCompile(`(.*)[\r\n]+` + `(` + errors.RenderedStackDumpRe + `[\s\S]*)`)
    47  
    48  // Severity is a string denoting the logging level of the entry.
    49  //
    50  // Must be one of the predefined constants, otherwise Stackdriver may not
    51  // recognize it.
    52  type Severity string
    53  
    54  // All allowed severities. Using other values results in undefined behavior.
    55  const (
    56  	UnknownSeverity Severity = ""
    57  	DebugSeverity   Severity = "DEBUG"
    58  	InfoSeverity    Severity = "INFO"
    59  	WarningSeverity Severity = "WARNING"
    60  	ErrorSeverity   Severity = "ERROR"
    61  )
    62  
    63  // LevelToSeverity converts logging level to the corresponding severity.
    64  func LevelToSeverity(l logging.Level) Severity {
    65  	switch l {
    66  	case logging.Debug:
    67  		return DebugSeverity
    68  	case logging.Info:
    69  		return InfoSeverity
    70  	case logging.Warning:
    71  		return WarningSeverity
    72  	case logging.Error:
    73  		return ErrorSeverity
    74  	default:
    75  		return UnknownSeverity
    76  	}
    77  }
    78  
    79  // LogEntry defines a structure of JSON log entries recognized by google-fluentd
    80  // plugin on GKE.
    81  //
    82  // See https://cloud.google.com/logging/docs/agent/configuration#process-payload
    83  // for a list of possible magical fields. All other fields are exposed as part
    84  // of jsonPayload in Stackdriver logs.
    85  type LogEntry struct {
    86  	// Severity is a string denoting the logging level of the entry.
    87  	Severity Severity `json:"severity"`
    88  	// Message is a single line human readable string of the log message.
    89  	Message string `json:"message,omitempty"`
    90  	// Timestamp is the unix timestamp of when the entry was produced.
    91  	Timestamp Timestamp `json:"timestamp,omitempty"`
    92  	// TraceID is 32-byte hex string with Stackdriver trace ID.
    93  	TraceID string `json:"logging.googleapis.com/trace,omitempty"`
    94  	// TraceSampled is true if this trace will be uploaded to Stackdriver.
    95  	TraceSampled bool `json:"logging.googleapis.com/trace_sampled,omitempty"`
    96  	// SpanID is a 16-byte hex string with Stackdriver span ID.
    97  	SpanID string `json:"logging.googleapis.com/spanId,omitempty"`
    98  	// Operation is used to group log lines from a single request together.
    99  	Operation *Operation `json:"logging.googleapis.com/operation,omitempty"`
   100  	// RequestInfo is information about the handled HTTP request.
   101  	RequestInfo *RequestInfo `json:"httpRequest,omitempty"`
   102  	// Fields are extra structured data that may be tagged in the log entry.
   103  	Fields logging.Fields `json:"fields,omitempty"`
   104  }
   105  
   106  // Timestamp represents a unix timestamp.
   107  type Timestamp struct {
   108  	Seconds int64 `json:"seconds"`
   109  	Nanos   int64 `json:"nanos"`
   110  }
   111  
   112  // ToTimestamp converts time.Time to Timestamp.
   113  func ToTimestamp(t time.Time) Timestamp {
   114  	ts := t.UnixNano()
   115  	return Timestamp{
   116  		Seconds: ts / 1e9,
   117  		Nanos:   ts % 1e9,
   118  	}
   119  }
   120  
   121  // Operation is used to group log lines from a single request together.
   122  type Operation struct {
   123  	ID string `json:"id"`
   124  }
   125  
   126  // RequestInfo contains information about handled HTTP request.
   127  //
   128  // See following definition for all fields we can possibly expose here:
   129  // https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#HttpRequest
   130  type RequestInfo struct {
   131  	Method       string `json:"requestMethod"` // e.g. "GET"
   132  	URL          string `json:"requestUrl"`    // e.g. "http://host/path"
   133  	Status       int    `json:"status"`        // e.g. 200
   134  	RequestSize  string `json:"requestSize"`   // e.g. "123123"
   135  	ResponseSize string `json:"responseSize"`  // e.g. "2324"
   136  	UserAgent    string `json:"userAgent"`     // e.g. "Mozilla/4.0 ..."
   137  	RemoteIP     string `json:"remoteIp"`      // e.g. "192.168.1.1"
   138  	Latency      string `json:"latency"`       // e.g. "3.5s"
   139  }
   140  
   141  // LogEntryMutator may mutate log entries before they are written.
   142  type LogEntryMutator func(context.Context, *LogEntry)
   143  
   144  // LogEntryWriter knows how to write LogEntries to some output.
   145  type LogEntryWriter interface {
   146  	// Write appends a log entry to the output.
   147  	Write(*LogEntry)
   148  }
   149  
   150  // Sink takes care of JSON-serializing log entries and synchronizing writes
   151  // to an io.Writer (usually stdout or stderr).
   152  //
   153  // Implements LogEntryWriter.
   154  //
   155  // Sink can either be used directly for fine-grain control of what is getting
   156  // logged, or via a logging.Logger for interoperability with most of LUCI code.
   157  //
   158  // There should be at most one Sink instance assigned to a given io.Writer,
   159  // shared by all Logger instances writing to it. Violating this requirement may
   160  // cause malformed log lines.
   161  type Sink struct {
   162  	Out io.Writer
   163  
   164  	l sync.Mutex
   165  }
   166  
   167  // Write appends a log entry to the output by JSON-serializing and writing it.
   168  //
   169  // Panics if the LogEntry can't be serialized (this should not be possible).
   170  // Ignores errors from io.Writer.
   171  func (s *Sink) Write(l *LogEntry) {
   172  	buf, err := json.Marshal(l)
   173  	if err != nil {
   174  		panic(err)
   175  	}
   176  	s.l.Lock()
   177  	defer s.l.Unlock()
   178  	s.Out.Write(buf)
   179  	s.Out.Write([]byte("\n"))
   180  }
   181  
   182  // CloudErrorsSink wraps a LogEntryWriter and an errorreporting.Client.
   183  type CloudErrorsSink struct {
   184  	Client  *errorreporting.Client
   185  	Request *http.Request
   186  	Out     LogEntryWriter
   187  }
   188  
   189  // Write appends a log entry to the given writer and reports the message to the
   190  // cloud error reporting for the Error severity.
   191  func (s *CloudErrorsSink) Write(l *LogEntry) {
   192  	if l.Severity == ErrorSeverity {
   193  		s.Client.Report(prepErrorReportingEntry(l, s.Request))
   194  	}
   195  	s.Out.Write(l)
   196  }
   197  
   198  // prepErrorReportingEntry prepares an errorreporting.Entry from the LogEntry message.
   199  func prepErrorReportingEntry(l *LogEntry, req *http.Request) errorreporting.Entry {
   200  	// Append the trace id to make it easier to search in Cloud Logs Explorer.
   201  	if msgWithStack := errStackRe.FindStringSubmatch(l.Message); msgWithStack != nil {
   202  		return errorreporting.Entry{
   203  			Req:   req,
   204  			Error: errors.New(strings.TrimSpace(msgWithStack[1]) + " (Log Trace ID: " + l.TraceID + ")"),
   205  			Stack: []byte(msgWithStack[2]),
   206  		}
   207  	}
   208  	// Capture the stack where the error happened. Limit the stack trace to 16K
   209  	// and remove all stack frames that belong to the logging library itself.
   210  	var buf [16 * 1024]byte
   211  	stack := buf[0:runtime.Stack(buf[:], false)]
   212  	stack = cleanupStack(stack)
   213  	return errorreporting.Entry{
   214  		Req:   req,
   215  		Error: errors.New(l.Message + " (Log Trace ID: " + l.TraceID + ")"),
   216  		Stack: stack,
   217  	}
   218  }
   219  
   220  // cleanupStack removes internal logging stack frames from the stack trace.
   221  //
   222  // That way the data reported to Cloud Error Reporting is cleaner.
   223  func cleanupStack(s []byte) []byte {
   224  	// `s` has form:
   225  	//
   226  	//  <first line with some overall message>\n
   227  	//  <symbolname>(<args>)\n
   228  	//  \t<filepath> <offset>\n
   229  	//  <symbolname>(<args>)\n
   230  	//  \t<filepath> <offset>\n
   231  	//  ...
   232  	//
   233  	// We need to preserve the first line, but remove all head entries (pairs of
   234  	// lines) for symbols in go.chromium.org/luci/common/logging package.
   235  	const skipPrefix = "go.chromium.org/luci/common/logging"
   236  
   237  	readLine := func() (line []byte) {
   238  		if idx := bytes.IndexByte(s, '\n'); idx != -1 {
   239  			line, s = s[:idx+1], s[idx+1:]
   240  		}
   241  		return line
   242  	}
   243  
   244  	firstLine := readLine()
   245  	if firstLine == nil {
   246  		return s
   247  	}
   248  
   249  	// Write the first line to the output as is.
   250  	out := bytes.Buffer{}
   251  	out.Grow(len(firstLine) + len(s))
   252  	_, _ = out.Write(firstLine)
   253  
   254  	// Skip pairs of lines we want filtered out.
   255  	for len(s) > 0 {
   256  		// Read <symbolname>(<args>)\n.
   257  		if sym := readLine(); !bytes.HasPrefix(sym, []byte(skipPrefix)) {
   258  			// Something unexpected (or just EOF). Dump it all to the output.
   259  			_, _ = out.Write(sym)
   260  			break
   261  		}
   262  		// Skip the following line with "\t<filepath> <offset>\n" as well.
   263  		_ = readLine()
   264  	}
   265  
   266  	// Dump the rest as is.
   267  	_, _ = out.Write(s)
   268  	return out.Bytes()
   269  }
   270  
   271  // Factory returns a factory of logger.Logger instances that log to the given
   272  // LogEntryWriter (usually &Sink{...}) using the LogEntry (if any) as a
   273  // prototype for log entries.
   274  //
   275  // For each log message, makes a copy of 'prototype', overwrites its Severity,
   276  // Message, Time and Fields (keeping other fields as they are), calls 'mut'
   277  // callback (which is allowed to mutate the log entry), and writes the result to
   278  // LogEntryWriter. This allows to "prepopulate" fields like TraceID or
   279  // OperationID in all log entries emitted by logging.Logger.
   280  //
   281  // Such factory can be installed in the context via logging.SetFactory.
   282  func Factory(w LogEntryWriter, prototype LogEntry, mut LogEntryMutator) func(context.Context) logging.Logger {
   283  	return func(ctx context.Context) logging.Logger {
   284  		return &jsonLogger{
   285  			ctx:       ctx,
   286  			w:         w,
   287  			prototype: prototype,
   288  			mut:       mut,
   289  		}
   290  	}
   291  }
   292  
   293  type jsonLogger struct {
   294  	ctx       context.Context
   295  	w         LogEntryWriter
   296  	prototype LogEntry
   297  	mut       LogEntryMutator
   298  
   299  	once      sync.Once      // used to initializes fields from 'ctx' on demand
   300  	fields    logging.Fields // in a structured form for LogEntry.Fields
   301  	fieldsStr string         // in a string form for LogEntry.Message
   302  }
   303  
   304  func (l *jsonLogger) Debugf(format string, args ...any) {
   305  	l.LogCall(logging.Debug, 1, format, args)
   306  }
   307  
   308  func (l *jsonLogger) Infof(format string, args ...any) {
   309  	l.LogCall(logging.Info, 1, format, args)
   310  }
   311  
   312  func (l *jsonLogger) Warningf(format string, args ...any) {
   313  	l.LogCall(logging.Warning, 1, format, args)
   314  }
   315  
   316  func (l *jsonLogger) Errorf(format string, args ...any) {
   317  	l.LogCall(logging.Error, 1, format, args)
   318  }
   319  
   320  func (l *jsonLogger) LogCall(lvl logging.Level, calldepth int, format string, args []any) {
   321  	if !logging.IsLogging(l.ctx, lvl) {
   322  		return
   323  	}
   324  
   325  	// Within a single context.Context fields are static and we can strinfigy them
   326  	// once when really necessary.
   327  	l.once.Do(func() {
   328  		fields := logging.GetFields(l.ctx)
   329  		if len(fields) == 0 {
   330  			return
   331  		}
   332  
   333  		// logging.ErrorKey usually points to a value that implements 'error'
   334  		// interface, which is not JSON-serializable. Convert it to a string. Note
   335  		// that mutating the result of logging.GetFields in place is not allowed, so
   336  		// we'll make a copy.
   337  		if err, ok := fields[logging.ErrorKey].(error); ok {
   338  			fields = logging.NewFields(fields)
   339  			fields[logging.ErrorKey] = err.Error()
   340  		}
   341  
   342  		l.fields = fields
   343  		l.fieldsStr = " :: " + fields.String()
   344  	})
   345  
   346  	e := l.prototype
   347  	e.Severity = LevelToSeverity(lvl)
   348  	e.Message = fmt.Sprintf(format, args...) + l.fieldsStr
   349  	e.Timestamp = ToTimestamp(clock.Now(l.ctx))
   350  	e.Fields = l.fields
   351  
   352  	if l.mut != nil {
   353  		l.mut(l.ctx, &e)
   354  	}
   355  
   356  	l.w.Write(&e)
   357  }