github.com/pachyderm/pachyderm@v1.13.4/src/server/worker/logs/logger.go (about)

     1  package logs
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"log"
     9  	"os"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/gogo/protobuf/jsonpb"
    15  	"github.com/gogo/protobuf/types"
    16  	"golang.org/x/sync/errgroup"
    17  
    18  	"github.com/pachyderm/pachyderm/src/client"
    19  	"github.com/pachyderm/pachyderm/src/client/pfs"
    20  	"github.com/pachyderm/pachyderm/src/client/pkg/errors"
    21  	"github.com/pachyderm/pachyderm/src/client/pkg/grpcutil"
    22  	"github.com/pachyderm/pachyderm/src/client/pps"
    23  	"github.com/pachyderm/pachyderm/src/server/worker/common"
    24  )
    25  
    26  const (
    27  	logBuffer = 25
    28  )
    29  
    30  // TaggedLogger is an interface providing logging functionality for use by the
    31  // worker, worker master, and user code processes
    32  type TaggedLogger interface {
    33  	// The TaggedLogger is usable as a io.Writer so that it can be set as the
    34  	// user code's subprocess stdout/stderr and still provide the same guarantees
    35  	io.Writer
    36  
    37  	// Logf logs to stdout and object storage (if stats are enabled), including
    38  	// metadata about the current pipeline and job
    39  	Logf(formatString string, args ...interface{})
    40  	// Errf logs only to stderr
    41  	Errf(formatString string, args ...interface{})
    42  
    43  	// LogStep will log before and after the given callback function runs, using
    44  	// the name provided
    45  	LogStep(name string, cb func() error) error
    46  
    47  	// These helpers will clone the current logger and construct a new logger that
    48  	// includes the given metadata in log messages.
    49  	WithJob(jobID string) TaggedLogger
    50  	WithData(data []*common.Input) TaggedLogger
    51  	WithUserCode() TaggedLogger
    52  
    53  	JobID() string
    54  
    55  	// Close will flush any writes to object storage and return information about
    56  	// where log statements were stored in object storage.
    57  	Close() (*pfs.Object, int64, error)
    58  }
    59  
    60  type taggedLogger struct {
    61  	// Mutex is used to ensure no loggers are writing to msgCh when it's closed
    62  	sync.RWMutex
    63  	template  pps.LogMessage
    64  	stderrLog *log.Logger
    65  	marshaler *jsonpb.Marshaler
    66  
    67  	// Used for mirroring log statements to object storage
    68  	putObjClient pfs.ObjectAPI_PutObjectClient
    69  	objSize      int64
    70  	msgCh        chan string
    71  	buffer       bytes.Buffer
    72  	eg           errgroup.Group
    73  }
    74  
    75  func newLogger(pipelineInfo *pps.PipelineInfo) *taggedLogger {
    76  	name := ""
    77  	if pipelineInfo != nil {
    78  		name = pipelineInfo.Pipeline.Name
    79  	}
    80  
    81  	return &taggedLogger{
    82  		template: pps.LogMessage{
    83  			PipelineName: name,
    84  			WorkerID:     os.Getenv(client.PPSPodNameEnv),
    85  		},
    86  		stderrLog: log.New(os.Stderr, "", log.LstdFlags|log.Llongfile),
    87  		marshaler: &jsonpb.Marshaler{},
    88  		msgCh:     make(chan string, logBuffer),
    89  	}
    90  }
    91  
    92  // NewLogger constructs a TaggedLogger for the given pipeline, optionally
    93  // including mirroring log statements to object storage.
    94  //
    95  // If a pachClient is passed in and stats are enabled on the pipeline, log
    96  // statements will be chunked and written to the Object API on the given
    97  // client.
    98  //
    99  // TODO: the whole object api interface here is bad, there are a few shortcomings:
   100  //  - it's only used under the worker function when stats are enabled
   101  //  - 'Close' is used to end the object, but it must be explicitly called
   102  //  - the 'eg', 'putObjClient', 'msgCh', 'buffer', and 'objSize' don't play well
   103  //      with cloned loggers.
   104  // Abstract this into a separate object with a more explicit lifetime?
   105  func NewLogger(pipelineInfo *pps.PipelineInfo, pachClient *client.APIClient) (TaggedLogger, error) {
   106  	result := newLogger(pipelineInfo)
   107  
   108  	if pachClient != nil && pipelineInfo.EnableStats {
   109  		putObjClient, err := pachClient.ObjectAPIClient.PutObject(pachClient.Ctx())
   110  		if err != nil {
   111  			return nil, err
   112  		}
   113  		result.putObjClient = putObjClient
   114  		result.eg.Go(func() error {
   115  			for msg := range result.msgCh {
   116  				for _, chunk := range grpcutil.Chunk([]byte(msg)) {
   117  					if err := result.putObjClient.Send(&pfs.PutObjectRequest{
   118  						Value: chunk,
   119  					}); err != nil && !errors.Is(err, io.EOF) {
   120  						return err
   121  					}
   122  				}
   123  				result.objSize += int64(len(msg))
   124  			}
   125  			return nil
   126  		})
   127  	}
   128  	return result, nil
   129  }
   130  
   131  // NewStatlessLogger constructs a TaggedLogger for the given pipeline.  This is
   132  // typically used outside of the worker/master main path.
   133  func NewStatlessLogger(pipelineInfo *pps.PipelineInfo) TaggedLogger {
   134  	return newLogger(pipelineInfo)
   135  }
   136  
   137  // NewMasterLogger constructs a TaggedLogger for the given pipeline that
   138  // includes the 'Master' flag.  This is typically used for all logging from the
   139  // worker master goroutine. The 'User' flag is set to false to maintain the
   140  // invariant that 'User' and 'Master' are mutually exclusive, which is done to
   141  // make it easier to query for specific logs.
   142  func NewMasterLogger(pipelineInfo *pps.PipelineInfo) TaggedLogger {
   143  	result := newLogger(pipelineInfo) // master loggers don't log stats
   144  	result.template.Master = true
   145  	result.template.User = false
   146  	return result
   147  }
   148  
   149  // WithJob clones the current logger and returns a new one that will include
   150  // the given job ID in log statement metadata.
   151  func (logger *taggedLogger) WithJob(jobID string) TaggedLogger {
   152  	result := logger.clone()
   153  	result.template.JobID = jobID
   154  	return result
   155  }
   156  
   157  // WithJob clones the current logger and returns a new one that will include
   158  // the given data inputs in log statement metadata.
   159  func (logger *taggedLogger) WithData(data []*common.Input) TaggedLogger {
   160  	result := logger.clone()
   161  
   162  	// Add inputs' details to log metadata, so we can find these logs later
   163  	result.template.Data = []*pps.InputFile{}
   164  	for _, d := range data {
   165  		result.template.Data = append(result.template.Data, &pps.InputFile{
   166  			Path: d.FileInfo.File.Path,
   167  			Hash: d.FileInfo.Hash,
   168  		})
   169  	}
   170  
   171  	// This is the same ID used in the stats tree for the datum
   172  	result.template.DatumID = common.DatumID(data)
   173  	return result
   174  }
   175  
   176  // WithUserCode clones the current logger and returns a new one that will
   177  // include the 'User' flag in log statement metadata. The 'Master' flag is set
   178  // to false to maintain the invariant that 'User' and 'Master' are mutually
   179  // exclusive, which is done to make it easier to query for specific logs.
   180  func (logger *taggedLogger) WithUserCode() TaggedLogger {
   181  	result := logger.clone()
   182  	result.template.User = true
   183  	result.template.Master = false
   184  	return result
   185  }
   186  
   187  // JobID returns the current job that the logger is configured with.
   188  func (logger *taggedLogger) JobID() string {
   189  	return logger.template.JobID
   190  }
   191  
   192  func (logger *taggedLogger) clone() *taggedLogger {
   193  	return &taggedLogger{
   194  		template:     logger.template,  // Copy struct
   195  		stderrLog:    logger.stderrLog, // logger should be goroutine-safe
   196  		marshaler:    &jsonpb.Marshaler{},
   197  		putObjClient: logger.putObjClient,
   198  		msgCh:        logger.msgCh,
   199  	}
   200  }
   201  
   202  // Logf logs the line Sprintf(formatString, args...), but formatted as a json
   203  // message and annotated with all of the metadata stored in 'loginfo'.
   204  //
   205  // Note: this is not thread-safe, as it modifies fields of 'logger.template'
   206  func (logger *taggedLogger) Logf(formatString string, args ...interface{}) {
   207  	logger.template.Message = fmt.Sprintf(formatString, args...)
   208  	if ts, err := types.TimestampProto(time.Now()); err == nil {
   209  		logger.template.Ts = ts
   210  	} else {
   211  		logger.Errf("could not generate logging timestamp: %s\n", err)
   212  		return
   213  	}
   214  	msg, err := logger.marshaler.MarshalToString(&logger.template)
   215  	if err != nil {
   216  		logger.Errf("could not marshal %v for logging: %s\n", &logger.template, err)
   217  		return
   218  	}
   219  	fmt.Println(msg)
   220  	logger.RLock()
   221  	defer logger.RUnlock()
   222  	if logger.putObjClient != nil {
   223  		logger.msgCh <- msg + "\n"
   224  	}
   225  }
   226  
   227  // LogStep will log before and after the given callback function runs, using
   228  // the name provided
   229  func (logger *taggedLogger) LogStep(name string, cb func() error) (retErr error) {
   230  	logger.Logf("started %v", name)
   231  	defer func() {
   232  		if retErr != nil {
   233  			logger.Logf("errored %v: %v", name, retErr)
   234  		} else {
   235  			logger.Logf("finished %v", name)
   236  		}
   237  	}()
   238  	return cb()
   239  }
   240  
   241  // Errf writes the given line to the stderr of the worker process.  This does
   242  // not go to a persistent log.
   243  func (logger *taggedLogger) Errf(formatString string, args ...interface{}) {
   244  	logger.stderrLog.Printf(formatString, args...)
   245  }
   246  
   247  // This is provided so that taggedLogger can be used as a io.Writer for stdout
   248  // and stderr when running user code.
   249  func (logger *taggedLogger) Write(p []byte) (_ int, retErr error) {
   250  	// never errors
   251  	logger.buffer.Write(p)
   252  	r := bufio.NewReader(&logger.buffer)
   253  	for {
   254  		message, err := r.ReadString('\n')
   255  		if err != nil {
   256  			if errors.Is(err, io.EOF) {
   257  				logger.buffer.Write([]byte(message))
   258  				return len(p), nil
   259  			}
   260  			// this shouldn't technically be possible to hit - io.EOF should be
   261  			// the only error bufio.Reader can return when using a buffer.
   262  			return 0, errors.Wrap(err, "ReadString")
   263  		}
   264  		// We don't want to make this call as:
   265  		// logger.Logf(message)
   266  		// because if the message has format characters like %s in it those
   267  		// will result in errors being logged.
   268  		logger.Logf("%s", strings.TrimSuffix(message, "\n"))
   269  	}
   270  }
   271  
   272  // Close flushes and closes the object storage client used to mirror log
   273  // statements to object storage.  Returns a pointer to the generated pfs.Object
   274  // as well as the total size of all written messages.
   275  func (logger *taggedLogger) Close() (*pfs.Object, int64, error) {
   276  	logger.Lock()
   277  	defer logger.Unlock()
   278  	if logger.putObjClient != nil {
   279  		close(logger.msgCh)
   280  		if err := logger.eg.Wait(); err != nil {
   281  			return nil, 0, err
   282  		}
   283  		object, err := logger.putObjClient.CloseAndRecv()
   284  		// we set putObjClient to nil so that future calls to Logf won't send
   285  		// msg down logger.msgCh as we've just closed that channel.
   286  		logger.putObjClient = nil
   287  		return object, logger.objSize, err
   288  	}
   289  	return nil, 0, nil
   290  }