github.com/bigcommerce/nomad@v0.9.3-bc/client/allocrunner/taskrunner/logmon_hook.go (about)

     1  package taskrunner
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"path/filepath"
     8  	"runtime"
     9  	"time"
    10  
    11  	hclog "github.com/hashicorp/go-hclog"
    12  	plugin "github.com/hashicorp/go-plugin"
    13  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    14  	"github.com/hashicorp/nomad/client/logmon"
    15  	"github.com/hashicorp/nomad/helper/uuid"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	bstructs "github.com/hashicorp/nomad/plugins/base/structs"
    18  	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    19  	"google.golang.org/grpc"
    20  	"google.golang.org/grpc/codes"
    21  )
    22  
    23  const (
    24  	// logmonReattachKey is the HookData key where logmon's reattach config
    25  	// is stored.
    26  	logmonReattachKey = "reattach_config"
    27  )
    28  
    29  // logmonHook launches logmon and manages task logging
    30  type logmonHook struct {
    31  	// logmon is the handle to the log monitor process for the task.
    32  	logmon             logmon.LogMon
    33  	logmonPluginClient *plugin.Client
    34  
    35  	config *logmonHookConfig
    36  
    37  	logger hclog.Logger
    38  }
    39  
    40  type logmonHookConfig struct {
    41  	logDir     string
    42  	stdoutFifo string
    43  	stderrFifo string
    44  }
    45  
    46  func newLogMonHook(cfg *logmonHookConfig, logger hclog.Logger) *logmonHook {
    47  	hook := &logmonHook{
    48  		config: cfg,
    49  		logger: logger,
    50  	}
    51  
    52  	return hook
    53  }
    54  
    55  func newLogMonHookConfig(taskName, logDir string) *logmonHookConfig {
    56  	cfg := &logmonHookConfig{
    57  		logDir: logDir,
    58  	}
    59  	if runtime.GOOS == "windows" {
    60  		id := uuid.Generate()[:8]
    61  		cfg.stdoutFifo = fmt.Sprintf("//./pipe/%s-%s.stdout", taskName, id)
    62  		cfg.stderrFifo = fmt.Sprintf("//./pipe/%s-%s.stderr", taskName, id)
    63  	} else {
    64  		cfg.stdoutFifo = filepath.Join(logDir, fmt.Sprintf(".%s.stdout.fifo", taskName))
    65  		cfg.stderrFifo = filepath.Join(logDir, fmt.Sprintf(".%s.stderr.fifo", taskName))
    66  	}
    67  	return cfg
    68  }
    69  
    70  func (*logmonHook) Name() string {
    71  	return "logmon"
    72  }
    73  
    74  func (h *logmonHook) launchLogMon(reattachConfig *plugin.ReattachConfig) error {
    75  	l, c, err := logmon.LaunchLogMon(h.logger, reattachConfig)
    76  	if err != nil {
    77  		return err
    78  	}
    79  
    80  	h.logmon = l
    81  	h.logmonPluginClient = c
    82  	return nil
    83  }
    84  
    85  func reattachConfigFromHookData(data map[string]string) (*plugin.ReattachConfig, error) {
    86  	if data == nil || data[logmonReattachKey] == "" {
    87  		return nil, nil
    88  	}
    89  
    90  	var cfg pstructs.ReattachConfig
    91  	err := json.Unmarshal([]byte(data[logmonReattachKey]), &cfg)
    92  	if err != nil {
    93  		return nil, err
    94  	}
    95  
    96  	return pstructs.ReattachConfigToGoPlugin(&cfg)
    97  }
    98  
    99  func (h *logmonHook) Prestart(ctx context.Context,
   100  	req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error {
   101  
   102  	attempts := 0
   103  	for {
   104  		err := h.prestartOneLoop(ctx, req)
   105  		if err == bstructs.ErrPluginShutdown || grpc.Code(err) == codes.Unavailable {
   106  			h.logger.Warn("logmon shutdown while making request", "error", err)
   107  
   108  			if attempts > 3 {
   109  				h.logger.Warn("logmon shutdown while making request; giving up", "attempts", attempts, "error", err)
   110  				return err
   111  			}
   112  
   113  			// retry after killing process and ensure we start a new logmon process
   114  			attempts++
   115  			h.logger.Warn("logmon shutdown while making request; retrying", "attempts", attempts, "error", err)
   116  			h.logmonPluginClient.Kill()
   117  			time.Sleep(1 * time.Second)
   118  			continue
   119  		} else if err != nil {
   120  			return err
   121  		}
   122  
   123  		rCfg := pstructs.ReattachConfigFromGoPlugin(h.logmonPluginClient.ReattachConfig())
   124  		jsonCfg, err := json.Marshal(rCfg)
   125  		if err != nil {
   126  			return err
   127  		}
   128  		resp.State = map[string]string{logmonReattachKey: string(jsonCfg)}
   129  		return nil
   130  	}
   131  }
   132  
   133  func (h *logmonHook) prestartOneLoop(ctx context.Context, req *interfaces.TaskPrestartRequest) error {
   134  	// attach to a running logmon if state indicates one
   135  	if h.logmonPluginClient == nil {
   136  		reattachConfig, err := reattachConfigFromHookData(req.PreviousState)
   137  		if err != nil {
   138  			h.logger.Error("failed to load reattach config", "error", err)
   139  			return err
   140  		}
   141  		if reattachConfig != nil {
   142  			if err := h.launchLogMon(reattachConfig); err != nil {
   143  				h.logger.Warn("failed to reattach to logmon process", "error", err)
   144  				// if we failed to launch logmon, try again below
   145  			}
   146  		}
   147  
   148  	}
   149  
   150  	// create a new client in initial starts, failed reattachment, or if we detect exits
   151  	if h.logmonPluginClient == nil || h.logmonPluginClient.Exited() {
   152  		if err := h.launchLogMon(nil); err != nil {
   153  			// Retry errors launching logmon as logmon may have crashed on start and
   154  			// subsequent attempts will start a new one.
   155  			h.logger.Error("failed to launch logmon process", "error", err)
   156  			return structs.NewRecoverableError(err, true)
   157  		}
   158  	}
   159  
   160  	err := h.logmon.Start(&logmon.LogConfig{
   161  		LogDir:        h.config.logDir,
   162  		StdoutLogFile: fmt.Sprintf("%s.stdout", req.Task.Name),
   163  		StderrLogFile: fmt.Sprintf("%s.stderr", req.Task.Name),
   164  		StdoutFifo:    h.config.stdoutFifo,
   165  		StderrFifo:    h.config.stderrFifo,
   166  		MaxFiles:      req.Task.LogConfig.MaxFiles,
   167  		MaxFileSizeMB: req.Task.LogConfig.MaxFileSizeMB,
   168  	})
   169  	if err != nil {
   170  		h.logger.Error("failed to start logmon", "error", err)
   171  		return err
   172  	}
   173  
   174  	return nil
   175  }
   176  
   177  func (h *logmonHook) Stop(_ context.Context, req *interfaces.TaskStopRequest, _ *interfaces.TaskStopResponse) error {
   178  
   179  	// It's possible that Stop was called without calling Prestart on agent
   180  	// restarts. Attempt to reattach to an existing logmon.
   181  	if h.logmon == nil || h.logmonPluginClient == nil {
   182  		if err := h.reattach(req); err != nil {
   183  			h.logger.Trace("error reattaching to logmon when stopping", "error", err)
   184  		}
   185  	}
   186  
   187  	if h.logmon != nil {
   188  		h.logmon.Stop()
   189  	}
   190  	if h.logmonPluginClient != nil {
   191  		h.logmonPluginClient.Kill()
   192  	}
   193  
   194  	return nil
   195  }
   196  
   197  // reattach to a running logmon if possible. Will not start a new logmon.
   198  func (h *logmonHook) reattach(req *interfaces.TaskStopRequest) error {
   199  	reattachConfig, err := reattachConfigFromHookData(req.ExistingState)
   200  	if err != nil {
   201  		return err
   202  	}
   203  
   204  	// Give up if there's no reattach config
   205  	if reattachConfig == nil {
   206  		return nil
   207  	}
   208  
   209  	return h.launchLogMon(reattachConfig)
   210  }