github.com/manicqin/nomad@v0.9.5/client/allocrunner/taskrunner/logmon_hook.go (about)

     1  package taskrunner
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"path/filepath"
     8  	"runtime"
     9  	"time"
    10  
    11  	hclog "github.com/hashicorp/go-hclog"
    12  	plugin "github.com/hashicorp/go-plugin"
    13  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    14  	"github.com/hashicorp/nomad/client/logmon"
    15  	"github.com/hashicorp/nomad/helper/uuid"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	bstructs "github.com/hashicorp/nomad/plugins/base/structs"
    18  	"github.com/hashicorp/nomad/plugins/drivers"
    19  	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    20  	"google.golang.org/grpc"
    21  	"google.golang.org/grpc/codes"
    22  )
    23  
    24  const (
    25  	// logmonReattachKey is the HookData key where logmon's reattach config
    26  	// is stored.
    27  	logmonReattachKey = "reattach_config"
    28  )
    29  
    30  // logmonHook launches logmon and manages task logging
    31  type logmonHook struct {
    32  	runner *TaskRunner
    33  
    34  	// logmon is the handle to the log monitor process for the task.
    35  	logmon             logmon.LogMon
    36  	logmonPluginClient *plugin.Client
    37  
    38  	config *logmonHookConfig
    39  
    40  	logger hclog.Logger
    41  }
    42  
    43  type logmonHookConfig struct {
    44  	logDir     string
    45  	stdoutFifo string
    46  	stderrFifo string
    47  }
    48  
    49  func newLogMonHook(tr *TaskRunner, logger hclog.Logger) *logmonHook {
    50  	hook := &logmonHook{
    51  		runner: tr,
    52  		config: tr.logmonHookConfig,
    53  		logger: logger,
    54  	}
    55  
    56  	return hook
    57  }
    58  
    59  func newLogMonHookConfig(taskName, logDir string) *logmonHookConfig {
    60  	cfg := &logmonHookConfig{
    61  		logDir: logDir,
    62  	}
    63  	if runtime.GOOS == "windows" {
    64  		id := uuid.Generate()[:8]
    65  		cfg.stdoutFifo = fmt.Sprintf("//./pipe/%s-%s.stdout", taskName, id)
    66  		cfg.stderrFifo = fmt.Sprintf("//./pipe/%s-%s.stderr", taskName, id)
    67  	} else {
    68  		cfg.stdoutFifo = filepath.Join(logDir, fmt.Sprintf(".%s.stdout.fifo", taskName))
    69  		cfg.stderrFifo = filepath.Join(logDir, fmt.Sprintf(".%s.stderr.fifo", taskName))
    70  	}
    71  	return cfg
    72  }
    73  
    74  func (*logmonHook) Name() string {
    75  	return "logmon"
    76  }
    77  
    78  func (h *logmonHook) launchLogMon(reattachConfig *plugin.ReattachConfig) error {
    79  	l, c, err := logmon.LaunchLogMon(h.logger, reattachConfig)
    80  	if err != nil {
    81  		return err
    82  	}
    83  
    84  	h.logmon = l
    85  	h.logmonPluginClient = c
    86  	return nil
    87  }
    88  
    89  func reattachConfigFromHookData(data map[string]string) (*plugin.ReattachConfig, error) {
    90  	if data == nil || data[logmonReattachKey] == "" {
    91  		return nil, nil
    92  	}
    93  
    94  	var cfg pstructs.ReattachConfig
    95  	err := json.Unmarshal([]byte(data[logmonReattachKey]), &cfg)
    96  	if err != nil {
    97  		return nil, err
    98  	}
    99  
   100  	return pstructs.ReattachConfigToGoPlugin(&cfg)
   101  }
   102  
   103  func (h *logmonHook) Prestart(ctx context.Context,
   104  	req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error {
   105  
   106  	if h.isLoggingDisabled() {
   107  		h.logger.Debug("logging is disabled by driver")
   108  		return nil
   109  	}
   110  
   111  	attempts := 0
   112  	for {
   113  		err := h.prestartOneLoop(ctx, req)
   114  		if err == bstructs.ErrPluginShutdown || grpc.Code(err) == codes.Unavailable {
   115  			h.logger.Warn("logmon shutdown while making request", "error", err)
   116  
   117  			if attempts > 3 {
   118  				h.logger.Warn("logmon shutdown while making request; giving up", "attempts", attempts, "error", err)
   119  				return err
   120  			}
   121  
   122  			// retry after killing process and ensure we start a new logmon process
   123  			attempts++
   124  			h.logger.Warn("logmon shutdown while making request; retrying", "attempts", attempts, "error", err)
   125  			h.logmonPluginClient.Kill()
   126  			time.Sleep(1 * time.Second)
   127  			continue
   128  		} else if err != nil {
   129  			return err
   130  		}
   131  
   132  		rCfg := pstructs.ReattachConfigFromGoPlugin(h.logmonPluginClient.ReattachConfig())
   133  		jsonCfg, err := json.Marshal(rCfg)
   134  		if err != nil {
   135  			return err
   136  		}
   137  		resp.State = map[string]string{logmonReattachKey: string(jsonCfg)}
   138  		return nil
   139  	}
   140  }
   141  
   142  func (h *logmonHook) isLoggingDisabled() bool {
   143  	ic, ok := h.runner.driver.(drivers.InternalCapabilitiesDriver)
   144  	if !ok {
   145  		return false
   146  	}
   147  
   148  	caps := ic.InternalCapabilities()
   149  	return caps.DisableLogCollection
   150  }
   151  
   152  func (h *logmonHook) prestartOneLoop(ctx context.Context, req *interfaces.TaskPrestartRequest) error {
   153  	// attach to a running logmon if state indicates one
   154  	if h.logmonPluginClient == nil {
   155  		reattachConfig, err := reattachConfigFromHookData(req.PreviousState)
   156  		if err != nil {
   157  			h.logger.Error("failed to load reattach config", "error", err)
   158  			return err
   159  		}
   160  		if reattachConfig != nil {
   161  			if err := h.launchLogMon(reattachConfig); err != nil {
   162  				h.logger.Warn("failed to reattach to logmon process", "error", err)
   163  				// if we failed to launch logmon, try again below
   164  			}
   165  		}
   166  
   167  	}
   168  
   169  	// create a new client in initial starts, failed reattachment, or if we detect exits
   170  	if h.logmonPluginClient == nil || h.logmonPluginClient.Exited() {
   171  		if err := h.launchLogMon(nil); err != nil {
   172  			// Retry errors launching logmon as logmon may have crashed on start and
   173  			// subsequent attempts will start a new one.
   174  			h.logger.Error("failed to launch logmon process", "error", err)
   175  			return structs.NewRecoverableError(err, true)
   176  		}
   177  	}
   178  
   179  	err := h.logmon.Start(&logmon.LogConfig{
   180  		LogDir:        h.config.logDir,
   181  		StdoutLogFile: fmt.Sprintf("%s.stdout", req.Task.Name),
   182  		StderrLogFile: fmt.Sprintf("%s.stderr", req.Task.Name),
   183  		StdoutFifo:    h.config.stdoutFifo,
   184  		StderrFifo:    h.config.stderrFifo,
   185  		MaxFiles:      req.Task.LogConfig.MaxFiles,
   186  		MaxFileSizeMB: req.Task.LogConfig.MaxFileSizeMB,
   187  		FileExtension: req.Task.LogConfig.FileExtension,
   188  	})
   189  	if err != nil {
   190  		h.logger.Error("failed to start logmon", "error", err)
   191  		return err
   192  	}
   193  
   194  	return nil
   195  }
   196  
   197  func (h *logmonHook) Stop(_ context.Context, req *interfaces.TaskStopRequest, _ *interfaces.TaskStopResponse) error {
   198  
   199  	// It's possible that Stop was called without calling Prestart on agent
   200  	// restarts. Attempt to reattach to an existing logmon.
   201  	if h.logmon == nil || h.logmonPluginClient == nil {
   202  		if err := h.reattach(req); err != nil {
   203  			h.logger.Trace("error reattaching to logmon when stopping", "error", err)
   204  		}
   205  	}
   206  
   207  	if h.logmon != nil {
   208  		h.logmon.Stop()
   209  	}
   210  	if h.logmonPluginClient != nil {
   211  		h.logmonPluginClient.Kill()
   212  	}
   213  
   214  	return nil
   215  }
   216  
   217  // reattach to a running logmon if possible. Will not start a new logmon.
   218  func (h *logmonHook) reattach(req *interfaces.TaskStopRequest) error {
   219  	reattachConfig, err := reattachConfigFromHookData(req.ExistingState)
   220  	if err != nil {
   221  		return err
   222  	}
   223  
   224  	// Give up if there's no reattach config
   225  	if reattachConfig == nil {
   226  		return nil
   227  	}
   228  
   229  	return h.launchLogMon(reattachConfig)
   230  }