github.com/manicqin/nomad@v0.9.5/client/allocrunner/taskrunner/logmon_hook.go (about) 1 package taskrunner 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "path/filepath" 8 "runtime" 9 "time" 10 11 hclog "github.com/hashicorp/go-hclog" 12 plugin "github.com/hashicorp/go-plugin" 13 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 14 "github.com/hashicorp/nomad/client/logmon" 15 "github.com/hashicorp/nomad/helper/uuid" 16 "github.com/hashicorp/nomad/nomad/structs" 17 bstructs "github.com/hashicorp/nomad/plugins/base/structs" 18 "github.com/hashicorp/nomad/plugins/drivers" 19 pstructs "github.com/hashicorp/nomad/plugins/shared/structs" 20 "google.golang.org/grpc" 21 "google.golang.org/grpc/codes" 22 ) 23 24 const ( 25 // logmonReattachKey is the HookData key where logmon's reattach config 26 // is stored. 27 logmonReattachKey = "reattach_config" 28 ) 29 30 // logmonHook launches logmon and manages task logging 31 type logmonHook struct { 32 runner *TaskRunner 33 34 // logmon is the handle to the log monitor process for the task. 35 logmon logmon.LogMon 36 logmonPluginClient *plugin.Client 37 38 config *logmonHookConfig 39 40 logger hclog.Logger 41 } 42 43 type logmonHookConfig struct { 44 logDir string 45 stdoutFifo string 46 stderrFifo string 47 } 48 49 func newLogMonHook(tr *TaskRunner, logger hclog.Logger) *logmonHook { 50 hook := &logmonHook{ 51 runner: tr, 52 config: tr.logmonHookConfig, 53 logger: logger, 54 } 55 56 return hook 57 } 58 59 func newLogMonHookConfig(taskName, logDir string) *logmonHookConfig { 60 cfg := &logmonHookConfig{ 61 logDir: logDir, 62 } 63 if runtime.GOOS == "windows" { 64 id := uuid.Generate()[:8] 65 cfg.stdoutFifo = fmt.Sprintf("//./pipe/%s-%s.stdout", taskName, id) 66 cfg.stderrFifo = fmt.Sprintf("//./pipe/%s-%s.stderr", taskName, id) 67 } else { 68 cfg.stdoutFifo = filepath.Join(logDir, fmt.Sprintf(".%s.stdout.fifo", taskName)) 69 cfg.stderrFifo = filepath.Join(logDir, fmt.Sprintf(".%s.stderr.fifo", taskName)) 70 } 71 return cfg 72 } 73 74 func (*logmonHook) Name() string { 75 return "logmon" 76 } 77 78 func (h *logmonHook) launchLogMon(reattachConfig *plugin.ReattachConfig) error { 79 l, c, err := logmon.LaunchLogMon(h.logger, reattachConfig) 80 if err != nil { 81 return err 82 } 83 84 h.logmon = l 85 h.logmonPluginClient = c 86 return nil 87 } 88 89 func reattachConfigFromHookData(data map[string]string) (*plugin.ReattachConfig, error) { 90 if data == nil || data[logmonReattachKey] == "" { 91 return nil, nil 92 } 93 94 var cfg pstructs.ReattachConfig 95 err := json.Unmarshal([]byte(data[logmonReattachKey]), &cfg) 96 if err != nil { 97 return nil, err 98 } 99 100 return pstructs.ReattachConfigToGoPlugin(&cfg) 101 } 102 103 func (h *logmonHook) Prestart(ctx context.Context, 104 req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error { 105 106 if h.isLoggingDisabled() { 107 h.logger.Debug("logging is disabled by driver") 108 return nil 109 } 110 111 attempts := 0 112 for { 113 err := h.prestartOneLoop(ctx, req) 114 if err == bstructs.ErrPluginShutdown || grpc.Code(err) == codes.Unavailable { 115 h.logger.Warn("logmon shutdown while making request", "error", err) 116 117 if attempts > 3 { 118 h.logger.Warn("logmon shutdown while making request; giving up", "attempts", attempts, "error", err) 119 return err 120 } 121 122 // retry after killing process and ensure we start a new logmon process 123 attempts++ 124 h.logger.Warn("logmon shutdown while making request; retrying", "attempts", attempts, "error", err) 125 h.logmonPluginClient.Kill() 126 time.Sleep(1 * time.Second) 127 continue 128 } else if err != nil { 129 return err 130 } 131 132 rCfg := pstructs.ReattachConfigFromGoPlugin(h.logmonPluginClient.ReattachConfig()) 133 jsonCfg, err := json.Marshal(rCfg) 134 if err != nil { 135 return err 136 } 137 resp.State = map[string]string{logmonReattachKey: string(jsonCfg)} 138 return nil 139 } 140 } 141 142 func (h *logmonHook) isLoggingDisabled() bool { 143 ic, ok := h.runner.driver.(drivers.InternalCapabilitiesDriver) 144 if !ok { 145 return false 146 } 147 148 caps := ic.InternalCapabilities() 149 return caps.DisableLogCollection 150 } 151 152 func (h *logmonHook) prestartOneLoop(ctx context.Context, req *interfaces.TaskPrestartRequest) error { 153 // attach to a running logmon if state indicates one 154 if h.logmonPluginClient == nil { 155 reattachConfig, err := reattachConfigFromHookData(req.PreviousState) 156 if err != nil { 157 h.logger.Error("failed to load reattach config", "error", err) 158 return err 159 } 160 if reattachConfig != nil { 161 if err := h.launchLogMon(reattachConfig); err != nil { 162 h.logger.Warn("failed to reattach to logmon process", "error", err) 163 // if we failed to launch logmon, try again below 164 } 165 } 166 167 } 168 169 // create a new client in initial starts, failed reattachment, or if we detect exits 170 if h.logmonPluginClient == nil || h.logmonPluginClient.Exited() { 171 if err := h.launchLogMon(nil); err != nil { 172 // Retry errors launching logmon as logmon may have crashed on start and 173 // subsequent attempts will start a new one. 174 h.logger.Error("failed to launch logmon process", "error", err) 175 return structs.NewRecoverableError(err, true) 176 } 177 } 178 179 err := h.logmon.Start(&logmon.LogConfig{ 180 LogDir: h.config.logDir, 181 StdoutLogFile: fmt.Sprintf("%s.stdout", req.Task.Name), 182 StderrLogFile: fmt.Sprintf("%s.stderr", req.Task.Name), 183 StdoutFifo: h.config.stdoutFifo, 184 StderrFifo: h.config.stderrFifo, 185 MaxFiles: req.Task.LogConfig.MaxFiles, 186 MaxFileSizeMB: req.Task.LogConfig.MaxFileSizeMB, 187 FileExtension: req.Task.LogConfig.FileExtension, 188 }) 189 if err != nil { 190 h.logger.Error("failed to start logmon", "error", err) 191 return err 192 } 193 194 return nil 195 } 196 197 func (h *logmonHook) Stop(_ context.Context, req *interfaces.TaskStopRequest, _ *interfaces.TaskStopResponse) error { 198 199 // It's possible that Stop was called without calling Prestart on agent 200 // restarts. Attempt to reattach to an existing logmon. 201 if h.logmon == nil || h.logmonPluginClient == nil { 202 if err := h.reattach(req); err != nil { 203 h.logger.Trace("error reattaching to logmon when stopping", "error", err) 204 } 205 } 206 207 if h.logmon != nil { 208 h.logmon.Stop() 209 } 210 if h.logmonPluginClient != nil { 211 h.logmonPluginClient.Kill() 212 } 213 214 return nil 215 } 216 217 // reattach to a running logmon if possible. Will not start a new logmon. 218 func (h *logmonHook) reattach(req *interfaces.TaskStopRequest) error { 219 reattachConfig, err := reattachConfigFromHookData(req.ExistingState) 220 if err != nil { 221 return err 222 } 223 224 // Give up if there's no reattach config 225 if reattachConfig == nil { 226 return nil 227 } 228 229 return h.launchLogMon(reattachConfig) 230 }