github.com/bigcommerce/nomad@v0.9.3-bc/client/allocrunner/taskrunner/logmon_hook.go (about) 1 package taskrunner 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "path/filepath" 8 "runtime" 9 "time" 10 11 hclog "github.com/hashicorp/go-hclog" 12 plugin "github.com/hashicorp/go-plugin" 13 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 14 "github.com/hashicorp/nomad/client/logmon" 15 "github.com/hashicorp/nomad/helper/uuid" 16 "github.com/hashicorp/nomad/nomad/structs" 17 bstructs "github.com/hashicorp/nomad/plugins/base/structs" 18 pstructs "github.com/hashicorp/nomad/plugins/shared/structs" 19 "google.golang.org/grpc" 20 "google.golang.org/grpc/codes" 21 ) 22 23 const ( 24 // logmonReattachKey is the HookData key where logmon's reattach config 25 // is stored. 26 logmonReattachKey = "reattach_config" 27 ) 28 29 // logmonHook launches logmon and manages task logging 30 type logmonHook struct { 31 // logmon is the handle to the log monitor process for the task. 32 logmon logmon.LogMon 33 logmonPluginClient *plugin.Client 34 35 config *logmonHookConfig 36 37 logger hclog.Logger 38 } 39 40 type logmonHookConfig struct { 41 logDir string 42 stdoutFifo string 43 stderrFifo string 44 } 45 46 func newLogMonHook(cfg *logmonHookConfig, logger hclog.Logger) *logmonHook { 47 hook := &logmonHook{ 48 config: cfg, 49 logger: logger, 50 } 51 52 return hook 53 } 54 55 func newLogMonHookConfig(taskName, logDir string) *logmonHookConfig { 56 cfg := &logmonHookConfig{ 57 logDir: logDir, 58 } 59 if runtime.GOOS == "windows" { 60 id := uuid.Generate()[:8] 61 cfg.stdoutFifo = fmt.Sprintf("//./pipe/%s-%s.stdout", taskName, id) 62 cfg.stderrFifo = fmt.Sprintf("//./pipe/%s-%s.stderr", taskName, id) 63 } else { 64 cfg.stdoutFifo = filepath.Join(logDir, fmt.Sprintf(".%s.stdout.fifo", taskName)) 65 cfg.stderrFifo = filepath.Join(logDir, fmt.Sprintf(".%s.stderr.fifo", taskName)) 66 } 67 return cfg 68 } 69 70 func (*logmonHook) Name() string { 71 return "logmon" 72 } 73 74 func (h *logmonHook) launchLogMon(reattachConfig *plugin.ReattachConfig) error { 75 l, c, err := logmon.LaunchLogMon(h.logger, reattachConfig) 76 if err != nil { 77 return err 78 } 79 80 h.logmon = l 81 h.logmonPluginClient = c 82 return nil 83 } 84 85 func reattachConfigFromHookData(data map[string]string) (*plugin.ReattachConfig, error) { 86 if data == nil || data[logmonReattachKey] == "" { 87 return nil, nil 88 } 89 90 var cfg pstructs.ReattachConfig 91 err := json.Unmarshal([]byte(data[logmonReattachKey]), &cfg) 92 if err != nil { 93 return nil, err 94 } 95 96 return pstructs.ReattachConfigToGoPlugin(&cfg) 97 } 98 99 func (h *logmonHook) Prestart(ctx context.Context, 100 req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error { 101 102 attempts := 0 103 for { 104 err := h.prestartOneLoop(ctx, req) 105 if err == bstructs.ErrPluginShutdown || grpc.Code(err) == codes.Unavailable { 106 h.logger.Warn("logmon shutdown while making request", "error", err) 107 108 if attempts > 3 { 109 h.logger.Warn("logmon shutdown while making request; giving up", "attempts", attempts, "error", err) 110 return err 111 } 112 113 // retry after killing process and ensure we start a new logmon process 114 attempts++ 115 h.logger.Warn("logmon shutdown while making request; retrying", "attempts", attempts, "error", err) 116 h.logmonPluginClient.Kill() 117 time.Sleep(1 * time.Second) 118 continue 119 } else if err != nil { 120 return err 121 } 122 123 rCfg := pstructs.ReattachConfigFromGoPlugin(h.logmonPluginClient.ReattachConfig()) 124 jsonCfg, err := json.Marshal(rCfg) 125 if err != nil { 126 return err 127 } 128 resp.State = map[string]string{logmonReattachKey: string(jsonCfg)} 129 return nil 130 } 131 } 132 133 func (h *logmonHook) prestartOneLoop(ctx context.Context, req *interfaces.TaskPrestartRequest) error { 134 // attach to a running logmon if state indicates one 135 if h.logmonPluginClient == nil { 136 reattachConfig, err := reattachConfigFromHookData(req.PreviousState) 137 if err != nil { 138 h.logger.Error("failed to load reattach config", "error", err) 139 return err 140 } 141 if reattachConfig != nil { 142 if err := h.launchLogMon(reattachConfig); err != nil { 143 h.logger.Warn("failed to reattach to logmon process", "error", err) 144 // if we failed to launch logmon, try again below 145 } 146 } 147 148 } 149 150 // create a new client in initial starts, failed reattachment, or if we detect exits 151 if h.logmonPluginClient == nil || h.logmonPluginClient.Exited() { 152 if err := h.launchLogMon(nil); err != nil { 153 // Retry errors launching logmon as logmon may have crashed on start and 154 // subsequent attempts will start a new one. 155 h.logger.Error("failed to launch logmon process", "error", err) 156 return structs.NewRecoverableError(err, true) 157 } 158 } 159 160 err := h.logmon.Start(&logmon.LogConfig{ 161 LogDir: h.config.logDir, 162 StdoutLogFile: fmt.Sprintf("%s.stdout", req.Task.Name), 163 StderrLogFile: fmt.Sprintf("%s.stderr", req.Task.Name), 164 StdoutFifo: h.config.stdoutFifo, 165 StderrFifo: h.config.stderrFifo, 166 MaxFiles: req.Task.LogConfig.MaxFiles, 167 MaxFileSizeMB: req.Task.LogConfig.MaxFileSizeMB, 168 }) 169 if err != nil { 170 h.logger.Error("failed to start logmon", "error", err) 171 return err 172 } 173 174 return nil 175 } 176 177 func (h *logmonHook) Stop(_ context.Context, req *interfaces.TaskStopRequest, _ *interfaces.TaskStopResponse) error { 178 179 // It's possible that Stop was called without calling Prestart on agent 180 // restarts. Attempt to reattach to an existing logmon. 181 if h.logmon == nil || h.logmonPluginClient == nil { 182 if err := h.reattach(req); err != nil { 183 h.logger.Trace("error reattaching to logmon when stopping", "error", err) 184 } 185 } 186 187 if h.logmon != nil { 188 h.logmon.Stop() 189 } 190 if h.logmonPluginClient != nil { 191 h.logmonPluginClient.Kill() 192 } 193 194 return nil 195 } 196 197 // reattach to a running logmon if possible. Will not start a new logmon. 198 func (h *logmonHook) reattach(req *interfaces.TaskStopRequest) error { 199 reattachConfig, err := reattachConfigFromHookData(req.ExistingState) 200 if err != nil { 201 return err 202 } 203 204 // Give up if there's no reattach config 205 if reattachConfig == nil { 206 return nil 207 } 208 209 return h.launchLogMon(reattachConfig) 210 }