github.com/justinjmoses/evergreen@v0.0.0-20170530173719-1d50e381ff0d/agent/comm/http.go (about) 1 package comm 2 3 import ( 4 "bytes" 5 "crypto/tls" 6 "crypto/x509" 7 "encoding/json" 8 "fmt" 9 "io/ioutil" 10 "net/http" 11 "os" 12 "strconv" 13 "time" 14 15 "github.com/evergreen-ci/evergreen" 16 "github.com/evergreen-ci/evergreen/apimodels" 17 "github.com/evergreen-ci/evergreen/model" 18 "github.com/evergreen-ci/evergreen/model/distro" 19 "github.com/evergreen-ci/evergreen/model/task" 20 "github.com/evergreen-ci/evergreen/model/version" 21 "github.com/evergreen-ci/evergreen/util" 22 "github.com/mongodb/grip/slogger" 23 "github.com/pkg/errors" 24 ) 25 26 const httpMaxAttempts = 10 27 28 var HeartbeatTimeout = time.Minute 29 30 var HTTPConflictError = errors.New("Conflict") 31 32 // HTTPCommunicator handles communication with the API server. An HTTPCommunicator 33 // is scoped to a single task, and all communication performed by it is 34 // only relevant to that running task. 35 type HTTPCommunicator struct { 36 ServerURLRoot string 37 TaskId string 38 TaskSecret string 39 HostId string 40 HostSecret string 41 MaxAttempts int 42 RetrySleep time.Duration 43 SignalChan chan Signal 44 Logger *slogger.Logger 45 HttpsCert string 46 httpClient *http.Client 47 // TODO only use one Client after global locking is removed 48 heartbeatClient *http.Client 49 } 50 51 // NewHTTPCommunicator returns an initialized HTTPCommunicator. 52 // The cert parameter may be blank if default system certificates are being used. 53 func NewHTTPCommunicator(serverURL, hostId, hostSecret, cert string) (*HTTPCommunicator, error) { 54 agentCommunicator := &HTTPCommunicator{ 55 ServerURLRoot: fmt.Sprintf("%v/api/%v", serverURL, evergreen.AgentAPIVersion), 56 HostId: hostId, 57 HostSecret: hostSecret, 58 MaxAttempts: httpMaxAttempts, 59 RetrySleep: time.Second * 2, 60 HttpsCert: cert, 61 } 62 63 if agentCommunicator.HttpsCert != "" { 64 pool := x509.NewCertPool() 65 if !pool.AppendCertsFromPEM([]byte(agentCommunicator.HttpsCert)) { 66 return nil, errors.New("failed to append HttpsCert to new cert pool") 67 } 68 tc := &tls.Config{RootCAs: pool} 69 tr := &http.Transport{TLSClientConfig: tc} 70 agentCommunicator.httpClient = &http.Client{Transport: tr} 71 agentCommunicator.heartbeatClient = &http.Client{Transport: tr, Timeout: HeartbeatTimeout} 72 } else { 73 agentCommunicator.httpClient = &http.Client{} 74 agentCommunicator.heartbeatClient = &http.Client{Timeout: HeartbeatTimeout} 75 } 76 return agentCommunicator, nil 77 } 78 79 func (h *HTTPCommunicator) SetSignalChan(communicatorChan chan Signal) { 80 h.SignalChan = communicatorChan 81 } 82 83 func (h *HTTPCommunicator) SetLogger(logger *slogger.Logger) { 84 h.Logger = logger 85 } 86 87 // Heartbeat encapsulates heartbeat behavior (i.e., pinging the API server at regular 88 // intervals to ensure that communication hasn't broken down). 89 type Heartbeat interface { 90 Heartbeat() (bool, error) 91 } 92 93 // Start marks the communicator's task as started. 94 func (h *HTTPCommunicator) Start() error { 95 pidStr := strconv.Itoa(os.Getpid()) 96 taskStartRequest := &apimodels.TaskStartRequest{Pid: pidStr} 97 resp, retryFail, err := h.postJSON("start", taskStartRequest) 98 if resp != nil { 99 defer resp.Body.Close() 100 } 101 if err != nil { 102 if retryFail { 103 err = errors.Wrapf(err, "task start failed after %v tries", h.MaxAttempts) 104 } else { 105 err = errors.Wrap(err, "failed to start task") 106 } 107 h.Logger.Logf(slogger.ERROR, err.Error()) 108 return err 109 } 110 return nil 111 } 112 113 // End marks the communicator's task as finished with the given status. 114 func (h *HTTPCommunicator) End(detail *apimodels.TaskEndDetail) (*apimodels.EndTaskResponse, error) { 115 taskEndResp := &apimodels.EndTaskResponse{} 116 resp, retryFail, err := h.postJSON("end", detail) 117 if resp == nil { 118 return nil, errors.New("empty response when trying to end task") 119 } 120 defer resp.Body.Close() 121 if err != nil { 122 if retryFail { 123 var bodyMsg []byte 124 if resp != nil { 125 bodyMsg, _ = ioutil.ReadAll(resp.Body) 126 } 127 err = errors.Wrapf(err, "task end failed after %v tries: %v", h.MaxAttempts, bodyMsg) 128 } else { 129 err = errors.Wrap(err, "failed to end task") 130 } 131 h.Logger.Logf(slogger.ERROR, err.Error()) 132 return nil, err 133 } 134 135 if resp != nil { 136 if err = util.ReadJSONInto(resp.Body, taskEndResp); err != nil { 137 message := fmt.Sprintf("Error unmarshalling task end response: %v", 138 err) 139 h.Logger.Logf(slogger.ERROR, message) 140 return nil, errors.New(message) 141 } 142 if resp.StatusCode != http.StatusOK { 143 message := fmt.Sprintf("unexpected status code in task end "+ 144 "request (%v): %v", resp.StatusCode, taskEndResp.Message) 145 return nil, errors.New(message) 146 } 147 err = nil 148 } else { 149 err = errors.New("received nil response from API server") 150 } 151 h.Logger.Logf(slogger.INFO, "task's end response received: %s", taskEndResp.Message) 152 return taskEndResp, err 153 } 154 155 // Log sends a batch of log messages for the task's logs to the API server. 156 func (h *HTTPCommunicator) Log(messages []model.LogMessage) error { 157 158 outgoingData := model.TaskLog{ 159 TaskId: h.TaskId, 160 Timestamp: time.Now(), 161 MessageCount: len(messages), 162 Messages: messages, 163 } 164 165 retriableLog := util.RetriableFunc( 166 func() error { 167 resp, err := h.TryTaskPost("log", outgoingData) 168 if resp != nil { 169 defer resp.Body.Close() 170 } 171 if err != nil { 172 return util.RetriableError{errors.WithStack(err)} 173 } 174 if resp.StatusCode == http.StatusInternalServerError { 175 return util.RetriableError{errors.Errorf("http status %v response body %v", resp.StatusCode, resp.Body)} 176 } 177 return nil 178 }, 179 ) 180 retryFail, err := util.Retry(retriableLog, h.MaxAttempts, h.RetrySleep) 181 if retryFail { 182 return errors.Wrapf(err, "logging failed after %vtries: %v", h.MaxAttempts) 183 } 184 return err 185 } 186 187 // GetTask returns the communicator's task. 188 func (h *HTTPCommunicator) GetTask() (*task.Task, error) { 189 task := &task.Task{} 190 retriableGet := util.RetriableFunc( 191 func() error { 192 resp, err := h.TryTaskGet("") 193 if resp != nil { 194 defer resp.Body.Close() 195 } 196 if resp != nil && resp.StatusCode == http.StatusConflict { 197 // Something very wrong, fail now with no retry. 198 return errors.New("conflict; wrong secret") 199 } 200 if err != nil { 201 // Some generic error trying to connect - try again 202 return util.RetriableError{err} 203 } 204 if resp == nil { 205 return util.RetriableError{errors.New("empty response")} 206 } else { 207 err = util.ReadJSONInto(resp.Body, task) 208 if err != nil { 209 fmt.Printf("error3, retrying: %v\n", err) 210 return util.RetriableError{err} 211 } 212 return nil 213 } 214 }, 215 ) 216 217 retryFail, err := util.Retry(retriableGet, h.MaxAttempts, h.RetrySleep) 218 if retryFail { 219 return nil, errors.Wrapf(err, "getting task failed after %v tries", h.MaxAttempts) 220 } 221 return task, nil 222 } 223 224 // GetDistro returns the distro for the communicator's task. 225 func (h *HTTPCommunicator) GetDistro() (*distro.Distro, error) { 226 d := &distro.Distro{} 227 retriableGet := util.RetriableFunc( 228 func() error { 229 resp, err := h.TryTaskGet("distro") 230 if resp == nil { 231 return util.RetriableError{errors.New("empty response")} 232 } 233 234 defer resp.Body.Close() 235 if err != nil { 236 // Some generic error trying to connect - try again 237 return util.RetriableError{err} 238 } 239 240 if resp != nil && resp.StatusCode == http.StatusConflict { 241 // Something very wrong, fail now with no retry. 242 return errors.New("conflict; wrong secret") 243 } 244 if resp.StatusCode != http.StatusOK { 245 return util.RetriableError{errors.Errorf("bad status: %s", resp.Status)} 246 } 247 248 err = util.ReadJSONInto(resp.Body, d) 249 if err != nil { 250 err = errors.Wrap(err, "unable to read distro response") 251 h.Logger.Logf(slogger.ERROR, err.Error()) 252 return util.RetriableError{err} 253 } 254 return nil 255 }, 256 ) 257 258 retryFail, err := util.Retry(retriableGet, h.MaxAttempts, h.RetrySleep) 259 if retryFail { 260 return nil, errors.Wrapf(err, "getting distro failed after %d tries", h.MaxAttempts) 261 } 262 return d, nil 263 } 264 265 // GetNextTask returns a next task response by getting the next task for a given host. 266 func (h *HTTPCommunicator) GetNextTask() (*apimodels.NextTaskResponse, error) { 267 taskResponse := &apimodels.NextTaskResponse{} 268 retriableGet := util.RetriableFunc( 269 func() error { 270 resp, err := h.TryGet("agent/next_task") 271 if resp == nil { 272 return util.RetriableError{fmt.Errorf("empty response")} 273 } 274 defer resp.Body.Close() 275 if resp.StatusCode == http.StatusConflict { 276 return fmt.Errorf("conflict - wrong secret!") 277 } 278 if err != nil { 279 return util.RetriableError{err} 280 } 281 err = util.ReadJSONInto(resp.Body, taskResponse) 282 if err != nil { 283 return util.RetriableError{err} 284 } 285 return nil 286 }) 287 retryFail, err := util.Retry(retriableGet, h.MaxAttempts, h.RetrySleep) 288 if retryFail { 289 return nil, fmt.Errorf("getting next task failed after %v tries: %v", h.MaxAttempts, err) 290 } 291 return taskResponse, nil 292 293 } 294 295 // GetProjectConfig loads the communicator's task's project from the API server. 296 func (h *HTTPCommunicator) GetProjectRef() (*model.ProjectRef, error) { 297 projectRef := &model.ProjectRef{} 298 retriableGet := util.RetriableFunc( 299 func() error { 300 resp, err := h.TryTaskGet("project_ref") 301 if resp != nil { 302 defer resp.Body.Close() 303 } 304 if resp != nil && resp.StatusCode == http.StatusConflict { 305 // Something very wrong, fail now with no retry. 306 return errors.New("conflict; wrong secret") 307 } 308 if err != nil { 309 // Some generic error trying to connect - try again 310 return util.RetriableError{err} 311 } 312 if resp == nil { 313 return util.RetriableError{errors.New("empty response")} 314 } 315 316 err = util.ReadJSONInto(resp.Body, projectRef) 317 if err != nil { 318 return util.RetriableError{err} 319 } 320 return nil 321 }, 322 ) 323 324 retryFail, err := util.Retry(retriableGet, h.MaxAttempts, h.RetrySleep) 325 if retryFail { 326 return nil, errors.Wrapf(err, "getting project ref failed after %d tries", h.MaxAttempts) 327 } 328 return projectRef, nil 329 } 330 331 // GetVersion loads the communicator's task's version from the API server. 332 func (h *HTTPCommunicator) GetVersion() (*version.Version, error) { 333 v := &version.Version{} 334 retriableGet := util.RetriableFunc( 335 func() error { 336 resp, err := h.TryTaskGet("version") 337 if resp != nil { 338 defer resp.Body.Close() 339 340 if resp.StatusCode == http.StatusConflict { 341 // Something very wrong, fail now with no retry. 342 return errors.New("conflict; wrong secret") 343 } 344 if resp.StatusCode != http.StatusOK { 345 msg, _ := ioutil.ReadAll(resp.Body) // ignore ReadAll error 346 return util.RetriableError{ 347 errors.Errorf("bad status code %v: %s", 348 resp.StatusCode, string(msg)), 349 } 350 } 351 } 352 353 if err != nil { 354 // Some generic error trying to connect - try again 355 return util.RetriableError{errors.WithStack(err)} 356 } 357 358 if resp == nil { 359 return util.RetriableError{errors.New("empty response")} 360 } 361 362 err = util.ReadJSONInto(resp.Body, v) 363 if err != nil { 364 err := errors.Wrap(err, "unable to read project version response") 365 h.Logger.Logf(slogger.ERROR, err.Error()) 366 return err 367 } 368 return nil 369 }, 370 ) 371 372 retryFail, err := util.Retry(retriableGet, h.MaxAttempts, h.RetrySleep) 373 if retryFail { 374 return nil, errors.Wrapf(err, "getting project configuration failed after %d tries", 375 h.MaxAttempts) 376 } 377 return v, nil 378 } 379 380 // Heartbeat sends a heartbeat to the API server. The server can respond with 381 // and "abort" response. This function returns true if the agent should abort. 382 func (h *HTTPCommunicator) Heartbeat() (bool, error) { 383 h.Logger.Logf(slogger.INFO, "Sending heartbeat.") 384 data := interface{}("heartbeat") 385 resp, err := h.tryRequestWithClient(h.getTaskPath("heartbeat"), "POST", h.heartbeatClient, &data) 386 if resp != nil { 387 defer resp.Body.Close() 388 } 389 if err != nil { 390 err = errors.Wrap(err, "error sending heartbeat") 391 h.Logger.Logf(slogger.ERROR, err.Error()) 392 return false, err 393 } 394 if resp.StatusCode == http.StatusConflict { 395 h.Logger.Logf(slogger.ERROR, "wrong secret (409) sending heartbeat") 396 h.SignalChan <- IncorrectSecret 397 return false, errors.Errorf("unauthorized - wrong secret") 398 } 399 if resp.StatusCode != http.StatusOK { 400 return false, errors.Errorf("unexpected status code doing heartbeat: %v", 401 resp.StatusCode) 402 } 403 404 heartbeatResponse := &apimodels.HeartbeatResponse{} 405 if err = util.ReadJSONInto(resp.Body, heartbeatResponse); err != nil { 406 err = errors.Wrap(err, "Error unmarshaling heartbeat response") 407 h.Logger.Logf(slogger.ERROR, err.Error()) 408 return false, err 409 } 410 return heartbeatResponse.Abort, nil 411 } 412 413 func (h *HTTPCommunicator) SetTask(taskId, taskSecret string) { 414 h.TaskId = taskId 415 h.TaskSecret = taskSecret 416 } 417 418 func (h *HTTPCommunicator) GetCurrentTaskId() string { 419 return h.TaskId 420 } 421 422 func (h *HTTPCommunicator) Reset(commSignal chan Signal, timeoutWatcher *TimeoutWatcher) (*APILogger, *StreamLogger, error) { 423 424 h.SignalChan = commSignal 425 // set up logger to API server 426 apiLogger := NewAPILogger(h) 427 428 // set up timeout logger, local and API logger streams 429 streamLogger, err := NewStreamLogger(timeoutWatcher, apiLogger) 430 if err != nil { 431 return nil, nil, err 432 } 433 h.Logger = streamLogger.Execution 434 return apiLogger, streamLogger, nil 435 436 } 437 438 // getTaskPath is a helper to create a path that can be used for task specific calls 439 func (h *HTTPCommunicator) getTaskPath(path string) string { 440 return fmt.Sprintf("task/%v/%v", h.TaskId, path) 441 } 442 443 func (h *HTTPCommunicator) TryGet(path string) (*http.Response, error) { 444 resp, err := h.tryRequestWithClient(path, "GET", h.httpClient, nil) 445 return resp, errors.WithStack(err) 446 } 447 448 func (h *HTTPCommunicator) TryTaskGet(path string) (*http.Response, error) { 449 resp, err := h.tryRequestWithClient(h.getTaskPath(path), "GET", h.httpClient, nil) 450 return resp, errors.WithStack(err) 451 } 452 453 func (h *HTTPCommunicator) TryTaskPost(path string, data interface{}) (*http.Response, error) { 454 resp, err := h.tryRequestWithClient(h.getTaskPath(path), "POST", h.httpClient, &data) 455 return resp, errors.WithStack(err) 456 } 457 458 func (h *HTTPCommunicator) TryPostJSON(path string, data interface{}) (*http.Response, error) { 459 resp, err := h.tryRequestWithClient(path, "POST", h.httpClient, &data) 460 return resp, errors.WithStack(err) 461 } 462 463 // tryRequestWithClient does the given task HTTP request using the provided client, allowing 464 // requests to be done with multiple client configurations/timeouts. 465 func (h *HTTPCommunicator) tryRequestWithClient(path string, method string, client *http.Client, 466 data *interface{}) (*http.Response, error) { 467 endpointUrl := fmt.Sprintf("%s/%s", h.ServerURLRoot, path) 468 req, err := http.NewRequest(method, endpointUrl, nil) 469 err = errors.WithStack(err) 470 if err != nil { 471 return nil, err 472 } 473 474 if data != nil { 475 var out []byte 476 out, err = json.Marshal(*data) 477 if err != nil { 478 return nil, err 479 } 480 req.Body = ioutil.NopCloser(bytes.NewReader(out)) 481 } 482 req.Header.Add(evergreen.TaskSecretHeader, h.TaskSecret) 483 req.Header.Add(evergreen.HostHeader, h.HostId) 484 req.Header.Add(evergreen.HostSecretHeader, h.HostSecret) 485 req.Header.Add("Content-Type", "application/json") 486 487 resp, err := client.Do(req) 488 return resp, errors.WithStack(err) 489 } 490 491 func (h *HTTPCommunicator) postJSON(path string, data interface{}) ( 492 resp *http.Response, retryFail bool, err error) { 493 retriablePost := util.RetriableFunc( 494 func() error { 495 resp, err = h.TryTaskPost(path, data) 496 if resp == nil { 497 h.Logger.Logf(slogger.ERROR, "nil response") 498 return errors.New("response is nil") 499 } 500 if err != nil { 501 h.Logger.Logf(slogger.ERROR, "HTTP Post failed on '%v': %v", 502 path, err) 503 return util.RetriableError{err} 504 } 505 if resp.StatusCode == http.StatusOK { 506 return nil 507 } 508 if resp.StatusCode == http.StatusConflict { 509 h.Logger.Logf(slogger.ERROR, "received 409 conflict error") 510 return HTTPConflictError 511 } 512 h.Logger.Logf(slogger.ERROR, "bad response '%v' posting to "+ 513 "'%v'", resp.StatusCode, path) 514 err = errors.Errorf("unexpected status code: %v", resp.StatusCode) 515 return util.RetriableError{err} 516 }, 517 ) 518 retryFail, err = util.Retry(retriablePost, h.MaxAttempts, h.RetrySleep) 519 520 return resp, retryFail, err 521 } 522 523 // FetchExpansionVars loads expansions for a communicator's task from the API server. 524 func (h *HTTPCommunicator) FetchExpansionVars() (*apimodels.ExpansionVars, error) { 525 resultVars := &apimodels.ExpansionVars{} 526 retriableGet := util.RetriableFunc( 527 func() error { 528 resp, err := h.TryTaskGet("fetch_vars") 529 if resp != nil { 530 defer resp.Body.Close() 531 } 532 if err != nil { 533 // Some generic error trying to connect - try again 534 h.Logger.Logf(slogger.ERROR, "failed trying to call fetch GET: %v", err) 535 return util.RetriableError{err} 536 } 537 if resp.StatusCode == http.StatusUnauthorized { 538 err = errors.Errorf("fetching expansions failed: got 'unauthorized' response.") 539 h.Logger.Logf(slogger.ERROR, err.Error()) 540 return err 541 } 542 if resp.StatusCode != http.StatusOK { 543 err = errors.Errorf("failed trying fetch GET, got bad response code: %v", resp.StatusCode) 544 h.Logger.Logf(slogger.ERROR, err.Error()) 545 return util.RetriableError{err} 546 } 547 if resp == nil { 548 err = errors.New("empty response fetching expansions") 549 h.Logger.Logf(slogger.ERROR, err.Error()) 550 return util.RetriableError{err} 551 } 552 553 // got here safely, so all is good - read the results 554 err = util.ReadJSONInto(resp.Body, resultVars) 555 if err != nil { 556 err = errors.Wrap(err, "failed to read vars from response") 557 h.Logger.Logf(slogger.ERROR, err.Error()) 558 return err 559 } 560 return nil 561 }, 562 ) 563 564 retryFail, err := util.Retry(retriableGet, httpMaxAttempts, 1*time.Second) 565 err = errors.WithStack(err) 566 if err != nil { 567 // stop trying to make fetch happen, it's not going to happen 568 if retryFail { 569 h.Logger.Logf(slogger.ERROR, "Fetching vars used up all retries.") 570 } 571 return nil, err 572 } 573 return resultVars, err 574 }