github.com/justinjmoses/evergreen@v0.0.0-20170530173719-1d50e381ff0d/agent/comm/http.go (about)

     1  package comm
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/tls"
     6  	"crypto/x509"
     7  	"encoding/json"
     8  	"fmt"
     9  	"io/ioutil"
    10  	"net/http"
    11  	"os"
    12  	"strconv"
    13  	"time"
    14  
    15  	"github.com/evergreen-ci/evergreen"
    16  	"github.com/evergreen-ci/evergreen/apimodels"
    17  	"github.com/evergreen-ci/evergreen/model"
    18  	"github.com/evergreen-ci/evergreen/model/distro"
    19  	"github.com/evergreen-ci/evergreen/model/task"
    20  	"github.com/evergreen-ci/evergreen/model/version"
    21  	"github.com/evergreen-ci/evergreen/util"
    22  	"github.com/mongodb/grip/slogger"
    23  	"github.com/pkg/errors"
    24  )
    25  
    26  const httpMaxAttempts = 10
    27  
    28  var HeartbeatTimeout = time.Minute
    29  
    30  var HTTPConflictError = errors.New("Conflict")
    31  
    32  // HTTPCommunicator handles communication with the API server. An HTTPCommunicator
    33  // is scoped to a single task, and all communication performed by it is
    34  // only relevant to that running task.
    35  type HTTPCommunicator struct {
    36  	ServerURLRoot string
    37  	TaskId        string
    38  	TaskSecret    string
    39  	HostId        string
    40  	HostSecret    string
    41  	MaxAttempts   int
    42  	RetrySleep    time.Duration
    43  	SignalChan    chan Signal
    44  	Logger        *slogger.Logger
    45  	HttpsCert     string
    46  	httpClient    *http.Client
    47  	// TODO only use one Client after global locking is removed
    48  	heartbeatClient *http.Client
    49  }
    50  
    51  // NewHTTPCommunicator returns an initialized HTTPCommunicator.
    52  // The cert parameter may be blank if default system certificates are being used.
    53  func NewHTTPCommunicator(serverURL, hostId, hostSecret, cert string) (*HTTPCommunicator, error) {
    54  	agentCommunicator := &HTTPCommunicator{
    55  		ServerURLRoot: fmt.Sprintf("%v/api/%v", serverURL, evergreen.AgentAPIVersion),
    56  		HostId:        hostId,
    57  		HostSecret:    hostSecret,
    58  		MaxAttempts:   httpMaxAttempts,
    59  		RetrySleep:    time.Second * 2,
    60  		HttpsCert:     cert,
    61  	}
    62  
    63  	if agentCommunicator.HttpsCert != "" {
    64  		pool := x509.NewCertPool()
    65  		if !pool.AppendCertsFromPEM([]byte(agentCommunicator.HttpsCert)) {
    66  			return nil, errors.New("failed to append HttpsCert to new cert pool")
    67  		}
    68  		tc := &tls.Config{RootCAs: pool}
    69  		tr := &http.Transport{TLSClientConfig: tc}
    70  		agentCommunicator.httpClient = &http.Client{Transport: tr}
    71  		agentCommunicator.heartbeatClient = &http.Client{Transport: tr, Timeout: HeartbeatTimeout}
    72  	} else {
    73  		agentCommunicator.httpClient = &http.Client{}
    74  		agentCommunicator.heartbeatClient = &http.Client{Timeout: HeartbeatTimeout}
    75  	}
    76  	return agentCommunicator, nil
    77  }
    78  
    79  func (h *HTTPCommunicator) SetSignalChan(communicatorChan chan Signal) {
    80  	h.SignalChan = communicatorChan
    81  }
    82  
    83  func (h *HTTPCommunicator) SetLogger(logger *slogger.Logger) {
    84  	h.Logger = logger
    85  }
    86  
    87  // Heartbeat encapsulates heartbeat behavior (i.e., pinging the API server at regular
    88  // intervals to ensure that communication hasn't broken down).
    89  type Heartbeat interface {
    90  	Heartbeat() (bool, error)
    91  }
    92  
    93  // Start marks the communicator's task as started.
    94  func (h *HTTPCommunicator) Start() error {
    95  	pidStr := strconv.Itoa(os.Getpid())
    96  	taskStartRequest := &apimodels.TaskStartRequest{Pid: pidStr}
    97  	resp, retryFail, err := h.postJSON("start", taskStartRequest)
    98  	if resp != nil {
    99  		defer resp.Body.Close()
   100  	}
   101  	if err != nil {
   102  		if retryFail {
   103  			err = errors.Wrapf(err, "task start failed after %v tries", h.MaxAttempts)
   104  		} else {
   105  			err = errors.Wrap(err, "failed to start task")
   106  		}
   107  		h.Logger.Logf(slogger.ERROR, err.Error())
   108  		return err
   109  	}
   110  	return nil
   111  }
   112  
   113  // End marks the communicator's task as finished with the given status.
   114  func (h *HTTPCommunicator) End(detail *apimodels.TaskEndDetail) (*apimodels.EndTaskResponse, error) {
   115  	taskEndResp := &apimodels.EndTaskResponse{}
   116  	resp, retryFail, err := h.postJSON("end", detail)
   117  	if resp == nil {
   118  		return nil, errors.New("empty response when trying to end task")
   119  	}
   120  	defer resp.Body.Close()
   121  	if err != nil {
   122  		if retryFail {
   123  			var bodyMsg []byte
   124  			if resp != nil {
   125  				bodyMsg, _ = ioutil.ReadAll(resp.Body)
   126  			}
   127  			err = errors.Wrapf(err, "task end failed after %v tries: %v", h.MaxAttempts, bodyMsg)
   128  		} else {
   129  			err = errors.Wrap(err, "failed to end task")
   130  		}
   131  		h.Logger.Logf(slogger.ERROR, err.Error())
   132  		return nil, err
   133  	}
   134  
   135  	if resp != nil {
   136  		if err = util.ReadJSONInto(resp.Body, taskEndResp); err != nil {
   137  			message := fmt.Sprintf("Error unmarshalling task end response: %v",
   138  				err)
   139  			h.Logger.Logf(slogger.ERROR, message)
   140  			return nil, errors.New(message)
   141  		}
   142  		if resp.StatusCode != http.StatusOK {
   143  			message := fmt.Sprintf("unexpected status code in task end "+
   144  				"request (%v): %v", resp.StatusCode, taskEndResp.Message)
   145  			return nil, errors.New(message)
   146  		}
   147  		err = nil
   148  	} else {
   149  		err = errors.New("received nil response from API server")
   150  	}
   151  	h.Logger.Logf(slogger.INFO, "task's end response received: %s", taskEndResp.Message)
   152  	return taskEndResp, err
   153  }
   154  
   155  // Log sends a batch of log messages for the task's logs to the API server.
   156  func (h *HTTPCommunicator) Log(messages []model.LogMessage) error {
   157  
   158  	outgoingData := model.TaskLog{
   159  		TaskId:       h.TaskId,
   160  		Timestamp:    time.Now(),
   161  		MessageCount: len(messages),
   162  		Messages:     messages,
   163  	}
   164  
   165  	retriableLog := util.RetriableFunc(
   166  		func() error {
   167  			resp, err := h.TryTaskPost("log", outgoingData)
   168  			if resp != nil {
   169  				defer resp.Body.Close()
   170  			}
   171  			if err != nil {
   172  				return util.RetriableError{errors.WithStack(err)}
   173  			}
   174  			if resp.StatusCode == http.StatusInternalServerError {
   175  				return util.RetriableError{errors.Errorf("http status %v response body %v", resp.StatusCode, resp.Body)}
   176  			}
   177  			return nil
   178  		},
   179  	)
   180  	retryFail, err := util.Retry(retriableLog, h.MaxAttempts, h.RetrySleep)
   181  	if retryFail {
   182  		return errors.Wrapf(err, "logging failed after %vtries: %v", h.MaxAttempts)
   183  	}
   184  	return err
   185  }
   186  
   187  // GetTask returns the communicator's task.
   188  func (h *HTTPCommunicator) GetTask() (*task.Task, error) {
   189  	task := &task.Task{}
   190  	retriableGet := util.RetriableFunc(
   191  		func() error {
   192  			resp, err := h.TryTaskGet("")
   193  			if resp != nil {
   194  				defer resp.Body.Close()
   195  			}
   196  			if resp != nil && resp.StatusCode == http.StatusConflict {
   197  				// Something very wrong, fail now with no retry.
   198  				return errors.New("conflict; wrong secret")
   199  			}
   200  			if err != nil {
   201  				// Some generic error trying to connect - try again
   202  				return util.RetriableError{err}
   203  			}
   204  			if resp == nil {
   205  				return util.RetriableError{errors.New("empty response")}
   206  			} else {
   207  				err = util.ReadJSONInto(resp.Body, task)
   208  				if err != nil {
   209  					fmt.Printf("error3, retrying: %v\n", err)
   210  					return util.RetriableError{err}
   211  				}
   212  				return nil
   213  			}
   214  		},
   215  	)
   216  
   217  	retryFail, err := util.Retry(retriableGet, h.MaxAttempts, h.RetrySleep)
   218  	if retryFail {
   219  		return nil, errors.Wrapf(err, "getting task failed after %v tries", h.MaxAttempts)
   220  	}
   221  	return task, nil
   222  }
   223  
   224  // GetDistro returns the distro for the communicator's task.
   225  func (h *HTTPCommunicator) GetDistro() (*distro.Distro, error) {
   226  	d := &distro.Distro{}
   227  	retriableGet := util.RetriableFunc(
   228  		func() error {
   229  			resp, err := h.TryTaskGet("distro")
   230  			if resp == nil {
   231  				return util.RetriableError{errors.New("empty response")}
   232  			}
   233  
   234  			defer resp.Body.Close()
   235  			if err != nil {
   236  				// Some generic error trying to connect - try again
   237  				return util.RetriableError{err}
   238  			}
   239  
   240  			if resp != nil && resp.StatusCode == http.StatusConflict {
   241  				// Something very wrong, fail now with no retry.
   242  				return errors.New("conflict; wrong secret")
   243  			}
   244  			if resp.StatusCode != http.StatusOK {
   245  				return util.RetriableError{errors.Errorf("bad status: %s", resp.Status)}
   246  			}
   247  
   248  			err = util.ReadJSONInto(resp.Body, d)
   249  			if err != nil {
   250  				err = errors.Wrap(err, "unable to read distro response")
   251  				h.Logger.Logf(slogger.ERROR, err.Error())
   252  				return util.RetriableError{err}
   253  			}
   254  			return nil
   255  		},
   256  	)
   257  
   258  	retryFail, err := util.Retry(retriableGet, h.MaxAttempts, h.RetrySleep)
   259  	if retryFail {
   260  		return nil, errors.Wrapf(err, "getting distro failed after %d tries", h.MaxAttempts)
   261  	}
   262  	return d, nil
   263  }
   264  
   265  // GetNextTask returns a next task response by getting the next task for a given host.
   266  func (h *HTTPCommunicator) GetNextTask() (*apimodels.NextTaskResponse, error) {
   267  	taskResponse := &apimodels.NextTaskResponse{}
   268  	retriableGet := util.RetriableFunc(
   269  		func() error {
   270  			resp, err := h.TryGet("agent/next_task")
   271  			if resp == nil {
   272  				return util.RetriableError{fmt.Errorf("empty response")}
   273  			}
   274  			defer resp.Body.Close()
   275  			if resp.StatusCode == http.StatusConflict {
   276  				return fmt.Errorf("conflict - wrong secret!")
   277  			}
   278  			if err != nil {
   279  				return util.RetriableError{err}
   280  			}
   281  			err = util.ReadJSONInto(resp.Body, taskResponse)
   282  			if err != nil {
   283  				return util.RetriableError{err}
   284  			}
   285  			return nil
   286  		})
   287  	retryFail, err := util.Retry(retriableGet, h.MaxAttempts, h.RetrySleep)
   288  	if retryFail {
   289  		return nil, fmt.Errorf("getting next task failed after %v tries: %v", h.MaxAttempts, err)
   290  	}
   291  	return taskResponse, nil
   292  
   293  }
   294  
   295  // GetProjectConfig loads the communicator's task's project from the API server.
   296  func (h *HTTPCommunicator) GetProjectRef() (*model.ProjectRef, error) {
   297  	projectRef := &model.ProjectRef{}
   298  	retriableGet := util.RetriableFunc(
   299  		func() error {
   300  			resp, err := h.TryTaskGet("project_ref")
   301  			if resp != nil {
   302  				defer resp.Body.Close()
   303  			}
   304  			if resp != nil && resp.StatusCode == http.StatusConflict {
   305  				// Something very wrong, fail now with no retry.
   306  				return errors.New("conflict; wrong secret")
   307  			}
   308  			if err != nil {
   309  				// Some generic error trying to connect - try again
   310  				return util.RetriableError{err}
   311  			}
   312  			if resp == nil {
   313  				return util.RetriableError{errors.New("empty response")}
   314  			}
   315  
   316  			err = util.ReadJSONInto(resp.Body, projectRef)
   317  			if err != nil {
   318  				return util.RetriableError{err}
   319  			}
   320  			return nil
   321  		},
   322  	)
   323  
   324  	retryFail, err := util.Retry(retriableGet, h.MaxAttempts, h.RetrySleep)
   325  	if retryFail {
   326  		return nil, errors.Wrapf(err, "getting project ref failed after %d tries", h.MaxAttempts)
   327  	}
   328  	return projectRef, nil
   329  }
   330  
   331  // GetVersion loads the communicator's task's version from the API server.
   332  func (h *HTTPCommunicator) GetVersion() (*version.Version, error) {
   333  	v := &version.Version{}
   334  	retriableGet := util.RetriableFunc(
   335  		func() error {
   336  			resp, err := h.TryTaskGet("version")
   337  			if resp != nil {
   338  				defer resp.Body.Close()
   339  
   340  				if resp.StatusCode == http.StatusConflict {
   341  					// Something very wrong, fail now with no retry.
   342  					return errors.New("conflict; wrong secret")
   343  				}
   344  				if resp.StatusCode != http.StatusOK {
   345  					msg, _ := ioutil.ReadAll(resp.Body) // ignore ReadAll error
   346  					return util.RetriableError{
   347  						errors.Errorf("bad status code %v: %s",
   348  							resp.StatusCode, string(msg)),
   349  					}
   350  				}
   351  			}
   352  
   353  			if err != nil {
   354  				// Some generic error trying to connect - try again
   355  				return util.RetriableError{errors.WithStack(err)}
   356  			}
   357  
   358  			if resp == nil {
   359  				return util.RetriableError{errors.New("empty response")}
   360  			}
   361  
   362  			err = util.ReadJSONInto(resp.Body, v)
   363  			if err != nil {
   364  				err := errors.Wrap(err, "unable to read project version response")
   365  				h.Logger.Logf(slogger.ERROR, err.Error())
   366  				return err
   367  			}
   368  			return nil
   369  		},
   370  	)
   371  
   372  	retryFail, err := util.Retry(retriableGet, h.MaxAttempts, h.RetrySleep)
   373  	if retryFail {
   374  		return nil, errors.Wrapf(err, "getting project configuration failed after %d tries",
   375  			h.MaxAttempts)
   376  	}
   377  	return v, nil
   378  }
   379  
   380  // Heartbeat sends a heartbeat to the API server. The server can respond with
   381  // and "abort" response. This function returns true if the agent should abort.
   382  func (h *HTTPCommunicator) Heartbeat() (bool, error) {
   383  	h.Logger.Logf(slogger.INFO, "Sending heartbeat.")
   384  	data := interface{}("heartbeat")
   385  	resp, err := h.tryRequestWithClient(h.getTaskPath("heartbeat"), "POST", h.heartbeatClient, &data)
   386  	if resp != nil {
   387  		defer resp.Body.Close()
   388  	}
   389  	if err != nil {
   390  		err = errors.Wrap(err, "error sending heartbeat")
   391  		h.Logger.Logf(slogger.ERROR, err.Error())
   392  		return false, err
   393  	}
   394  	if resp.StatusCode == http.StatusConflict {
   395  		h.Logger.Logf(slogger.ERROR, "wrong secret (409) sending heartbeat")
   396  		h.SignalChan <- IncorrectSecret
   397  		return false, errors.Errorf("unauthorized - wrong secret")
   398  	}
   399  	if resp.StatusCode != http.StatusOK {
   400  		return false, errors.Errorf("unexpected status code doing heartbeat: %v",
   401  			resp.StatusCode)
   402  	}
   403  
   404  	heartbeatResponse := &apimodels.HeartbeatResponse{}
   405  	if err = util.ReadJSONInto(resp.Body, heartbeatResponse); err != nil {
   406  		err = errors.Wrap(err, "Error unmarshaling heartbeat response")
   407  		h.Logger.Logf(slogger.ERROR, err.Error())
   408  		return false, err
   409  	}
   410  	return heartbeatResponse.Abort, nil
   411  }
   412  
   413  func (h *HTTPCommunicator) SetTask(taskId, taskSecret string) {
   414  	h.TaskId = taskId
   415  	h.TaskSecret = taskSecret
   416  }
   417  
   418  func (h *HTTPCommunicator) GetCurrentTaskId() string {
   419  	return h.TaskId
   420  }
   421  
   422  func (h *HTTPCommunicator) Reset(commSignal chan Signal, timeoutWatcher *TimeoutWatcher) (*APILogger, *StreamLogger, error) {
   423  
   424  	h.SignalChan = commSignal
   425  	// set up logger to API server
   426  	apiLogger := NewAPILogger(h)
   427  
   428  	// set up timeout logger, local and API logger streams
   429  	streamLogger, err := NewStreamLogger(timeoutWatcher, apiLogger)
   430  	if err != nil {
   431  		return nil, nil, err
   432  	}
   433  	h.Logger = streamLogger.Execution
   434  	return apiLogger, streamLogger, nil
   435  
   436  }
   437  
   438  // getTaskPath is a helper to create a path that can be used for task specific calls
   439  func (h *HTTPCommunicator) getTaskPath(path string) string {
   440  	return fmt.Sprintf("task/%v/%v", h.TaskId, path)
   441  }
   442  
   443  func (h *HTTPCommunicator) TryGet(path string) (*http.Response, error) {
   444  	resp, err := h.tryRequestWithClient(path, "GET", h.httpClient, nil)
   445  	return resp, errors.WithStack(err)
   446  }
   447  
   448  func (h *HTTPCommunicator) TryTaskGet(path string) (*http.Response, error) {
   449  	resp, err := h.tryRequestWithClient(h.getTaskPath(path), "GET", h.httpClient, nil)
   450  	return resp, errors.WithStack(err)
   451  }
   452  
   453  func (h *HTTPCommunicator) TryTaskPost(path string, data interface{}) (*http.Response, error) {
   454  	resp, err := h.tryRequestWithClient(h.getTaskPath(path), "POST", h.httpClient, &data)
   455  	return resp, errors.WithStack(err)
   456  }
   457  
   458  func (h *HTTPCommunicator) TryPostJSON(path string, data interface{}) (*http.Response, error) {
   459  	resp, err := h.tryRequestWithClient(path, "POST", h.httpClient, &data)
   460  	return resp, errors.WithStack(err)
   461  }
   462  
   463  // tryRequestWithClient does the given task HTTP request using the provided client, allowing
   464  // requests to be done with multiple client configurations/timeouts.
   465  func (h *HTTPCommunicator) tryRequestWithClient(path string, method string, client *http.Client,
   466  	data *interface{}) (*http.Response, error) {
   467  	endpointUrl := fmt.Sprintf("%s/%s", h.ServerURLRoot, path)
   468  	req, err := http.NewRequest(method, endpointUrl, nil)
   469  	err = errors.WithStack(err)
   470  	if err != nil {
   471  		return nil, err
   472  	}
   473  
   474  	if data != nil {
   475  		var out []byte
   476  		out, err = json.Marshal(*data)
   477  		if err != nil {
   478  			return nil, err
   479  		}
   480  		req.Body = ioutil.NopCloser(bytes.NewReader(out))
   481  	}
   482  	req.Header.Add(evergreen.TaskSecretHeader, h.TaskSecret)
   483  	req.Header.Add(evergreen.HostHeader, h.HostId)
   484  	req.Header.Add(evergreen.HostSecretHeader, h.HostSecret)
   485  	req.Header.Add("Content-Type", "application/json")
   486  
   487  	resp, err := client.Do(req)
   488  	return resp, errors.WithStack(err)
   489  }
   490  
   491  func (h *HTTPCommunicator) postJSON(path string, data interface{}) (
   492  	resp *http.Response, retryFail bool, err error) {
   493  	retriablePost := util.RetriableFunc(
   494  		func() error {
   495  			resp, err = h.TryTaskPost(path, data)
   496  			if resp == nil {
   497  				h.Logger.Logf(slogger.ERROR, "nil response")
   498  				return errors.New("response is nil")
   499  			}
   500  			if err != nil {
   501  				h.Logger.Logf(slogger.ERROR, "HTTP Post failed on '%v': %v",
   502  					path, err)
   503  				return util.RetriableError{err}
   504  			}
   505  			if resp.StatusCode == http.StatusOK {
   506  				return nil
   507  			}
   508  			if resp.StatusCode == http.StatusConflict {
   509  				h.Logger.Logf(slogger.ERROR, "received 409 conflict error")
   510  				return HTTPConflictError
   511  			}
   512  			h.Logger.Logf(slogger.ERROR, "bad response '%v' posting to "+
   513  				"'%v'", resp.StatusCode, path)
   514  			err = errors.Errorf("unexpected status code: %v", resp.StatusCode)
   515  			return util.RetriableError{err}
   516  		},
   517  	)
   518  	retryFail, err = util.Retry(retriablePost, h.MaxAttempts, h.RetrySleep)
   519  
   520  	return resp, retryFail, err
   521  }
   522  
   523  // FetchExpansionVars loads expansions for a communicator's task from the API server.
   524  func (h *HTTPCommunicator) FetchExpansionVars() (*apimodels.ExpansionVars, error) {
   525  	resultVars := &apimodels.ExpansionVars{}
   526  	retriableGet := util.RetriableFunc(
   527  		func() error {
   528  			resp, err := h.TryTaskGet("fetch_vars")
   529  			if resp != nil {
   530  				defer resp.Body.Close()
   531  			}
   532  			if err != nil {
   533  				// Some generic error trying to connect - try again
   534  				h.Logger.Logf(slogger.ERROR, "failed trying to call fetch GET: %v", err)
   535  				return util.RetriableError{err}
   536  			}
   537  			if resp.StatusCode == http.StatusUnauthorized {
   538  				err = errors.Errorf("fetching expansions failed: got 'unauthorized' response.")
   539  				h.Logger.Logf(slogger.ERROR, err.Error())
   540  				return err
   541  			}
   542  			if resp.StatusCode != http.StatusOK {
   543  				err = errors.Errorf("failed trying fetch GET, got bad response code: %v", resp.StatusCode)
   544  				h.Logger.Logf(slogger.ERROR, err.Error())
   545  				return util.RetriableError{err}
   546  			}
   547  			if resp == nil {
   548  				err = errors.New("empty response fetching expansions")
   549  				h.Logger.Logf(slogger.ERROR, err.Error())
   550  				return util.RetriableError{err}
   551  			}
   552  
   553  			// got here safely, so all is good - read the results
   554  			err = util.ReadJSONInto(resp.Body, resultVars)
   555  			if err != nil {
   556  				err = errors.Wrap(err, "failed to read vars from response")
   557  				h.Logger.Logf(slogger.ERROR, err.Error())
   558  				return err
   559  			}
   560  			return nil
   561  		},
   562  	)
   563  
   564  	retryFail, err := util.Retry(retriableGet, httpMaxAttempts, 1*time.Second)
   565  	err = errors.WithStack(err)
   566  	if err != nil {
   567  		// stop trying to make fetch happen, it's not going to happen
   568  		if retryFail {
   569  			h.Logger.Logf(slogger.ERROR, "Fetching vars used up all retries.")
   570  		}
   571  		return nil, err
   572  	}
   573  	return resultVars, err
   574  }