git.colasdn.top/newrelic/go-agent@v3.26.0+incompatible/internal/collector.go (about)

     1  // Copyright 2020 New Relic Corporation. All rights reserved.
     2  // SPDX-License-Identifier: Apache-2.0
     3  
     4  package internal
     5  
     6  import (
     7  	"encoding/json"
     8  	"errors"
     9  	"fmt"
    10  	"io/ioutil"
    11  	"net/http"
    12  	"net/url"
    13  	"os"
    14  	"regexp"
    15  	"strconv"
    16  	"sync"
    17  	"time"
    18  
    19  	"github.com/newrelic/go-agent/internal/logger"
    20  )
    21  
    22  const (
    23  	// ProcotolVersion is the protocol version used to communicate with NR
    24  	// backend.
    25  	ProcotolVersion = 17
    26  	userAgentPrefix = "NewRelic-Go-Agent/"
    27  
    28  	// Methods used in collector communication.
    29  	cmdPreconnect   = "preconnect"
    30  	cmdConnect      = "connect"
    31  	cmdMetrics      = "metric_data"
    32  	cmdCustomEvents = "custom_event_data"
    33  	cmdTxnEvents    = "analytic_event_data"
    34  	cmdErrorEvents  = "error_event_data"
    35  	cmdErrorData    = "error_data"
    36  	cmdTxnTraces    = "transaction_sample_data"
    37  	cmdSlowSQLs     = "sql_trace_data"
    38  	cmdSpanEvents   = "span_event_data"
    39  )
    40  
    41  // RpmCmd contains fields specific to an individual call made to RPM.
    42  type RpmCmd struct {
    43  	Name              string
    44  	Collector         string
    45  	RunID             string
    46  	Data              []byte
    47  	RequestHeadersMap map[string]string
    48  	MaxPayloadSize    int
    49  }
    50  
    51  // RpmControls contains fields which will be the same for all calls made
    52  // by the same application.
    53  type RpmControls struct {
    54  	License        string
    55  	Client         *http.Client
    56  	Logger         logger.Logger
    57  	AgentVersion   string
    58  	GzipWriterPool *sync.Pool
    59  }
    60  
    61  // RPMResponse contains a NR endpoint response.
    62  //
    63  // Agent Behavior Summary:
    64  //
    65  // on connect/preconnect:
    66  //     410 means shutdown
    67  //     200, 202 mean success (start run)
    68  //     all other response codes and errors mean try after backoff
    69  //
    70  // on harvest:
    71  //     410 means shutdown
    72  //     401, 409 mean restart run
    73  //     408, 429, 500, 503 mean save data for next harvest
    74  //     all other response codes and errors discard the data and continue the current harvest
    75  type RPMResponse struct {
    76  	statusCode int
    77  	body       []byte
    78  	// Err indicates whether or not the call was successful: newRPMResponse
    79  	// should be used to avoid mismatch between statusCode and Err.
    80  	Err                      error
    81  	disconnectSecurityPolicy bool
    82  }
    83  
    84  func newRPMResponse(statusCode int) RPMResponse {
    85  	var err error
    86  	if statusCode != 200 && statusCode != 202 {
    87  		err = fmt.Errorf("response code: %d", statusCode)
    88  	}
    89  	return RPMResponse{statusCode: statusCode, Err: err}
    90  }
    91  
    92  // IsDisconnect indicates that the agent should disconnect.
    93  func (resp RPMResponse) IsDisconnect() bool {
    94  	return resp.statusCode == 410 || resp.disconnectSecurityPolicy
    95  }
    96  
    97  // IsRestartException indicates that the agent should restart.
    98  func (resp RPMResponse) IsRestartException() bool {
    99  	return resp.statusCode == 401 ||
   100  		resp.statusCode == 409
   101  }
   102  
   103  // ShouldSaveHarvestData indicates that the agent should save the data and try
   104  // to send it in the next harvest.
   105  func (resp RPMResponse) ShouldSaveHarvestData() bool {
   106  	switch resp.statusCode {
   107  	case 408, 429, 500, 503:
   108  		return true
   109  	default:
   110  		return false
   111  	}
   112  }
   113  
   114  func rpmURL(cmd RpmCmd, cs RpmControls) string {
   115  	var u url.URL
   116  
   117  	u.Host = cmd.Collector
   118  	u.Path = "agent_listener/invoke_raw_method"
   119  	u.Scheme = "https"
   120  
   121  	query := url.Values{}
   122  	query.Set("marshal_format", "json")
   123  	query.Set("protocol_version", strconv.Itoa(ProcotolVersion))
   124  	query.Set("method", cmd.Name)
   125  	query.Set("license_key", cs.License)
   126  
   127  	if len(cmd.RunID) > 0 {
   128  		query.Set("run_id", cmd.RunID)
   129  	}
   130  
   131  	u.RawQuery = query.Encode()
   132  	return u.String()
   133  }
   134  
   135  func collectorRequestInternal(url string, cmd RpmCmd, cs RpmControls) RPMResponse {
   136  	compressed, err := compress(cmd.Data, cs.GzipWriterPool)
   137  	if nil != err {
   138  		return RPMResponse{Err: err}
   139  	}
   140  
   141  	if l := compressed.Len(); l > cmd.MaxPayloadSize {
   142  		return RPMResponse{Err: fmt.Errorf("Payload size for %s too large: %d greater than %d", cmd.Name, l, cmd.MaxPayloadSize)}
   143  	}
   144  
   145  	req, err := http.NewRequest("POST", url, compressed)
   146  	if nil != err {
   147  		return RPMResponse{Err: err}
   148  	}
   149  
   150  	req.Header.Add("Accept-Encoding", "identity, deflate")
   151  	req.Header.Add("Content-Type", "application/octet-stream")
   152  	req.Header.Add("User-Agent", userAgentPrefix+cs.AgentVersion)
   153  	req.Header.Add("Content-Encoding", "gzip")
   154  	for k, v := range cmd.RequestHeadersMap {
   155  		req.Header.Add(k, v)
   156  	}
   157  
   158  	resp, err := cs.Client.Do(req)
   159  	if err != nil {
   160  		return RPMResponse{Err: err}
   161  	}
   162  
   163  	defer resp.Body.Close()
   164  
   165  	r := newRPMResponse(resp.StatusCode)
   166  
   167  	// Read the entire response, rather than using resp.Body as input to json.NewDecoder to
   168  	// avoid the issue described here:
   169  	// https://github.com/google/go-github/pull/317
   170  	// https://ahmetalpbalkan.com/blog/golang-json-decoder-pitfalls/
   171  	// Also, collector JSON responses are expected to be quite small.
   172  	body, err := ioutil.ReadAll(resp.Body)
   173  	if nil == r.Err {
   174  		r.Err = err
   175  	}
   176  	r.body = body
   177  
   178  	return r
   179  }
   180  
   181  // CollectorRequest makes a request to New Relic.
   182  func CollectorRequest(cmd RpmCmd, cs RpmControls) RPMResponse {
   183  	url := rpmURL(cmd, cs)
   184  
   185  	if cs.Logger.DebugEnabled() {
   186  		cs.Logger.Debug("rpm request", map[string]interface{}{
   187  			"command": cmd.Name,
   188  			"url":     url,
   189  			"payload": JSONString(cmd.Data),
   190  		})
   191  	}
   192  
   193  	resp := collectorRequestInternal(url, cmd, cs)
   194  
   195  	if cs.Logger.DebugEnabled() {
   196  		if err := resp.Err; err != nil {
   197  			cs.Logger.Debug("rpm failure", map[string]interface{}{
   198  				"command":  cmd.Name,
   199  				"url":      url,
   200  				"response": string(resp.body), // Body might not be JSON on failure.
   201  				"error":    err.Error(),
   202  			})
   203  		} else {
   204  			cs.Logger.Debug("rpm response", map[string]interface{}{
   205  				"command":  cmd.Name,
   206  				"url":      url,
   207  				"response": JSONString(resp.body),
   208  			})
   209  		}
   210  	}
   211  
   212  	return resp
   213  }
   214  
   215  const (
   216  	// NEW_RELIC_HOST can be used to override the New Relic endpoint.  This
   217  	// is useful for testing.
   218  	envHost = "NEW_RELIC_HOST"
   219  )
   220  
   221  var (
   222  	preconnectHostOverride       = os.Getenv(envHost)
   223  	preconnectHostDefault        = "collector.newrelic.com"
   224  	preconnectRegionLicenseRegex = regexp.MustCompile(`(^.+?)x`)
   225  )
   226  
   227  func calculatePreconnectHost(license, overrideHost string) string {
   228  	if "" != overrideHost {
   229  		return overrideHost
   230  	}
   231  	m := preconnectRegionLicenseRegex.FindStringSubmatch(license)
   232  	if len(m) > 1 {
   233  		return "collector." + m[1] + ".nr-data.net"
   234  	}
   235  	return preconnectHostDefault
   236  }
   237  
   238  // ConnectJSONCreator allows the creation of the connect payload JSON to be
   239  // deferred until the SecurityPolicies are acquired and vetted.
   240  type ConnectJSONCreator interface {
   241  	CreateConnectJSON(*SecurityPolicies) ([]byte, error)
   242  }
   243  
   244  type preconnectRequest struct {
   245  	SecurityPoliciesToken string `json:"security_policies_token,omitempty"`
   246  	HighSecurity          bool   `json:"high_security"`
   247  }
   248  
   249  var (
   250  	errMissingAgentRunID = errors.New("connect reply missing agent run id")
   251  )
   252  
   253  // ConnectAttempt tries to connect an application.
   254  func ConnectAttempt(config ConnectJSONCreator, securityPoliciesToken string, highSecurity bool, cs RpmControls) (*ConnectReply, RPMResponse) {
   255  	preconnectData, err := json.Marshal([]preconnectRequest{{
   256  		SecurityPoliciesToken: securityPoliciesToken,
   257  		HighSecurity:          highSecurity,
   258  	}})
   259  	if nil != err {
   260  		return nil, RPMResponse{Err: fmt.Errorf("unable to marshal preconnect data: %v", err)}
   261  	}
   262  
   263  	call := RpmCmd{
   264  		Name:           cmdPreconnect,
   265  		Collector:      calculatePreconnectHost(cs.License, preconnectHostOverride),
   266  		Data:           preconnectData,
   267  		MaxPayloadSize: maxPayloadSizeInBytes,
   268  	}
   269  
   270  	resp := CollectorRequest(call, cs)
   271  	if nil != resp.Err {
   272  		return nil, resp
   273  	}
   274  
   275  	var preconnect struct {
   276  		Preconnect PreconnectReply `json:"return_value"`
   277  	}
   278  	err = json.Unmarshal(resp.body, &preconnect)
   279  	if nil != err {
   280  		// Certain security policy errors must be treated as a disconnect.
   281  		return nil, RPMResponse{
   282  			Err:                      fmt.Errorf("unable to process preconnect reply: %v", err),
   283  			disconnectSecurityPolicy: isDisconnectSecurityPolicyError(err),
   284  		}
   285  	}
   286  
   287  	js, err := config.CreateConnectJSON(preconnect.Preconnect.SecurityPolicies.PointerIfPopulated())
   288  	if nil != err {
   289  		return nil, RPMResponse{Err: fmt.Errorf("unable to create connect data: %v", err)}
   290  	}
   291  
   292  	call.Collector = preconnect.Preconnect.Collector
   293  	call.Data = js
   294  	call.Name = cmdConnect
   295  
   296  	resp = CollectorRequest(call, cs)
   297  	if nil != resp.Err {
   298  		return nil, resp
   299  	}
   300  
   301  	reply, err := ConstructConnectReply(resp.body, preconnect.Preconnect)
   302  	if nil != err {
   303  		return nil, RPMResponse{Err: err}
   304  	}
   305  
   306  	// Note:  This should never happen.  It would mean the collector
   307  	// response is malformed.  This exists merely as extra defensiveness.
   308  	if "" == reply.RunID {
   309  		return nil, RPMResponse{Err: errMissingAgentRunID}
   310  	}
   311  
   312  	return reply, resp
   313  }
   314  
   315  // ConstructConnectReply takes the body of a Connect reply, in the form of bytes, and a
   316  // PreconnectReply, and converts it into a *ConnectReply
   317  func ConstructConnectReply(body []byte, preconnect PreconnectReply) (*ConnectReply, error) {
   318  	var reply struct {
   319  		Reply *ConnectReply `json:"return_value"`
   320  	}
   321  	reply.Reply = ConnectReplyDefaults()
   322  	err := json.Unmarshal(body, &reply)
   323  	if nil != err {
   324  		return nil, fmt.Errorf("unable to parse connect reply: %v", err)
   325  	}
   326  
   327  	reply.Reply.PreconnectReply = preconnect
   328  
   329  	reply.Reply.AdaptiveSampler = NewAdaptiveSampler(
   330  		time.Duration(reply.Reply.SamplingTargetPeriodInSeconds)*time.Second,
   331  		reply.Reply.SamplingTarget,
   332  		time.Now())
   333  	reply.Reply.rulesCache = newRulesCache(txnNameCacheLimit)
   334  
   335  	return reply.Reply, nil
   336  }