github.com/instana/go-sensor@v1.62.2-0.20240520081010-4919868049e1/gcr_agent.go (about)

     1  // (c) Copyright IBM Corp. 2021
     2  // (c) Copyright Instana Inc. 2020
     3  
     4  package instana
     5  
     6  import (
     7  	"bytes"
     8  	"context"
     9  	"encoding/json"
    10  	"fmt"
    11  	"io"
    12  	"io/ioutil"
    13  	"net/http"
    14  	"os"
    15  	"strconv"
    16  	"strings"
    17  	"sync"
    18  	"time"
    19  
    20  	"github.com/instana/go-sensor/acceptor"
    21  	"github.com/instana/go-sensor/autoprofile"
    22  	"github.com/instana/go-sensor/gcloud"
    23  )
    24  
    25  const googleCloudRunMetadataURL = "http://metadata.google.internal"
    26  
    27  type gcrMetadata struct {
    28  	gcloud.ComputeMetadata
    29  
    30  	Service       string
    31  	Configuration string
    32  	Revision      string
    33  	Port          string
    34  }
    35  
    36  type gcrSnapshot struct {
    37  	Service  serverlessSnapshot
    38  	Metadata gcrMetadata
    39  }
    40  
    41  func newGCRSnapshot(pid int, md gcrMetadata) gcrSnapshot {
    42  	return gcrSnapshot{
    43  		Service: serverlessSnapshot{
    44  			EntityID:  md.Instance.ID,
    45  			Host:      "gcp:cloud-run:revision:" + md.Revision,
    46  			PID:       pid,
    47  			StartedAt: processStartedAt,
    48  			Container: containerSnapshot{
    49  				ID:   md.Instance.ID,
    50  				Type: "gcpCloudRunInstance",
    51  			},
    52  		},
    53  		Metadata: md,
    54  	}
    55  }
    56  
    57  func newGCRServiceRevisionInstancePluginPayload(snapshot gcrSnapshot) acceptor.PluginPayload {
    58  	regionName := snapshot.Metadata.Instance.Region
    59  	if ind := strings.LastIndexByte(regionName, '/'); ind >= 0 {
    60  		// truncate projects/<projectID>/regions/ prefix to extract the region
    61  		// from a fully-qualified name
    62  		regionName = regionName[ind+1:]
    63  	}
    64  
    65  	return acceptor.NewGCRServiceRevisionInstancePluginPayload(snapshot.Service.EntityID, acceptor.GCRServiceRevisionInstanceData{
    66  		Runtime:          "go",
    67  		Region:           regionName,
    68  		Service:          snapshot.Metadata.Service,
    69  		Configuration:    snapshot.Metadata.Configuration,
    70  		Revision:         snapshot.Metadata.Revision,
    71  		InstanceID:       snapshot.Metadata.Instance.ID,
    72  		Port:             snapshot.Metadata.Port,
    73  		NumericProjectID: snapshot.Metadata.Project.NumericProjectID,
    74  		ProjectID:        snapshot.Metadata.Project.ProjectID,
    75  	})
    76  }
    77  
    78  type gcrAgent struct {
    79  	Endpoint string
    80  	Key      string
    81  	PID      int
    82  	Zone     string
    83  	Tags     map[string]interface{}
    84  
    85  	snapshot         gcrSnapshot
    86  	lastProcessStats processStats
    87  
    88  	mu        sync.Mutex
    89  	spanQueue []Span
    90  
    91  	runtimeSnapshot *SnapshotCollector
    92  	processStats    *processStatsCollector
    93  	gcr             *gcloud.ComputeMetadataProvider
    94  	client          *http.Client
    95  	logger          LeveledLogger
    96  }
    97  
    98  func newGCRAgent(
    99  	serviceName, acceptorEndpoint, agentKey string,
   100  	client *http.Client,
   101  	logger LeveledLogger,
   102  ) *gcrAgent {
   103  	if logger == nil {
   104  		logger = defaultLogger
   105  	}
   106  
   107  	if client == nil {
   108  		client = http.DefaultClient
   109  	}
   110  
   111  	logger.Debug("initializing google cloud run agent")
   112  
   113  	// allow overriding the metadata URL endpoint for testing purposes
   114  	mdURL, ok := os.LookupEnv("GOOGLE_CLOUD_RUN_METADATA_ENDPOINT")
   115  	if !ok {
   116  		mdURL = googleCloudRunMetadataURL
   117  	}
   118  
   119  	agent := &gcrAgent{
   120  		Endpoint: acceptorEndpoint,
   121  		Key:      agentKey,
   122  		PID:      os.Getpid(),
   123  		Zone:     os.Getenv("INSTANA_ZONE"),
   124  		Tags:     parseInstanaTags(os.Getenv("INSTANA_TAGS")),
   125  		runtimeSnapshot: &SnapshotCollector{
   126  			CollectionInterval: snapshotCollectionInterval,
   127  			ServiceName:        serviceName,
   128  		},
   129  		processStats: &processStatsCollector{
   130  			logger: logger,
   131  		},
   132  		gcr:    gcloud.NewComputeMetadataProvider(mdURL, client),
   133  		client: client,
   134  		logger: logger,
   135  	}
   136  
   137  	go func() {
   138  		for {
   139  			for i := 0; i < maximumRetries; i++ {
   140  				snapshot, ok := agent.collectSnapshot(context.Background())
   141  				if ok {
   142  					agent.snapshot = snapshot
   143  					break
   144  				}
   145  
   146  				time.Sleep(expDelay(i + 1))
   147  			}
   148  			time.Sleep(snapshotCollectionInterval)
   149  		}
   150  	}()
   151  	go agent.processStats.Run(context.Background(), time.Second)
   152  
   153  	return agent
   154  }
   155  
   156  func (a *gcrAgent) Ready() bool { return a.snapshot.Service.EntityID != "" }
   157  
   158  func (a *gcrAgent) SendMetrics(data acceptor.Metrics) (err error) {
   159  	processStats := a.processStats.Collect()
   160  	defer func() {
   161  		if err == nil {
   162  			// only update the last sent stats if they were transmitted successfully
   163  			// since they are updated on the backend incrementally using received
   164  			// deltas
   165  			a.lastProcessStats = processStats
   166  		}
   167  	}()
   168  
   169  	payload := struct {
   170  		Metrics metricsPayload `json:"metrics,omitempty"`
   171  		Spans   []Span         `json:"spans,omitempty"`
   172  	}{
   173  		Metrics: metricsPayload{
   174  			Plugins: []acceptor.PluginPayload{
   175  				newGCRServiceRevisionInstancePluginPayload(a.snapshot),
   176  				newProcessPluginPayload(a.snapshot.Service, a.lastProcessStats, processStats),
   177  				acceptor.NewGoProcessPluginPayload(acceptor.GoProcessData{
   178  					PID:      a.PID,
   179  					Snapshot: a.runtimeSnapshot.Collect(),
   180  					Metrics:  data,
   181  				}),
   182  			},
   183  		},
   184  	}
   185  
   186  	a.mu.Lock()
   187  	if len(a.spanQueue) > 0 {
   188  		payload.Spans = make([]Span, len(a.spanQueue))
   189  		copy(payload.Spans, a.spanQueue)
   190  		a.spanQueue = a.spanQueue[:0]
   191  	}
   192  	a.mu.Unlock()
   193  
   194  	buf := bytes.NewBuffer(nil)
   195  	if err := json.NewEncoder(buf).Encode(payload); err != nil {
   196  		return fmt.Errorf("failed to marshal metrics payload: %s", err)
   197  	}
   198  
   199  	req, err := http.NewRequest(http.MethodPost, a.Endpoint+"/bundle", buf)
   200  	if err != nil {
   201  		return fmt.Errorf("failed to prepare send metrics request: %s", err)
   202  	}
   203  
   204  	req.Header.Set("Content-Type", "application/json")
   205  
   206  	return a.sendRequest(req)
   207  }
   208  
   209  func (a *gcrAgent) SendEvent(event *EventData) error { return nil }
   210  
   211  func (a *gcrAgent) SendSpans(spans []Span) error {
   212  	from := newServerlessAgentFromS(a.snapshot.Service.EntityID, "gcp")
   213  	for i := range spans {
   214  		spans[i].From = from
   215  	}
   216  
   217  	// enqueue the spans to send them in a bundle with metrics instead of sending immediately
   218  	a.mu.Lock()
   219  	a.spanQueue = append(a.spanQueue, spans...)
   220  	a.mu.Unlock()
   221  
   222  	return nil
   223  }
   224  
   225  func (a *gcrAgent) SendProfiles(profiles []autoprofile.Profile) error { return nil }
   226  
   227  func (a *gcrAgent) Flush(ctx context.Context) error {
   228  	if len(a.spanQueue) == 0 {
   229  		return nil
   230  	}
   231  
   232  	if !a.Ready() {
   233  		return ErrAgentNotReady
   234  	}
   235  
   236  	a.mu.Lock()
   237  	defer a.mu.Unlock()
   238  
   239  	buf := bytes.NewBuffer(nil)
   240  	if err := json.NewEncoder(buf).Encode(a.spanQueue); err != nil {
   241  		return fmt.Errorf("failed to marshal traces payload: %s", err)
   242  	}
   243  	a.spanQueue = a.spanQueue[:0]
   244  
   245  	req, err := http.NewRequest(http.MethodPost, a.Endpoint+"/traces", buf)
   246  	if err != nil {
   247  		return fmt.Errorf("failed to prepare send traces request: %s", err)
   248  	}
   249  
   250  	req.Header.Set("Content-Type", "application/json")
   251  
   252  	return a.sendRequest(req.WithContext(ctx))
   253  }
   254  
   255  func (a *gcrAgent) sendRequest(req *http.Request) error {
   256  	req.Header.Set("X-Instana-Host", a.snapshot.Service.Host)
   257  	req.Header.Set("X-Instana-Key", a.Key)
   258  	req.Header.Set("X-Instana-Time", strconv.FormatInt(time.Now().UnixNano()/int64(time.Millisecond), 10))
   259  
   260  	resp, err := a.client.Do(req)
   261  	if err != nil {
   262  		return fmt.Errorf("failed to send request to the serverless agent: %s", err)
   263  	}
   264  
   265  	defer resp.Body.Close()
   266  
   267  	if resp.StatusCode >= http.StatusBadRequest {
   268  		respBody, err := ioutil.ReadAll(resp.Body)
   269  		if err != nil {
   270  			a.logger.Debug("failed to read serverless agent response: ", err)
   271  			return nil
   272  		}
   273  
   274  		a.logger.Info("serverless agent has responded with ", resp.Status, ": ", string(respBody))
   275  		return nil
   276  	}
   277  
   278  	io.CopyN(ioutil.Discard, resp.Body, 1<<20)
   279  
   280  	return nil
   281  }
   282  
   283  func (a *gcrAgent) collectSnapshot(ctx context.Context) (gcrSnapshot, bool) {
   284  	md, err := a.gcr.ComputeMetadata(ctx)
   285  	if err != nil {
   286  		a.logger.Warn("failed to get service metadata: ", err)
   287  		return gcrSnapshot{}, false
   288  	}
   289  
   290  	snapshot := newGCRSnapshot(a.PID, gcrMetadata{
   291  		ComputeMetadata: md,
   292  		Service:         os.Getenv("K_SERVICE"),
   293  		Configuration:   os.Getenv("K_CONFIGURATION"),
   294  		Revision:        os.Getenv("K_REVISION"),
   295  		Port:            os.Getenv("PORT"),
   296  	})
   297  	snapshot.Service.Zone = a.Zone
   298  	snapshot.Service.Tags = a.Tags
   299  
   300  	a.logger.Debug("collected snapshot")
   301  
   302  	return snapshot, true
   303  }