github.com/rudderlabs/rudder-go-kit@v0.30.0/stats/statsd.go

github.com/rudderlabs/rudder-go-kit@v0.30.0/stats/statsd.go (about)

     1  package stats
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"runtime"
     7  	"strings"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/cenkalti/backoff/v4"
    12  	"go.opentelemetry.io/otel/trace"
    13  	"gopkg.in/alexcesaro/statsd.v2"
    14  
    15  	"github.com/rudderlabs/rudder-go-kit/logger"
    16  )
    17  
    18  // statsdStats is the statsd-specific implementation of Stats
    19  type statsdStats struct {
    20  	config                     statsConfig
    21  	statsdConfig               statsdConfig
    22  	state                      *statsdState
    23  	logger                     logger.Logger
    24  	backgroundCollectionCtx    context.Context
    25  	backgroundCollectionCancel func()
    26  
    27  	// tracing not supported when using stats with StatsD
    28  	tracer trace.Tracer
    29  }
    30  
    31  func (s *statsdStats) Start(ctx context.Context, goFactory GoRoutineFactory) error {
    32  	if !s.config.enabled.Load() {
    33  		return nil
    34  	}
    35  
    36  	s.state.conn = statsd.Address(s.statsdConfig.statsdServerURL)
    37  	// since, we don't want setup to be a blocking call, creating a separate `go routine` for retry to get statsd client.
    38  
    39  	// NOTE: this is to get at least a dummy client, even if there is a failure.
    40  	// So, that nil pointer error is not received when client is called.
    41  	var err error
    42  	s.state.client.statsd, err = statsd.New(s.state.conn, s.statsdConfig.statsdTagsFormat(), s.statsdConfig.statsdDefaultTags())
    43  	if err == nil {
    44  		s.logger.Info("StatsD client setup succeeded.")
    45  		s.state.clientsLock.Lock()
    46  		s.state.connEstablished = true
    47  		s.state.clientsLock.Unlock()
    48  	}
    49  
    50  	goFactory.Go(func() {
    51  		if err != nil {
    52  			s.logger.Info("retrying StatsD client creation in the background...")
    53  			var c *statsd.Client
    54  			c, err = s.getNewStatsdClientWithExpoBackoff(
    55  				ctx,
    56  				s.state.conn,
    57  				s.statsdConfig.statsdTagsFormat(),
    58  				s.statsdConfig.statsdDefaultTags(),
    59  			)
    60  			if err != nil {
    61  				s.config.enabled.Store(false)
    62  				s.logger.Errorf("error while creating new StatsD client, giving up: %v", err)
    63  			} else {
    64  				s.state.clientsLock.Lock()
    65  				s.state.client.statsd = c
    66  				for _, client := range s.state.pendingClients {
    67  					client.statsdMu.Lock()
    68  					client.statsd = s.state.client.statsd.Clone(
    69  						s.state.conn,
    70  						s.statsdConfig.statsdTagsFormat(),
    71  						s.statsdConfig.statsdDefaultTags(),
    72  						statsd.Tags(client.tags...),
    73  						statsd.SampleRate(client.samplingRate),
    74  					)
    75  					client.statsdMu.Unlock()
    76  				}
    77  
    78  				s.logger.Info("StatsD client setup succeeded.")
    79  				s.state.connEstablished = true
    80  				s.state.pendingClients = nil
    81  				s.state.clientsLock.Unlock()
    82  			}
    83  		}
    84  		if err == nil && ctx.Err() == nil {
    85  			s.collectPeriodicStats(goFactory)
    86  		}
    87  	})
    88  
    89  	s.logger.Infof("Stats started successfully in mode %q with address %q", "StatsD", s.statsdConfig.statsdServerURL)
    90  
    91  	return nil
    92  }
    93  
    94  // NewTracer creates a new Tracer
    95  func (s *statsdStats) NewTracer(_ string) Tracer { return &tracer{tracer: s.tracer} }
    96  
    97  func (s *statsdStats) getNewStatsdClientWithExpoBackoff(ctx context.Context, opts ...statsd.Option) (*statsd.Client, error) {
    98  	bo := backoff.NewExponentialBackOff()
    99  	bo.MaxInterval = time.Minute
   100  	bo.MaxElapsedTime = 0
   101  	boCtx := backoff.WithContext(bo, ctx)
   102  	var err error
   103  	var c *statsd.Client
   104  	op := func() error {
   105  		c, err = statsd.New(opts...)
   106  		if err != nil {
   107  			s.logger.Errorf("error while creating new StatsD client: %v", err)
   108  		}
   109  		return err
   110  	}
   111  
   112  	err = backoff.Retry(op, boCtx)
   113  	return c, err
   114  }
   115  
   116  func (s *statsdStats) collectPeriodicStats(goFactory GoRoutineFactory) {
   117  	gaugeFunc := func(key string, val uint64) {
   118  		s.NewStat("runtime_"+key, GaugeType).Gauge(val)
   119  	}
   120  	s.state.rc = newRuntimeStatsCollector(gaugeFunc)
   121  	s.state.rc.PauseDur = time.Duration(s.config.periodicStatsConfig.statsCollectionInterval) * time.Second
   122  	s.state.rc.EnableCPU = s.config.periodicStatsConfig.enableCPUStats
   123  	s.state.rc.EnableMem = s.config.periodicStatsConfig.enableMemStats
   124  	s.state.rc.EnableGC = s.config.periodicStatsConfig.enableGCStats
   125  
   126  	s.state.mc = newMetricStatsCollector(s, s.config.periodicStatsConfig.metricManager)
   127  	if s.config.periodicStatsConfig.enabled {
   128  		var wg sync.WaitGroup
   129  		wg.Add(2)
   130  		goFactory.Go(func() {
   131  			defer wg.Done()
   132  			s.state.rc.run(s.backgroundCollectionCtx)
   133  		})
   134  		goFactory.Go(func() {
   135  			defer wg.Done()
   136  			s.state.mc.run(s.backgroundCollectionCtx)
   137  		})
   138  		wg.Wait()
   139  	}
   140  }
   141  
   142  // Stop stops periodic collection of stats.
   143  func (s *statsdStats) Stop() {
   144  	s.state.clientsLock.RLock()
   145  	defer s.state.clientsLock.RUnlock()
   146  
   147  	if !s.config.enabled.Load() || !s.state.connEstablished {
   148  		return
   149  	}
   150  
   151  	s.backgroundCollectionCancel()
   152  	if !s.config.periodicStatsConfig.enabled {
   153  		return
   154  	}
   155  
   156  	if s.state.rc.done != nil {
   157  		<-s.state.rc.done
   158  	}
   159  	if s.state.mc.done != nil {
   160  		<-s.state.mc.done
   161  	}
   162  }
   163  
   164  // NewStat creates a new Measurement with provided Name and Type
   165  func (s *statsdStats) NewStat(name, statType string) (m Measurement) {
   166  	return s.internalNewTaggedStat(name, statType, nil, 1)
   167  }
   168  
   169  func (s *statsdStats) NewTaggedStat(Name, StatType string, tags Tags) (m Measurement) {
   170  	return s.internalNewTaggedStat(Name, StatType, tags, 1)
   171  }
   172  
   173  func (s *statsdStats) NewSampledTaggedStat(Name, StatType string, tags Tags) (m Measurement) {
   174  	return s.internalNewTaggedStat(Name, StatType, tags, s.statsdConfig.samplingRate)
   175  }
   176  
   177  func (s *statsdStats) internalNewTaggedStat(name, statType string, tags Tags, samplingRate float32) (m Measurement) {
   178  	// If stats is not enabled, returning a dummy struct
   179  	if !s.config.enabled.Load() {
   180  		return s.newStatsdMeasurement(name, statType, &statsdClient{})
   181  	}
   182  
   183  	// Clean up tags based on deployment type. No need to send workspace id tag for free tier customers.
   184  	newTags := make(Tags)
   185  	for k, v := range tags {
   186  		if strings.Trim(k, " ") == "" {
   187  			s.logger.Warnf("removing empty tag key with value %q for measurement %q", v, name)
   188  			continue
   189  		}
   190  		if _, ok := s.config.excludedTags[k]; ok {
   191  			continue
   192  		}
   193  		sanitizedKey := sanitizeTagKey(k)
   194  		if _, ok := s.config.excludedTags[sanitizedKey]; ok {
   195  			continue
   196  		}
   197  		newTags[sanitizedKey] = v
   198  	}
   199  
   200  	// key comprises the measurement type plus all tag-value pairs
   201  	taggedClientKey := newTags.String() + fmt.Sprintf("%f", samplingRate)
   202  
   203  	s.state.clientsLock.RLock()
   204  	taggedClient, found := s.state.clients[taggedClientKey]
   205  	s.state.clientsLock.RUnlock()
   206  
   207  	if !found {
   208  		s.state.clientsLock.Lock()
   209  		if taggedClient, found = s.state.clients[taggedClientKey]; !found { // double check for race
   210  			tagVals := newTags.Strings()
   211  			taggedClient = &statsdClient{samplingRate: samplingRate, tags: tagVals}
   212  			if s.state.connEstablished {
   213  				taggedClient.statsd = s.state.client.statsd.Clone(
   214  					s.state.conn,
   215  					s.statsdConfig.statsdTagsFormat(),
   216  					s.statsdConfig.statsdDefaultTags(),
   217  					statsd.Tags(tagVals...),
   218  					statsd.SampleRate(samplingRate),
   219  				)
   220  			} else {
   221  				// new statsd clients will be created when connection is established for all pending clients
   222  				s.state.pendingClients[taggedClientKey] = taggedClient
   223  			}
   224  			s.state.clients[taggedClientKey] = taggedClient
   225  		}
   226  		s.state.clientsLock.Unlock()
   227  	}
   228  
   229  	return s.newStatsdMeasurement(name, statType, taggedClient)
   230  }
   231  
   232  // newStatsdMeasurement creates a new measurement of the specific type
   233  func (s *statsdStats) newStatsdMeasurement(name, statType string, client *statsdClient) Measurement {
   234  	if strings.Trim(name, " ") == "" {
   235  		byteArr := make([]byte, 2048)
   236  		n := runtime.Stack(byteArr, false)
   237  		stackTrace := string(byteArr[:n])
   238  		s.logger.Warnf("detected missing stat measurement name, using 'novalue':\n%v", stackTrace)
   239  		name = "novalue"
   240  	}
   241  	baseMeasurement := &statsdMeasurement{
   242  		enabled:            s.config.enabled.Load(),
   243  		name:               name,
   244  		client:             client,
   245  		genericMeasurement: genericMeasurement{statType: statType},
   246  	}
   247  	switch statType {
   248  	case CountType:
   249  		return &statsdCounter{baseMeasurement}
   250  	case GaugeType:
   251  		return &statsdGauge{baseMeasurement}
   252  	case TimerType:
   253  		return &statsdTimer{statsdMeasurement: baseMeasurement}
   254  	case HistogramType:
   255  		return &statsdHistogram{baseMeasurement}
   256  	default:
   257  		panic(fmt.Errorf("unsupported measurement type %s", statType))
   258  	}
   259  }
   260  
   261  type statsdConfig struct {
   262  	tagsFormat          string
   263  	statsdServerURL     string
   264  	samplingRate        float32
   265  	instanceName        string
   266  	namespaceIdentifier string
   267  }
   268  
   269  // statsdDefaultTags returns the default tags to use for statsd
   270  func (c *statsdConfig) statsdDefaultTags() statsd.Option {
   271  	var tags []string
   272  	if c.instanceName != "" {
   273  		tags = append(tags, "instanceName", c.instanceName)
   274  	}
   275  	if c.namespaceIdentifier != "" {
   276  		tags = append(tags, "namespace", c.namespaceIdentifier)
   277  	}
   278  	return statsd.Tags(tags...)
   279  }
   280  
   281  // statsdTagsFormat returns the tags format to use for statsd
   282  func (c *statsdConfig) statsdTagsFormat() statsd.Option {
   283  	switch c.tagsFormat {
   284  	case "datadog":
   285  		return statsd.TagsFormat(statsd.Datadog)
   286  	default:
   287  		return statsd.TagsFormat(statsd.InfluxDB)
   288  	}
   289  }
   290  
   291  type statsdState struct {
   292  	conn   statsd.Option
   293  	client *statsdClient
   294  	rc     runtimeStatsCollector
   295  	mc     metricStatsCollector
   296  
   297  	clientsLock     sync.RWMutex // protects the following
   298  	connEstablished bool
   299  	clients         map[string]*statsdClient
   300  	pendingClients  map[string]*statsdClient
   301  }
   302  
   303  // statsdClient is a wrapper around statsd.Client.
   304  // We use this wrapper to allow for filling the actual statsd client at a later stage,
   305  // in case a connection cannot be established immediately at startup.
   306  type statsdClient struct {
   307  	samplingRate float32
   308  	tags         []string
   309  
   310  	statsdMu sync.RWMutex // protects the following
   311  	statsd   *statsd.Client
   312  }
   313  
   314  // ready returns true if the statsd client is ready to be used (not nil).
   315  //
   316  // sc.statsdMu.RLock should be held when calling this method.
   317  func (sc *statsdClient) ready() bool {
   318  	return sc.statsd != nil
   319  }