github.com/rudderlabs/rudder-go-kit@v0.30.0/stats/statsd.go (about) 1 package stats 2 3 import ( 4 "context" 5 "fmt" 6 "runtime" 7 "strings" 8 "sync" 9 "time" 10 11 "github.com/cenkalti/backoff/v4" 12 "go.opentelemetry.io/otel/trace" 13 "gopkg.in/alexcesaro/statsd.v2" 14 15 "github.com/rudderlabs/rudder-go-kit/logger" 16 ) 17 18 // statsdStats is the statsd-specific implementation of Stats 19 type statsdStats struct { 20 config statsConfig 21 statsdConfig statsdConfig 22 state *statsdState 23 logger logger.Logger 24 backgroundCollectionCtx context.Context 25 backgroundCollectionCancel func() 26 27 // tracing not supported when using stats with StatsD 28 tracer trace.Tracer 29 } 30 31 func (s *statsdStats) Start(ctx context.Context, goFactory GoRoutineFactory) error { 32 if !s.config.enabled.Load() { 33 return nil 34 } 35 36 s.state.conn = statsd.Address(s.statsdConfig.statsdServerURL) 37 // since, we don't want setup to be a blocking call, creating a separate `go routine` for retry to get statsd client. 38 39 // NOTE: this is to get at least a dummy client, even if there is a failure. 40 // So, that nil pointer error is not received when client is called. 41 var err error 42 s.state.client.statsd, err = statsd.New(s.state.conn, s.statsdConfig.statsdTagsFormat(), s.statsdConfig.statsdDefaultTags()) 43 if err == nil { 44 s.logger.Info("StatsD client setup succeeded.") 45 s.state.clientsLock.Lock() 46 s.state.connEstablished = true 47 s.state.clientsLock.Unlock() 48 } 49 50 goFactory.Go(func() { 51 if err != nil { 52 s.logger.Info("retrying StatsD client creation in the background...") 53 var c *statsd.Client 54 c, err = s.getNewStatsdClientWithExpoBackoff( 55 ctx, 56 s.state.conn, 57 s.statsdConfig.statsdTagsFormat(), 58 s.statsdConfig.statsdDefaultTags(), 59 ) 60 if err != nil { 61 s.config.enabled.Store(false) 62 s.logger.Errorf("error while creating new StatsD client, giving up: %v", err) 63 } else { 64 s.state.clientsLock.Lock() 65 s.state.client.statsd = c 66 for _, client := range s.state.pendingClients { 67 client.statsdMu.Lock() 68 client.statsd = s.state.client.statsd.Clone( 69 s.state.conn, 70 s.statsdConfig.statsdTagsFormat(), 71 s.statsdConfig.statsdDefaultTags(), 72 statsd.Tags(client.tags...), 73 statsd.SampleRate(client.samplingRate), 74 ) 75 client.statsdMu.Unlock() 76 } 77 78 s.logger.Info("StatsD client setup succeeded.") 79 s.state.connEstablished = true 80 s.state.pendingClients = nil 81 s.state.clientsLock.Unlock() 82 } 83 } 84 if err == nil && ctx.Err() == nil { 85 s.collectPeriodicStats(goFactory) 86 } 87 }) 88 89 s.logger.Infof("Stats started successfully in mode %q with address %q", "StatsD", s.statsdConfig.statsdServerURL) 90 91 return nil 92 } 93 94 // NewTracer creates a new Tracer 95 func (s *statsdStats) NewTracer(_ string) Tracer { return &tracer{tracer: s.tracer} } 96 97 func (s *statsdStats) getNewStatsdClientWithExpoBackoff(ctx context.Context, opts ...statsd.Option) (*statsd.Client, error) { 98 bo := backoff.NewExponentialBackOff() 99 bo.MaxInterval = time.Minute 100 bo.MaxElapsedTime = 0 101 boCtx := backoff.WithContext(bo, ctx) 102 var err error 103 var c *statsd.Client 104 op := func() error { 105 c, err = statsd.New(opts...) 106 if err != nil { 107 s.logger.Errorf("error while creating new StatsD client: %v", err) 108 } 109 return err 110 } 111 112 err = backoff.Retry(op, boCtx) 113 return c, err 114 } 115 116 func (s *statsdStats) collectPeriodicStats(goFactory GoRoutineFactory) { 117 gaugeFunc := func(key string, val uint64) { 118 s.NewStat("runtime_"+key, GaugeType).Gauge(val) 119 } 120 s.state.rc = newRuntimeStatsCollector(gaugeFunc) 121 s.state.rc.PauseDur = time.Duration(s.config.periodicStatsConfig.statsCollectionInterval) * time.Second 122 s.state.rc.EnableCPU = s.config.periodicStatsConfig.enableCPUStats 123 s.state.rc.EnableMem = s.config.periodicStatsConfig.enableMemStats 124 s.state.rc.EnableGC = s.config.periodicStatsConfig.enableGCStats 125 126 s.state.mc = newMetricStatsCollector(s, s.config.periodicStatsConfig.metricManager) 127 if s.config.periodicStatsConfig.enabled { 128 var wg sync.WaitGroup 129 wg.Add(2) 130 goFactory.Go(func() { 131 defer wg.Done() 132 s.state.rc.run(s.backgroundCollectionCtx) 133 }) 134 goFactory.Go(func() { 135 defer wg.Done() 136 s.state.mc.run(s.backgroundCollectionCtx) 137 }) 138 wg.Wait() 139 } 140 } 141 142 // Stop stops periodic collection of stats. 143 func (s *statsdStats) Stop() { 144 s.state.clientsLock.RLock() 145 defer s.state.clientsLock.RUnlock() 146 147 if !s.config.enabled.Load() || !s.state.connEstablished { 148 return 149 } 150 151 s.backgroundCollectionCancel() 152 if !s.config.periodicStatsConfig.enabled { 153 return 154 } 155 156 if s.state.rc.done != nil { 157 <-s.state.rc.done 158 } 159 if s.state.mc.done != nil { 160 <-s.state.mc.done 161 } 162 } 163 164 // NewStat creates a new Measurement with provided Name and Type 165 func (s *statsdStats) NewStat(name, statType string) (m Measurement) { 166 return s.internalNewTaggedStat(name, statType, nil, 1) 167 } 168 169 func (s *statsdStats) NewTaggedStat(Name, StatType string, tags Tags) (m Measurement) { 170 return s.internalNewTaggedStat(Name, StatType, tags, 1) 171 } 172 173 func (s *statsdStats) NewSampledTaggedStat(Name, StatType string, tags Tags) (m Measurement) { 174 return s.internalNewTaggedStat(Name, StatType, tags, s.statsdConfig.samplingRate) 175 } 176 177 func (s *statsdStats) internalNewTaggedStat(name, statType string, tags Tags, samplingRate float32) (m Measurement) { 178 // If stats is not enabled, returning a dummy struct 179 if !s.config.enabled.Load() { 180 return s.newStatsdMeasurement(name, statType, &statsdClient{}) 181 } 182 183 // Clean up tags based on deployment type. No need to send workspace id tag for free tier customers. 184 newTags := make(Tags) 185 for k, v := range tags { 186 if strings.Trim(k, " ") == "" { 187 s.logger.Warnf("removing empty tag key with value %q for measurement %q", v, name) 188 continue 189 } 190 if _, ok := s.config.excludedTags[k]; ok { 191 continue 192 } 193 sanitizedKey := sanitizeTagKey(k) 194 if _, ok := s.config.excludedTags[sanitizedKey]; ok { 195 continue 196 } 197 newTags[sanitizedKey] = v 198 } 199 200 // key comprises the measurement type plus all tag-value pairs 201 taggedClientKey := newTags.String() + fmt.Sprintf("%f", samplingRate) 202 203 s.state.clientsLock.RLock() 204 taggedClient, found := s.state.clients[taggedClientKey] 205 s.state.clientsLock.RUnlock() 206 207 if !found { 208 s.state.clientsLock.Lock() 209 if taggedClient, found = s.state.clients[taggedClientKey]; !found { // double check for race 210 tagVals := newTags.Strings() 211 taggedClient = &statsdClient{samplingRate: samplingRate, tags: tagVals} 212 if s.state.connEstablished { 213 taggedClient.statsd = s.state.client.statsd.Clone( 214 s.state.conn, 215 s.statsdConfig.statsdTagsFormat(), 216 s.statsdConfig.statsdDefaultTags(), 217 statsd.Tags(tagVals...), 218 statsd.SampleRate(samplingRate), 219 ) 220 } else { 221 // new statsd clients will be created when connection is established for all pending clients 222 s.state.pendingClients[taggedClientKey] = taggedClient 223 } 224 s.state.clients[taggedClientKey] = taggedClient 225 } 226 s.state.clientsLock.Unlock() 227 } 228 229 return s.newStatsdMeasurement(name, statType, taggedClient) 230 } 231 232 // newStatsdMeasurement creates a new measurement of the specific type 233 func (s *statsdStats) newStatsdMeasurement(name, statType string, client *statsdClient) Measurement { 234 if strings.Trim(name, " ") == "" { 235 byteArr := make([]byte, 2048) 236 n := runtime.Stack(byteArr, false) 237 stackTrace := string(byteArr[:n]) 238 s.logger.Warnf("detected missing stat measurement name, using 'novalue':\n%v", stackTrace) 239 name = "novalue" 240 } 241 baseMeasurement := &statsdMeasurement{ 242 enabled: s.config.enabled.Load(), 243 name: name, 244 client: client, 245 genericMeasurement: genericMeasurement{statType: statType}, 246 } 247 switch statType { 248 case CountType: 249 return &statsdCounter{baseMeasurement} 250 case GaugeType: 251 return &statsdGauge{baseMeasurement} 252 case TimerType: 253 return &statsdTimer{statsdMeasurement: baseMeasurement} 254 case HistogramType: 255 return &statsdHistogram{baseMeasurement} 256 default: 257 panic(fmt.Errorf("unsupported measurement type %s", statType)) 258 } 259 } 260 261 type statsdConfig struct { 262 tagsFormat string 263 statsdServerURL string 264 samplingRate float32 265 instanceName string 266 namespaceIdentifier string 267 } 268 269 // statsdDefaultTags returns the default tags to use for statsd 270 func (c *statsdConfig) statsdDefaultTags() statsd.Option { 271 var tags []string 272 if c.instanceName != "" { 273 tags = append(tags, "instanceName", c.instanceName) 274 } 275 if c.namespaceIdentifier != "" { 276 tags = append(tags, "namespace", c.namespaceIdentifier) 277 } 278 return statsd.Tags(tags...) 279 } 280 281 // statsdTagsFormat returns the tags format to use for statsd 282 func (c *statsdConfig) statsdTagsFormat() statsd.Option { 283 switch c.tagsFormat { 284 case "datadog": 285 return statsd.TagsFormat(statsd.Datadog) 286 default: 287 return statsd.TagsFormat(statsd.InfluxDB) 288 } 289 } 290 291 type statsdState struct { 292 conn statsd.Option 293 client *statsdClient 294 rc runtimeStatsCollector 295 mc metricStatsCollector 296 297 clientsLock sync.RWMutex // protects the following 298 connEstablished bool 299 clients map[string]*statsdClient 300 pendingClients map[string]*statsdClient 301 } 302 303 // statsdClient is a wrapper around statsd.Client. 304 // We use this wrapper to allow for filling the actual statsd client at a later stage, 305 // in case a connection cannot be established immediately at startup. 306 type statsdClient struct { 307 samplingRate float32 308 tags []string 309 310 statsdMu sync.RWMutex // protects the following 311 statsd *statsd.Client 312 } 313 314 // ready returns true if the statsd client is ready to be used (not nil). 315 // 316 // sc.statsdMu.RLock should be held when calling this method. 317 func (sc *statsdClient) ready() bool { 318 return sc.statsd != nil 319 }