github.com/grafana/pyroscope@v1.18.0/pkg/usagestats/reporter.go (about) 1 package usagestats 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "flag" 8 "io" 9 "math" 10 "time" 11 12 "github.com/go-kit/log" 13 "github.com/go-kit/log/level" 14 "github.com/google/uuid" 15 "github.com/grafana/dskit/backoff" 16 "github.com/grafana/dskit/kv" 17 "github.com/grafana/dskit/multierror" 18 "github.com/grafana/dskit/services" 19 "github.com/prometheus/client_golang/prometheus" 20 "github.com/thanos-io/objstore" 21 22 "github.com/grafana/pyroscope/pkg/util/build" 23 ) 24 25 const ( 26 // File name for the cluster seed file. 27 ClusterSeedFileName = "pyroscope_cluster_seed.json" 28 // attemptNumber how many times we will try to read a corrupted cluster seed before deleting it. 29 attemptNumber = 4 30 // seedKey is the key for the cluster seed to use with the kv store. 31 seedKey = "usagestats_token" 32 ) 33 34 var ( 35 reportCheckInterval = time.Minute 36 reportInterval = 4 * time.Hour 37 38 stabilityCheckInterval = 5 * time.Second 39 stabilityMinimunRequired = 6 40 ) 41 42 type Config struct { 43 Enabled bool `yaml:"reporting_enabled"` 44 Leader bool `yaml:"-"` 45 } 46 47 // RegisterFlags adds the flags required to config this to the given FlagSet 48 func (cfg *Config) RegisterFlags(f *flag.FlagSet) { 49 f.BoolVar(&cfg.Enabled, "usage-stats.enabled", true, "Enable anonymous usage statistics collection. For more details about usage statistics, refer to https://grafana.com/docs/pyroscope/latest/configure-server/anonymous-usage-statistics-reporting/") 50 } 51 52 type Reporter struct { 53 logger log.Logger 54 bucket objstore.Bucket 55 reg prometheus.Registerer 56 57 services.Service 58 59 conf Config 60 kvConfig kv.Config 61 cluster ClusterSeed 62 lastReport time.Time 63 } 64 65 func NewReporter(config Config, kvConfig kv.Config, objectClient objstore.Bucket, logger log.Logger, reg prometheus.Registerer) (*Reporter, error) { 66 if !config.Enabled { 67 return nil, nil 68 } 69 r := &Reporter{ 70 logger: logger, 71 bucket: objectClient, 72 conf: config, 73 kvConfig: kvConfig, 74 reg: reg, 75 } 76 r.Service = services.NewBasicService(nil, r.running, nil) 77 return r, nil 78 } 79 80 func (rep *Reporter) initLeader(ctx context.Context) ClusterSeed { 81 kvClient, err := kv.NewClient(rep.kvConfig, JSONCodec, nil, rep.logger) 82 if err != nil { 83 level.Info(rep.logger).Log("msg", "failed to create kv client", "err", err) 84 return ClusterSeed{} 85 } 86 // Try to become leader via the kv client 87 backoff := backoff.New(ctx, backoff.Config{ 88 MinBackoff: time.Second, 89 MaxBackoff: time.Minute, 90 MaxRetries: 0, 91 }) 92 for backoff.Ongoing() { 93 // create a new cluster seed 94 seed := ClusterSeed{ 95 UID: uuid.NewString(), 96 PrometheusVersion: build.GetVersion(), 97 CreatedAt: time.Now(), 98 } 99 if err := kvClient.CAS(ctx, seedKey, func(in interface{}) (out interface{}, retry bool, err error) { 100 // The key is already set, so we don't need to do anything 101 if in != nil { 102 if kvSeed, ok := in.(*ClusterSeed); ok && kvSeed != nil && kvSeed.UID != seed.UID { 103 seed = *kvSeed 104 return nil, false, nil 105 } 106 } 107 108 return seed.Clone(), true, nil 109 }); err != nil { 110 level.Info(rep.logger).Log("msg", "failed to CAS cluster seed key", "err", err) 111 continue 112 } 113 // ensure stability of the cluster seed 114 stableSeed := ensureStableKey(ctx, kvClient, rep.logger) 115 seed = *stableSeed 116 // Fetch the remote cluster seed. 117 remoteSeed, err := rep.fetchSeed(ctx, 118 func(err error) bool { 119 // we only want to retry if the error is not an object not found error 120 return !rep.bucket.IsObjNotFoundErr(err) 121 }) 122 if err != nil { 123 if rep.bucket.IsObjNotFoundErr(err) { 124 // we are the leader and we need to save the file. 125 if err := rep.writeSeedFile(ctx, seed); err != nil { 126 level.Info(rep.logger).Log("msg", "failed to CAS cluster seed key", "err", err) 127 backoff.Wait() 128 continue 129 } 130 return seed 131 } 132 backoff.Wait() 133 continue 134 } 135 return remoteSeed 136 } 137 return ClusterSeed{} 138 } 139 140 // ensureStableKey ensures that the cluster seed is stable for at least 30seconds. 141 // This is required when using gossiping kv client like memberlist which will never have the same seed 142 // but will converge eventually. 143 func ensureStableKey(ctx context.Context, kvClient kv.Client, logger log.Logger) *ClusterSeed { 144 var ( 145 previous *ClusterSeed 146 stableCount int 147 ) 148 for { 149 time.Sleep(stabilityCheckInterval) 150 value, err := kvClient.Get(ctx, seedKey) 151 if err != nil { 152 level.Debug(logger).Log("msg", "failed to get cluster seed key for stability check", "err", err) 153 continue 154 } 155 if seed, ok := value.(*ClusterSeed); ok && seed != nil { 156 if previous == nil { 157 previous = seed 158 continue 159 } 160 if previous.UID != seed.UID { 161 previous = seed 162 stableCount = 0 163 continue 164 } 165 stableCount++ 166 if stableCount > stabilityMinimunRequired { 167 return seed 168 } 169 } 170 } 171 } 172 173 func (rep *Reporter) init(ctx context.Context) { 174 if rep.conf.Leader { 175 rep.cluster = rep.initLeader(ctx) 176 return 177 } 178 // follower only wait for the cluster seed to be set. 179 // it will try forever to fetch the cluster seed. 180 seed, _ := rep.fetchSeed(ctx, nil) 181 rep.cluster = seed 182 } 183 184 // fetchSeed fetches the cluster seed from the object store and try until it succeeds. 185 // continueFn allow you to decide if we should continue retrying. Nil means always retry 186 func (rep *Reporter) fetchSeed(ctx context.Context, continueFn func(err error) bool) (ClusterSeed, error) { 187 var ( 188 backoff = backoff.New(ctx, backoff.Config{ 189 MinBackoff: time.Second, 190 MaxBackoff: time.Minute, 191 MaxRetries: 0, 192 }) 193 readingErr = 0 194 ) 195 for backoff.Ongoing() { 196 seed, err := rep.readSeedFile(ctx) 197 if err != nil { 198 if !rep.bucket.IsObjNotFoundErr(err) { 199 readingErr++ 200 } 201 level.Debug(rep.logger).Log("msg", "failed to read cluster seed file", "err", err) 202 if readingErr > attemptNumber { 203 if err := rep.bucket.Delete(ctx, ClusterSeedFileName); err != nil { 204 level.Error(rep.logger).Log("msg", "failed to delete corrupted cluster seed file, deleting it", "err", err) 205 } 206 readingErr = 0 207 } 208 if continueFn == nil || continueFn(err) { 209 backoff.Wait() 210 continue 211 } 212 return ClusterSeed{}, err 213 } 214 return seed, nil 215 } 216 return ClusterSeed{}, backoff.Err() 217 } 218 219 // readSeedFile reads the cluster seed file from the object store. 220 func (rep *Reporter) readSeedFile(ctx context.Context) (ClusterSeed, error) { 221 reader, err := rep.bucket.Get(ctx, ClusterSeedFileName) 222 if err != nil { 223 return ClusterSeed{}, err 224 } 225 defer func() { 226 if err := reader.Close(); err != nil { 227 level.Error(rep.logger).Log("msg", "failed to close reader", "err", err) 228 } 229 }() 230 data, err := io.ReadAll(reader) 231 if err != nil { 232 return ClusterSeed{}, err 233 } 234 seed, err := JSONCodec.Decode(data) 235 if err != nil { 236 return ClusterSeed{}, err 237 } 238 return *(seed.(*ClusterSeed)), nil 239 } 240 241 // writeSeedFile writes the cluster seed to the object store. 242 func (rep *Reporter) writeSeedFile(ctx context.Context, seed ClusterSeed) error { 243 data, err := JSONCodec.Encode(seed) 244 if err != nil { 245 return err 246 } 247 return rep.bucket.Upload(ctx, ClusterSeedFileName, bytes.NewReader(data)) 248 } 249 250 // running inits the reporter seed and start sending report for every interval 251 func (rep *Reporter) running(ctx context.Context) error { 252 rep.init(ctx) 253 254 if rep.cluster.UID == "" { 255 <-ctx.Done() 256 if err := ctx.Err(); !errors.Is(err, context.Canceled) { 257 return err 258 } 259 return nil 260 } 261 // check every minute if we should report. 262 ticker := time.NewTicker(reportCheckInterval) 263 defer ticker.Stop() 264 265 // find when to send the next report. 266 next := nextReport(reportInterval, rep.cluster.CreatedAt, time.Now()) 267 if rep.lastReport.IsZero() { 268 // if we never reported assumed it was the last interval. 269 rep.lastReport = next.Add(-reportInterval) 270 } 271 for { 272 select { 273 case <-ticker.C: 274 now := time.Now() 275 if !next.Equal(now) && now.Sub(rep.lastReport) < reportInterval { 276 continue 277 } 278 level.Debug(rep.logger).Log("msg", "reporting cluster stats", "date", time.Now()) 279 if err := rep.reportUsage(ctx, next); err != nil { 280 level.Info(rep.logger).Log("msg", "failed to report usage", "err", err) 281 continue 282 } 283 rep.lastReport = next 284 next = next.Add(reportInterval) 285 case <-ctx.Done(): 286 if err := ctx.Err(); !errors.Is(err, context.Canceled) { 287 return err 288 } 289 return nil 290 } 291 } 292 } 293 294 // reportUsage reports the usage to grafana.com. 295 func (rep *Reporter) reportUsage(ctx context.Context, interval time.Time) error { 296 backoff := backoff.New(ctx, backoff.Config{ 297 MinBackoff: time.Second, 298 MaxBackoff: 30 * time.Second, 299 MaxRetries: 5, 300 }) 301 var errs multierror.MultiError 302 for backoff.Ongoing() { 303 if err := sendReport(ctx, rep.cluster, interval); err != nil { 304 level.Info(rep.logger).Log("msg", "failed to send usage report", "retries", backoff.NumRetries(), "err", err) 305 errs.Add(err) 306 backoff.Wait() 307 continue 308 } 309 level.Debug(rep.logger).Log("msg", "usage report sent with success") 310 return nil 311 } 312 return errs.Err() 313 } 314 315 // nextReport compute the next report time based on the interval. 316 // The interval is based off the creation of the cluster seed to avoid all cluster reporting at the same time. 317 func nextReport(interval time.Duration, createdAt, now time.Time) time.Time { 318 // createdAt * (x * interval ) >= now 319 return createdAt.Add(time.Duration(math.Ceil(float64(now.Sub(createdAt))/float64(interval))) * interval) 320 }