github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/usagestats/reporter.go (about) 1 package usagestats 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "flag" 8 "io" 9 "math" 10 "time" 11 12 "github.com/go-kit/log" 13 "github.com/go-kit/log/level" 14 "github.com/google/uuid" 15 "github.com/grafana/dskit/backoff" 16 "github.com/grafana/dskit/kv" 17 "github.com/grafana/dskit/multierror" 18 "github.com/grafana/dskit/services" 19 "github.com/prometheus/client_golang/prometheus" 20 21 "github.com/grafana/loki/pkg/storage/chunk/client" 22 "github.com/grafana/loki/pkg/util/build" 23 ) 24 25 const ( 26 // File name for the cluster seed file. 27 ClusterSeedFileName = "loki_cluster_seed.json" 28 // attemptNumber how many times we will try to read a corrupted cluster seed before deleting it. 29 attemptNumber = 4 30 // seedKey is the key for the cluster seed to use with the kv store. 31 seedKey = "usagestats_token" 32 ) 33 34 var ( 35 reportCheckInterval = time.Minute 36 reportInterval = 4 * time.Hour 37 38 stabilityCheckInterval = 5 * time.Second 39 stabilityMinimunRequired = 6 40 ) 41 42 type Config struct { 43 Enabled bool `yaml:"reporting_enabled"` 44 Leader bool `yaml:"-"` 45 } 46 47 // RegisterFlags adds the flags required to config this to the given FlagSet 48 func (cfg *Config) RegisterFlags(f *flag.FlagSet) { 49 f.BoolVar(&cfg.Enabled, "reporting.enabled", true, "Enable anonymous usage reporting.") 50 } 51 52 type Reporter struct { 53 logger log.Logger 54 objectClient client.ObjectClient 55 reg prometheus.Registerer 56 57 services.Service 58 59 conf Config 60 kvConfig kv.Config 61 cluster *ClusterSeed 62 lastReport time.Time 63 } 64 65 func NewReporter(config Config, kvConfig kv.Config, objectClient client.ObjectClient, logger log.Logger, reg prometheus.Registerer) (*Reporter, error) { 66 if !config.Enabled { 67 return nil, nil 68 } 69 r := &Reporter{ 70 logger: logger, 71 objectClient: objectClient, 72 conf: config, 73 kvConfig: kvConfig, 74 reg: reg, 75 } 76 r.Service = services.NewBasicService(nil, r.running, nil) 77 return r, nil 78 } 79 80 func (rep *Reporter) initLeader(ctx context.Context) *ClusterSeed { 81 kvClient, err := kv.NewClient(rep.kvConfig, JSONCodec, nil, rep.logger) 82 if err != nil { 83 level.Info(rep.logger).Log("msg", "failed to create kv client", "err", err) 84 return nil 85 } 86 // Try to become leader via the kv client 87 backoff := backoff.New(ctx, backoff.Config{ 88 MinBackoff: time.Second, 89 MaxBackoff: time.Minute, 90 MaxRetries: 0, 91 }) 92 for backoff.Ongoing() { 93 // create a new cluster seed 94 seed := ClusterSeed{ 95 UID: uuid.NewString(), 96 PrometheusVersion: build.GetVersion(), 97 CreatedAt: time.Now(), 98 } 99 if err := kvClient.CAS(ctx, seedKey, func(in interface{}) (out interface{}, retry bool, err error) { 100 // The key is already set, so we don't need to do anything 101 if in != nil { 102 if kvSeed, ok := in.(*ClusterSeed); ok && kvSeed != nil && kvSeed.UID != seed.UID { 103 seed = *kvSeed 104 return nil, false, nil 105 } 106 } 107 return &seed, true, nil 108 }); err != nil { 109 level.Info(rep.logger).Log("msg", "failed to CAS cluster seed key", "err", err) 110 continue 111 } 112 // ensure stability of the cluster seed 113 stableSeed := ensureStableKey(ctx, kvClient, rep.logger) 114 seed = *stableSeed 115 // Fetch the remote cluster seed. 116 remoteSeed, err := rep.fetchSeed(ctx, 117 func(err error) bool { 118 // we only want to retry if the error is not an object not found error 119 return !rep.objectClient.IsObjectNotFoundErr(err) 120 }) 121 if err != nil { 122 if rep.objectClient.IsObjectNotFoundErr(err) { 123 // we are the leader and we need to save the file. 124 if err := rep.writeSeedFile(ctx, seed); err != nil { 125 level.Info(rep.logger).Log("msg", "failed to CAS cluster seed key", "err", err) 126 backoff.Wait() 127 continue 128 } 129 return &seed 130 } 131 backoff.Wait() 132 continue 133 } 134 return remoteSeed 135 } 136 return nil 137 } 138 139 // ensureStableKey ensures that the cluster seed is stable for at least 30seconds. 140 // This is required when using gossiping kv client like memberlist which will never have the same seed 141 // but will converge eventually. 142 func ensureStableKey(ctx context.Context, kvClient kv.Client, logger log.Logger) *ClusterSeed { 143 var ( 144 previous *ClusterSeed 145 stableCount int 146 ) 147 for { 148 time.Sleep(stabilityCheckInterval) 149 value, err := kvClient.Get(ctx, seedKey) 150 if err != nil { 151 level.Debug(logger).Log("msg", "failed to get cluster seed key for stability check", "err", err) 152 continue 153 } 154 if seed, ok := value.(*ClusterSeed); ok && seed != nil { 155 if previous == nil { 156 previous = seed 157 continue 158 } 159 if previous.UID != seed.UID { 160 previous = seed 161 stableCount = 0 162 continue 163 } 164 stableCount++ 165 if stableCount > stabilityMinimunRequired { 166 return seed 167 } 168 } 169 } 170 } 171 172 func (rep *Reporter) init(ctx context.Context) { 173 if rep.conf.Leader { 174 rep.cluster = rep.initLeader(ctx) 175 return 176 } 177 // follower only wait for the cluster seed to be set. 178 // it will try forever to fetch the cluster seed. 179 seed, _ := rep.fetchSeed(ctx, nil) 180 rep.cluster = seed 181 } 182 183 // fetchSeed fetches the cluster seed from the object store and try until it succeeds. 184 // continueFn allow you to decide if we should continue retrying. Nil means always retry 185 func (rep *Reporter) fetchSeed(ctx context.Context, continueFn func(err error) bool) (*ClusterSeed, error) { 186 var ( 187 backoff = backoff.New(ctx, backoff.Config{ 188 MinBackoff: time.Second, 189 MaxBackoff: time.Minute, 190 MaxRetries: 0, 191 }) 192 readingErr = 0 193 ) 194 for backoff.Ongoing() { 195 seed, err := rep.readSeedFile(ctx) 196 if err != nil { 197 if !rep.objectClient.IsObjectNotFoundErr(err) { 198 readingErr++ 199 } 200 level.Debug(rep.logger).Log("msg", "failed to read cluster seed file", "err", err) 201 if readingErr > attemptNumber { 202 if err := rep.objectClient.DeleteObject(ctx, ClusterSeedFileName); err != nil { 203 level.Error(rep.logger).Log("msg", "failed to delete corrupted cluster seed file, deleting it", "err", err) 204 } 205 readingErr = 0 206 } 207 if continueFn == nil || continueFn(err) { 208 backoff.Wait() 209 continue 210 } 211 return nil, err 212 } 213 return seed, nil 214 } 215 return nil, backoff.Err() 216 } 217 218 // readSeedFile reads the cluster seed file from the object store. 219 func (rep *Reporter) readSeedFile(ctx context.Context) (*ClusterSeed, error) { 220 reader, _, err := rep.objectClient.GetObject(ctx, ClusterSeedFileName) 221 if err != nil { 222 return nil, err 223 } 224 if err != nil { 225 return nil, err 226 } 227 defer func() { 228 if err := reader.Close(); err != nil { 229 level.Error(rep.logger).Log("msg", "failed to close reader", "err", err) 230 } 231 }() 232 data, err := io.ReadAll(reader) 233 if err != nil { 234 return nil, err 235 } 236 seed, err := JSONCodec.Decode(data) 237 if err != nil { 238 return nil, err 239 } 240 return seed.(*ClusterSeed), nil 241 } 242 243 // writeSeedFile writes the cluster seed to the object store. 244 func (rep *Reporter) writeSeedFile(ctx context.Context, seed ClusterSeed) error { 245 data, err := JSONCodec.Encode(seed) 246 if err != nil { 247 return err 248 } 249 return rep.objectClient.PutObject(ctx, ClusterSeedFileName, bytes.NewReader(data)) 250 } 251 252 // running inits the reporter seed and start sending report for every interval 253 func (rep *Reporter) running(ctx context.Context) error { 254 rep.init(ctx) 255 256 if rep.cluster == nil { 257 <-ctx.Done() 258 if err := ctx.Err(); !errors.Is(err, context.Canceled) { 259 return err 260 } 261 return nil 262 } 263 // check every minute if we should report. 264 ticker := time.NewTicker(reportCheckInterval) 265 defer ticker.Stop() 266 267 // find when to send the next report. 268 next := nextReport(reportInterval, rep.cluster.CreatedAt, time.Now()) 269 if rep.lastReport.IsZero() { 270 // if we never reported assumed it was the last interval. 271 rep.lastReport = next.Add(-reportInterval) 272 } 273 for { 274 select { 275 case <-ticker.C: 276 now := time.Now() 277 if !next.Equal(now) && now.Sub(rep.lastReport) < reportInterval { 278 continue 279 } 280 level.Debug(rep.logger).Log("msg", "reporting cluster stats", "date", time.Now()) 281 if err := rep.reportUsage(ctx, next); err != nil { 282 level.Info(rep.logger).Log("msg", "failed to report usage", "err", err) 283 continue 284 } 285 rep.lastReport = next 286 next = next.Add(reportInterval) 287 case <-ctx.Done(): 288 if err := ctx.Err(); !errors.Is(err, context.Canceled) { 289 return err 290 } 291 return nil 292 } 293 } 294 } 295 296 // reportUsage reports the usage to grafana.com. 297 func (rep *Reporter) reportUsage(ctx context.Context, interval time.Time) error { 298 backoff := backoff.New(ctx, backoff.Config{ 299 MinBackoff: time.Second, 300 MaxBackoff: 30 * time.Second, 301 MaxRetries: 5, 302 }) 303 var errs multierror.MultiError 304 for backoff.Ongoing() { 305 if err := sendReport(ctx, rep.cluster, interval); err != nil { 306 level.Info(rep.logger).Log("msg", "failed to send usage report", "retries", backoff.NumRetries(), "err", err) 307 errs.Add(err) 308 backoff.Wait() 309 continue 310 } 311 level.Debug(rep.logger).Log("msg", "usage report sent with success") 312 return nil 313 } 314 return errs.Err() 315 } 316 317 // nextReport compute the next report time based on the interval. 318 // The interval is based off the creation of the cluster seed to avoid all cluster reporting at the same time. 319 func nextReport(interval time.Duration, createdAt, now time.Time) time.Time { 320 // createdAt * (x * interval ) >= now 321 return createdAt.Add(time.Duration(math.Ceil(float64(now.Sub(createdAt))/float64(interval))) * interval) 322 }