go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/server/internal/tracesampler.go (about)

     1  // Copyright 2019 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package internal
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math/rand"
    21  	"strconv"
    22  	"strings"
    23  	"sync"
    24  	"time"
    25  
    26  	"go.opentelemetry.io/otel/sdk/trace"
    27  	oteltrace "go.opentelemetry.io/otel/trace"
    28  )
    29  
    30  // BaseSampler constructs an object that decides how often to sample traces.
    31  //
    32  // The spec is a string in one of the forms:
    33  //   - `X%` - to sample approximately X percent of requests.
    34  //   - `Xqps` - to produce approximately X samples per second.
    35  //
    36  // Returns an error if the spec can't be parsed.
    37  func BaseSampler(spec string) (trace.Sampler, error) {
    38  	switch spec = strings.ToLower(spec); {
    39  	case strings.HasSuffix(spec, "%"):
    40  		percent, err := strconv.ParseFloat(strings.TrimSuffix(spec, "%"), 64)
    41  		if err != nil {
    42  			return nil, fmt.Errorf("not a float percent %q", spec)
    43  		}
    44  		// Note: TraceIDRatioBased takes care of <=0.0 && >=1.0 cases.
    45  		return trace.TraceIDRatioBased(percent / 100.0), nil
    46  
    47  	case strings.HasSuffix(spec, "qps"):
    48  		qps, err := strconv.ParseFloat(strings.TrimSuffix(spec, "qps"), 64)
    49  		if err != nil {
    50  			return nil, fmt.Errorf("not a float QPS %q", spec)
    51  		}
    52  		if qps <= 0.0000001 {
    53  			// Semantically the same, but slightly faster.
    54  			return trace.TraceIDRatioBased(0), nil
    55  		}
    56  		return &qpsSampler{
    57  			period: time.Duration(float64(time.Second) / qps),
    58  			now:    time.Now,
    59  			rnd:    rand.New(rand.NewSource(rand.Int63())),
    60  		}, nil
    61  
    62  	default:
    63  		return nil, fmt.Errorf("unrecognized sampling spec string %q - should be either 'X%%' or 'Xqps'", spec)
    64  	}
    65  }
    66  
    67  // GateSampler returns a sampler that calls the callback to decide if the span
    68  // should be sampled.
    69  //
    70  // If the callback returns false, the span will not be sampled.
    71  //
    72  // If the callback returns true, the decision will be handed over to the given
    73  // base sampler.
    74  func GateSampler(base trace.Sampler, cb func(context.Context) bool) trace.Sampler {
    75  	return &gateSampler{base, cb}
    76  }
    77  
    78  // qpsSampler asks to sample a trace approximately each 'period'.
    79  //
    80  // Adds some random jitter to desynchronize cycles running concurrently across
    81  // many processes.
    82  type qpsSampler struct {
    83  	m      sync.RWMutex
    84  	next   time.Time
    85  	period time.Duration
    86  	now    func() time.Time // for mocking time
    87  	rnd    *rand.Rand       // for random jitter
    88  }
    89  
    90  func (s *qpsSampler) ShouldSample(p trace.SamplingParameters) trace.SamplingResult {
    91  	now := s.now()
    92  
    93  	s.m.RLock()
    94  	sample := s.next.IsZero() || now.After(s.next)
    95  	s.m.RUnlock()
    96  
    97  	if sample {
    98  		s.m.Lock()
    99  		switch {
   100  		case s.next.IsZero():
   101  			// Start the cycle at some random offset.
   102  			s.next = now.Add(s.randomDurationLocked(0, s.period))
   103  		case now.After(s.next):
   104  			// Add random jitter to the cycle length.
   105  			jitter := s.period / 10
   106  			s.next = now.Add(s.randomDurationLocked(s.period-jitter, s.period+jitter))
   107  		}
   108  		s.m.Unlock()
   109  	}
   110  
   111  	decision := trace.Drop
   112  	if sample {
   113  		decision = trace.RecordAndSample
   114  	}
   115  	return trace.SamplingResult{
   116  		Decision:   decision,
   117  		Tracestate: oteltrace.SpanContextFromContext(p.ParentContext).TraceState(),
   118  	}
   119  }
   120  
   121  func (s *qpsSampler) Description() string {
   122  	return fmt.Sprintf("qpsSampler{period:%s}", s.period)
   123  }
   124  
   125  func (s *qpsSampler) randomDurationLocked(min, max time.Duration) time.Duration {
   126  	return min + time.Duration(s.rnd.Int63n(int64(max-min)))
   127  }
   128  
   129  // gateSampler is a sampler that calls the callback to decide if the span
   130  // should be sampled.
   131  type gateSampler struct {
   132  	base trace.Sampler
   133  	cb   func(context.Context) bool
   134  }
   135  
   136  func (s *gateSampler) ShouldSample(p trace.SamplingParameters) trace.SamplingResult {
   137  	if !s.cb(p.ParentContext) {
   138  		return trace.SamplingResult{
   139  			Decision:   trace.Drop,
   140  			Tracestate: oteltrace.SpanContextFromContext(p.ParentContext).TraceState(),
   141  		}
   142  	}
   143  	return s.base.ShouldSample(p)
   144  }
   145  
   146  func (s *gateSampler) Description() string {
   147  	return fmt.Sprintf("gateSampler{base:%s}", s.base.Description())
   148  }