github.com/grafana/pyroscope@v1.18.0/pkg/api/version/version.go (about)

     1  package version
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"math"
     7  	"net"
     8  	"strconv"
     9  	"sync"
    10  	"time"
    11  
    12  	"connectrpc.com/connect"
    13  	"github.com/go-kit/log"
    14  	"github.com/go-kit/log/level"
    15  	"github.com/gogo/protobuf/proto"
    16  	"github.com/grafana/dskit/kv"
    17  	"github.com/grafana/dskit/kv/codec"
    18  	"github.com/grafana/dskit/kv/memberlist"
    19  	"github.com/grafana/dskit/ring"
    20  	"github.com/grafana/dskit/services"
    21  	"github.com/pkg/errors"
    22  	"github.com/prometheus/client_golang/prometheus"
    23  
    24  	versionv1 "github.com/grafana/pyroscope/api/gen/proto/go/version/v1"
    25  	"github.com/grafana/pyroscope/pkg/util"
    26  )
    27  
    28  // currentQuerierVersion is the current version of the querier API.
    29  // It is used to check if the query path is compatible a new change.
    30  // Increase this number when a new API change is introduced.
    31  const currentQuerierVersion = uint64(1)
    32  
    33  var (
    34  	heartbeatInterval                      = 15 * time.Second
    35  	instanceTimeout                        = 1 * time.Minute
    36  	_                 memberlist.Mergeable = (*Versions)(nil)
    37  	_                 services.Service     = (*Service)(nil)
    38  	now                                    = time.Now
    39  )
    40  
    41  func GetCodec() codec.Codec {
    42  	return codec.NewProtoCodec("versions", newVersions)
    43  }
    44  
    45  func newVersions() proto.Message {
    46  	return &Versions{
    47  		Versions: &versionv1.Versions{
    48  			Instances: make(map[string]*versionv1.InstanceVersion),
    49  		},
    50  	}
    51  }
    52  
    53  type Versions struct {
    54  	*versionv1.Versions
    55  }
    56  
    57  // Implements proto.Unmarshaler.
    58  func (v *Versions) Unmarshal(in []byte) error {
    59  	return v.UnmarshalVT(in)
    60  }
    61  
    62  // Implements proto.Marshaler.
    63  func (v *Versions) Marshal() ([]byte, error) {
    64  	return v.MarshalVT()
    65  }
    66  
    67  // Merge merges two versions. This is used when CASing or merging versions from other nodes.
    68  // v is the local version and should be mutated to include the changes from incoming.
    69  // The function should only returned changed instances.
    70  func (v *Versions) Merge(incoming memberlist.Mergeable, localCAS bool) (memberlist.Mergeable, error) {
    71  	if incoming == nil {
    72  		return nil, nil
    73  	}
    74  	other, ok := incoming.(*Versions)
    75  	if !ok {
    76  		return nil, fmt.Errorf("expected *Versions, got %T", incoming)
    77  	}
    78  	if other == nil {
    79  		return nil, nil
    80  	}
    81  	if v == nil {
    82  		v = &Versions{
    83  			Versions: other.CloneVT(),
    84  		}
    85  		return other, nil
    86  	}
    87  	if other.EqualVT(v.Versions) {
    88  		return nil, nil
    89  	}
    90  	if v.Instances == nil {
    91  		v.Instances = make(map[string]*versionv1.InstanceVersion)
    92  	}
    93  	var updated []string
    94  
    95  	// Copy over all the instances with newer timestamps.
    96  	for k, new := range other.Instances {
    97  		current, ok := v.Instances[k]
    98  		if !ok || new.Timestamp > current.Timestamp {
    99  			v.Instances[k] = new.CloneVT()
   100  			updated = append(updated, k)
   101  		} else if new.Timestamp == current.Timestamp && !current.Left && new.Left {
   102  			v.Instances[k] = new.CloneVT()
   103  			updated = append(updated, k)
   104  		}
   105  
   106  	}
   107  
   108  	if localCAS {
   109  		// Mark left all the instances that are not in the other.
   110  		for k, current := range v.Instances {
   111  			if _, ok := other.Instances[k]; !ok && !current.Left {
   112  				current.Left = true
   113  				current.Timestamp = now().UnixNano()
   114  				updated = append(updated, k)
   115  			}
   116  		}
   117  	}
   118  	// No updated members, no need to broadcast.
   119  	if len(updated) == 0 {
   120  		return nil, nil
   121  	}
   122  	// Return the changes to broadcast.
   123  	changes := newVersions().(*Versions)
   124  	for _, k := range updated {
   125  		changes.Instances[k] = v.Instances[k].CloneVT()
   126  	}
   127  	return changes, nil
   128  }
   129  
   130  // MergeContent describes content of this Mergeable.
   131  // Versions simply returns list of component that it includes.
   132  func (d *Versions) MergeContent() []string {
   133  	result := []string(nil)
   134  	for k := range d.Instances {
   135  		result = append(result, k)
   136  	}
   137  	return result
   138  }
   139  
   140  // RemoveTombstones is not required for version keys.
   141  func (v *Versions) RemoveTombstones(limit time.Time) (total, removed int) {
   142  	for n, inst := range v.Instances {
   143  		if inst.Left {
   144  			if limit.IsZero() || time.Unix(0, inst.Timestamp).Before(limit) {
   145  				// remove it
   146  				delete(v.Instances, n)
   147  				removed++
   148  			} else {
   149  				total++
   150  			}
   151  		}
   152  	}
   153  	return
   154  }
   155  
   156  // Implements memberlist.Mergeable.
   157  func (v *Versions) Clone() memberlist.Mergeable {
   158  	return &Versions{
   159  		Versions: v.CloneVT(),
   160  	}
   161  }
   162  
   163  type Service struct {
   164  	*services.BasicService
   165  	store    kv.Client
   166  	cfg      util.CommonRingConfig
   167  	logger   log.Logger
   168  	cancel   context.CancelFunc
   169  	ctx      context.Context
   170  	wg       sync.WaitGroup
   171  	addr, id string
   172  
   173  	version uint64
   174  }
   175  
   176  // New creates a new version service.
   177  func New(cfg util.CommonRingConfig, logger log.Logger, reg prometheus.Registerer) (*Service, error) {
   178  	client, err := kv.NewClient(cfg.KVStore, GetCodec(), kv.RegistererWithKVName(reg, "versions"), logger)
   179  	if err != nil {
   180  		return nil, errors.Wrap(err, "failed to initialize versions' KV store")
   181  	}
   182  
   183  	instanceAddr, err := ring.GetInstanceAddr(cfg.InstanceAddr, cfg.InstanceInterfaceNames, logger, cfg.EnableIPv6)
   184  	if err != nil {
   185  		return nil, err
   186  	}
   187  	ctx, cancel := context.WithCancel(context.Background())
   188  	instancePort := ring.GetInstancePort(cfg.InstancePort, cfg.ListenPort)
   189  	svc := &Service{
   190  		store:   client,
   191  		id:      cfg.InstanceID,
   192  		addr:    net.JoinHostPort(instanceAddr, strconv.Itoa(instancePort)),
   193  		cfg:     cfg,
   194  		logger:  log.With(logger, "component", "versions"),
   195  		cancel:  cancel,
   196  		ctx:     ctx,
   197  		version: currentQuerierVersion,
   198  	}
   199  	// The service is simple only has a running function.
   200  	// Stopping is manual to ensure we stop as part of the shutdown process.
   201  	svc.BasicService = services.NewBasicService(
   202  		func(_ context.Context) error { return nil },
   203  		svc.running,
   204  		func(_ error) error { return nil },
   205  	)
   206  	return svc, nil
   207  }
   208  
   209  func (svc *Service) running(ctx context.Context) error {
   210  	go svc.loop()
   211  	<-ctx.Done()
   212  	return nil
   213  }
   214  
   215  func (svc *Service) loop() {
   216  	svc.wg.Add(1)
   217  	ticker := time.NewTicker(heartbeatInterval)
   218  	defer func() {
   219  		ticker.Stop()
   220  		svc.wg.Done()
   221  	}()
   222  
   223  	for {
   224  		select {
   225  		case <-ticker.C:
   226  			if err := svc.heartbeat(svc.ctx); err != nil {
   227  				level.Error(svc.logger).Log("msg", "failed to heartbeat", "err", err)
   228  			}
   229  
   230  		case <-svc.ctx.Done():
   231  			level.Info(svc.logger).Log("msg", "versions is shutting down")
   232  			return
   233  		}
   234  	}
   235  }
   236  
   237  func (svc *Service) heartbeat(ctx context.Context) error {
   238  	return svc.store.CAS(ctx, "versions", func(in interface{}) (out interface{}, retry bool, err error) {
   239  		var versions *versionv1.Versions
   240  		if in == nil {
   241  			versions = newVersions().(*Versions).Versions
   242  		} else {
   243  			versions = in.(*Versions).Versions
   244  		}
   245  		current, ok := versions.Instances[svc.id]
   246  		if !ok {
   247  			current = &versionv1.InstanceVersion{}
   248  			versions.Instances[svc.id] = current
   249  		}
   250  		current.Addr = svc.addr
   251  		current.ID = svc.id
   252  		current.Timestamp = now().UnixNano()
   253  		current.QuerierAPI = svc.version
   254  		// Now prune old instances.
   255  		for id, instance := range versions.Instances {
   256  			lastHeartbeat := time.Unix(0, instance.GetTimestamp())
   257  			if time.Since(lastHeartbeat) > instanceTimeout {
   258  				level.Warn(svc.logger).Log("msg", "auto-forgetting instance from the versions because it is unhealthy for a long time", "instance", id, "last_heartbeat", lastHeartbeat.String())
   259  				delete(versions.Instances, id)
   260  			}
   261  		}
   262  		return &Versions{
   263  			Versions: versions,
   264  		}, true, nil
   265  	})
   266  }
   267  
   268  func (svc *Service) Version(ctx context.Context, req *connect.Request[versionv1.VersionRequest]) (*connect.Response[versionv1.VersionResponse], error) {
   269  	value, err := svc.store.Get(ctx, "versions")
   270  	if err != nil {
   271  		return nil, err
   272  	}
   273  	versions, ok := value.(*Versions)
   274  	if !ok {
   275  		// we don't have any versions yet.
   276  		return connect.NewResponse(&versionv1.VersionResponse{}), nil
   277  	}
   278  	// collect the minimum querier version.
   279  	minQuerierVersion := uint64(math.MaxUint64)
   280  	for _, instance := range versions.Instances {
   281  		if instance.QuerierAPI < minQuerierVersion {
   282  			minQuerierVersion = instance.QuerierAPI
   283  		}
   284  	}
   285  	return connect.NewResponse(&versionv1.VersionResponse{
   286  		QuerierAPI: minQuerierVersion,
   287  	}), nil
   288  }
   289  
   290  // Shutdown stops version reports.
   291  // This should only be called when the service is fully shutting down.
   292  func (svc *Service) Shutdown() {
   293  	svc.cancel()
   294  	svc.wg.Wait()
   295  }