github.com/weaviate/weaviate@v1.24.6/usecases/telemetry/telemetry.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package telemetry
    13  
    14  import (
    15  	"bytes"
    16  	"context"
    17  	"encoding/base64"
    18  	"encoding/json"
    19  	"fmt"
    20  	"io"
    21  	"net/http"
    22  	"runtime"
    23  	"sort"
    24  	"strings"
    25  	"time"
    26  
    27  	enterrors "github.com/weaviate/weaviate/entities/errors"
    28  
    29  	"github.com/go-openapi/strfmt"
    30  	"github.com/google/uuid"
    31  	"github.com/sirupsen/logrus"
    32  	"github.com/weaviate/weaviate/entities/models"
    33  	"github.com/weaviate/weaviate/entities/verbosity"
    34  	"github.com/weaviate/weaviate/usecases/config"
    35  )
    36  
    37  const (
    38  	defaultConsumer = "aHR0cHM6Ly90ZWxlbWV0cnkud2Vhdmlh" +
    39  		"dGUuaW8vd2VhdmlhdGUtdGVsZW1ldHJ5"
    40  	defaultPushInterval = 24 * time.Hour
    41  )
    42  
    43  type nodesStatusGetter interface {
    44  	LocalNodeStatus(ctx context.Context, className, output string) *models.NodeStatus
    45  }
    46  
    47  type modulesProvider interface {
    48  	GetMeta() (map[string]interface{}, error)
    49  }
    50  
    51  // Telemeter is responsible for managing the transmission of telemetry data
    52  type Telemeter struct {
    53  	machineID         strfmt.UUID
    54  	nodesStatusGetter nodesStatusGetter
    55  	modulesProvider   modulesProvider
    56  	logger            logrus.FieldLogger
    57  	shutdown          chan struct{}
    58  	failedToStart     bool
    59  	consumer          string
    60  	pushInterval      time.Duration
    61  }
    62  
    63  // New creates a new Telemeter instance
    64  func New(nodesStatusGetter nodesStatusGetter, modulesProvider modulesProvider,
    65  	logger logrus.FieldLogger,
    66  ) *Telemeter {
    67  	tel := &Telemeter{
    68  		machineID:         strfmt.UUID(uuid.NewString()),
    69  		nodesStatusGetter: nodesStatusGetter,
    70  		modulesProvider:   modulesProvider,
    71  		logger:            logger,
    72  		shutdown:          make(chan struct{}),
    73  		consumer:          defaultConsumer,
    74  		pushInterval:      defaultPushInterval,
    75  	}
    76  	return tel
    77  }
    78  
    79  // Start begins telemetry for the node
    80  func (tel *Telemeter) Start(ctx context.Context) error {
    81  	payload, err := tel.push(ctx, PayloadType.Init)
    82  	if err != nil {
    83  		tel.failedToStart = true
    84  		return fmt.Errorf("push: %w", err)
    85  	}
    86  	f := func() {
    87  		t := time.NewTicker(tel.pushInterval)
    88  		defer t.Stop()
    89  		for {
    90  			select {
    91  			case <-tel.shutdown:
    92  				return
    93  			case <-t.C:
    94  				payload, err = tel.push(ctx, PayloadType.Update)
    95  				if err != nil {
    96  					tel.logger.
    97  						WithField("action", "telemetry_push").
    98  						WithField("payload", fmt.Sprintf("%+v", payload)).
    99  						WithField("retry_at", time.Now().Add(tel.pushInterval).Format(time.RFC3339)).
   100  						Error(err.Error())
   101  					continue
   102  				}
   103  				tel.logger.
   104  					WithField("action", "telemetry_push").
   105  					WithField("payload", fmt.Sprintf("%+v", payload)).
   106  					Info("telemetry update")
   107  			}
   108  		}
   109  	}
   110  	enterrors.GoWrapper(f, tel.logger)
   111  
   112  	tel.logger.
   113  		WithField("action", "telemetry_push").
   114  		WithField("payload", fmt.Sprintf("%+v", payload)).
   115  		Info("telemetry started")
   116  	return nil
   117  }
   118  
   119  // Stop shuts down the telemeter
   120  func (tel *Telemeter) Stop(ctx context.Context) error {
   121  	if tel.failedToStart {
   122  		return nil
   123  	}
   124  
   125  	select {
   126  	case <-ctx.Done():
   127  		return fmt.Errorf("shutdown telemetry: %w", ctx.Err())
   128  	case tel.shutdown <- struct{}{}:
   129  		payload, err := tel.push(ctx, PayloadType.Terminate)
   130  		if err != nil {
   131  			tel.logger.
   132  				WithField("action", "telemetry_push").
   133  				WithField("payload", fmt.Sprintf("%+v", payload)).
   134  				Error(err.Error())
   135  			return err
   136  		}
   137  		tel.logger.
   138  			WithField("action", "telemetry_push").
   139  			WithField("payload", fmt.Sprintf("%+v", payload)).
   140  			Info("telemetry terminated")
   141  		return nil
   142  	}
   143  }
   144  
   145  // push sends telemetry data to the consumer url
   146  func (tel *Telemeter) push(ctx context.Context, payloadType string) (*Payload, error) {
   147  	payload, err := tel.buildPayload(ctx, payloadType)
   148  	if err != nil {
   149  		return nil, fmt.Errorf("build payload: %w", err)
   150  	}
   151  
   152  	b, err := json.Marshal(payload)
   153  	if err != nil {
   154  		return nil, fmt.Errorf("marshal payload: %w", err)
   155  	}
   156  
   157  	url, err := base64.StdEncoding.DecodeString(tel.consumer)
   158  	if err != nil {
   159  		return nil, fmt.Errorf("decode url: %w", err)
   160  	}
   161  
   162  	resp, err := http.Post(string(url), "application/json", bytes.NewReader(b))
   163  	if err != nil {
   164  		return nil, fmt.Errorf("failed to send request: %w", err)
   165  	}
   166  	defer resp.Body.Close()
   167  	if resp.StatusCode != http.StatusOK {
   168  		body, _ := io.ReadAll(resp.Body)
   169  		return nil, fmt.Errorf("request unsuccessful, status code: %d, body: %s", resp.StatusCode, string(body))
   170  	}
   171  	return payload, nil
   172  }
   173  
   174  func (tel *Telemeter) buildPayload(ctx context.Context, payloadType string) (*Payload, error) {
   175  	mods, err := tel.getEnabledModules()
   176  	if err != nil {
   177  		return nil, fmt.Errorf("get enabled modules: %w", err)
   178  	}
   179  
   180  	var objs int64
   181  	// The first payload should not include object count,
   182  	// because all the shards may not be loaded yet. We
   183  	// don't want to force load for telemetry alone
   184  	if payloadType != PayloadType.Init {
   185  		objs, err = tel.getObjectCount(ctx)
   186  		if err != nil {
   187  			return nil, fmt.Errorf("get object count: %w", err)
   188  		}
   189  	}
   190  
   191  	return &Payload{
   192  		MachineID:  tel.machineID,
   193  		Type:       payloadType,
   194  		Version:    config.ServerVersion,
   195  		Modules:    mods,
   196  		NumObjects: objs,
   197  		OS:         runtime.GOOS,
   198  		Arch:       runtime.GOARCH,
   199  	}, nil
   200  }
   201  
   202  func (tel *Telemeter) getEnabledModules() (string, error) {
   203  	modMeta, err := tel.modulesProvider.GetMeta()
   204  	if err != nil {
   205  		return "", fmt.Errorf("meta from modules provider: %w", err)
   206  	}
   207  	if len(modMeta) == 0 {
   208  		return "", nil
   209  	}
   210  	mods, i := make([]string, len(modMeta)), 0
   211  	for name := range modMeta {
   212  		mods[i], i = name, i+1
   213  	}
   214  	sort.Strings(mods)
   215  	return strings.Join(mods, ","), nil
   216  }
   217  
   218  func (tel *Telemeter) getObjectCount(ctx context.Context) (int64, error) {
   219  	status := tel.nodesStatusGetter.LocalNodeStatus(ctx, "", verbosity.OutputVerbose)
   220  	if status == nil || status.Stats == nil {
   221  		return 0, fmt.Errorf("received nil node stats")
   222  	}
   223  	return status.Stats.ObjectCount, nil
   224  }