github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ts/server.go (about)

     1  // Copyright 2015 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package ts
    12  
    13  import (
    14  	"context"
    15  	"math"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/kv"
    18  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    19  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    20  	"github.com/cockroachdb/cockroach/pkg/ts/tspb"
    21  	"github.com/cockroachdb/cockroach/pkg/util/log"
    22  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    23  	"github.com/cockroachdb/cockroach/pkg/util/quotapool"
    24  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    25  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    26  	gwruntime "github.com/grpc-ecosystem/grpc-gateway/runtime"
    27  	"google.golang.org/grpc"
    28  	"google.golang.org/grpc/codes"
    29  	"google.golang.org/grpc/status"
    30  )
    31  
    32  const (
    33  	// URLPrefix is the prefix for all time series endpoints hosted by the
    34  	// server.
    35  	URLPrefix = "/ts/"
    36  	// queryWorkerMax is the default maximum number of worker goroutines that
    37  	// the time series server can use to service incoming queries.
    38  	queryWorkerMax = 8
    39  	// queryMemoryMax is a soft limit for the amount of total memory used by
    40  	// time series queries. This is not currently enforced, but is used for
    41  	// monitoring purposes.
    42  	queryMemoryMax = int64(64 * 1024 * 1024) // 64MiB
    43  	// dumpBatchSize is the number of keys processed in each batch by the dump
    44  	// command.
    45  	dumpBatchSize = 100
    46  )
    47  
    48  // ClusterNodeCountFn is a function that returns the number of nodes active on
    49  // the cluster.
    50  type ClusterNodeCountFn func() int64
    51  
    52  // ServerConfig provides a means for tests to override settings in the time
    53  // series server.
    54  type ServerConfig struct {
    55  	// The maximum number of query workers used by the server. If this
    56  	// value is zero, a default non-zero value is used instead.
    57  	QueryWorkerMax int
    58  	// The maximum amount of memory that should be used for processing queries
    59  	// across all workers. If this value is zero, a default non-zero value is
    60  	// used instead.
    61  	QueryMemoryMax int64
    62  }
    63  
    64  // Server handles incoming external requests related to time series data.
    65  //
    66  // The server attempts to constrain the total amount of memory it uses for
    67  // processing incoming queries. This is accomplished with a multi-pronged
    68  // strategy:
    69  // + The server has a worker memory limit, which is a quota for the amount of
    70  //   memory that can be used across all currently executing queries.
    71  // + The server also has a pre-set limit on the number of parallel workers that
    72  //   can be executing at one time. Each worker is given an even share of the
    73  //   server's total memory limit, which it should not exceed.
    74  // + Each worker breaks its task into chunks which it will process sequentially;
    75  //   the size of each chunk is calculated to avoid exceeding the memory limit.
    76  //
    77  // In addition to this strategy, the server uses a memory monitor to track the
    78  // amount of memory being used in reality by worker tasks. This is intended to
    79  // verify the calculations of the individual workers are correct.
    80  //
    81  // A second memory monitor is used to track the space used by the results of
    82  // query workers, which are longer lived; an incoming request may utilize
    83  // several workers, but the results of each worker cannot be released until
    84  // being returned to the requestor. Result memory is not currently limited,
    85  // as in practical usage it is dwarfed by the memory needed by workers to
    86  // generate the results.
    87  type Server struct {
    88  	log.AmbientContext
    89  	db               *DB
    90  	stopper          *stop.Stopper
    91  	nodeCountFn      ClusterNodeCountFn
    92  	queryMemoryMax   int64
    93  	queryWorkerMax   int
    94  	workerMemMonitor mon.BytesMonitor
    95  	resultMemMonitor mon.BytesMonitor
    96  	workerSem        *quotapool.IntPool
    97  }
    98  
    99  // MakeServer instantiates a new Server which services requests with data from
   100  // the supplied DB.
   101  func MakeServer(
   102  	ambient log.AmbientContext,
   103  	db *DB,
   104  	nodeCountFn ClusterNodeCountFn,
   105  	cfg ServerConfig,
   106  	stopper *stop.Stopper,
   107  ) Server {
   108  	ambient.AddLogTag("ts-srv", nil)
   109  
   110  	// Override default values from configuration.
   111  	queryWorkerMax := queryWorkerMax
   112  	if cfg.QueryWorkerMax != 0 {
   113  		queryWorkerMax = cfg.QueryWorkerMax
   114  	}
   115  	queryMemoryMax := queryMemoryMax
   116  	if cfg.QueryMemoryMax != 0 {
   117  		queryMemoryMax = cfg.QueryMemoryMax
   118  	}
   119  	workerSem := quotapool.NewIntPool("ts.Server worker", uint64(queryWorkerMax))
   120  	stopper.AddCloser(workerSem.Closer("stopper"))
   121  	return Server{
   122  		AmbientContext: ambient,
   123  		db:             db,
   124  		stopper:        stopper,
   125  		nodeCountFn:    nodeCountFn,
   126  		workerMemMonitor: mon.MakeUnlimitedMonitor(
   127  			context.Background(),
   128  			"timeseries-workers",
   129  			mon.MemoryResource,
   130  			nil,
   131  			nil,
   132  			// Begin logging messages if we exceed our planned memory usage by
   133  			// more than double.
   134  			queryMemoryMax*2,
   135  			db.st,
   136  		),
   137  		resultMemMonitor: mon.MakeUnlimitedMonitor(
   138  			context.Background(),
   139  			"timeseries-results",
   140  			mon.MemoryResource,
   141  			nil,
   142  			nil,
   143  			math.MaxInt64,
   144  			db.st,
   145  		),
   146  		queryMemoryMax: queryMemoryMax,
   147  		queryWorkerMax: queryWorkerMax,
   148  		workerSem:      workerSem,
   149  	}
   150  }
   151  
   152  // RegisterService registers the GRPC service.
   153  func (s *Server) RegisterService(g *grpc.Server) {
   154  	tspb.RegisterTimeSeriesServer(g, s)
   155  }
   156  
   157  // RegisterGateway starts the gateway (i.e. reverse proxy) that proxies HTTP requests
   158  // to the appropriate gRPC endpoints.
   159  func (s *Server) RegisterGateway(
   160  	ctx context.Context, mux *gwruntime.ServeMux, conn *grpc.ClientConn,
   161  ) error {
   162  	return tspb.RegisterTimeSeriesHandler(ctx, mux, conn)
   163  }
   164  
   165  // Query is an endpoint that returns data for one or more metrics over a
   166  // specific time span.
   167  func (s *Server) Query(
   168  	ctx context.Context, request *tspb.TimeSeriesQueryRequest,
   169  ) (*tspb.TimeSeriesQueryResponse, error) {
   170  	ctx = s.AnnotateCtx(ctx)
   171  	if len(request.Queries) == 0 {
   172  		return nil, status.Errorf(codes.InvalidArgument, "Queries cannot be empty")
   173  	}
   174  
   175  	// If not set, sampleNanos should default to ten second resolution.
   176  	sampleNanos := request.SampleNanos
   177  	if sampleNanos == 0 {
   178  		sampleNanos = Resolution10s.SampleDuration()
   179  	}
   180  
   181  	// For the interpolation limit, use the time limit until stores are considered
   182  	// dead. This is a conservatively long span, but gives us a good indication of
   183  	// when a gap likely indicates an outage (and thus missing values should not
   184  	// be interpolated).
   185  	interpolationLimit := kvserver.TimeUntilStoreDead.Get(&s.db.st.SV).Nanoseconds()
   186  
   187  	// Get the estimated number of nodes on the cluster, used to compute more
   188  	// accurate memory usage estimates. Set a minimum of 1 in order to avoid
   189  	// divide-by-zero panics.
   190  	estimatedClusterNodeCount := s.nodeCountFn()
   191  	if estimatedClusterNodeCount == 0 {
   192  		estimatedClusterNodeCount = 1
   193  	}
   194  
   195  	response := tspb.TimeSeriesQueryResponse{
   196  		Results: make([]tspb.TimeSeriesQueryResponse_Result, len(request.Queries)),
   197  	}
   198  
   199  	// Defer cancellation of context passed to worker tasks; if main task
   200  	// returns early, worker tasks should be torn down quickly.
   201  	ctx, cancel := context.WithCancel(ctx)
   202  	defer cancel()
   203  
   204  	// Channel which workers use to report their result, which is either an
   205  	// error or nil (when successful).
   206  	workerOutput := make(chan error)
   207  
   208  	// Create a separate memory management context for each query, allowing them
   209  	// to be run in parallel.
   210  	memContexts := make([]QueryMemoryContext, len(request.Queries))
   211  	defer func() {
   212  		for idx := range memContexts {
   213  			memContexts[idx].Close(ctx)
   214  		}
   215  	}()
   216  
   217  	timespan := QueryTimespan{
   218  		StartNanos:          request.StartNanos,
   219  		EndNanos:            request.EndNanos,
   220  		SampleDurationNanos: sampleNanos,
   221  		NowNanos:            timeutil.Now().UnixNano(),
   222  	}
   223  
   224  	// Start a task which is itself responsible for starting per-query worker
   225  	// tasks. This is needed because RunLimitedAsyncTask can block; in the
   226  	// case where a single request has more queries than the semaphore limit,
   227  	// a deadlock would occur because queries cannot complete until
   228  	// they have written their result to the "output" channel, which is
   229  	// processed later in the main function.
   230  	if err := s.stopper.RunAsyncTask(ctx, "ts.Server: queries", func(ctx context.Context) {
   231  		for queryIdx, query := range request.Queries {
   232  			queryIdx := queryIdx
   233  			query := query
   234  
   235  			if err := s.stopper.RunLimitedAsyncTask(
   236  				ctx,
   237  				"ts.Server: query",
   238  				s.workerSem,
   239  				true, /* wait */
   240  				func(ctx context.Context) {
   241  					// Estimated source count is either the count of requested sources
   242  					// *or* the estimated cluster node count if no sources are specified.
   243  					var estimatedSourceCount int64
   244  					if len(query.Sources) > 0 {
   245  						estimatedSourceCount = int64(len(query.Sources))
   246  					} else {
   247  						estimatedSourceCount = estimatedClusterNodeCount
   248  					}
   249  
   250  					// Create a memory account for the results of this query.
   251  					memContexts[queryIdx] = MakeQueryMemoryContext(
   252  						&s.workerMemMonitor,
   253  						&s.resultMemMonitor,
   254  						QueryMemoryOptions{
   255  							BudgetBytes:             s.queryMemoryMax / int64(s.queryWorkerMax),
   256  							EstimatedSources:        estimatedSourceCount,
   257  							InterpolationLimitNanos: interpolationLimit,
   258  						},
   259  					)
   260  
   261  					datapoints, sources, err := s.db.Query(
   262  						ctx,
   263  						query,
   264  						Resolution10s,
   265  						timespan,
   266  						memContexts[queryIdx],
   267  					)
   268  					if err == nil {
   269  						response.Results[queryIdx] = tspb.TimeSeriesQueryResponse_Result{
   270  							Query:      query,
   271  							Datapoints: datapoints,
   272  						}
   273  						response.Results[queryIdx].Sources = sources
   274  					}
   275  					select {
   276  					case workerOutput <- err:
   277  					case <-ctx.Done():
   278  					}
   279  				},
   280  			); err != nil {
   281  				// Stopper has been closed and is draining. Return an error and
   282  				// exit the worker-spawning loop.
   283  				select {
   284  				case workerOutput <- err:
   285  				case <-ctx.Done():
   286  				}
   287  				return
   288  			}
   289  		}
   290  	}); err != nil {
   291  		return nil, err
   292  	}
   293  
   294  	for range request.Queries {
   295  		select {
   296  		case err := <-workerOutput:
   297  			if err != nil {
   298  				// Return the first error encountered. This will cancel the
   299  				// worker context and cause all other in-progress workers to
   300  				// exit.
   301  				return nil, err
   302  			}
   303  		case <-ctx.Done():
   304  			return nil, ctx.Err()
   305  		}
   306  	}
   307  
   308  	return &response, nil
   309  }
   310  
   311  // Dump returns a stream of raw timeseries data that has been stored on the
   312  // server. Only data from the 10-second resolution is returned; rollup data is
   313  // not currently returned. Data is returned in the order it is read from disk,
   314  // and will thus not be totally organized by series.
   315  func (s *Server) Dump(req *tspb.DumpRequest, stream tspb.TimeSeries_DumpServer) error {
   316  	ctx := stream.Context()
   317  	span := &roachpb.Span{
   318  		Key:    roachpb.Key(firstTSRKey),
   319  		EndKey: roachpb.Key(lastTSRKey),
   320  	}
   321  
   322  	for span != nil {
   323  		b := &kv.Batch{}
   324  		b.Header.MaxSpanRequestKeys = dumpBatchSize
   325  		b.Scan(span.Key, span.EndKey)
   326  		err := s.db.db.Run(ctx, b)
   327  		if err != nil {
   328  			return err
   329  		}
   330  		result := b.Results[0]
   331  		span = result.ResumeSpan
   332  		for i := range result.Rows {
   333  			row := &result.Rows[i]
   334  			name, source, resolution, _, err := DecodeDataKey(row.Key)
   335  			if err != nil {
   336  				return err
   337  			}
   338  			if resolution != Resolution10s {
   339  				// Only return the highest resolution data.
   340  				continue
   341  			}
   342  			var idata roachpb.InternalTimeSeriesData
   343  			if err := row.ValueProto(&idata); err != nil {
   344  				return err
   345  			}
   346  
   347  			tsdata := &tspb.TimeSeriesData{
   348  				Name:       name,
   349  				Source:     source,
   350  				Datapoints: make([]tspb.TimeSeriesDatapoint, idata.SampleCount()),
   351  			}
   352  			for i := 0; i < idata.SampleCount(); i++ {
   353  				if idata.IsColumnar() {
   354  					tsdata.Datapoints[i].TimestampNanos = idata.TimestampForOffset(idata.Offset[i])
   355  					tsdata.Datapoints[i].Value = idata.Last[i]
   356  				} else {
   357  					tsdata.Datapoints[i].TimestampNanos = idata.TimestampForOffset(idata.Samples[i].Offset)
   358  					tsdata.Datapoints[i].Value = idata.Samples[i].Sum
   359  				}
   360  			}
   361  			if err := stream.Send(tsdata); err != nil {
   362  				return err
   363  			}
   364  		}
   365  	}
   366  	return nil
   367  }