github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ts/server.go (about) 1 // Copyright 2015 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package ts 12 13 import ( 14 "context" 15 "math" 16 17 "github.com/cockroachdb/cockroach/pkg/kv" 18 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 19 "github.com/cockroachdb/cockroach/pkg/roachpb" 20 "github.com/cockroachdb/cockroach/pkg/ts/tspb" 21 "github.com/cockroachdb/cockroach/pkg/util/log" 22 "github.com/cockroachdb/cockroach/pkg/util/mon" 23 "github.com/cockroachdb/cockroach/pkg/util/quotapool" 24 "github.com/cockroachdb/cockroach/pkg/util/stop" 25 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 26 gwruntime "github.com/grpc-ecosystem/grpc-gateway/runtime" 27 "google.golang.org/grpc" 28 "google.golang.org/grpc/codes" 29 "google.golang.org/grpc/status" 30 ) 31 32 const ( 33 // URLPrefix is the prefix for all time series endpoints hosted by the 34 // server. 35 URLPrefix = "/ts/" 36 // queryWorkerMax is the default maximum number of worker goroutines that 37 // the time series server can use to service incoming queries. 38 queryWorkerMax = 8 39 // queryMemoryMax is a soft limit for the amount of total memory used by 40 // time series queries. This is not currently enforced, but is used for 41 // monitoring purposes. 42 queryMemoryMax = int64(64 * 1024 * 1024) // 64MiB 43 // dumpBatchSize is the number of keys processed in each batch by the dump 44 // command. 45 dumpBatchSize = 100 46 ) 47 48 // ClusterNodeCountFn is a function that returns the number of nodes active on 49 // the cluster. 50 type ClusterNodeCountFn func() int64 51 52 // ServerConfig provides a means for tests to override settings in the time 53 // series server. 54 type ServerConfig struct { 55 // The maximum number of query workers used by the server. If this 56 // value is zero, a default non-zero value is used instead. 57 QueryWorkerMax int 58 // The maximum amount of memory that should be used for processing queries 59 // across all workers. If this value is zero, a default non-zero value is 60 // used instead. 61 QueryMemoryMax int64 62 } 63 64 // Server handles incoming external requests related to time series data. 65 // 66 // The server attempts to constrain the total amount of memory it uses for 67 // processing incoming queries. This is accomplished with a multi-pronged 68 // strategy: 69 // + The server has a worker memory limit, which is a quota for the amount of 70 // memory that can be used across all currently executing queries. 71 // + The server also has a pre-set limit on the number of parallel workers that 72 // can be executing at one time. Each worker is given an even share of the 73 // server's total memory limit, which it should not exceed. 74 // + Each worker breaks its task into chunks which it will process sequentially; 75 // the size of each chunk is calculated to avoid exceeding the memory limit. 76 // 77 // In addition to this strategy, the server uses a memory monitor to track the 78 // amount of memory being used in reality by worker tasks. This is intended to 79 // verify the calculations of the individual workers are correct. 80 // 81 // A second memory monitor is used to track the space used by the results of 82 // query workers, which are longer lived; an incoming request may utilize 83 // several workers, but the results of each worker cannot be released until 84 // being returned to the requestor. Result memory is not currently limited, 85 // as in practical usage it is dwarfed by the memory needed by workers to 86 // generate the results. 87 type Server struct { 88 log.AmbientContext 89 db *DB 90 stopper *stop.Stopper 91 nodeCountFn ClusterNodeCountFn 92 queryMemoryMax int64 93 queryWorkerMax int 94 workerMemMonitor mon.BytesMonitor 95 resultMemMonitor mon.BytesMonitor 96 workerSem *quotapool.IntPool 97 } 98 99 // MakeServer instantiates a new Server which services requests with data from 100 // the supplied DB. 101 func MakeServer( 102 ambient log.AmbientContext, 103 db *DB, 104 nodeCountFn ClusterNodeCountFn, 105 cfg ServerConfig, 106 stopper *stop.Stopper, 107 ) Server { 108 ambient.AddLogTag("ts-srv", nil) 109 110 // Override default values from configuration. 111 queryWorkerMax := queryWorkerMax 112 if cfg.QueryWorkerMax != 0 { 113 queryWorkerMax = cfg.QueryWorkerMax 114 } 115 queryMemoryMax := queryMemoryMax 116 if cfg.QueryMemoryMax != 0 { 117 queryMemoryMax = cfg.QueryMemoryMax 118 } 119 workerSem := quotapool.NewIntPool("ts.Server worker", uint64(queryWorkerMax)) 120 stopper.AddCloser(workerSem.Closer("stopper")) 121 return Server{ 122 AmbientContext: ambient, 123 db: db, 124 stopper: stopper, 125 nodeCountFn: nodeCountFn, 126 workerMemMonitor: mon.MakeUnlimitedMonitor( 127 context.Background(), 128 "timeseries-workers", 129 mon.MemoryResource, 130 nil, 131 nil, 132 // Begin logging messages if we exceed our planned memory usage by 133 // more than double. 134 queryMemoryMax*2, 135 db.st, 136 ), 137 resultMemMonitor: mon.MakeUnlimitedMonitor( 138 context.Background(), 139 "timeseries-results", 140 mon.MemoryResource, 141 nil, 142 nil, 143 math.MaxInt64, 144 db.st, 145 ), 146 queryMemoryMax: queryMemoryMax, 147 queryWorkerMax: queryWorkerMax, 148 workerSem: workerSem, 149 } 150 } 151 152 // RegisterService registers the GRPC service. 153 func (s *Server) RegisterService(g *grpc.Server) { 154 tspb.RegisterTimeSeriesServer(g, s) 155 } 156 157 // RegisterGateway starts the gateway (i.e. reverse proxy) that proxies HTTP requests 158 // to the appropriate gRPC endpoints. 159 func (s *Server) RegisterGateway( 160 ctx context.Context, mux *gwruntime.ServeMux, conn *grpc.ClientConn, 161 ) error { 162 return tspb.RegisterTimeSeriesHandler(ctx, mux, conn) 163 } 164 165 // Query is an endpoint that returns data for one or more metrics over a 166 // specific time span. 167 func (s *Server) Query( 168 ctx context.Context, request *tspb.TimeSeriesQueryRequest, 169 ) (*tspb.TimeSeriesQueryResponse, error) { 170 ctx = s.AnnotateCtx(ctx) 171 if len(request.Queries) == 0 { 172 return nil, status.Errorf(codes.InvalidArgument, "Queries cannot be empty") 173 } 174 175 // If not set, sampleNanos should default to ten second resolution. 176 sampleNanos := request.SampleNanos 177 if sampleNanos == 0 { 178 sampleNanos = Resolution10s.SampleDuration() 179 } 180 181 // For the interpolation limit, use the time limit until stores are considered 182 // dead. This is a conservatively long span, but gives us a good indication of 183 // when a gap likely indicates an outage (and thus missing values should not 184 // be interpolated). 185 interpolationLimit := kvserver.TimeUntilStoreDead.Get(&s.db.st.SV).Nanoseconds() 186 187 // Get the estimated number of nodes on the cluster, used to compute more 188 // accurate memory usage estimates. Set a minimum of 1 in order to avoid 189 // divide-by-zero panics. 190 estimatedClusterNodeCount := s.nodeCountFn() 191 if estimatedClusterNodeCount == 0 { 192 estimatedClusterNodeCount = 1 193 } 194 195 response := tspb.TimeSeriesQueryResponse{ 196 Results: make([]tspb.TimeSeriesQueryResponse_Result, len(request.Queries)), 197 } 198 199 // Defer cancellation of context passed to worker tasks; if main task 200 // returns early, worker tasks should be torn down quickly. 201 ctx, cancel := context.WithCancel(ctx) 202 defer cancel() 203 204 // Channel which workers use to report their result, which is either an 205 // error or nil (when successful). 206 workerOutput := make(chan error) 207 208 // Create a separate memory management context for each query, allowing them 209 // to be run in parallel. 210 memContexts := make([]QueryMemoryContext, len(request.Queries)) 211 defer func() { 212 for idx := range memContexts { 213 memContexts[idx].Close(ctx) 214 } 215 }() 216 217 timespan := QueryTimespan{ 218 StartNanos: request.StartNanos, 219 EndNanos: request.EndNanos, 220 SampleDurationNanos: sampleNanos, 221 NowNanos: timeutil.Now().UnixNano(), 222 } 223 224 // Start a task which is itself responsible for starting per-query worker 225 // tasks. This is needed because RunLimitedAsyncTask can block; in the 226 // case where a single request has more queries than the semaphore limit, 227 // a deadlock would occur because queries cannot complete until 228 // they have written their result to the "output" channel, which is 229 // processed later in the main function. 230 if err := s.stopper.RunAsyncTask(ctx, "ts.Server: queries", func(ctx context.Context) { 231 for queryIdx, query := range request.Queries { 232 queryIdx := queryIdx 233 query := query 234 235 if err := s.stopper.RunLimitedAsyncTask( 236 ctx, 237 "ts.Server: query", 238 s.workerSem, 239 true, /* wait */ 240 func(ctx context.Context) { 241 // Estimated source count is either the count of requested sources 242 // *or* the estimated cluster node count if no sources are specified. 243 var estimatedSourceCount int64 244 if len(query.Sources) > 0 { 245 estimatedSourceCount = int64(len(query.Sources)) 246 } else { 247 estimatedSourceCount = estimatedClusterNodeCount 248 } 249 250 // Create a memory account for the results of this query. 251 memContexts[queryIdx] = MakeQueryMemoryContext( 252 &s.workerMemMonitor, 253 &s.resultMemMonitor, 254 QueryMemoryOptions{ 255 BudgetBytes: s.queryMemoryMax / int64(s.queryWorkerMax), 256 EstimatedSources: estimatedSourceCount, 257 InterpolationLimitNanos: interpolationLimit, 258 }, 259 ) 260 261 datapoints, sources, err := s.db.Query( 262 ctx, 263 query, 264 Resolution10s, 265 timespan, 266 memContexts[queryIdx], 267 ) 268 if err == nil { 269 response.Results[queryIdx] = tspb.TimeSeriesQueryResponse_Result{ 270 Query: query, 271 Datapoints: datapoints, 272 } 273 response.Results[queryIdx].Sources = sources 274 } 275 select { 276 case workerOutput <- err: 277 case <-ctx.Done(): 278 } 279 }, 280 ); err != nil { 281 // Stopper has been closed and is draining. Return an error and 282 // exit the worker-spawning loop. 283 select { 284 case workerOutput <- err: 285 case <-ctx.Done(): 286 } 287 return 288 } 289 } 290 }); err != nil { 291 return nil, err 292 } 293 294 for range request.Queries { 295 select { 296 case err := <-workerOutput: 297 if err != nil { 298 // Return the first error encountered. This will cancel the 299 // worker context and cause all other in-progress workers to 300 // exit. 301 return nil, err 302 } 303 case <-ctx.Done(): 304 return nil, ctx.Err() 305 } 306 } 307 308 return &response, nil 309 } 310 311 // Dump returns a stream of raw timeseries data that has been stored on the 312 // server. Only data from the 10-second resolution is returned; rollup data is 313 // not currently returned. Data is returned in the order it is read from disk, 314 // and will thus not be totally organized by series. 315 func (s *Server) Dump(req *tspb.DumpRequest, stream tspb.TimeSeries_DumpServer) error { 316 ctx := stream.Context() 317 span := &roachpb.Span{ 318 Key: roachpb.Key(firstTSRKey), 319 EndKey: roachpb.Key(lastTSRKey), 320 } 321 322 for span != nil { 323 b := &kv.Batch{} 324 b.Header.MaxSpanRequestKeys = dumpBatchSize 325 b.Scan(span.Key, span.EndKey) 326 err := s.db.db.Run(ctx, b) 327 if err != nil { 328 return err 329 } 330 result := b.Results[0] 331 span = result.ResumeSpan 332 for i := range result.Rows { 333 row := &result.Rows[i] 334 name, source, resolution, _, err := DecodeDataKey(row.Key) 335 if err != nil { 336 return err 337 } 338 if resolution != Resolution10s { 339 // Only return the highest resolution data. 340 continue 341 } 342 var idata roachpb.InternalTimeSeriesData 343 if err := row.ValueProto(&idata); err != nil { 344 return err 345 } 346 347 tsdata := &tspb.TimeSeriesData{ 348 Name: name, 349 Source: source, 350 Datapoints: make([]tspb.TimeSeriesDatapoint, idata.SampleCount()), 351 } 352 for i := 0; i < idata.SampleCount(); i++ { 353 if idata.IsColumnar() { 354 tsdata.Datapoints[i].TimestampNanos = idata.TimestampForOffset(idata.Offset[i]) 355 tsdata.Datapoints[i].Value = idata.Last[i] 356 } else { 357 tsdata.Datapoints[i].TimestampNanos = idata.TimestampForOffset(idata.Samples[i].Offset) 358 tsdata.Datapoints[i].Value = idata.Samples[i].Sum 359 } 360 } 361 if err := stream.Send(tsdata); err != nil { 362 return err 363 } 364 } 365 } 366 return nil 367 }