github.com/m3db/m3@v1.5.0/src/dbnode/network/server/tchannelthrift/node/service.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package node 22 23 import ( 24 goctx "context" 25 "errors" 26 "fmt" 27 "runtime" 28 "sort" 29 "sync" 30 "time" 31 32 "github.com/m3db/m3/src/dbnode/client" 33 "github.com/m3db/m3/src/dbnode/generated/thrift/rpc" 34 "github.com/m3db/m3/src/dbnode/namespace" 35 "github.com/m3db/m3/src/dbnode/network/server/tchannelthrift" 36 "github.com/m3db/m3/src/dbnode/network/server/tchannelthrift/convert" 37 tterrors "github.com/m3db/m3/src/dbnode/network/server/tchannelthrift/errors" 38 "github.com/m3db/m3/src/dbnode/storage" 39 "github.com/m3db/m3/src/dbnode/storage/block" 40 "github.com/m3db/m3/src/dbnode/storage/index" 41 idxconvert "github.com/m3db/m3/src/dbnode/storage/index/convert" 42 "github.com/m3db/m3/src/dbnode/storage/limits" 43 "github.com/m3db/m3/src/dbnode/storage/limits/permits" 44 "github.com/m3db/m3/src/dbnode/storage/series" 45 "github.com/m3db/m3/src/dbnode/tracepoint" 46 "github.com/m3db/m3/src/dbnode/ts/writes" 47 "github.com/m3db/m3/src/dbnode/x/xio" 48 "github.com/m3db/m3/src/dbnode/x/xpool" 49 "github.com/m3db/m3/src/m3ninx/index/segment/fst/encoding/docs" 50 "github.com/m3db/m3/src/x/checked" 51 "github.com/m3db/m3/src/x/clock" 52 "github.com/m3db/m3/src/x/context" 53 xdebug "github.com/m3db/m3/src/x/debug" 54 xerrors "github.com/m3db/m3/src/x/errors" 55 "github.com/m3db/m3/src/x/ident" 56 "github.com/m3db/m3/src/x/instrument" 57 xopentracing "github.com/m3db/m3/src/x/opentracing" 58 "github.com/m3db/m3/src/x/pool" 59 xresource "github.com/m3db/m3/src/x/resource" 60 "github.com/m3db/m3/src/x/serialize" 61 xtime "github.com/m3db/m3/src/x/time" 62 63 apachethrift "github.com/apache/thrift/lib/go/thrift" 64 opentracinglog "github.com/opentracing/opentracing-go/log" 65 "github.com/uber-go/tally" 66 "github.com/uber/tchannel-go/thrift" 67 "go.uber.org/zap" 68 ) 69 70 var ( 71 // NB(r): pool sizes are vars to help reduce stress on tests. 72 segmentArrayPoolSize = 65536 73 writeBatchPooledReqPoolSize = 1024 74 ) 75 76 const ( 77 initSegmentArrayPoolLength = 4 78 maxSegmentArrayPooledLength = 32 79 // Any pooled error slices that grow beyond this capcity will be thrown away. 80 writeBatchPooledReqPoolMaxErrorsSliceSize = 4096 81 ) 82 83 var ( 84 // errServerIsOverloaded raised when trying to process a request when the server is overloaded 85 errServerIsOverloaded = errors.New("server is overloaded") 86 87 // errIllegalTagValues raised when the tags specified are in-correct 88 errIllegalTagValues = errors.New("illegal tag values specified") 89 90 // errRequiresDatapoint raised when a datapoint is not provided 91 errRequiresDatapoint = errors.New("requires datapoint") 92 93 // errNodeIsNotBootstrapped 94 errNodeIsNotBootstrapped = errors.New("node is not bootstrapped") 95 96 // errDatabaseIsNotInitializedYet is raised when an RPC attempt is made before the database 97 // has been set. 98 errDatabaseIsNotInitializedYet = errors.New("database is not yet initialized") 99 100 // errDatabaseHasAlreadyBeenSet is raised when SetDatabase() is called more than one time. 101 errDatabaseHasAlreadyBeenSet = errors.New("database has already been set") 102 103 // errHealthNotSet is raised when server health data structure is not set. 104 errHealthNotSet = errors.New("server health not set") 105 ) 106 107 type serviceMetrics struct { 108 fetch instrument.MethodMetrics 109 fetchTagged instrument.MethodMetrics 110 aggregate instrument.MethodMetrics 111 write instrument.MethodMetrics 112 writeTagged instrument.MethodMetrics 113 fetchBlocks instrument.MethodMetrics 114 fetchBlocksMetadata instrument.MethodMetrics 115 repair instrument.MethodMetrics 116 truncate instrument.MethodMetrics 117 fetchBatchRawRPCS tally.Counter 118 fetchBatchRaw instrument.BatchMethodMetrics 119 writeBatchRawRPCs tally.Counter 120 writeBatchRaw instrument.BatchMethodMetrics 121 writeTaggedBatchRawRPCs tally.Counter 122 writeTaggedBatchRaw instrument.BatchMethodMetrics 123 overloadRejected tally.Counter 124 rpcTotalRead tally.Counter 125 rpcStatusCanceledRead tally.Counter 126 // the series blocks read during a call to fetchTagged 127 fetchTaggedSeriesBlocks tally.Histogram 128 } 129 130 func newServiceMetrics(scope tally.Scope, opts instrument.TimerOptions) serviceMetrics { 131 return serviceMetrics{ 132 fetch: instrument.NewMethodMetrics(scope, "fetch", opts), 133 fetchTagged: instrument.NewMethodMetrics(scope, "fetchTagged", opts), 134 aggregate: instrument.NewMethodMetrics(scope, "aggregate", opts), 135 write: instrument.NewMethodMetrics(scope, "write", opts), 136 writeTagged: instrument.NewMethodMetrics(scope, "writeTagged", opts), 137 fetchBlocks: instrument.NewMethodMetrics(scope, "fetchBlocks", opts), 138 fetchBlocksMetadata: instrument.NewMethodMetrics(scope, "fetchBlocksMetadata", opts), 139 repair: instrument.NewMethodMetrics(scope, "repair", opts), 140 truncate: instrument.NewMethodMetrics(scope, "truncate", opts), 141 fetchBatchRawRPCS: scope.Counter("fetchBatchRaw-rpcs"), 142 fetchBatchRaw: instrument.NewBatchMethodMetrics(scope, "fetchBatchRaw", opts), 143 writeBatchRawRPCs: scope.Counter("writeBatchRaw-rpcs"), 144 writeBatchRaw: instrument.NewBatchMethodMetrics(scope, "writeBatchRaw", opts), 145 writeTaggedBatchRawRPCs: scope.Counter("writeTaggedBatchRaw-rpcs"), 146 writeTaggedBatchRaw: instrument.NewBatchMethodMetrics(scope, "writeTaggedBatchRaw", opts), 147 overloadRejected: scope.Counter("overload-rejected"), 148 rpcTotalRead: scope.Tagged(map[string]string{ 149 "rpc_type": "read", 150 }).Counter("rpc_total"), 151 rpcStatusCanceledRead: scope.Tagged(map[string]string{ 152 "rpc_status": "canceled", 153 "rpc_type": "read", 154 }).Counter("rpc_status"), 155 } 156 } 157 158 // TODO(r): server side pooling for all return types from service methods 159 type service struct { 160 state serviceState 161 162 logger *zap.Logger 163 164 opts tchannelthrift.Options 165 nowFn clock.NowFn 166 pools pools 167 metrics serviceMetrics 168 queryLimits limits.QueryLimits 169 seriesReadPermits permits.Manager 170 } 171 172 type serviceState struct { 173 sync.RWMutex 174 db storage.Database 175 health *rpc.NodeHealthResult_ 176 177 numOutstandingWriteRPCs int 178 maxOutstandingWriteRPCs int 179 180 numOutstandingReadRPCs int 181 maxOutstandingReadRPCs int 182 183 profiles map[string]*xdebug.ContinuousFileProfile 184 } 185 186 func (s *serviceState) DB() (storage.Database, bool) { 187 s.RLock() 188 v := s.db 189 s.RUnlock() 190 return v, v != nil 191 } 192 193 func (s *serviceState) Health() (*rpc.NodeHealthResult_, bool) { 194 s.RLock() 195 v := s.health 196 s.RUnlock() 197 return v, v != nil 198 } 199 200 func (s *serviceState) DBForWriteRPCWithLimit() ( 201 db storage.Database, dbInitialized bool, rpcDoesNotExceedLimit bool) { 202 s.Lock() 203 defer s.Unlock() 204 205 if s.db == nil { 206 return nil, false, false 207 } 208 if s.numOutstandingWriteRPCs >= s.maxOutstandingWriteRPCs { 209 return nil, true, false 210 } 211 212 v := s.db 213 s.numOutstandingWriteRPCs++ 214 return v, true, true 215 } 216 217 func (s *serviceState) DecNumOutstandingWriteRPCs() { 218 s.Lock() 219 s.numOutstandingWriteRPCs-- 220 s.Unlock() 221 } 222 223 func (s *serviceState) DBForReadRPCWithLimit() ( 224 db storage.Database, dbInitialized bool, requestDoesNotExceedLimit bool) { 225 s.Lock() 226 defer s.Unlock() 227 228 if s.db == nil { 229 return nil, false, false 230 } 231 if s.numOutstandingReadRPCs >= s.maxOutstandingReadRPCs { 232 return nil, true, false 233 } 234 235 v := s.db 236 s.numOutstandingReadRPCs++ 237 return v, true, true 238 } 239 240 func (s *serviceState) DecNumOutstandingReadRPCs() { 241 s.Lock() 242 s.numOutstandingReadRPCs-- 243 s.Unlock() 244 } 245 246 type pools struct { 247 id ident.Pool 248 tagEncoder serialize.TagEncoderPool 249 checkedBytesWrapper xpool.CheckedBytesWrapperPool 250 segmentsArray segmentsArrayPool 251 writeBatchPooledReqPool *writeBatchPooledReqPool 252 blockMetadataV2 tchannelthrift.BlockMetadataV2Pool 253 blockMetadataV2Slice tchannelthrift.BlockMetadataV2SlicePool 254 } 255 256 // ensure `pools` matches a required conversion interface 257 var _ convert.FetchTaggedConversionPools = pools{} 258 259 func (p pools) ID() ident.Pool { return p.id } 260 func (p pools) CheckedBytesWrapper() xpool.CheckedBytesWrapperPool { return p.checkedBytesWrapper } 261 262 // Service is the interface for the node RPC service. 263 type Service interface { 264 rpc.TChanNode 265 266 // FetchTaggedIter returns an iterator for the results of FetchTagged. 267 // It is the responsibility of the caller to close the returned iterator. 268 FetchTaggedIter(ctx context.Context, req *rpc.FetchTaggedRequest) (FetchTaggedResultsIter, error) 269 270 // SetDatabase only safe to be called one time once the service has started. 271 SetDatabase(db storage.Database) error 272 273 // Database returns the current database. 274 Database() (storage.Database, error) 275 276 // SetMetadata sets a metadata key to the given value. 277 SetMetadata(key, value string) 278 279 // Metadata returns the metadata for the given key and a bool indicating 280 // if it is present. 281 Metadata(key string) (string, bool) 282 } 283 284 // NewService creates a new node TChannel Thrift service 285 func NewService(db storage.Database, opts tchannelthrift.Options) Service { 286 if opts == nil { 287 opts = tchannelthrift.NewOptions() 288 } 289 290 iopts := opts.InstrumentOptions() 291 292 scope := iopts. 293 MetricsScope(). 294 SubScope("service"). 295 Tagged(map[string]string{"service-name": "node"}) 296 297 // Use the new scope in options 298 iopts = iopts.SetMetricsScope(scope) 299 opts = opts.SetInstrumentOptions(iopts) 300 301 segmentPool := newSegmentsArrayPool(segmentsArrayPoolOpts{ 302 Capacity: initSegmentArrayPoolLength, 303 MaxCapacity: maxSegmentArrayPooledLength, 304 Options: pool.NewObjectPoolOptions(). 305 SetSize(segmentArrayPoolSize). 306 SetInstrumentOptions(iopts.SetMetricsScope( 307 scope.SubScope("segment-array-pool"))), 308 }) 309 segmentPool.Init() 310 311 writeBatchPoolSize := writeBatchPooledReqPoolSize 312 if maxWriteReqs := opts.MaxOutstandingWriteRequests(); maxWriteReqs > 0 { 313 // If a limit on the number of maximum outstanding write 314 // requests has been set then we know the exact number of 315 // of writeBatchPooledReq objects we need to never have to 316 // allocate one on demand. 317 writeBatchPoolSize = maxWriteReqs 318 } 319 writeBatchPooledReqPool := newWriteBatchPooledReqPool(writeBatchPoolSize, iopts) 320 writeBatchPooledReqPool.Init() 321 322 return &service{ 323 state: serviceState{ 324 db: db, 325 health: &rpc.NodeHealthResult_{ 326 Ok: true, 327 Status: "up", 328 Bootstrapped: false, 329 }, 330 maxOutstandingWriteRPCs: opts.MaxOutstandingWriteRequests(), 331 maxOutstandingReadRPCs: opts.MaxOutstandingReadRequests(), 332 profiles: make(map[string]*xdebug.ContinuousFileProfile), 333 }, 334 logger: iopts.Logger(), 335 opts: opts, 336 nowFn: opts.ClockOptions().NowFn(), 337 metrics: newServiceMetrics(scope, iopts.TimerOptions()), 338 pools: pools{ 339 id: opts.IdentifierPool(), 340 checkedBytesWrapper: opts.CheckedBytesWrapperPool(), 341 tagEncoder: opts.TagEncoderPool(), 342 segmentsArray: segmentPool, 343 writeBatchPooledReqPool: writeBatchPooledReqPool, 344 blockMetadataV2: opts.BlockMetadataV2Pool(), 345 blockMetadataV2Slice: opts.BlockMetadataV2SlicePool(), 346 }, 347 queryLimits: opts.QueryLimits(), 348 seriesReadPermits: opts.PermitsOptions().SeriesReadPermitsManager(), 349 } 350 } 351 352 func (s *service) SetMetadata(key, value string) { 353 s.state.Lock() 354 defer s.state.Unlock() 355 // Copy health state and update single value since in flight 356 // requests might hold ref to current health result. 357 newHealth := &rpc.NodeHealthResult_{} 358 *newHealth = *s.state.health 359 var meta map[string]string 360 if curr := newHealth.Metadata; curr != nil { 361 meta = make(map[string]string, len(curr)+1) 362 for k, v := range curr { 363 meta[k] = v 364 } 365 } else { 366 meta = make(map[string]string, 8) 367 } 368 meta[key] = value 369 newHealth.Metadata = meta 370 s.state.health = newHealth 371 } 372 373 func (s *service) Metadata(key string) (string, bool) { 374 s.state.RLock() 375 md, found := s.state.health.Metadata[key] 376 s.state.RUnlock() 377 return md, found 378 } 379 380 func (s *service) Health(ctx thrift.Context) (*rpc.NodeHealthResult_, error) { 381 health, ok := s.state.Health() 382 if !ok { 383 // Health should always be set 384 return nil, convert.ToRPCError(errHealthNotSet) 385 } 386 387 db, ok := s.state.DB() 388 if !ok { 389 // DB not yet set, just return existing health status 390 return health, nil 391 } 392 393 // Update bootstrapped field if not up to date. Note that we use 394 // IsBootstrappedAndDurable instead of IsBootstrapped to make sure 395 // that in the scenario where a topology change has occurred, none of 396 // our automated tooling will assume a node is healthy until it has 397 // marked all its shards as available and is able to bootstrap all the 398 // shards it owns from its own local disk. 399 bootstrapped := db.IsBootstrappedAndDurable() 400 if health.Bootstrapped != bootstrapped { 401 newHealth := &rpc.NodeHealthResult_{} 402 *newHealth = *health 403 newHealth.Bootstrapped = bootstrapped 404 405 s.state.Lock() 406 s.state.health = newHealth 407 s.state.Unlock() 408 409 // Update response 410 health = newHealth 411 } 412 413 return health, nil 414 } 415 416 // Bootstrapped is designed to be used with cluster management tools like k8s 417 // that expect an endpoint that will return success if the node is 418 // healthy/bootstrapped and an error if not. We added this endpoint because 419 // while the Health endpoint provides the same information, this endpoint does 420 // not require parsing the response to determine if the node is bootstrapped or 421 // not. 422 func (s *service) Bootstrapped(ctx thrift.Context) (*rpc.NodeBootstrappedResult_, error) { 423 db, ok := s.state.DB() 424 if !ok { 425 return nil, convert.ToRPCError(errDatabaseIsNotInitializedYet) 426 } 427 428 // Note that we use IsBootstrappedAndDurable instead of IsBootstrapped to 429 // make sure that in the scenario where a topology change has occurred, none 430 // of our automated tooling will assume a node is healthy until it has 431 // marked all its shards as available and is able to bootstrap all the 432 // shards it owns from its own local disk. 433 if bootstrapped := db.IsBootstrappedAndDurable(); !bootstrapped { 434 return nil, convert.ToRPCError(errNodeIsNotBootstrapped) 435 } 436 437 return &rpc.NodeBootstrappedResult_{}, nil 438 } 439 440 // BootstrappedInPlacementOrNoPlacement is designed to be used with cluster 441 // management tools like k8s that expected an endpoint that will return 442 // success if the node either: 443 // 1) Has no cluster placement set yet. 444 // 2) Is bootstrapped and durable, meaning it is bootstrapped and is able 445 // to bootstrap the shards it owns from it's own local disk. 446 // This is useful in addition to the Bootstrapped RPC method as it helps 447 // progress node addition/removal/modifications when no placement is set 448 // at all and therefore the node has not been able to bootstrap yet. 449 func (s *service) BootstrappedInPlacementOrNoPlacement(ctx thrift.Context) (*rpc.NodeBootstrappedInPlacementOrNoPlacementResult_, error) { 450 hasPlacement, err := s.opts.TopologyInitializer().TopologyIsSet() 451 if err != nil { 452 return nil, convert.ToRPCError(err) 453 } 454 455 if !hasPlacement { 456 // No placement at all. 457 return &rpc.NodeBootstrappedInPlacementOrNoPlacementResult_{}, nil 458 } 459 460 db, ok := s.state.DB() 461 if !ok { 462 return nil, convert.ToRPCError(errDatabaseIsNotInitializedYet) 463 } 464 465 if bootstrapped := db.IsBootstrappedAndDurable(); !bootstrapped { 466 return nil, convert.ToRPCError(errNodeIsNotBootstrapped) 467 } 468 469 return &rpc.NodeBootstrappedInPlacementOrNoPlacementResult_{}, nil 470 } 471 472 func (s *service) Query(tctx thrift.Context, req *rpc.QueryRequest) (*rpc.QueryResult_, error) { 473 db, err := s.startReadRPCWithDB() 474 if err != nil { 475 return nil, err 476 } 477 defer s.readRPCCompleted(tctx) 478 479 ctx := addRequestDataToContext(tctx, req.Source, tchannelthrift.Query) 480 ctx, sp, sampled := ctx.StartSampledTraceSpan(tracepoint.Query) 481 if sampled { 482 sp.LogFields( 483 opentracinglog.String("query", req.Query.String()), 484 opentracinglog.String("namespace", req.NameSpace), 485 xopentracing.Time("start", time.Unix(0, req.RangeStart)), 486 xopentracing.Time("end", time.Unix(0, req.RangeEnd)), 487 ) 488 } 489 490 result, err := s.query(ctx, db, req) 491 if sampled && err != nil { 492 sp.LogFields(opentracinglog.Error(err)) 493 } 494 sp.Finish() 495 496 return result, err 497 } 498 499 func (s *service) query(ctx context.Context, db storage.Database, req *rpc.QueryRequest) (*rpc.QueryResult_, error) { 500 start, rangeStartErr := convert.ToTime(req.RangeStart, req.RangeType) 501 end, rangeEndErr := convert.ToTime(req.RangeEnd, req.RangeType) 502 if rangeStartErr != nil || rangeEndErr != nil { 503 return nil, tterrors.NewBadRequestError(xerrors.FirstError(rangeStartErr, rangeEndErr)) 504 } 505 506 q, err := convert.FromRPCQuery(req.Query) 507 if err != nil { 508 return nil, convert.ToRPCError(err) 509 } 510 511 nsID := s.pools.id.GetStringID(ctx, req.NameSpace) 512 opts := index.QueryOptions{ 513 StartInclusive: start, 514 EndExclusive: end, 515 } 516 if l := req.Limit; l != nil { 517 opts.SeriesLimit = int(*l) 518 } 519 if len(req.Source) > 0 { 520 opts.Source = req.Source 521 } 522 queryResult, err := db.QueryIDs(ctx, nsID, index.Query{Query: q}, opts) 523 if err != nil { 524 return nil, convert.ToRPCError(err) 525 } 526 527 result := &rpc.QueryResult_{ 528 Results: make([]*rpc.QueryResultElement, 0, queryResult.Results.Map().Len()), 529 Exhaustive: queryResult.Exhaustive, 530 } 531 fetchData := true 532 if req.NoData != nil && *req.NoData { 533 fetchData = false 534 } 535 // Re-use reader and id for more memory-efficient processing of 536 // tags from doc.Metadata 537 reader := docs.NewEncodedDocumentReader() 538 id := ident.NewReusableBytesID() 539 for _, entry := range queryResult.Results.Map().Iter() { 540 d := entry.Value() 541 metadata, err := docs.MetadataFromDocument(d, reader) 542 if err != nil { 543 return nil, err 544 } 545 tags := idxconvert.ToSeriesTags(metadata, idxconvert.Opts{NoClone: true}) 546 elem := &rpc.QueryResultElement{ 547 ID: string(entry.Key()), 548 Tags: make([]*rpc.Tag, 0, tags.Remaining()), 549 } 550 result.Results = append(result.Results, elem) 551 552 for tags.Next() { 553 tag := tags.Current() 554 elem.Tags = append(elem.Tags, &rpc.Tag{ 555 Name: tag.Name.String(), 556 Value: tag.Value.String(), 557 }) 558 } 559 if err := tags.Err(); err != nil { 560 return nil, err 561 } 562 if !fetchData { 563 continue 564 } 565 id.Reset(entry.Key()) 566 datapoints, err := s.readDatapoints(ctx, db, nsID, id, start, end, 567 req.ResultTimeType) 568 if err != nil { 569 return nil, convert.ToRPCError(err) 570 } 571 elem.Datapoints = datapoints 572 } 573 574 return result, nil 575 } 576 577 func (s *service) AggregateTiles(tctx thrift.Context, req *rpc.AggregateTilesRequest) (*rpc.AggregateTilesResult_, error) { 578 db, err := s.startWriteRPCWithDB() 579 if err != nil { 580 return nil, err 581 } 582 defer s.writeRPCCompleted() 583 584 ctx, sp, sampled := tchannelthrift.Context(tctx).StartSampledTraceSpan(tracepoint.AggregateTiles) 585 defer sp.Finish() 586 587 if sampled { 588 sp.LogFields( 589 opentracinglog.String("sourceNamespace", req.SourceNamespace), 590 opentracinglog.String("targetNamespace", req.TargetNamespace), 591 xopentracing.Time("start", time.Unix(0, req.RangeStart)), 592 xopentracing.Time("end", time.Unix(0, req.RangeEnd)), 593 opentracinglog.String("step", req.Step), 594 ) 595 } 596 597 processedTileCount, err := s.aggregateTiles(ctx, db, req) 598 if err != nil { 599 sp.LogFields(opentracinglog.Error(err)) 600 } 601 602 return &rpc.AggregateTilesResult_{ 603 ProcessedTileCount: processedTileCount, 604 }, err 605 } 606 607 func (s *service) aggregateTiles( 608 ctx context.Context, 609 db storage.Database, 610 req *rpc.AggregateTilesRequest, 611 ) (int64, error) { 612 start, err := convert.ToTime(req.RangeStart, req.RangeType) 613 if err != nil { 614 return 0, tterrors.NewBadRequestError(err) 615 } 616 end, err := convert.ToTime(req.RangeEnd, req.RangeType) 617 if err != nil { 618 return 0, tterrors.NewBadRequestError(err) 619 } 620 step, err := time.ParseDuration(req.Step) 621 if err != nil { 622 return 0, tterrors.NewBadRequestError(err) 623 } 624 625 sourceNsID := s.pools.id.GetStringID(ctx, req.SourceNamespace) 626 targetNsID := s.pools.id.GetStringID(ctx, req.TargetNamespace) 627 628 opts, err := storage.NewAggregateTilesOptions( 629 start, end, step, 630 sourceNsID, 631 storage.AggregateTilesAPI, 632 false, false, nil, 633 s.opts.InstrumentOptions()) 634 if err != nil { 635 return 0, tterrors.NewBadRequestError(err) 636 } 637 638 processedTileCount, err := db.AggregateTiles(ctx, sourceNsID, targetNsID, opts) 639 if err != nil { 640 return processedTileCount, convert.ToRPCError(err) 641 } 642 643 return processedTileCount, nil 644 } 645 646 func (s *service) Fetch(tctx thrift.Context, req *rpc.FetchRequest) (*rpc.FetchResult_, error) { 647 db, err := s.startReadRPCWithDB() 648 if err != nil { 649 return nil, err 650 } 651 defer s.readRPCCompleted(tctx) 652 653 var ( 654 callStart = s.nowFn() 655 ctx = addRequestDataToContext(tctx, req.Source, tchannelthrift.Fetch) 656 657 start, rangeStartErr = convert.ToTime(req.RangeStart, req.RangeType) 658 end, rangeEndErr = convert.ToTime(req.RangeEnd, req.RangeType) 659 ) 660 661 if rangeStartErr != nil || rangeEndErr != nil { 662 s.metrics.fetch.ReportError(s.nowFn().Sub(callStart)) 663 return nil, tterrors.NewBadRequestError(xerrors.FirstError(rangeStartErr, rangeEndErr)) 664 } 665 666 tsID := s.pools.id.GetStringID(ctx, req.ID) 667 nsID := s.pools.id.GetStringID(ctx, req.NameSpace) 668 669 // Make datapoints an initialized empty array for JSON serialization as empty array than null 670 datapoints, err := s.readDatapoints(ctx, db, nsID, tsID, start, end, 671 req.ResultTimeType) 672 if err != nil { 673 s.metrics.fetch.ReportError(s.nowFn().Sub(callStart)) 674 return nil, convert.ToRPCError(err) 675 } 676 677 s.metrics.fetch.ReportSuccess(s.nowFn().Sub(callStart)) 678 return &rpc.FetchResult_{Datapoints: datapoints}, nil 679 } 680 681 func (s *service) readDatapoints( 682 ctx context.Context, 683 db storage.Database, 684 nsID, tsID ident.ID, 685 start, end xtime.UnixNano, 686 timeType rpc.TimeType, 687 ) ([]*rpc.Datapoint, error) { 688 iter, err := db.ReadEncoded(ctx, nsID, tsID, start, end) 689 if err != nil { 690 return nil, err 691 } 692 encoded, err := iter.ToSlices(ctx) 693 if err != nil { 694 return nil, err 695 } 696 697 // Resolve all futures (block reads can be backed by async implementations) and filter out any empty segments. 698 filteredBlockReaderSliceOfSlices, err := xio.FilterEmptyBlockReadersSliceOfSlicesInPlace(encoded) 699 if err != nil { 700 return nil, err 701 } 702 703 // Make datapoints an initialized empty array for JSON serialization as empty array than null 704 datapoints := make([]*rpc.Datapoint, 0) 705 706 multiIt := db.Options().MultiReaderIteratorPool().Get() 707 nsCtx := namespace.NewContextFor(nsID, db.Options().SchemaRegistry()) 708 multiIt.ResetSliceOfSlices( 709 xio.NewReaderSliceOfSlicesFromBlockReadersIterator( 710 filteredBlockReaderSliceOfSlices), nsCtx.Schema) 711 defer multiIt.Close() 712 713 for multiIt.Next() { 714 dp, _, annotation := multiIt.Current() 715 716 timestamp, timestampErr := convert.ToValue(dp.TimestampNanos, timeType) 717 if timestampErr != nil { 718 return nil, xerrors.NewInvalidParamsError(timestampErr) 719 } 720 721 datapoint := rpc.NewDatapoint() 722 datapoint.Timestamp = timestamp 723 datapoint.Value = dp.Value 724 datapoint.Annotation = annotation 725 726 datapoints = append(datapoints, datapoint) 727 } 728 729 if err := multiIt.Err(); err != nil { 730 return nil, err 731 } 732 733 return datapoints, nil 734 } 735 736 func (s *service) FetchTagged(tctx thrift.Context, req *rpc.FetchTaggedRequest) (*rpc.FetchTaggedResult_, error) { 737 ctx := tchannelthrift.Context(tctx) 738 iter, err := s.FetchTaggedIter(ctx, req) 739 if err != nil { 740 return nil, convert.ToRPCError(err) 741 } 742 743 result, err := s.fetchTaggedResult(ctx, iter) 744 iter.Close(err) 745 if err != nil { 746 return nil, convert.ToRPCError(err) 747 } 748 749 return result, nil 750 } 751 752 func (s *service) fetchTaggedResult(ctx context.Context, 753 iter FetchTaggedResultsIter, 754 ) (*rpc.FetchTaggedResult_, error) { 755 response := &rpc.FetchTaggedResult_{ 756 Elements: make([]*rpc.FetchTaggedIDResult_, 0, iter.NumIDs()), 757 Exhaustive: iter.Exhaustive(), 758 } 759 760 for iter.Next(ctx) { 761 cur := iter.Current() 762 tagBytes, err := cur.WriteTags(nil) 763 if err != nil { 764 return nil, err 765 } 766 segments, err := cur.WriteSegments(ctx, nil) 767 if err != nil { 768 return nil, err 769 } 770 response.Elements = append(response.Elements, &rpc.FetchTaggedIDResult_{ 771 ID: cur.ID(), 772 NameSpace: iter.Namespace().Bytes(), 773 EncodedTags: tagBytes, 774 Segments: segments, 775 }) 776 } 777 if iter.Err() != nil { 778 return nil, iter.Err() 779 } 780 781 if v := int64(iter.WaitedIndex()); v > 0 { 782 response.WaitedIndex = &v 783 } 784 if v := int64(iter.WaitedSeriesRead()); v > 0 { 785 response.WaitedSeriesRead = &v 786 } 787 788 return response, nil 789 } 790 791 func (s *service) FetchTaggedIter(ctx context.Context, req *rpc.FetchTaggedRequest) (FetchTaggedResultsIter, error) { 792 callStart := s.nowFn() 793 ctx = addRequestDataToM3Context(ctx, req.Source, tchannelthrift.FetchTagged) 794 ctx, sp, sampled := ctx.StartSampledTraceSpan(tracepoint.FetchTagged) 795 if sampled { 796 sp.LogFields( 797 opentracinglog.String("query", string(req.Query)), 798 opentracinglog.String("namespace", string(req.NameSpace)), 799 xopentracing.Time("start", time.Unix(0, req.RangeStart)), 800 xopentracing.Time("end", time.Unix(0, req.RangeEnd)), 801 ) 802 } 803 804 instrumentClose := func(err error) { 805 if sampled && err != nil { 806 sp.LogFields(opentracinglog.Error(err)) 807 } 808 sp.Finish() 809 810 s.metrics.fetchTagged.ReportSuccessOrError(err, s.nowFn().Sub(callStart)) 811 } 812 iter, err := s.fetchTaggedIter(ctx, req, instrumentClose) 813 if err != nil { 814 instrumentClose(err) 815 } 816 return iter, err 817 } 818 819 func (s *service) fetchTaggedIter( 820 ctx context.Context, 821 req *rpc.FetchTaggedRequest, 822 instrumentClose func(error), 823 ) (FetchTaggedResultsIter, error) { 824 db, err := s.startReadRPCWithDB() 825 if err != nil { 826 return nil, err 827 } 828 ctx.RegisterCloser(xresource.SimpleCloserFn(func() { 829 s.readRPCCompleted(ctx.GoContext()) 830 })) 831 832 ns, query, opts, fetchData, err := convert.FromRPCFetchTaggedRequest(req, s.pools) 833 if err != nil { 834 return nil, tterrors.NewBadRequestError(err) 835 } 836 837 queryResult, err := db.QueryIDs(ctx, ns, query, opts) 838 if err != nil { 839 return nil, convert.ToRPCError(err) 840 } 841 842 permits, err := s.seriesReadPermits.NewPermits(ctx) 843 if err != nil { 844 return nil, convert.ToRPCError(err) 845 } 846 847 tagEncoder := s.pools.tagEncoder.Get() 848 ctx.RegisterFinalizer(tagEncoder) 849 850 return newFetchTaggedResultsIter(fetchTaggedResultsIterOpts{ 851 queryResult: queryResult, 852 queryOpts: opts, 853 fetchData: fetchData, 854 db: db, 855 docReader: docs.NewEncodedDocumentReader(), 856 nsID: ns, 857 tagEncoder: tagEncoder, 858 iOpts: s.opts.InstrumentOptions(), 859 instrumentClose: instrumentClose, 860 blockPermits: permits, 861 requireNoWait: req.RequireNoWait, 862 indexWaited: queryResult.Waited, 863 }), nil 864 } 865 866 // FetchTaggedResultsIter iterates over the results from FetchTagged 867 // The iterator is not thread safe and must only be accessed from a single goroutine. 868 type FetchTaggedResultsIter interface { 869 // NumIDs returns the total number of series IDs in the result. 870 NumIDs() int 871 872 // Exhaustive returns true if NumIDs is all IDs that the query could have returned. 873 Exhaustive() bool 874 875 // WaitedIndex counts how many times index querying had to wait for permits. 876 WaitedIndex() int 877 878 // WaitedSeriesRead counts how many times series being read had to wait for permits. 879 WaitedSeriesRead() int 880 881 // Namespace is the namespace. 882 Namespace() ident.ID 883 884 // Next advances to the next element, returning if one exists. 885 // 886 // Iterators that embed this interface should expose a Current() function to return the element retrieved by Next. 887 // If an error occurs this returns false and it can be retrieved with Err. 888 Next(ctx context.Context) bool 889 890 // Err returns a non-nil error if an error occurred when calling Next(). 891 Err() error 892 893 // Current returns the current IDResult fetched with Next. The result is only valid if Err is nil. 894 Current() IDResult 895 896 // Close closes the iterator. The provided error is non-nil if the client of the Iterator encountered an error 897 // while iterating. 898 Close(err error) 899 } 900 901 type fetchTaggedResultsIter struct { 902 fetchTaggedResultsIterOpts 903 idResults []idResult 904 idx int 905 blockReadIdx int 906 cur IDResult 907 err error 908 permits []permits.Permit 909 unreleasedQuota int64 910 indexWaited int 911 seriesReadWaited int 912 } 913 914 type fetchTaggedResultsIterOpts struct { 915 queryResult index.QueryResult 916 queryOpts index.QueryOptions 917 fetchData bool 918 db storage.Database 919 docReader *docs.EncodedDocumentReader 920 nsID ident.ID 921 tagEncoder serialize.TagEncoder 922 iOpts instrument.Options 923 instrumentClose func(error) 924 blockPermits permits.Permits 925 requireNoWait bool 926 indexWaited int 927 } 928 929 func newFetchTaggedResultsIter(opts fetchTaggedResultsIterOpts) FetchTaggedResultsIter { //nolint: gocritic 930 return &fetchTaggedResultsIter{ 931 fetchTaggedResultsIterOpts: opts, 932 idResults: make([]idResult, 0, opts.queryResult.Results.Map().Len()), 933 permits: make([]permits.Permit, 0), 934 } 935 } 936 937 func (i *fetchTaggedResultsIter) NumIDs() int { 938 return i.queryResult.Results.Map().Len() 939 } 940 941 func (i *fetchTaggedResultsIter) Exhaustive() bool { 942 return i.queryResult.Exhaustive 943 } 944 945 func (i *fetchTaggedResultsIter) WaitedIndex() int { 946 return i.indexWaited 947 } 948 949 func (i *fetchTaggedResultsIter) WaitedSeriesRead() int { 950 return i.seriesReadWaited 951 } 952 953 func (i *fetchTaggedResultsIter) Namespace() ident.ID { 954 return i.nsID 955 } 956 957 func (i *fetchTaggedResultsIter) Next(ctx context.Context) bool { 958 // initialize the iterator state on the first fetch. 959 if i.idx == 0 { 960 for _, entry := range i.queryResult.Results.Map().Iter() { // nolint: gocritic 961 result := idResult{ 962 queryResult: entry, 963 docReader: i.docReader, 964 tagEncoder: i.tagEncoder, 965 iOpts: i.iOpts, 966 } 967 if i.fetchData { 968 // NB(r): Use a bytes ID here so that this ID doesn't need to be 969 // copied by the blockRetriever in the streamRequest method when 970 // it checks if the ID is finalizeable or not with IsNoFinalize. 971 id := ident.BytesID(result.queryResult.Key()) 972 result.blockReadersIter, i.err = i.db.ReadEncoded(ctx, 973 i.nsID, 974 id, 975 i.queryOpts.StartInclusive, 976 i.queryOpts.EndExclusive) 977 if i.err != nil { 978 return false 979 } 980 } 981 i.idResults = append(i.idResults, result) 982 } 983 } else { 984 // release the permits and memory from the previous block readers. 985 i.releaseQuotaUsed(i.idx - 1) 986 i.idResults[i.idx-1].blockReaders = nil 987 } 988 989 if i.idx == i.queryResult.Results.Map().Len() { 990 return false 991 } 992 993 if i.fetchData { 994 // ensure the blockReaders exist for the current series ID. additionally try to prefetch additional blockReaders 995 // for future seriesID to pipeline the disk reads. 996 readBlocks: 997 for i.blockReadIdx < i.queryResult.Results.Map().Len() { 998 currResult := &i.idResults[i.blockReadIdx] 999 blockIter := currResult.blockReadersIter 1000 1001 for blockIter.Next(ctx) { 1002 curr := blockIter.Current() 1003 currResult.blockReaders = append(currResult.blockReaders, curr) 1004 acquired, err := i.acquire(ctx, i.blockReadIdx) 1005 if err != nil { 1006 i.err = err 1007 return false 1008 } 1009 if !acquired { 1010 // if limit met then stop prefetching and resume later from the current point in the iterator. 1011 break readBlocks 1012 } 1013 } 1014 if blockIter.Err() != nil { 1015 i.err = blockIter.Err() 1016 return false 1017 } 1018 i.blockReadIdx++ 1019 } 1020 } 1021 1022 i.cur = &i.idResults[i.idx] 1023 i.idx++ 1024 return true 1025 } 1026 1027 // acquire a block permit for a series ID. returns true if a permit is available. 1028 func (i *fetchTaggedResultsIter) acquire(ctx context.Context, idx int) (bool, error) { 1029 var curPermit permits.Permit 1030 if len(i.permits) > 0 { 1031 curPermit = i.permits[len(i.permits)-1] 1032 } 1033 if curPermit == nil || curPermit.QuotaRemaining() <= 0 { 1034 if i.idx == idx { 1035 // block acquiring if we need the block readers to fulfill the current fetch. 1036 acquireResult, err := i.blockPermits.Acquire(ctx) 1037 var success bool 1038 defer func() { 1039 // Note: ALWAYS release if we do not successfully return back 1040 // the permit and we checked one out. 1041 if !success && acquireResult.Permit != nil { 1042 i.blockPermits.Release(acquireResult.Permit) 1043 } 1044 }() 1045 if acquireResult.Waited { 1046 i.seriesReadWaited++ 1047 if err == nil && i.requireNoWait { 1048 // Fail iteration if request requires no waiting. 1049 return false, permits.ErrOperationWaitedOnRequireNoWait 1050 } 1051 } 1052 if err != nil { 1053 return false, err 1054 } 1055 success = true 1056 i.permits = append(i.permits, acquireResult.Permit) 1057 curPermit = acquireResult.Permit 1058 } else { 1059 // don't block if we are prefetching for a future seriesID. 1060 permit, err := i.blockPermits.TryAcquire(ctx) 1061 if err != nil { 1062 return false, err 1063 } 1064 if permit == nil { 1065 return false, nil 1066 } 1067 i.permits = append(i.permits, permit) 1068 curPermit = permit 1069 } 1070 } 1071 curPermit.Use(1) 1072 i.idResults[idx].quotaUsed++ 1073 return true, nil 1074 } 1075 1076 // release all the block permits acquired by a series ID that has been processed. 1077 func (i *fetchTaggedResultsIter) releaseQuotaUsed(idx int) { 1078 i.unreleasedQuota += i.idResults[idx].quotaUsed 1079 for i.unreleasedQuota > 0 && i.unreleasedQuota >= i.permits[0].AllowedQuota() { 1080 p := i.permits[0] 1081 i.blockPermits.Release(p) 1082 i.unreleasedQuota -= p.AllowedQuota() 1083 i.permits = i.permits[1:] 1084 } 1085 } 1086 1087 func (i *fetchTaggedResultsIter) Err() error { 1088 return i.err 1089 } 1090 1091 func (i *fetchTaggedResultsIter) Current() IDResult { 1092 return i.cur 1093 } 1094 1095 func (i *fetchTaggedResultsIter) Close(err error) { 1096 i.instrumentClose(err) 1097 for _, p := range i.permits { 1098 i.blockPermits.Release(p) 1099 } 1100 i.blockPermits.Close() 1101 } 1102 1103 // IDResult is the FetchTagged result for a series ID. 1104 type IDResult interface { 1105 // ID returns the series ID. 1106 ID() []byte 1107 1108 // WriteTags writes the encoded tags to provided slice. Callers must use the returned reference in case the slice needs 1109 // to grow, just like append(). 1110 WriteTags(dst []byte) ([]byte, error) 1111 1112 // WriteSegments writes the Segments to the provided slice. Callers must use the returned reference in case the slice 1113 // needs to grow, just like append(). 1114 // This method blocks until segment data is available or the context deadline expires. 1115 WriteSegments(ctx context.Context, dst []*rpc.Segments) ([]*rpc.Segments, error) 1116 } 1117 1118 type idResult struct { 1119 queryResult index.ResultsMapEntry 1120 docReader *docs.EncodedDocumentReader 1121 tagEncoder serialize.TagEncoder 1122 blockReadersIter series.BlockReaderIter 1123 blockReaders [][]xio.BlockReader 1124 quotaUsed int64 1125 iOpts instrument.Options 1126 } 1127 1128 func (i *idResult) ID() []byte { 1129 return i.queryResult.Key() 1130 } 1131 1132 func (i *idResult) WriteTags(dst []byte) ([]byte, error) { 1133 metadata, err := docs.MetadataFromDocument(i.queryResult.Value(), i.docReader) 1134 if err != nil { 1135 return nil, err 1136 } 1137 tags := idxconvert.ToSeriesTags(metadata, idxconvert.Opts{NoClone: true}) 1138 encodedTags, err := encodeTags(i.tagEncoder, tags, i.iOpts) 1139 if err != nil { // This is an invariant, should never happen 1140 return nil, tterrors.NewInternalError(err) 1141 } 1142 dst = append(dst[:0], encodedTags.Bytes()...) 1143 i.tagEncoder.Reset() 1144 return dst, nil 1145 } 1146 1147 func (i *idResult) WriteSegments(ctx context.Context, dst []*rpc.Segments) ([]*rpc.Segments, error) { 1148 dst = dst[:0] 1149 for _, blockReaders := range i.blockReaders { 1150 segments, err := readEncodedResultSegment(ctx, blockReaders) 1151 if err != nil { 1152 return nil, err 1153 } 1154 if segments != nil { 1155 dst = append(dst, segments) 1156 } 1157 } 1158 return dst, nil 1159 } 1160 1161 func (s *service) Aggregate(tctx thrift.Context, req *rpc.AggregateQueryRequest) (*rpc.AggregateQueryResult_, error) { 1162 db, err := s.startReadRPCWithDB() 1163 if err != nil { 1164 return nil, err 1165 } 1166 defer s.readRPCCompleted(tctx) 1167 1168 callStart := s.nowFn() 1169 ctx := tchannelthrift.Context(tctx) 1170 1171 ns, query, opts, err := convert.FromRPCAggregateQueryRequest(req) 1172 if err != nil { 1173 s.metrics.aggregate.ReportError(s.nowFn().Sub(callStart)) 1174 return nil, tterrors.NewBadRequestError(err) 1175 } 1176 1177 queryResult, err := db.AggregateQuery(ctx, ns, query, opts) 1178 if err != nil { 1179 s.metrics.aggregate.ReportError(s.nowFn().Sub(callStart)) 1180 return nil, convert.ToRPCError(err) 1181 } 1182 1183 response := &rpc.AggregateQueryResult_{ 1184 Exhaustive: queryResult.Exhaustive, 1185 } 1186 results := queryResult.Results 1187 for _, entry := range results.Map().Iter() { 1188 responseElem := &rpc.AggregateQueryResultTagNameElement{ 1189 TagName: entry.Key().String(), 1190 } 1191 tagValues := entry.Value() 1192 tagValuesMap := tagValues.Map() 1193 responseElem.TagValues = make([]*rpc.AggregateQueryResultTagValueElement, 0, tagValuesMap.Len()) 1194 for _, entry := range tagValuesMap.Iter() { 1195 responseElem.TagValues = append(responseElem.TagValues, &rpc.AggregateQueryResultTagValueElement{ 1196 TagValue: entry.Key().String(), 1197 }) 1198 } 1199 response.Results = append(response.Results, responseElem) 1200 } 1201 s.metrics.aggregate.ReportSuccess(s.nowFn().Sub(callStart)) 1202 return response, nil 1203 } 1204 1205 func (s *service) AggregateRaw(tctx thrift.Context, req *rpc.AggregateQueryRawRequest) (*rpc.AggregateQueryRawResult_, error) { 1206 db, err := s.startReadRPCWithDB() 1207 if err != nil { 1208 return nil, err 1209 } 1210 defer s.readRPCCompleted(tctx) 1211 1212 callStart := s.nowFn() 1213 ctx := addRequestDataToContext(tctx, req.Source, tchannelthrift.AggregateRaw) 1214 1215 ns, query, opts, err := convert.FromRPCAggregateQueryRawRequest(req, s.pools) 1216 if err != nil { 1217 s.metrics.aggregate.ReportError(s.nowFn().Sub(callStart)) 1218 return nil, tterrors.NewBadRequestError(err) 1219 } 1220 1221 queryResult, err := db.AggregateQuery(ctx, ns, query, opts) 1222 if err != nil { 1223 s.metrics.aggregate.ReportError(s.nowFn().Sub(callStart)) 1224 return nil, convert.ToRPCError(err) 1225 } 1226 1227 var WaitedIndex *int64 1228 if v := int64(queryResult.Waited); v > 0 { 1229 WaitedIndex = &v 1230 } 1231 1232 response := &rpc.AggregateQueryRawResult_{ 1233 Exhaustive: queryResult.Exhaustive, 1234 WaitedIndex: WaitedIndex, 1235 } 1236 results := queryResult.Results 1237 for _, entry := range results.Map().Iter() { 1238 responseElem := &rpc.AggregateQueryRawResultTagNameElement{ 1239 TagName: entry.Key().Bytes(), 1240 } 1241 tagValues := entry.Value() 1242 if tagValues.HasValues() { 1243 tagValuesMap := tagValues.Map() 1244 responseElem.TagValues = make([]*rpc.AggregateQueryRawResultTagValueElement, 0, tagValuesMap.Len()) 1245 for _, entry := range tagValuesMap.Iter() { 1246 responseElem.TagValues = append(responseElem.TagValues, &rpc.AggregateQueryRawResultTagValueElement{ 1247 TagValue: entry.Key().Bytes(), 1248 }) 1249 } 1250 } 1251 response.Results = append(response.Results, responseElem) 1252 } 1253 1254 s.metrics.aggregate.ReportSuccess(s.nowFn().Sub(callStart)) 1255 return response, nil 1256 } 1257 1258 func encodeTags( 1259 enc serialize.TagEncoder, 1260 tags ident.TagIterator, 1261 iOpts instrument.Options) (checked.Bytes, error) { 1262 if err := enc.Encode(tags); err != nil { 1263 // should never happen 1264 err = xerrors.NewRenamedError(err, fmt.Errorf("unable to encode tags")) 1265 instrument.EmitAndLogInvariantViolation(iOpts, func(l *zap.Logger) { 1266 l.Error(err.Error()) 1267 }) 1268 return nil, err 1269 } 1270 encodedTags, ok := enc.Data() 1271 if !ok { 1272 // should never happen 1273 err := fmt.Errorf("unable to encode tags: unable to unwrap bytes") 1274 instrument.EmitAndLogInvariantViolation(iOpts, func(l *zap.Logger) { 1275 l.Error(err.Error()) 1276 }) 1277 return nil, err 1278 } 1279 return encodedTags, nil 1280 } 1281 1282 func (s *service) FetchBatchRaw(tctx thrift.Context, req *rpc.FetchBatchRawRequest) (*rpc.FetchBatchRawResult_, error) { 1283 s.metrics.fetchBatchRawRPCS.Inc(1) 1284 db, err := s.startReadRPCWithDB() 1285 if err != nil { 1286 return nil, err 1287 } 1288 defer s.readRPCCompleted(tctx) 1289 1290 callStart := s.nowFn() 1291 ctx := addRequestDataToContext(tctx, req.Source, tchannelthrift.FetchBatchRaw) 1292 1293 start, rangeStartErr := convert.ToTime(req.RangeStart, req.RangeTimeType) 1294 end, rangeEndErr := convert.ToTime(req.RangeEnd, req.RangeTimeType) 1295 1296 if rangeStartErr != nil || rangeEndErr != nil { 1297 s.metrics.fetchBatchRaw.ReportNonRetryableErrors(len(req.Ids)) 1298 s.metrics.fetchBatchRaw.ReportLatency(s.nowFn().Sub(callStart)) 1299 return nil, tterrors.NewBadRequestError(xerrors.FirstError(rangeStartErr, rangeEndErr)) 1300 } 1301 1302 var ( 1303 success int 1304 retryableErrors int 1305 nonRetryableErrors int 1306 ) 1307 nsID := s.newID(ctx, req.NameSpace) 1308 result := rpc.NewFetchBatchRawResult_() 1309 result.Elements = make([]*rpc.FetchRawResult_, len(req.Ids)) 1310 1311 // NB(r): Step 1 read the data using an asychronuous block reader, 1312 // but don't serialize yet so that all block reader requests can 1313 // be issued at once before waiting for their results. 1314 encodedResults := make([]struct { 1315 err error 1316 result [][]xio.BlockReader 1317 }, len(req.Ids)) 1318 for i := range req.Ids { 1319 tsID := s.newID(ctx, req.Ids[i]) 1320 iter, err := db.ReadEncoded(ctx, nsID, tsID, start, end) 1321 if err != nil { 1322 encodedResults[i].err = err 1323 continue 1324 } 1325 encoded, err := iter.ToSlices(ctx) 1326 if err != nil { 1327 encodedResults[i].err = err 1328 continue 1329 } 1330 encodedResults[i].result = encoded 1331 } 1332 1333 // Step 2: Read the results of the asynchronuous block readers. 1334 for i := range req.Ids { 1335 rawResult := rpc.NewFetchRawResult_() 1336 result.Elements[i] = rawResult 1337 1338 if err := encodedResults[i].err; err != nil { 1339 rawResult.Err = convert.ToRPCError(err) 1340 continue 1341 } 1342 1343 segments, rpcErr := s.readEncodedResult(ctx, encodedResults[i].result) 1344 if rpcErr != nil { 1345 rawResult.Err = rpcErr 1346 if tterrors.IsBadRequestError(rawResult.Err) { 1347 nonRetryableErrors++ 1348 } else { 1349 retryableErrors++ 1350 } 1351 continue 1352 } 1353 1354 success++ 1355 rawResult.Segments = segments 1356 } 1357 1358 s.metrics.fetchBatchRaw.ReportSuccess(success) 1359 s.metrics.fetchBatchRaw.ReportRetryableErrors(retryableErrors) 1360 s.metrics.fetchBatchRaw.ReportNonRetryableErrors(nonRetryableErrors) 1361 s.metrics.fetchBatchRaw.ReportLatency(s.nowFn().Sub(callStart)) 1362 1363 return result, nil 1364 } 1365 1366 func (s *service) FetchBatchRawV2(tctx thrift.Context, req *rpc.FetchBatchRawV2Request) (*rpc.FetchBatchRawResult_, error) { 1367 s.metrics.fetchBatchRawRPCS.Inc(1) 1368 db, err := s.startReadRPCWithDB() 1369 if err != nil { 1370 return nil, err 1371 } 1372 defer s.readRPCCompleted(tctx) 1373 1374 var ( 1375 callStart = s.nowFn() 1376 ctx = addRequestDataToContext(tctx, req.Source, tchannelthrift.FetchBatchRawV2) 1377 nsIDs = make([]ident.ID, 0, len(req.Elements)) 1378 result = rpc.NewFetchBatchRawResult_() 1379 success int 1380 retryableErrors int 1381 nonRetryableErrors int 1382 ) 1383 1384 for _, nsBytes := range req.NameSpaces { 1385 nsIDs = append(nsIDs, s.newID(ctx, nsBytes)) 1386 } 1387 for _, elem := range req.Elements { 1388 if elem.NameSpace >= int64(len(nsIDs)) { 1389 return nil, fmt.Errorf( 1390 "received fetch request with namespace index: %d, but only %d namespaces were provided", 1391 elem.NameSpace, len(nsIDs)) 1392 } 1393 } 1394 1395 for _, elem := range req.Elements { 1396 start, rangeStartErr := convert.ToTime(elem.RangeStart, elem.RangeTimeType) 1397 end, rangeEndErr := convert.ToTime(elem.RangeEnd, elem.RangeTimeType) 1398 if rangeStartErr != nil || rangeEndErr != nil { 1399 s.metrics.fetchBatchRaw.ReportNonRetryableErrors(len(req.Elements)) 1400 s.metrics.fetchBatchRaw.ReportLatency(s.nowFn().Sub(callStart)) 1401 return nil, tterrors.NewBadRequestError(xerrors.FirstError(rangeStartErr, rangeEndErr)) 1402 } 1403 1404 rawResult := rpc.NewFetchRawResult_() 1405 result.Elements = append(result.Elements, rawResult) 1406 tsID := s.newID(ctx, elem.ID) 1407 1408 nsIdx := nsIDs[int(elem.NameSpace)] 1409 iter, err := db.ReadEncoded(ctx, nsIdx, tsID, start, end) 1410 if err != nil { 1411 rawResult.Err = convert.ToRPCError(err) 1412 if tterrors.IsBadRequestError(rawResult.Err) { 1413 nonRetryableErrors++ 1414 } else { 1415 retryableErrors++ 1416 } 1417 continue 1418 } 1419 encodedResult, err := iter.ToSlices(ctx) 1420 if err != nil { 1421 rawResult.Err = convert.ToRPCError(err) 1422 if tterrors.IsBadRequestError(rawResult.Err) { 1423 nonRetryableErrors++ 1424 } else { 1425 retryableErrors++ 1426 } 1427 continue 1428 } 1429 1430 segments, rpcErr := s.readEncodedResult(ctx, encodedResult) 1431 if rpcErr != nil { 1432 rawResult.Err = rpcErr 1433 if tterrors.IsBadRequestError(rawResult.Err) { 1434 nonRetryableErrors++ 1435 } else { 1436 retryableErrors++ 1437 } 1438 continue 1439 } 1440 1441 success++ 1442 rawResult.Segments = segments 1443 } 1444 1445 s.metrics.fetchBatchRaw.ReportSuccess(success) 1446 s.metrics.fetchBatchRaw.ReportRetryableErrors(retryableErrors) 1447 s.metrics.fetchBatchRaw.ReportNonRetryableErrors(nonRetryableErrors) 1448 s.metrics.fetchBatchRaw.ReportLatency(s.nowFn().Sub(callStart)) 1449 1450 return result, nil 1451 } 1452 1453 func (s *service) FetchBlocksRaw(tctx thrift.Context, req *rpc.FetchBlocksRawRequest) (*rpc.FetchBlocksRawResult_, error) { 1454 db, err := s.startReadRPCWithDB() 1455 if err != nil { 1456 return nil, err 1457 } 1458 defer s.readRPCCompleted(tctx) 1459 1460 var ( 1461 callStart = s.nowFn() 1462 ctx = tchannelthrift.Context(tctx) 1463 nsID = s.newID(ctx, req.NameSpace) 1464 // check if the namespace if known 1465 nsMetadata, ok = db.Namespace(nsID) 1466 ) 1467 if !ok { 1468 return nil, tterrors.NewBadRequestError(fmt.Errorf("unable to find specified namespace: %v", nsID.String())) 1469 } 1470 1471 res := rpc.NewFetchBlocksRawResult_() 1472 res.Elements = make([]*rpc.Blocks, len(req.Elements)) 1473 1474 // Preallocate starts to maximum size since at least one element will likely 1475 // be fetching most blocks for peer bootstrapping 1476 ropts := nsMetadata.Options().RetentionOptions() 1477 blockStarts := make([]xtime.UnixNano, 0, 1478 (ropts.RetentionPeriod()+ropts.FutureRetentionPeriod())/ropts.BlockSize()) 1479 1480 for i, request := range req.Elements { 1481 blockStarts = blockStarts[:0] 1482 1483 for _, start := range request.Starts { 1484 blockStarts = append(blockStarts, xtime.UnixNano(start)) 1485 } 1486 1487 tsID := s.newID(ctx, request.ID) 1488 fetched, err := db.FetchBlocks( 1489 ctx, nsID, uint32(req.Shard), tsID, blockStarts) 1490 if err != nil { 1491 s.metrics.fetchBlocks.ReportError(s.nowFn().Sub(callStart)) 1492 return nil, convert.ToRPCError(err) 1493 } 1494 1495 blocks := rpc.NewBlocks() 1496 blocks.ID = request.ID 1497 blocks.Blocks = make([]*rpc.Block, 0, len(fetched)) 1498 1499 for _, fetchedBlock := range fetched { 1500 block := rpc.NewBlock() 1501 block.Start = int64(fetchedBlock.Start) 1502 if err := fetchedBlock.Err; err != nil { 1503 block.Err = convert.ToRPCError(err) 1504 } else { 1505 var converted convert.ToSegmentsResult 1506 converted, err = convert.ToSegments(ctx, fetchedBlock.Blocks) 1507 if err != nil { 1508 block.Err = convert.ToRPCError(err) 1509 } 1510 if converted.Segments == nil { 1511 // No data for block, skip this block 1512 continue 1513 } 1514 block.Segments = converted.Segments 1515 block.Checksum = converted.Checksum 1516 } 1517 1518 blocks.Blocks = append(blocks.Blocks, block) 1519 } 1520 1521 res.Elements[i] = blocks 1522 } 1523 1524 s.metrics.fetchBlocks.ReportSuccess(s.nowFn().Sub(callStart)) 1525 1526 return res, nil 1527 } 1528 1529 func (s *service) FetchBlocksMetadataRawV2(tctx thrift.Context, req *rpc.FetchBlocksMetadataRawV2Request) (*rpc.FetchBlocksMetadataRawV2Result_, error) { 1530 db, err := s.startReadRPCWithDB() 1531 if err != nil { 1532 return nil, err 1533 } 1534 defer s.readRPCCompleted(tctx) 1535 1536 callStart := s.nowFn() 1537 defer func() { 1538 // No need to report metric anywhere else as we capture all cases here 1539 s.metrics.fetchBlocksMetadata.ReportSuccessOrError(err, s.nowFn().Sub(callStart)) 1540 }() 1541 1542 ctx := tchannelthrift.Context(tctx) 1543 if req.Limit <= 0 { 1544 return nil, nil 1545 } 1546 1547 var opts block.FetchBlocksMetadataOptions 1548 if req.IncludeSizes != nil { 1549 opts.IncludeSizes = *req.IncludeSizes 1550 } 1551 if req.IncludeChecksums != nil { 1552 opts.IncludeChecksums = *req.IncludeChecksums 1553 } 1554 if req.IncludeLastRead != nil { 1555 opts.IncludeLastRead = *req.IncludeLastRead 1556 } 1557 1558 var ( 1559 nsID = s.newID(ctx, req.NameSpace) 1560 start = xtime.UnixNano(req.RangeStart) 1561 end = xtime.UnixNano(req.RangeEnd) 1562 ) 1563 fetchedMetadata, nextPageToken, err := db.FetchBlocksMetadataV2( 1564 ctx, nsID, uint32(req.Shard), start, end, req.Limit, req.PageToken, opts) 1565 if err != nil { 1566 return nil, convert.ToRPCError(err) 1567 } 1568 1569 ctx.RegisterCloser(fetchedMetadata) 1570 1571 result, err := s.getFetchBlocksMetadataRawV2Result(ctx, nextPageToken, opts, fetchedMetadata) 1572 if err != nil { 1573 return nil, convert.ToRPCError(err) 1574 } 1575 1576 ctx.RegisterFinalizer(s.newCloseableMetadataV2Result(result)) 1577 return result, nil 1578 } 1579 1580 func (s *service) getFetchBlocksMetadataRawV2Result( 1581 ctx context.Context, 1582 nextPageToken storage.PageToken, 1583 opts block.FetchBlocksMetadataOptions, 1584 results block.FetchBlocksMetadataResults, 1585 ) (*rpc.FetchBlocksMetadataRawV2Result_, error) { 1586 elements, err := s.getBlocksMetadataV2FromResult(ctx, opts, results) 1587 if err != nil { 1588 return nil, err 1589 } 1590 1591 result := rpc.NewFetchBlocksMetadataRawV2Result_() 1592 result.NextPageToken = nextPageToken 1593 result.Elements = elements 1594 return result, nil 1595 } 1596 1597 func (s *service) getBlocksMetadataV2FromResult( 1598 ctx context.Context, 1599 opts block.FetchBlocksMetadataOptions, 1600 results block.FetchBlocksMetadataResults, 1601 ) ([]*rpc.BlockMetadataV2, error) { 1602 blocks := s.pools.blockMetadataV2Slice.Get() 1603 for _, fetchedMetadata := range results.Results() { 1604 fetchedMetadataBlocks := fetchedMetadata.Blocks.Results() 1605 1606 var ( 1607 id = fetchedMetadata.ID.Bytes() 1608 tags = fetchedMetadata.Tags 1609 encodedTags []byte 1610 ) 1611 if tags != nil && tags.Remaining() > 0 { 1612 enc := s.pools.tagEncoder.Get() 1613 ctx.RegisterFinalizer(enc) 1614 encoded, err := encodeTags(enc, tags, s.opts.InstrumentOptions()) 1615 if err != nil { 1616 return nil, err 1617 } 1618 encodedTags = encoded.Bytes() 1619 } 1620 1621 for _, fetchedMetadataBlock := range fetchedMetadataBlocks { 1622 blockMetadata := s.pools.blockMetadataV2.Get() 1623 blockMetadata.ID = id 1624 blockMetadata.EncodedTags = encodedTags 1625 blockMetadata.Start = int64(fetchedMetadataBlock.Start) 1626 1627 if opts.IncludeSizes { 1628 size := fetchedMetadataBlock.Size 1629 blockMetadata.Size = &size 1630 } else { 1631 blockMetadata.Size = nil 1632 } 1633 1634 checksum := fetchedMetadataBlock.Checksum 1635 if opts.IncludeChecksums && checksum != nil { 1636 value := int64(*checksum) 1637 blockMetadata.Checksum = &value 1638 } else { 1639 blockMetadata.Checksum = nil 1640 } 1641 1642 if opts.IncludeLastRead { 1643 lastRead := int64(fetchedMetadataBlock.LastRead) 1644 blockMetadata.LastRead = &lastRead 1645 blockMetadata.LastReadTimeType = rpc.TimeType_UNIX_NANOSECONDS 1646 } else { 1647 blockMetadata.LastRead = nil 1648 blockMetadata.LastReadTimeType = rpc.TimeType(0) 1649 } 1650 1651 if err := fetchedMetadataBlock.Err; err != nil { 1652 blockMetadata.Err = convert.ToRPCError(err) 1653 } else { 1654 blockMetadata.Err = nil 1655 } 1656 1657 blocks = append(blocks, blockMetadata) 1658 } 1659 } 1660 1661 return blocks, nil 1662 } 1663 1664 func (s *service) Write(tctx thrift.Context, req *rpc.WriteRequest) error { 1665 db, err := s.startWriteRPCWithDB() 1666 if err != nil { 1667 return err 1668 } 1669 defer s.writeRPCCompleted() 1670 1671 callStart := s.nowFn() 1672 ctx := tchannelthrift.Context(tctx) 1673 1674 if req.Datapoint == nil { 1675 s.metrics.write.ReportError(s.nowFn().Sub(callStart)) 1676 return tterrors.NewBadRequestError(errRequiresDatapoint) 1677 } 1678 1679 dp := req.Datapoint 1680 unit, unitErr := convert.ToUnit(dp.TimestampTimeType) 1681 1682 if unitErr != nil { 1683 s.metrics.write.ReportError(s.nowFn().Sub(callStart)) 1684 return tterrors.NewBadRequestError(unitErr) 1685 } 1686 1687 d, err := unit.Value() 1688 if err != nil { 1689 s.metrics.write.ReportError(s.nowFn().Sub(callStart)) 1690 return tterrors.NewBadRequestError(err) 1691 } 1692 1693 if err = db.Write( 1694 ctx, 1695 s.pools.id.GetStringID(ctx, req.NameSpace), 1696 s.pools.id.GetStringID(ctx, req.ID), 1697 xtime.FromNormalizedTime(dp.Timestamp, d), 1698 dp.Value, 1699 unit, 1700 dp.Annotation, 1701 ); err != nil { 1702 s.metrics.write.ReportError(s.nowFn().Sub(callStart)) 1703 return convert.ToRPCError(err) 1704 } 1705 1706 s.metrics.write.ReportSuccess(s.nowFn().Sub(callStart)) 1707 1708 return nil 1709 } 1710 1711 func (s *service) WriteTagged(tctx thrift.Context, req *rpc.WriteTaggedRequest) error { 1712 db, err := s.startWriteRPCWithDB() 1713 if err != nil { 1714 return err 1715 } 1716 defer s.writeRPCCompleted() 1717 1718 callStart := s.nowFn() 1719 ctx := tchannelthrift.Context(tctx) 1720 1721 if req.Datapoint == nil { 1722 s.metrics.writeTagged.ReportError(s.nowFn().Sub(callStart)) 1723 return tterrors.NewBadRequestError(errRequiresDatapoint) 1724 } 1725 1726 if req.Tags == nil { 1727 s.metrics.writeTagged.ReportError(s.nowFn().Sub(callStart)) 1728 return tterrors.NewBadRequestError(errIllegalTagValues) 1729 } 1730 1731 dp := req.Datapoint 1732 unit, unitErr := convert.ToUnit(dp.TimestampTimeType) 1733 1734 if unitErr != nil { 1735 s.metrics.writeTagged.ReportError(s.nowFn().Sub(callStart)) 1736 return tterrors.NewBadRequestError(unitErr) 1737 } 1738 1739 d, err := unit.Value() 1740 if err != nil { 1741 s.metrics.writeTagged.ReportError(s.nowFn().Sub(callStart)) 1742 return tterrors.NewBadRequestError(err) 1743 } 1744 1745 iter, err := convert.ToTagsIter(req) 1746 if err != nil { 1747 s.metrics.writeTagged.ReportError(s.nowFn().Sub(callStart)) 1748 return tterrors.NewBadRequestError(err) 1749 } 1750 1751 if err = db.WriteTagged(ctx, 1752 s.pools.id.GetStringID(ctx, req.NameSpace), 1753 s.pools.id.GetStringID(ctx, req.ID), 1754 idxconvert.NewTagsIterMetadataResolver(iter), 1755 xtime.UnixNano(dp.Timestamp).FromNormalizedTime(d), 1756 dp.Value, unit, dp.Annotation); err != nil { 1757 s.metrics.writeTagged.ReportError(s.nowFn().Sub(callStart)) 1758 return convert.ToRPCError(err) 1759 } 1760 1761 s.metrics.writeTagged.ReportSuccess(s.nowFn().Sub(callStart)) 1762 1763 return nil 1764 } 1765 1766 func (s *service) WriteBatchRaw(tctx thrift.Context, req *rpc.WriteBatchRawRequest) error { 1767 s.metrics.writeBatchRawRPCs.Inc(1) 1768 db, err := s.startWriteRPCWithDB() 1769 if err != nil { 1770 return err 1771 } 1772 defer s.writeRPCCompleted() 1773 1774 callStart := s.nowFn() 1775 ctx := tchannelthrift.Context(tctx) 1776 1777 // NB(r): Use the pooled request tracking to return thrift alloc'd bytes 1778 // to the thrift bytes pool and to return ident.ID wrappers to a pool for 1779 // reuse. We also reduce contention on pools by getting one per batch request 1780 // rather than one per ID. 1781 pooledReq := s.pools.writeBatchPooledReqPool.Get(len(req.Elements)) 1782 pooledReq.writeReq = req 1783 ctx.RegisterFinalizer(pooledReq) 1784 1785 var ( 1786 nsID = s.newPooledID(ctx, req.NameSpace, pooledReq) 1787 retryableErrors int 1788 nonRetryableErrors int 1789 ) 1790 1791 batchWriter, err := db.BatchWriter(nsID, len(req.Elements)) 1792 if err != nil { 1793 return convert.ToRPCError(err) 1794 } 1795 1796 // The lifecycle of the annotations is more involved than the rest of the data 1797 // so we set the annotation pool put method as the finalization function and 1798 // let the database take care of returning them to the pool. 1799 batchWriter.SetFinalizeAnnotationFn(finalizeAnnotationFn) 1800 1801 for i, elem := range req.Elements { 1802 unit, unitErr := convert.ToUnit(elem.Datapoint.TimestampTimeType) 1803 if unitErr != nil { 1804 nonRetryableErrors++ 1805 pooledReq.addError(tterrors.NewBadRequestWriteBatchRawError(i, unitErr)) 1806 continue 1807 } 1808 1809 d, err := unit.Value() 1810 if err != nil { 1811 nonRetryableErrors++ 1812 pooledReq.addError(tterrors.NewBadRequestWriteBatchRawError(i, err)) 1813 continue 1814 } 1815 1816 seriesID := s.newPooledID(ctx, elem.ID, pooledReq) 1817 batchWriter.Add( 1818 i, 1819 seriesID, 1820 xtime.FromNormalizedTime(elem.Datapoint.Timestamp, d), 1821 elem.Datapoint.Value, 1822 unit, 1823 elem.Datapoint.Annotation, 1824 ) 1825 } 1826 1827 err = db.WriteBatch(ctx, nsID, batchWriter.(writes.WriteBatch), 1828 pooledReq) 1829 if err != nil { 1830 return convert.ToRPCError(err) 1831 } 1832 1833 nonRetryableErrors += pooledReq.numNonRetryableErrors() 1834 retryableErrors += pooledReq.numRetryableErrors() 1835 totalErrors := nonRetryableErrors + retryableErrors 1836 1837 s.metrics.writeBatchRaw.ReportSuccess(len(req.Elements) - totalErrors) 1838 s.metrics.writeBatchRaw.ReportRetryableErrors(retryableErrors) 1839 s.metrics.writeBatchRaw.ReportNonRetryableErrors(nonRetryableErrors) 1840 s.metrics.writeBatchRaw.ReportLatency(s.nowFn().Sub(callStart)) 1841 1842 errs := pooledReq.writeBatchRawErrors() 1843 if len(errs) > 0 { 1844 batchErrs := rpc.NewWriteBatchRawErrors() 1845 batchErrs.Errors = errs 1846 return batchErrs 1847 } 1848 1849 return nil 1850 } 1851 1852 func (s *service) WriteBatchRawV2(tctx thrift.Context, req *rpc.WriteBatchRawV2Request) error { 1853 s.metrics.writeBatchRawRPCs.Inc(1) 1854 db, err := s.startWriteRPCWithDB() 1855 if err != nil { 1856 return err 1857 } 1858 defer s.writeRPCCompleted() 1859 1860 callStart := s.nowFn() 1861 ctx := tchannelthrift.Context(tctx) 1862 1863 // Sanity check input. 1864 numNamespaces := int64(len(req.NameSpaces)) 1865 for _, elem := range req.Elements { 1866 if elem.NameSpace >= numNamespaces { 1867 return fmt.Errorf("namespace index: %d is out of range of provided namespaces", elem.NameSpace) 1868 } 1869 } 1870 1871 // Sort the elements so that they're sorted by namespace so we can reuse the same batch writer. 1872 sort.Slice(req.Elements, func(i, j int) bool { 1873 return req.Elements[i].NameSpace < req.Elements[j].NameSpace 1874 }) 1875 1876 // NB(r): Use the pooled request tracking to return thrift alloc'd bytes 1877 // to the thrift bytes pool and to return ident.ID wrappers to a pool for 1878 // reuse. We also reduce contention on pools by getting one per batch request 1879 // rather than one per ID. 1880 pooledReq := s.pools.writeBatchPooledReqPool.Get(len(req.Elements)) 1881 pooledReq.writeV2Req = req 1882 ctx.RegisterFinalizer(pooledReq) 1883 1884 var ( 1885 nsID ident.ID 1886 nsIdx int64 1887 batchWriter writes.BatchWriter 1888 1889 retryableErrors int 1890 nonRetryableErrors int 1891 ) 1892 for i, elem := range req.Elements { 1893 if nsID == nil || elem.NameSpace != nsIdx { 1894 if batchWriter != nil { 1895 err = db.WriteBatch(ctx, nsID, batchWriter.(writes.WriteBatch), pooledReq) 1896 if err != nil { 1897 return convert.ToRPCError(err) 1898 } 1899 batchWriter = nil 1900 } 1901 1902 nsID = s.newPooledID(ctx, req.NameSpaces[elem.NameSpace], pooledReq) 1903 nsIdx = elem.NameSpace 1904 1905 batchWriter, err = db.BatchWriter(nsID, len(req.Elements)) 1906 if err != nil { 1907 return convert.ToRPCError(err) 1908 } 1909 // The lifecycle of the annotations is more involved than the rest of the data 1910 // so we set the annotation pool put method as the finalization function and 1911 // let the database take care of returning them to the pool. 1912 batchWriter.SetFinalizeAnnotationFn(finalizeAnnotationFn) 1913 } 1914 1915 unit, unitErr := convert.ToUnit(elem.Datapoint.TimestampTimeType) 1916 if unitErr != nil { 1917 nonRetryableErrors++ 1918 pooledReq.addError(tterrors.NewBadRequestWriteBatchRawError(i, unitErr)) 1919 continue 1920 } 1921 1922 d, err := unit.Value() 1923 if err != nil { 1924 nonRetryableErrors++ 1925 pooledReq.addError(tterrors.NewBadRequestWriteBatchRawError(i, err)) 1926 continue 1927 } 1928 1929 seriesID := s.newPooledID(ctx, elem.ID, pooledReq) 1930 batchWriter.Add( 1931 i, 1932 seriesID, 1933 xtime.FromNormalizedTime(elem.Datapoint.Timestamp, d), 1934 elem.Datapoint.Value, 1935 unit, 1936 elem.Datapoint.Annotation, 1937 ) 1938 } 1939 1940 if batchWriter != nil { 1941 // Write the last batch. 1942 err = db.WriteBatch(ctx, nsID, batchWriter.(writes.WriteBatch), pooledReq) 1943 if err != nil { 1944 return convert.ToRPCError(err) 1945 } 1946 } 1947 1948 nonRetryableErrors += pooledReq.numNonRetryableErrors() 1949 retryableErrors += pooledReq.numRetryableErrors() 1950 totalErrors := nonRetryableErrors + retryableErrors 1951 1952 s.metrics.writeBatchRaw.ReportSuccess(len(req.Elements) - totalErrors) 1953 s.metrics.writeBatchRaw.ReportRetryableErrors(retryableErrors) 1954 s.metrics.writeBatchRaw.ReportNonRetryableErrors(nonRetryableErrors) 1955 s.metrics.writeBatchRaw.ReportLatency(s.nowFn().Sub(callStart)) 1956 1957 errs := pooledReq.writeBatchRawErrors() 1958 if len(errs) > 0 { 1959 batchErrs := rpc.NewWriteBatchRawErrors() 1960 batchErrs.Errors = errs 1961 return batchErrs 1962 } 1963 1964 return nil 1965 } 1966 1967 func (s *service) WriteTaggedBatchRaw(tctx thrift.Context, req *rpc.WriteTaggedBatchRawRequest) error { 1968 s.metrics.writeTaggedBatchRawRPCs.Inc(1) 1969 db, err := s.startWriteRPCWithDB() 1970 if err != nil { 1971 return err 1972 } 1973 defer s.writeRPCCompleted() 1974 1975 callStart := s.nowFn() 1976 ctx := tchannelthrift.Context(tctx) 1977 1978 // NB(r): Use the pooled request tracking to return thrift alloc'd bytes 1979 // to the thrift bytes pool and to return ident.ID wrappers to a pool for 1980 // reuse. We also reduce contention on pools by getting one per batch request 1981 // rather than one per ID. 1982 pooledReq := s.pools.writeBatchPooledReqPool.Get(len(req.Elements)) 1983 pooledReq.writeTaggedReq = req 1984 ctx.RegisterFinalizer(pooledReq) 1985 1986 var ( 1987 nsID = s.newPooledID(ctx, req.NameSpace, pooledReq) 1988 retryableErrors int 1989 nonRetryableErrors int 1990 ) 1991 1992 batchWriter, err := db.BatchWriter(nsID, len(req.Elements)) 1993 if err != nil { 1994 return convert.ToRPCError(err) 1995 } 1996 1997 // The lifecycle of the encoded tags and annotations is more involved than 1998 // the rest of the data so we set the encoded tags and annotation pool put 1999 // calls as finalization functions and let the database take care of 2000 // returning them to the pool. 2001 batchWriter.SetFinalizeEncodedTagsFn(finalizeEncodedTagsFn) 2002 batchWriter.SetFinalizeAnnotationFn(finalizeAnnotationFn) 2003 2004 for i, elem := range req.Elements { 2005 unit, unitErr := convert.ToUnit(elem.Datapoint.TimestampTimeType) 2006 if unitErr != nil { 2007 nonRetryableErrors++ 2008 pooledReq.addError(tterrors.NewBadRequestWriteBatchRawError(i, unitErr)) 2009 continue 2010 } 2011 2012 d, err := unit.Value() 2013 if err != nil { 2014 nonRetryableErrors++ 2015 pooledReq.addError(tterrors.NewBadRequestWriteBatchRawError(i, err)) 2016 continue 2017 } 2018 2019 seriesID := s.newPooledID(ctx, elem.ID, pooledReq) 2020 2021 batchWriter.AddTagged( 2022 i, 2023 seriesID, 2024 elem.EncodedTags, 2025 xtime.FromNormalizedTime(elem.Datapoint.Timestamp, d), 2026 elem.Datapoint.Value, 2027 unit, 2028 elem.Datapoint.Annotation) 2029 } 2030 2031 err = db.WriteTaggedBatch(ctx, nsID, batchWriter, pooledReq) 2032 if err != nil { 2033 return convert.ToRPCError(err) 2034 } 2035 2036 nonRetryableErrors += pooledReq.numNonRetryableErrors() 2037 retryableErrors += pooledReq.numRetryableErrors() 2038 totalErrors := nonRetryableErrors + retryableErrors 2039 2040 s.metrics.writeTaggedBatchRaw.ReportSuccess(len(req.Elements) - totalErrors) 2041 s.metrics.writeTaggedBatchRaw.ReportRetryableErrors(retryableErrors) 2042 s.metrics.writeTaggedBatchRaw.ReportNonRetryableErrors(nonRetryableErrors) 2043 s.metrics.writeTaggedBatchRaw.ReportLatency(s.nowFn().Sub(callStart)) 2044 2045 errs := pooledReq.writeBatchRawErrors() 2046 if len(errs) > 0 { 2047 batchErrs := rpc.NewWriteBatchRawErrors() 2048 batchErrs.Errors = errs 2049 return batchErrs 2050 } 2051 2052 return nil 2053 } 2054 2055 func (s *service) WriteTaggedBatchRawV2(tctx thrift.Context, req *rpc.WriteTaggedBatchRawV2Request) error { 2056 s.metrics.writeBatchRawRPCs.Inc(1) 2057 db, err := s.startWriteRPCWithDB() 2058 if err != nil { 2059 return err 2060 } 2061 defer s.writeRPCCompleted() 2062 2063 callStart := s.nowFn() 2064 ctx := tchannelthrift.Context(tctx) 2065 2066 // Sanity check input. 2067 numNamespaces := int64(len(req.NameSpaces)) 2068 for _, elem := range req.Elements { 2069 if elem.NameSpace >= numNamespaces { 2070 return fmt.Errorf("namespace index: %d is out of range of provided namespaces", elem.NameSpace) 2071 } 2072 } 2073 2074 // Sort the elements so that they're sorted by namespace so we can reuse the same batch writer. 2075 sort.Slice(req.Elements, func(i, j int) bool { 2076 return req.Elements[i].NameSpace < req.Elements[j].NameSpace 2077 }) 2078 2079 // NB(r): Use the pooled request tracking to return thrift alloc'd bytes 2080 // to the thrift bytes pool and to return ident.ID wrappers to a pool for 2081 // reuse. We also reduce contention on pools by getting one per batch request 2082 // rather than one per ID. 2083 pooledReq := s.pools.writeBatchPooledReqPool.Get(len(req.Elements)) 2084 pooledReq.writeTaggedV2Req = req 2085 ctx.RegisterFinalizer(pooledReq) 2086 2087 var ( 2088 nsID ident.ID 2089 nsIdx int64 2090 batchWriter writes.BatchWriter 2091 2092 retryableErrors int 2093 nonRetryableErrors int 2094 ) 2095 for i, elem := range req.Elements { 2096 if nsID == nil || elem.NameSpace != nsIdx { 2097 if batchWriter != nil { 2098 err = db.WriteTaggedBatch(ctx, nsID, batchWriter.(writes.WriteBatch), pooledReq) 2099 if err != nil { 2100 return convert.ToRPCError(err) 2101 } 2102 batchWriter = nil 2103 } 2104 2105 nsID = s.newPooledID(ctx, req.NameSpaces[elem.NameSpace], pooledReq) 2106 nsIdx = elem.NameSpace 2107 2108 batchWriter, err = db.BatchWriter(nsID, len(req.Elements)) 2109 if err != nil { 2110 return convert.ToRPCError(err) 2111 } 2112 // The lifecycle of the encoded tags and annotations is more involved than the 2113 // rest of the data so we set the annotation pool put method as the finalization 2114 // function and let the database take care of returning them to the pool. 2115 batchWriter.SetFinalizeEncodedTagsFn(finalizeEncodedTagsFn) 2116 batchWriter.SetFinalizeAnnotationFn(finalizeAnnotationFn) 2117 } 2118 2119 unit, unitErr := convert.ToUnit(elem.Datapoint.TimestampTimeType) 2120 if unitErr != nil { 2121 nonRetryableErrors++ 2122 pooledReq.addError(tterrors.NewBadRequestWriteBatchRawError(i, unitErr)) 2123 continue 2124 } 2125 2126 d, err := unit.Value() 2127 if err != nil { 2128 nonRetryableErrors++ 2129 pooledReq.addError(tterrors.NewBadRequestWriteBatchRawError(i, err)) 2130 continue 2131 } 2132 2133 seriesID := s.newPooledID(ctx, elem.ID, pooledReq) 2134 2135 batchWriter.AddTagged( 2136 i, 2137 seriesID, 2138 elem.EncodedTags, 2139 xtime.FromNormalizedTime(elem.Datapoint.Timestamp, d), 2140 elem.Datapoint.Value, 2141 unit, 2142 elem.Datapoint.Annotation, 2143 ) 2144 } 2145 2146 if batchWriter != nil { 2147 // Write the last batch. 2148 err = db.WriteTaggedBatch(ctx, nsID, batchWriter.(writes.WriteBatch), pooledReq) 2149 if err != nil { 2150 return convert.ToRPCError(err) 2151 } 2152 } 2153 2154 nonRetryableErrors += pooledReq.numNonRetryableErrors() 2155 retryableErrors += pooledReq.numRetryableErrors() 2156 totalErrors := nonRetryableErrors + retryableErrors 2157 2158 s.metrics.writeBatchRaw.ReportSuccess(len(req.Elements) - totalErrors) 2159 s.metrics.writeBatchRaw.ReportRetryableErrors(retryableErrors) 2160 s.metrics.writeBatchRaw.ReportNonRetryableErrors(nonRetryableErrors) 2161 s.metrics.writeBatchRaw.ReportLatency(s.nowFn().Sub(callStart)) 2162 2163 errs := pooledReq.writeBatchRawErrors() 2164 if len(errs) > 0 { 2165 batchErrs := rpc.NewWriteBatchRawErrors() 2166 batchErrs.Errors = errs 2167 return batchErrs 2168 } 2169 2170 return nil 2171 } 2172 2173 func (s *service) Repair(tctx thrift.Context) error { 2174 db, err := s.startRPCWithDB() 2175 if err != nil { 2176 return err 2177 } 2178 2179 callStart := s.nowFn() 2180 2181 if err := db.Repair(); err != nil { 2182 s.metrics.repair.ReportError(s.nowFn().Sub(callStart)) 2183 return convert.ToRPCError(err) 2184 } 2185 2186 s.metrics.repair.ReportSuccess(s.nowFn().Sub(callStart)) 2187 2188 return nil 2189 } 2190 2191 func (s *service) Truncate(tctx thrift.Context, req *rpc.TruncateRequest) (r *rpc.TruncateResult_, err error) { 2192 db, err := s.startRPCWithDB() 2193 if err != nil { 2194 return nil, err 2195 } 2196 2197 callStart := s.nowFn() 2198 ctx := tchannelthrift.Context(tctx) 2199 truncated, err := db.Truncate(s.newID(ctx, req.NameSpace)) 2200 if err != nil { 2201 s.metrics.truncate.ReportError(s.nowFn().Sub(callStart)) 2202 return nil, convert.ToRPCError(err) 2203 } 2204 2205 res := rpc.NewTruncateResult_() 2206 res.NumSeries = truncated 2207 2208 s.metrics.truncate.ReportSuccess(s.nowFn().Sub(callStart)) 2209 2210 return res, nil 2211 } 2212 2213 func (s *service) GetPersistRateLimit( 2214 ctx thrift.Context, 2215 ) (*rpc.NodePersistRateLimitResult_, error) { 2216 db, err := s.startRPCWithDB() 2217 if err != nil { 2218 return nil, err 2219 } 2220 2221 runtimeOptsMgr := db.Options().RuntimeOptionsManager() 2222 opts := runtimeOptsMgr.Get().PersistRateLimitOptions() 2223 limitEnabled := opts.LimitEnabled() 2224 limitMbps := opts.LimitMbps() 2225 limitCheckEvery := int64(opts.LimitCheckEvery()) 2226 result := &rpc.NodePersistRateLimitResult_{ 2227 LimitEnabled: limitEnabled, 2228 LimitMbps: limitMbps, 2229 LimitCheckEvery: limitCheckEvery, 2230 } 2231 return result, nil 2232 } 2233 2234 func (s *service) SetPersistRateLimit( 2235 ctx thrift.Context, 2236 req *rpc.NodeSetPersistRateLimitRequest, 2237 ) (*rpc.NodePersistRateLimitResult_, error) { 2238 db, err := s.startRPCWithDB() 2239 if err != nil { 2240 return nil, err 2241 } 2242 2243 runtimeOptsMgr := db.Options().RuntimeOptionsManager() 2244 runopts := runtimeOptsMgr.Get() 2245 opts := runopts.PersistRateLimitOptions() 2246 if req.LimitEnabled != nil { 2247 opts = opts.SetLimitEnabled(*req.LimitEnabled) 2248 } 2249 if req.LimitMbps != nil { 2250 opts = opts.SetLimitMbps(*req.LimitMbps) 2251 } 2252 if req.LimitCheckEvery != nil { 2253 opts = opts.SetLimitCheckEvery(int(*req.LimitCheckEvery)) 2254 } 2255 2256 runtimeOptsMgr.Update(runopts.SetPersistRateLimitOptions(opts)) 2257 2258 return s.GetPersistRateLimit(ctx) 2259 } 2260 2261 func (s *service) GetWriteNewSeriesAsync( 2262 ctx thrift.Context, 2263 ) (*rpc.NodeWriteNewSeriesAsyncResult_, error) { 2264 db, err := s.startRPCWithDB() 2265 if err != nil { 2266 return nil, err 2267 } 2268 2269 runtimeOptsMgr := db.Options().RuntimeOptionsManager() 2270 value := runtimeOptsMgr.Get().WriteNewSeriesAsync() 2271 return &rpc.NodeWriteNewSeriesAsyncResult_{ 2272 WriteNewSeriesAsync: value, 2273 }, nil 2274 } 2275 2276 func (s *service) SetWriteNewSeriesAsync( 2277 ctx thrift.Context, 2278 req *rpc.NodeSetWriteNewSeriesAsyncRequest, 2279 ) (*rpc.NodeWriteNewSeriesAsyncResult_, error) { 2280 db, err := s.startRPCWithDB() 2281 if err != nil { 2282 return nil, err 2283 } 2284 2285 runtimeOptsMgr := db.Options().RuntimeOptionsManager() 2286 set := runtimeOptsMgr.Get().SetWriteNewSeriesAsync(req.WriteNewSeriesAsync) 2287 if err := runtimeOptsMgr.Update(set); err != nil { 2288 return nil, tterrors.NewBadRequestError(err) 2289 } 2290 return s.GetWriteNewSeriesAsync(ctx) 2291 } 2292 2293 func (s *service) GetWriteNewSeriesBackoffDuration( 2294 ctx thrift.Context, 2295 ) ( 2296 *rpc.NodeWriteNewSeriesBackoffDurationResult_, 2297 error, 2298 ) { 2299 db, err := s.startRPCWithDB() 2300 if err != nil { 2301 return nil, err 2302 } 2303 2304 runtimeOptsMgr := db.Options().RuntimeOptionsManager() 2305 value := runtimeOptsMgr.Get().WriteNewSeriesBackoffDuration() 2306 return &rpc.NodeWriteNewSeriesBackoffDurationResult_{ 2307 WriteNewSeriesBackoffDuration: int64(value / time.Millisecond), 2308 DurationType: rpc.TimeType_UNIX_MILLISECONDS, 2309 }, nil 2310 } 2311 2312 func (s *service) SetWriteNewSeriesBackoffDuration( 2313 ctx thrift.Context, 2314 req *rpc.NodeSetWriteNewSeriesBackoffDurationRequest, 2315 ) ( 2316 *rpc.NodeWriteNewSeriesBackoffDurationResult_, 2317 error, 2318 ) { 2319 db, err := s.startRPCWithDB() 2320 if err != nil { 2321 return nil, err 2322 } 2323 2324 unit, err := convert.ToDuration(req.DurationType) 2325 if err != nil { 2326 return nil, tterrors.NewBadRequestError(xerrors.NewInvalidParamsError(err)) 2327 } 2328 runtimeOptsMgr := db.Options().RuntimeOptionsManager() 2329 value := time.Duration(req.WriteNewSeriesBackoffDuration) * unit 2330 set := runtimeOptsMgr.Get().SetWriteNewSeriesBackoffDuration(value) 2331 if err := runtimeOptsMgr.Update(set); err != nil { 2332 return nil, tterrors.NewBadRequestError(err) 2333 } 2334 return s.GetWriteNewSeriesBackoffDuration(ctx) 2335 } 2336 2337 func (s *service) GetWriteNewSeriesLimitPerShardPerSecond( 2338 ctx thrift.Context, 2339 ) ( 2340 *rpc.NodeWriteNewSeriesLimitPerShardPerSecondResult_, 2341 error, 2342 ) { 2343 db, err := s.startRPCWithDB() 2344 if err != nil { 2345 return nil, err 2346 } 2347 2348 runtimeOptsMgr := db.Options().RuntimeOptionsManager() 2349 value := runtimeOptsMgr.Get().WriteNewSeriesLimitPerShardPerSecond() 2350 return &rpc.NodeWriteNewSeriesLimitPerShardPerSecondResult_{ 2351 WriteNewSeriesLimitPerShardPerSecond: int64(value), 2352 }, nil 2353 } 2354 2355 func (s *service) SetWriteNewSeriesLimitPerShardPerSecond( 2356 ctx thrift.Context, 2357 req *rpc.NodeSetWriteNewSeriesLimitPerShardPerSecondRequest, 2358 ) ( 2359 *rpc.NodeWriteNewSeriesLimitPerShardPerSecondResult_, 2360 error, 2361 ) { 2362 db, err := s.startRPCWithDB() 2363 if err != nil { 2364 return nil, err 2365 } 2366 2367 runtimeOptsMgr := db.Options().RuntimeOptionsManager() 2368 value := int(req.WriteNewSeriesLimitPerShardPerSecond) 2369 set := runtimeOptsMgr.Get().SetWriteNewSeriesLimitPerShardPerSecond(value) 2370 if err := runtimeOptsMgr.Update(set); err != nil { 2371 return nil, tterrors.NewBadRequestError(err) 2372 } 2373 return s.GetWriteNewSeriesLimitPerShardPerSecond(ctx) 2374 } 2375 2376 func (s *service) DebugProfileStart( 2377 ctx thrift.Context, 2378 req *rpc.DebugProfileStartRequest, 2379 ) (*rpc.DebugProfileStartResult_, error) { 2380 s.state.Lock() 2381 defer s.state.Unlock() 2382 2383 _, ok := s.state.profiles[req.Name] 2384 if ok { 2385 err := fmt.Errorf("profile already exists: %s", req.Name) 2386 return nil, tterrors.NewBadRequestError(err) 2387 } 2388 2389 var ( 2390 interval time.Duration 2391 duration time.Duration 2392 debug int 2393 err error 2394 ) 2395 if v := req.Interval; v != nil { 2396 interval, err = time.ParseDuration(*v) 2397 if err != nil { 2398 return nil, tterrors.NewBadRequestError(err) 2399 } 2400 } 2401 if v := req.Duration; v != nil { 2402 duration, err = time.ParseDuration(*v) 2403 if err != nil { 2404 return nil, tterrors.NewBadRequestError(err) 2405 } 2406 } 2407 if v := req.Debug; v != nil { 2408 debug = int(*v) 2409 } 2410 2411 conditional := func() bool { 2412 if v := req.ConditionalNumGoroutinesGreaterThan; v != nil { 2413 if runtime.NumGoroutine() <= int(*v) { 2414 return false 2415 } 2416 } 2417 if v := req.ConditionalNumGoroutinesLessThan; v != nil { 2418 if runtime.NumGoroutine() >= int(*v) { 2419 return false 2420 } 2421 } 2422 if v := req.ConditionalIsOverloaded; v != nil { 2423 overloaded := s.state.db != nil && s.state.db.IsOverloaded() 2424 if *v != overloaded { 2425 return false 2426 } 2427 } 2428 2429 return true 2430 } 2431 2432 p, err := xdebug.NewContinuousFileProfile(xdebug.ContinuousFileProfileOptions{ 2433 FilePathTemplate: req.FilePathTemplate, 2434 ProfileName: req.Name, 2435 ProfileDuration: duration, 2436 ProfileDebug: debug, 2437 Conditional: conditional, 2438 Interval: interval, 2439 InstrumentOptions: s.opts.InstrumentOptions(), 2440 }) 2441 if err != nil { 2442 return nil, tterrors.NewBadRequestError(err) 2443 } 2444 2445 if err := p.Start(); err != nil { 2446 return nil, err 2447 } 2448 2449 s.state.profiles[req.Name] = p 2450 2451 return &rpc.DebugProfileStartResult_{}, nil 2452 } 2453 2454 func (s *service) DebugProfileStop( 2455 ctx thrift.Context, 2456 req *rpc.DebugProfileStopRequest, 2457 ) (*rpc.DebugProfileStopResult_, error) { 2458 s.state.Lock() 2459 defer s.state.Unlock() 2460 2461 existing, ok := s.state.profiles[req.Name] 2462 if !ok { 2463 err := fmt.Errorf("profile does not exist: %s", req.Name) 2464 return nil, tterrors.NewBadRequestError(err) 2465 } 2466 2467 if err := existing.Stop(); err != nil { 2468 return nil, err 2469 } 2470 2471 delete(s.state.profiles, req.Name) 2472 2473 return &rpc.DebugProfileStopResult_{}, nil 2474 } 2475 2476 func (s *service) DebugIndexMemorySegments( 2477 ctx thrift.Context, 2478 req *rpc.DebugIndexMemorySegmentsRequest, 2479 ) ( 2480 *rpc.DebugIndexMemorySegmentsResult_, 2481 error, 2482 ) { 2483 db, err := s.startRPCWithDB() 2484 if err != nil { 2485 return nil, err 2486 } 2487 2488 var multiErr xerrors.MultiError 2489 for _, ns := range db.Namespaces() { 2490 idx, err := ns.Index() 2491 if err != nil { 2492 return nil, err 2493 } 2494 2495 if err := idx.DebugMemorySegments(storage.DebugMemorySegmentsOptions{ 2496 OutputDirectory: req.Directory, 2497 }); err != nil { 2498 return nil, err 2499 } 2500 } 2501 2502 if err := multiErr.FinalError(); err != nil { 2503 return nil, err 2504 } 2505 2506 return &rpc.DebugIndexMemorySegmentsResult_{}, nil 2507 } 2508 2509 func (s *service) SetDatabase(db storage.Database) error { 2510 s.state.Lock() 2511 defer s.state.Unlock() 2512 2513 if s.state.db != nil { 2514 return errDatabaseHasAlreadyBeenSet 2515 } 2516 s.state.db = db 2517 return nil 2518 } 2519 2520 func (s *service) Database() (storage.Database, error) { 2521 s.state.RLock() 2522 defer s.state.RUnlock() 2523 2524 if s.state.db == nil { 2525 return nil, errDatabaseIsNotInitializedYet 2526 } 2527 return s.state.db, nil 2528 } 2529 2530 func (s *service) startWriteRPCWithDB() (storage.Database, error) { 2531 if s.state.maxOutstandingWriteRPCs == 0 { 2532 // No limitations on number of outstanding requests. 2533 return s.startRPCWithDB() 2534 } 2535 2536 db, dbIsInitialized, requestDoesNotExceedLimit := s.state.DBForWriteRPCWithLimit() 2537 if !dbIsInitialized { 2538 return nil, convert.ToRPCError(errDatabaseIsNotInitializedYet) 2539 } 2540 if !requestDoesNotExceedLimit { 2541 s.metrics.overloadRejected.Inc(1) 2542 return nil, convert.ToRPCError(errServerIsOverloaded) 2543 } 2544 if db.IsOverloaded() { 2545 s.metrics.overloadRejected.Inc(1) 2546 return nil, convert.ToRPCError(errServerIsOverloaded) 2547 } 2548 2549 return db, nil 2550 } 2551 2552 func (s *service) writeRPCCompleted() { 2553 if s.state.maxOutstandingWriteRPCs == 0 { 2554 // Nothing to do since we're not tracking the number outstanding RPCs. 2555 return 2556 } 2557 2558 s.state.DecNumOutstandingWriteRPCs() 2559 } 2560 2561 func (s *service) startReadRPCWithDB() (storage.Database, error) { 2562 if s.state.maxOutstandingReadRPCs == 0 { 2563 // No limitations on number of outstanding requests. 2564 return s.startRPCWithDB() 2565 } 2566 2567 db, dbIsInitialized, requestDoesNotExceedLimit := s.state.DBForReadRPCWithLimit() 2568 if !dbIsInitialized { 2569 return nil, convert.ToRPCError(errDatabaseIsNotInitializedYet) 2570 } 2571 if !requestDoesNotExceedLimit { 2572 s.metrics.overloadRejected.Inc(1) 2573 return nil, convert.ToRPCError(errServerIsOverloaded) 2574 } 2575 if db.IsOverloaded() { 2576 s.metrics.overloadRejected.Inc(1) 2577 return nil, convert.ToRPCError(errServerIsOverloaded) 2578 } 2579 2580 return db, nil 2581 } 2582 2583 func (s *service) readRPCCompleted(ctx goctx.Context) { 2584 s.metrics.rpcTotalRead.Inc(1) 2585 select { 2586 case <-ctx.Done(): 2587 s.metrics.rpcStatusCanceledRead.Inc(1) 2588 default: 2589 } 2590 2591 if s.state.maxOutstandingReadRPCs == 0 { 2592 // Nothing to do since we're not tracking the number outstanding RPCs. 2593 return 2594 } 2595 2596 s.state.DecNumOutstandingReadRPCs() 2597 } 2598 2599 func (s *service) startRPCWithDB() (storage.Database, error) { 2600 db, ok := s.state.DB() 2601 if !ok { 2602 return nil, convert.ToRPCError(errDatabaseIsNotInitializedYet) 2603 } 2604 2605 if db.IsOverloaded() { 2606 s.metrics.overloadRejected.Inc(1) 2607 return nil, convert.ToRPCError(errServerIsOverloaded) 2608 } 2609 2610 return db, nil 2611 } 2612 2613 func (s *service) newID(ctx context.Context, id []byte) ident.ID { 2614 checkedBytes := s.pools.checkedBytesWrapper.Get(id) 2615 return s.pools.id.GetBinaryID(ctx, checkedBytes) 2616 } 2617 2618 func (s *service) newPooledID( 2619 ctx context.Context, 2620 id []byte, 2621 p *writeBatchPooledReq, 2622 ) ident.ID { 2623 if result, ok := p.nextPooledID(id); ok { 2624 return result 2625 } 2626 return s.newID(ctx, id) 2627 } 2628 2629 func (s *service) readEncodedResult( 2630 ctx context.Context, 2631 encoded [][]xio.BlockReader, 2632 ) ([]*rpc.Segments, *rpc.Error) { 2633 segments := s.pools.segmentsArray.Get() 2634 segments = segmentsArr(segments).grow(len(encoded)) 2635 segments = segments[:0] 2636 ctx.RegisterFinalizer(xresource.FinalizerFn(func() { 2637 s.pools.segmentsArray.Put(segments) 2638 })) 2639 2640 for _, readers := range encoded { 2641 segment, err := readEncodedResultSegment(ctx, readers) 2642 if err != nil { 2643 return nil, err 2644 } 2645 if segment == nil { 2646 continue 2647 } 2648 segments = append(segments, segment) 2649 } 2650 2651 return segments, nil 2652 } 2653 2654 func readEncodedResultSegment( 2655 ctx context.Context, 2656 readers []xio.BlockReader, 2657 ) (*rpc.Segments, *rpc.Error) { 2658 converted, err := convert.ToSegments(ctx, readers) 2659 if err != nil { 2660 return nil, convert.ToRPCError(err) 2661 } 2662 if converted.Segments == nil { 2663 return nil, nil 2664 } 2665 2666 return converted.Segments, nil 2667 } 2668 2669 func (s *service) newCloseableMetadataV2Result( 2670 res *rpc.FetchBlocksMetadataRawV2Result_, 2671 ) closeableMetadataV2Result { 2672 return closeableMetadataV2Result{s: s, result: res} 2673 } 2674 2675 type closeableMetadataV2Result struct { 2676 s *service 2677 result *rpc.FetchBlocksMetadataRawV2Result_ 2678 } 2679 2680 func (c closeableMetadataV2Result) Finalize() { 2681 for _, blockMetadata := range c.result.Elements { 2682 c.s.pools.blockMetadataV2.Put(blockMetadata) 2683 } 2684 c.s.pools.blockMetadataV2Slice.Put(c.result.Elements) 2685 } 2686 2687 type writeBatchPooledReq struct { 2688 pooledIDs []writeBatchPooledReqID 2689 pooledIDsUsed int 2690 writeReq *rpc.WriteBatchRawRequest 2691 writeV2Req *rpc.WriteBatchRawV2Request 2692 writeTaggedReq *rpc.WriteTaggedBatchRawRequest 2693 writeTaggedV2Req *rpc.WriteTaggedBatchRawV2Request 2694 2695 // We want to avoid allocating an intermediary slice of []error so we 2696 // just include all the error handling in this struct for performance 2697 // reasons since its pooled on a per-request basis anyways. This allows 2698 // us to use this object as a storage.IndexedErrorHandler and avoid allocating 2699 // []error in the storage package, as well as pool the []*rpc.WriteBatchRawError, 2700 // although the individual *rpc.WriteBatchRawError still need to be allocated 2701 // each time. 2702 nonRetryableErrors int 2703 retryableErrors int 2704 errs []*rpc.WriteBatchRawError 2705 2706 pool *writeBatchPooledReqPool 2707 } 2708 2709 func (r *writeBatchPooledReq) nextPooledID(idBytes []byte) (ident.ID, bool) { 2710 if r.pooledIDsUsed >= len(r.pooledIDs) { 2711 return nil, false 2712 } 2713 2714 bytes := r.pooledIDs[r.pooledIDsUsed].bytes 2715 bytes.IncRef() 2716 bytes.Reset(idBytes) 2717 2718 id := r.pooledIDs[r.pooledIDsUsed].id 2719 r.pooledIDsUsed++ 2720 2721 return id, true 2722 } 2723 2724 func (r *writeBatchPooledReq) Finalize() { 2725 // Reset the pooledIDsUsed and decrement the ref counts 2726 for i := 0; i < r.pooledIDsUsed; i++ { 2727 r.pooledIDs[i].bytes.DecRef() 2728 } 2729 r.pooledIDsUsed = 0 2730 2731 // Return any pooled thrift byte slices to the thrift pool. 2732 if r.writeReq != nil { 2733 for _, elem := range r.writeReq.Elements { 2734 apachethrift.BytesPoolPut(elem.ID) 2735 // Ownership of the annotations has been transferred to the BatchWriter 2736 // so they will get returned the pool automatically by the commitlog once 2737 // it finishes writing them to disk via the finalization function that 2738 // gets set on the WriteBatch. 2739 } 2740 r.writeReq = nil 2741 } 2742 if r.writeV2Req != nil { 2743 for _, elem := range r.writeV2Req.Elements { 2744 apachethrift.BytesPoolPut(elem.ID) 2745 // Ownership of the annotations has been transferred to the BatchWriter 2746 // so they will get returned the pool automatically by the commitlog once 2747 // it finishes writing them to disk via the finalization function that 2748 // gets set on the WriteBatch. 2749 } 2750 r.writeV2Req = nil 2751 } 2752 if r.writeTaggedReq != nil { 2753 for _, elem := range r.writeTaggedReq.Elements { 2754 apachethrift.BytesPoolPut(elem.ID) 2755 // Ownership of the encoded tags has been transferred to the BatchWriter 2756 // so they will get returned the pool automatically by the commitlog once 2757 // it finishes writing them to disk via the finalization function that 2758 // gets set on the WriteBatch. 2759 2760 // See comment above about not finalizing annotations here. 2761 } 2762 r.writeTaggedReq = nil 2763 } 2764 if r.writeTaggedV2Req != nil { 2765 for _, elem := range r.writeTaggedV2Req.Elements { 2766 apachethrift.BytesPoolPut(elem.ID) 2767 // Ownership of the encoded tags has been transferred to the BatchWriter 2768 // so they will get returned the pool automatically by the commitlog once 2769 // it finishes writing them to disk via the finalization function that 2770 // gets set on the WriteBatch. 2771 2772 // See comment above about not finalizing annotations here. 2773 } 2774 r.writeTaggedV2Req = nil 2775 } 2776 2777 r.nonRetryableErrors = 0 2778 r.retryableErrors = 0 2779 if cap(r.errs) <= writeBatchPooledReqPoolMaxErrorsSliceSize { 2780 r.errs = r.errs[:0] 2781 } else { 2782 // Slice grew too large, throw it away and let a new one be 2783 // allocated on the next append call. 2784 r.errs = nil 2785 } 2786 2787 // Return to pool 2788 r.pool.Put(r) 2789 } 2790 2791 func (r *writeBatchPooledReq) HandleError(index int, err error) { 2792 if err == nil { 2793 return 2794 } 2795 2796 if xerrors.IsInvalidParams(err) { 2797 r.nonRetryableErrors++ 2798 r.errs = append( 2799 r.errs, 2800 tterrors.NewBadRequestWriteBatchRawError(index, err)) 2801 return 2802 } 2803 2804 r.retryableErrors++ 2805 r.errs = append( 2806 r.errs, 2807 tterrors.NewWriteBatchRawError(index, err)) 2808 } 2809 2810 func (r *writeBatchPooledReq) addError(err *rpc.WriteBatchRawError) { 2811 r.errs = append(r.errs, err) 2812 } 2813 2814 func (r *writeBatchPooledReq) writeBatchRawErrors() []*rpc.WriteBatchRawError { 2815 return r.errs 2816 } 2817 2818 func (r *writeBatchPooledReq) numRetryableErrors() int { 2819 return r.retryableErrors 2820 } 2821 2822 func (r *writeBatchPooledReq) numNonRetryableErrors() int { 2823 return r.nonRetryableErrors 2824 } 2825 2826 type writeBatchPooledReqID struct { 2827 bytes checked.Bytes 2828 id ident.ID 2829 } 2830 2831 type writeBatchPooledReqPool struct { 2832 pool pool.ObjectPool 2833 } 2834 2835 func newWriteBatchPooledReqPool( 2836 size int, 2837 iopts instrument.Options, 2838 ) *writeBatchPooledReqPool { 2839 pool := pool.NewObjectPool(pool.NewObjectPoolOptions(). 2840 SetSize(size). 2841 SetInstrumentOptions(iopts.SetMetricsScope( 2842 iopts.MetricsScope().SubScope("write-batch-pooled-req-pool")))) 2843 return &writeBatchPooledReqPool{pool: pool} 2844 } 2845 2846 func (p *writeBatchPooledReqPool) Init() { 2847 p.pool.Init(func() interface{} { 2848 return &writeBatchPooledReq{pool: p} 2849 }) 2850 } 2851 2852 func (p *writeBatchPooledReqPool) Get(size int) *writeBatchPooledReq { 2853 cappedSize := size 2854 if cappedSize > client.DefaultWriteBatchSize { 2855 cappedSize = client.DefaultWriteBatchSize 2856 } 2857 // NB(r): Make pooled IDs plus an extra one for the namespace 2858 cappedSize++ 2859 2860 pooledReq := p.pool.Get().(*writeBatchPooledReq) 2861 if cappedSize > len(pooledReq.pooledIDs) { 2862 newPooledIDs := make([]writeBatchPooledReqID, 0, cappedSize) 2863 newPooledIDs = append(newPooledIDs, pooledReq.pooledIDs...) 2864 2865 for i := len(pooledReq.pooledIDs); i < len(newPooledIDs); i++ { 2866 newPooledIDs[i].bytes = checked.NewBytes(nil, nil) 2867 newPooledIDs[i].id = ident.BinaryID(newPooledIDs[i].bytes) 2868 // BinaryID(..) incs the ref, we however don't want to pretend 2869 // the bytes is owned at this point since its not being used, so we 2870 // immediately dec a ref here to avoid calling get on this ID 2871 // being a valid call 2872 newPooledIDs[i].bytes.DecRef() 2873 } 2874 2875 pooledReq.pooledIDs = newPooledIDs 2876 } 2877 2878 return pooledReq 2879 } 2880 2881 func (p *writeBatchPooledReqPool) Put(v *writeBatchPooledReq) { 2882 p.pool.Put(v) 2883 } 2884 2885 // finalizeEncodedTagsFn implements ts.FinalizeEncodedTagsFn because 2886 // apachethrift.BytesPoolPut(b) returns a bool but ts.FinalizeEncodedTagsFn 2887 // does not. 2888 func finalizeEncodedTagsFn(b []byte) { 2889 apachethrift.BytesPoolPut(b) 2890 } 2891 2892 // finalizeAnnotationFn implements ts.FinalizeAnnotationFn because 2893 // apachethrift.BytesPoolPut(b) returns a bool but ts.FinalizeAnnotationFn 2894 // does not. 2895 func finalizeAnnotationFn(b []byte) { 2896 apachethrift.BytesPoolPut(b) 2897 } 2898 2899 func addRequestDataToContext( 2900 tctx thrift.Context, 2901 source []byte, 2902 endpoint tchannelthrift.Endpoint, 2903 ) context.Context { 2904 return addRequestDataToM3Context(tchannelthrift.Context(tctx), source, endpoint) 2905 } 2906 2907 func addRequestDataToM3Context( 2908 ctx context.Context, 2909 source []byte, 2910 endpoint tchannelthrift.Endpoint, 2911 ) context.Context { 2912 if ctx.GoContext() == nil { 2913 return ctx 2914 } 2915 2916 goCtx := tchannelthrift.NewContextWithEndpoint(ctx.GoContext(), endpoint) 2917 2918 if len(source) > 0 { 2919 goCtx = goctx.WithValue(goCtx, limits.SourceContextKey, source) 2920 } 2921 2922 ctx.SetGoContext(goCtx) 2923 2924 return ctx 2925 }