github.com/m3db/m3@v1.5.0/src/dbnode/client/host_queue.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package client 22 23 import ( 24 "bytes" 25 "context" 26 "errors" 27 "fmt" 28 "math" 29 "sync" 30 "time" 31 32 "github.com/m3db/m3/src/dbnode/generated/thrift/rpc" 33 "github.com/m3db/m3/src/dbnode/topology" 34 "github.com/m3db/m3/src/x/clock" 35 "github.com/m3db/m3/src/x/ident" 36 "github.com/m3db/m3/src/x/pool" 37 xsync "github.com/m3db/m3/src/x/sync" 38 39 "github.com/uber-go/tally" 40 "github.com/uber/tchannel-go/thrift" 41 ) 42 43 const _defaultHostQueueOpsArraySize = 8 44 45 var ( 46 // ErrCallMissingContext returned when call is missing required context. 47 ErrCallMissingContext = errors.New("call missing context") 48 // ErrCallWithoutDeadline returned when call context has no deadline. 49 ErrCallWithoutDeadline = errors.New("call context without deadline") 50 ) 51 52 type queue struct { 53 sync.WaitGroup 54 sync.RWMutex 55 56 opts Options 57 nowFn clock.NowFn 58 host topology.Host 59 connPool connectionPool 60 writeBatchRawRequestPool writeBatchRawRequestPool 61 writeBatchRawV2RequestPool writeBatchRawV2RequestPool 62 writeBatchRawRequestElementArrayPool writeBatchRawRequestElementArrayPool 63 writeBatchRawV2RequestElementArrayPool writeBatchRawV2RequestElementArrayPool 64 writeTaggedBatchRawRequestPool writeTaggedBatchRawRequestPool 65 writeTaggedBatchRawV2RequestPool writeTaggedBatchRawV2RequestPool 66 writeTaggedBatchRawRequestElementArrayPool writeTaggedBatchRawRequestElementArrayPool 67 writeTaggedBatchRawV2RequestElementArrayPool writeTaggedBatchRawV2RequestElementArrayPool 68 fetchBatchRawV2RequestPool fetchBatchRawV2RequestPool 69 fetchBatchRawV2RequestElementArrayPool fetchBatchRawV2RequestElementArrayPool 70 workerPool xsync.PooledWorkerPool 71 size int 72 ops []op 73 opsSumSize int 74 opsLastRotatedAt time.Time 75 opsArrayPool *opArrayPool 76 drainIn chan []op 77 writeOpBatchSize tally.Histogram 78 fetchOpBatchSize tally.Histogram 79 status status 80 serverSupportsV2APIs bool 81 } 82 83 func newHostQueue( 84 host topology.Host, 85 hostQueueOpts hostQueueOpts, 86 ) (hostQueue, error) { 87 var ( 88 opts = hostQueueOpts.opts 89 iOpts = opts.InstrumentOptions() 90 scope = iOpts.MetricsScope().SubScope("hostqueue") 91 ) 92 iOpts = iOpts.SetMetricsScope(scope) 93 opts = opts.SetInstrumentOptions(iOpts.SetMetricsScope(scope)) 94 95 writeOpBatchSizeBuckets, err := tally.ExponentialValueBuckets(1, 2, 15) 96 if err != nil { 97 return nil, err 98 } 99 writeOpBatchSizeBuckets = append(tally.ValueBuckets{0}, writeOpBatchSizeBuckets...) 100 101 fetchOpBatchSizeBuckets, err := tally.ExponentialValueBuckets(1, 2, 15) 102 if err != nil { 103 return nil, err 104 } 105 fetchOpBatchSizeBuckets = append(tally.ValueBuckets{0}, fetchOpBatchSizeBuckets...) 106 107 newHostQueuePooledWorker := opts.HostQueueNewPooledWorkerFn() 108 workerPool, err := newHostQueuePooledWorker(xsync.NewPooledWorkerOptions{ 109 InstrumentOptions: iOpts, 110 }) 111 if err != nil { 112 return nil, err 113 } 114 workerPool.Init() 115 116 var ( 117 opsArrayLen = _defaultHostQueueOpsArraySize 118 opArrayPoolSize = opts.HostQueueOpsArrayPoolSize() 119 opArrayPoolOpts = pool.NewObjectPoolOptions(). 120 SetInstrumentOptions( 121 opts.InstrumentOptions().SetMetricsScope(scope.SubScope("op-array-pool")), 122 ). 123 SetSize(int(opArrayPoolSize)). 124 SetDynamic(opArrayPoolSize.IsDynamic()) 125 ) 126 127 if !opArrayPoolSize.IsDynamic() { 128 // for static pools, keep channel buffer size the same as pool size to preserve backwards-compat 129 opsArrayLen = int(opArrayPoolSize) 130 } 131 132 opArrayPoolElemCapacity := int(math.Max(float64(opts.HostQueueOpsFlushSize()), float64(opts.WriteBatchSize()))) 133 opArrayPool := newOpArrayPool(opArrayPoolOpts, opArrayPoolElemCapacity) 134 opArrayPool.Init() 135 136 return &queue{ 137 opts: opts, 138 nowFn: opts.ClockOptions().NowFn(), 139 host: host, 140 connPool: newConnectionPool(host, opts), 141 writeBatchRawRequestPool: hostQueueOpts.writeBatchRawRequestPool, 142 writeBatchRawV2RequestPool: hostQueueOpts.writeBatchRawV2RequestPool, 143 writeBatchRawRequestElementArrayPool: hostQueueOpts.writeBatchRawRequestElementArrayPool, 144 writeBatchRawV2RequestElementArrayPool: hostQueueOpts.writeBatchRawV2RequestElementArrayPool, 145 writeTaggedBatchRawRequestPool: hostQueueOpts.writeTaggedBatchRawRequestPool, 146 writeTaggedBatchRawV2RequestPool: hostQueueOpts.writeTaggedBatchRawV2RequestPool, 147 writeTaggedBatchRawRequestElementArrayPool: hostQueueOpts.writeTaggedBatchRawRequestElementArrayPool, 148 writeTaggedBatchRawV2RequestElementArrayPool: hostQueueOpts.writeTaggedBatchRawV2RequestElementArrayPool, 149 fetchBatchRawV2RequestPool: hostQueueOpts.fetchBatchRawV2RequestPool, 150 fetchBatchRawV2RequestElementArrayPool: hostQueueOpts.fetchBatchRawV2RequestElementArrayPool, 151 workerPool: workerPool, 152 size: opts.HostQueueOpsFlushSize(), 153 ops: opArrayPool.Get(), 154 opsArrayPool: opArrayPool, 155 writeOpBatchSize: scope.Histogram("write-op-batch-size", writeOpBatchSizeBuckets), 156 fetchOpBatchSize: scope.Histogram("fetch-op-batch-size", fetchOpBatchSizeBuckets), 157 drainIn: make(chan []op, opsArrayLen), 158 serverSupportsV2APIs: opts.UseV2BatchAPIs(), 159 }, nil 160 } 161 162 func (q *queue) Open() { 163 q.Lock() 164 defer q.Unlock() 165 166 if q.status != statusNotOpen { 167 return 168 } 169 170 q.status = statusOpen 171 172 // Open the connection pool 173 q.connPool.Open() 174 175 // Continually drain the queue until closed 176 go q.drain() 177 178 flushInterval := q.opts.HostQueueOpsFlushInterval() 179 if flushInterval > 0 { 180 // Continually flush the queue at given interval if set 181 go q.flushEvery(flushInterval) 182 } 183 } 184 185 func (q *queue) flushEvery(interval time.Duration) { 186 // sleepForOverride used change the next sleep based on last ops rotation 187 var sleepForOverride time.Duration 188 for { 189 sleepFor := interval 190 if sleepForOverride > 0 { 191 sleepFor = sleepForOverride 192 sleepForOverride = 0 193 } 194 195 time.Sleep(sleepFor) 196 197 q.RLock() 198 if q.status != statusOpen { 199 q.RUnlock() 200 return 201 } 202 lastRotateAt := q.opsLastRotatedAt 203 q.RUnlock() 204 205 sinceLastRotate := q.nowFn().Sub(lastRotateAt) 206 if sinceLastRotate < interval { 207 // Rotated already recently, sleep until we would next consider flushing 208 sleepForOverride = interval - sinceLastRotate 209 continue 210 } 211 212 q.Lock() 213 if q.status != statusOpen { 214 q.Unlock() 215 return 216 } 217 needsDrain := q.rotateOpsWithLock() 218 // Need to hold lock while writing to the drainIn 219 // channel to ensure it has not been closed 220 if len(needsDrain) != 0 { 221 q.drainIn <- needsDrain 222 } 223 q.Unlock() 224 } 225 } 226 227 func (q *queue) rotateOpsWithLock() []op { 228 if q.opsSumSize == 0 { 229 // No need to rotate as queue is empty 230 return nil 231 } 232 233 needsDrain := q.ops 234 235 // Reset ops 236 q.ops = q.opsArrayPool.Get() 237 q.opsSumSize = 0 238 q.opsLastRotatedAt = q.nowFn() 239 240 return needsDrain 241 } 242 243 func (q *queue) drain() { 244 var ( 245 currV2WriteReq *rpc.WriteBatchRawV2Request 246 currV2WriteOps []op 247 248 currV2WriteTaggedReq *rpc.WriteTaggedBatchRawV2Request 249 currV2WriteTaggedOps []op 250 251 currWriteOpsByNamespace namespaceWriteBatchOpsSlice 252 currTaggedWriteOpsByNamespace namespaceWriteTaggedBatchOpsSlice 253 254 currV2FetchBatchRawReq *rpc.FetchBatchRawV2Request 255 currV2FetchBatchRawOps []op 256 ) 257 258 for ops := range q.drainIn { 259 opsLen := len(ops) 260 for i := 0; i < opsLen; i++ { 261 switch v := ops[i].(type) { 262 case *writeOperation: 263 if q.serverSupportsV2APIs { 264 currV2WriteReq, currV2WriteOps = q.drainWriteOpV2(v, currV2WriteReq, currV2WriteOps, ops[i]) 265 } else { 266 currWriteOpsByNamespace = q.drainWriteOpV1(v, currWriteOpsByNamespace, ops[i]) 267 } 268 case *writeTaggedOperation: 269 if q.serverSupportsV2APIs { 270 currV2WriteTaggedReq, currV2WriteTaggedOps = q.drainTaggedWriteOpV2(v, currV2WriteTaggedReq, currV2WriteTaggedOps, ops[i]) 271 } else { 272 currTaggedWriteOpsByNamespace = q.drainTaggedWriteOpV1(v, currTaggedWriteOpsByNamespace, ops[i]) 273 } 274 case *fetchBatchOp: 275 if q.serverSupportsV2APIs { 276 currV2FetchBatchRawReq, currV2FetchBatchRawOps = q.drainFetchBatchRawV2Op(v, currV2FetchBatchRawReq, currV2FetchBatchRawOps, ops[i]) 277 } else { 278 q.asyncFetch(v) 279 } 280 case *truncateOp: 281 q.asyncTruncate(v) 282 default: 283 completionFn := ops[i].CompletionFn() 284 completionFn(nil, errQueueUnknownOperation(q.host.ID())) 285 } 286 } 287 288 // If any outstanding write ops, async write. 289 for i, writeOps := range currWriteOpsByNamespace { 290 if len(writeOps.ops) > 0 { 291 q.asyncWrite(writeOps.namespace, writeOps.ops, 292 writeOps.elems) 293 } 294 // Zero the element 295 currWriteOpsByNamespace[i] = namespaceWriteBatchOps{} 296 } 297 // Reset the slice 298 currWriteOpsByNamespace = currWriteOpsByNamespace[:0] 299 if currV2WriteReq != nil { 300 q.asyncWriteV2(currV2WriteOps, currV2WriteReq) 301 currV2WriteReq = nil 302 currV2WriteOps = nil 303 } 304 305 // If any outstanding tagged write ops, async write 306 for i, writeOps := range currTaggedWriteOpsByNamespace { 307 if len(writeOps.ops) > 0 { 308 q.asyncTaggedWrite(writeOps.namespace, writeOps.ops, 309 writeOps.elems) 310 } 311 // Zero the element 312 currTaggedWriteOpsByNamespace[i] = namespaceWriteTaggedBatchOps{} 313 } 314 // If any outstanding fetches, fetch. 315 if currV2FetchBatchRawReq != nil { 316 q.asyncFetchV2(currV2FetchBatchRawOps, currV2FetchBatchRawReq) 317 currV2FetchBatchRawOps = nil 318 currV2FetchBatchRawReq = nil 319 } 320 321 // Reset the slice 322 currTaggedWriteOpsByNamespace = currTaggedWriteOpsByNamespace[:0] 323 if currV2WriteTaggedReq != nil { 324 q.asyncTaggedWriteV2(currV2WriteTaggedOps, currV2WriteTaggedReq) 325 currV2WriteTaggedReq = nil 326 currV2WriteTaggedOps = nil 327 } 328 329 if ops != nil { 330 q.opsArrayPool.Put(ops) 331 } 332 } 333 334 // Close the connection pool after all requests done 335 q.Wait() 336 q.connPool.Close() 337 } 338 339 func (q *queue) drainWriteOpV1( 340 v *writeOperation, 341 currWriteOpsByNamespace namespaceWriteBatchOpsSlice, 342 op op, 343 ) namespaceWriteBatchOpsSlice { 344 namespace := v.namespace 345 idx := currWriteOpsByNamespace.indexOf(namespace) 346 if idx == -1 { 347 value := namespaceWriteBatchOps{ 348 namespace: namespace, 349 opsArrayPool: q.opsArrayPool, 350 writeBatchRawRequestElementArrayPool: q.writeBatchRawRequestElementArrayPool, 351 } 352 idx = len(currWriteOpsByNamespace) 353 currWriteOpsByNamespace = append(currWriteOpsByNamespace, value) 354 } 355 356 currWriteOpsByNamespace.appendAt(idx, op, &v.request) 357 358 if currWriteOpsByNamespace.lenAt(idx) == q.opts.WriteBatchSize() { 359 // Reached write batch limit, write async and reset. 360 q.asyncWrite(namespace, currWriteOpsByNamespace[idx].ops, 361 currWriteOpsByNamespace[idx].elems) 362 currWriteOpsByNamespace.resetAt(idx) 363 } 364 365 return currWriteOpsByNamespace 366 } 367 368 func (q *queue) drainTaggedWriteOpV1( 369 v *writeTaggedOperation, 370 currTaggedWriteOpsByNamespace namespaceWriteTaggedBatchOpsSlice, 371 op op, 372 ) namespaceWriteTaggedBatchOpsSlice { 373 namespace := v.namespace 374 idx := currTaggedWriteOpsByNamespace.indexOf(namespace) 375 if idx == -1 { 376 value := namespaceWriteTaggedBatchOps{ 377 namespace: namespace, 378 opsArrayPool: q.opsArrayPool, 379 writeTaggedBatchRawRequestElementArrayPool: q.writeTaggedBatchRawRequestElementArrayPool, 380 } 381 idx = len(currTaggedWriteOpsByNamespace) 382 currTaggedWriteOpsByNamespace = append(currTaggedWriteOpsByNamespace, value) 383 } 384 385 currTaggedWriteOpsByNamespace.appendAt(idx, op, &v.request) 386 387 if currTaggedWriteOpsByNamespace.lenAt(idx) == q.opts.WriteBatchSize() { 388 // Reached write batch limit, write async and reset 389 q.asyncTaggedWrite(namespace, currTaggedWriteOpsByNamespace[idx].ops, 390 currTaggedWriteOpsByNamespace[idx].elems) 391 currTaggedWriteOpsByNamespace.resetAt(idx) 392 } 393 394 return currTaggedWriteOpsByNamespace 395 } 396 397 func (q *queue) drainWriteOpV2( 398 v *writeOperation, 399 currV2WriteReq *rpc.WriteBatchRawV2Request, 400 currV2WriteOps []op, 401 op op, 402 ) (*rpc.WriteBatchRawV2Request, []op) { 403 namespace := v.namespace 404 if currV2WriteReq == nil { 405 currV2WriteReq = q.writeBatchRawV2RequestPool.Get() 406 currV2WriteReq.Elements = q.writeBatchRawV2RequestElementArrayPool.Get() 407 } 408 409 nsIdx := -1 410 for i, ns := range currV2WriteReq.NameSpaces { 411 if bytes.Equal(namespace.Bytes(), ns) { 412 nsIdx = i 413 break 414 } 415 } 416 if nsIdx == -1 { 417 currV2WriteReq.NameSpaces = append(currV2WriteReq.NameSpaces, namespace.Bytes()) 418 nsIdx = len(currV2WriteReq.NameSpaces) - 1 419 } 420 421 // Copy the request because operations are shared across multiple host queues so mutating 422 // them directly is racey. 423 // TODO(rartoul): Consider adding a pool for this. 424 requestCopy := v.requestV2 425 requestCopy.NameSpace = int64(nsIdx) 426 currV2WriteReq.Elements = append(currV2WriteReq.Elements, &requestCopy) 427 currV2WriteOps = append(currV2WriteOps, op) 428 if len(currV2WriteReq.Elements) == q.opts.WriteBatchSize() { 429 // Reached write batch limit, write async and reset. 430 q.asyncWriteV2(currV2WriteOps, currV2WriteReq) 431 currV2WriteReq = nil 432 currV2WriteOps = nil 433 } 434 435 return currV2WriteReq, currV2WriteOps 436 } 437 438 func (q *queue) drainTaggedWriteOpV2( 439 v *writeTaggedOperation, 440 currV2WriteTaggedReq *rpc.WriteTaggedBatchRawV2Request, 441 currV2WriteTaggedOps []op, 442 op op, 443 ) (*rpc.WriteTaggedBatchRawV2Request, []op) { 444 namespace := v.namespace 445 if currV2WriteTaggedReq == nil { 446 currV2WriteTaggedReq = q.writeTaggedBatchRawV2RequestPool.Get() 447 currV2WriteTaggedReq.Elements = q.writeTaggedBatchRawV2RequestElementArrayPool.Get() 448 } 449 450 nsIdx := -1 451 for i, ns := range currV2WriteTaggedReq.NameSpaces { 452 if bytes.Equal(namespace.Bytes(), ns) { 453 nsIdx = i 454 break 455 } 456 } 457 if nsIdx == -1 { 458 currV2WriteTaggedReq.NameSpaces = append(currV2WriteTaggedReq.NameSpaces, namespace.Bytes()) 459 nsIdx = len(currV2WriteTaggedReq.NameSpaces) - 1 460 } 461 462 // Copy the request because operations are shared across multiple host queues so mutating 463 // them directly is racey. 464 // TODO(rartoul): Consider adding a pool for this. 465 requestCopy := v.requestV2 466 requestCopy.NameSpace = int64(nsIdx) 467 currV2WriteTaggedReq.Elements = append(currV2WriteTaggedReq.Elements, &requestCopy) 468 currV2WriteTaggedOps = append(currV2WriteTaggedOps, op) 469 if len(currV2WriteTaggedReq.Elements) == q.opts.WriteBatchSize() { 470 // Reached write batch limit, write async and reset. 471 q.asyncTaggedWriteV2(currV2WriteTaggedOps, currV2WriteTaggedReq) 472 currV2WriteTaggedReq = nil 473 currV2WriteTaggedOps = nil 474 } 475 476 return currV2WriteTaggedReq, currV2WriteTaggedOps 477 } 478 479 func (q *queue) drainFetchBatchRawV2Op( 480 v *fetchBatchOp, 481 currV2FetchBatchRawReq *rpc.FetchBatchRawV2Request, 482 currV2FetchBatchRawOps []op, 483 op op, 484 ) (*rpc.FetchBatchRawV2Request, []op) { 485 namespace := v.request.NameSpace 486 if currV2FetchBatchRawReq == nil { 487 currV2FetchBatchRawReq = q.fetchBatchRawV2RequestPool.Get() 488 currV2FetchBatchRawReq.Elements = q.fetchBatchRawV2RequestElementArrayPool.Get() 489 } 490 491 nsIdx := -1 492 for i, ns := range currV2FetchBatchRawReq.NameSpaces { 493 if bytes.Equal(namespace, ns) { 494 nsIdx = i 495 break 496 } 497 } 498 if nsIdx == -1 { 499 currV2FetchBatchRawReq.NameSpaces = append(currV2FetchBatchRawReq.NameSpaces, namespace) 500 nsIdx = len(currV2FetchBatchRawReq.NameSpaces) - 1 501 } 502 for i := range v.requestV2Elements { 503 // Each host queue gets its own fetchBatchOp so mutating the NameSpace field here is safe. 504 v.requestV2Elements[i].NameSpace = int64(nsIdx) 505 currV2FetchBatchRawReq.Elements = append(currV2FetchBatchRawReq.Elements, &v.requestV2Elements[i]) 506 } 507 currV2FetchBatchRawOps = append(currV2FetchBatchRawOps, op) 508 // This logic means that in practice we may sometimes exceed the fetch batch size by a factor of 2 509 // but that's ok since it does not need to be exact. 510 if len(currV2FetchBatchRawReq.Elements) >= q.opts.FetchBatchSize() { 511 q.asyncFetchV2(currV2FetchBatchRawOps, currV2FetchBatchRawReq) 512 currV2FetchBatchRawReq = nil 513 currV2FetchBatchRawOps = nil 514 } 515 516 return currV2FetchBatchRawReq, currV2FetchBatchRawOps 517 } 518 519 func (q *queue) asyncTaggedWrite( 520 namespace ident.ID, 521 ops []op, 522 elems []*rpc.WriteTaggedBatchRawRequestElement, 523 ) { 524 q.writeOpBatchSize.RecordValue(float64(len(elems))) 525 q.Add(1) 526 527 q.workerPool.Go(func() { 528 req := q.writeTaggedBatchRawRequestPool.Get() 529 req.NameSpace = namespace.Bytes() 530 req.Elements = elems 531 532 // NB(r): Defer is slow in the hot path unfortunately 533 cleanup := func() { 534 q.writeTaggedBatchRawRequestElementArrayPool.Put(elems) 535 q.writeTaggedBatchRawRequestPool.Put(req) 536 q.opsArrayPool.Put(ops) 537 q.Done() 538 } 539 540 // NB(bl): host is passed to writeState to determine the state of the 541 // shard on the node we're writing to 542 543 client, _, err := q.connPool.NextClient() 544 if err != nil { 545 // No client available 546 callAllCompletionFns(ops, q.host, err) 547 cleanup() 548 return 549 } 550 551 ctx, _ := thrift.NewContext(q.opts.WriteRequestTimeout()) 552 err = client.WriteTaggedBatchRaw(ctx, req) 553 if err == nil { 554 // All succeeded 555 callAllCompletionFns(ops, q.host, nil) 556 cleanup() 557 return 558 } 559 560 if batchErrs, ok := err.(*rpc.WriteBatchRawErrors); ok { 561 // Callback all writes with errors 562 hasErr := make(map[int]struct{}) 563 for _, batchErr := range batchErrs.Errors { 564 op := ops[batchErr.Index] 565 op.CompletionFn()(q.host, batchErr.Err) 566 hasErr[int(batchErr.Index)] = struct{}{} 567 } 568 // Callback all writes with no errors 569 for i := range ops { 570 if _, ok := hasErr[i]; !ok { 571 // No error 572 ops[i].CompletionFn()(q.host, nil) 573 } 574 } 575 cleanup() 576 return 577 } 578 579 // Entire batch failed 580 callAllCompletionFns(ops, q.host, err) 581 cleanup() 582 }) 583 } 584 585 func (q *queue) asyncTaggedWriteV2( 586 ops []op, 587 req *rpc.WriteTaggedBatchRawV2Request, 588 ) { 589 q.writeOpBatchSize.RecordValue(float64(len(req.Elements))) 590 q.Add(1) 591 592 q.workerPool.Go(func() { 593 // NB(r): Defer is slow in the hot path unfortunately 594 cleanup := func() { 595 q.writeTaggedBatchRawV2RequestElementArrayPool.Put(req.Elements) 596 q.writeTaggedBatchRawV2RequestPool.Put(req) 597 q.opsArrayPool.Put(ops) 598 q.Done() 599 } 600 601 // NB(bl): host is passed to writeState to determine the state of the 602 // shard on the node we're writing to. 603 client, _, err := q.connPool.NextClient() 604 if err != nil { 605 // No client available 606 callAllCompletionFns(ops, q.host, err) 607 cleanup() 608 return 609 } 610 611 ctx, _ := thrift.NewContext(q.opts.WriteRequestTimeout()) 612 err = client.WriteTaggedBatchRawV2(ctx, req) 613 if err == nil { 614 // All succeeded 615 callAllCompletionFns(ops, q.host, nil) 616 cleanup() 617 return 618 } 619 620 if batchErrs, ok := err.(*rpc.WriteBatchRawErrors); ok { 621 // Callback all writes with errors 622 hasErr := make(map[int]struct{}) 623 for _, batchErr := range batchErrs.Errors { 624 op := ops[batchErr.Index] 625 op.CompletionFn()(q.host, batchErr.Err) 626 hasErr[int(batchErr.Index)] = struct{}{} 627 } 628 // Callback all writes with no errors 629 for i := range ops { 630 if _, ok := hasErr[i]; !ok { 631 // No error 632 ops[i].CompletionFn()(q.host, nil) 633 } 634 } 635 cleanup() 636 return 637 } 638 639 // Entire batch failed 640 callAllCompletionFns(ops, q.host, err) 641 cleanup() 642 }) 643 } 644 645 func (q *queue) asyncWrite( 646 namespace ident.ID, 647 ops []op, 648 elems []*rpc.WriteBatchRawRequestElement, 649 ) { 650 q.writeOpBatchSize.RecordValue(float64(len(elems))) 651 q.Add(1) 652 q.workerPool.Go(func() { 653 req := q.writeBatchRawRequestPool.Get() 654 req.NameSpace = namespace.Bytes() 655 req.Elements = elems 656 657 // NB(r): Defer is slow in the hot path unfortunately 658 cleanup := func() { 659 q.writeBatchRawRequestElementArrayPool.Put(elems) 660 q.writeBatchRawRequestPool.Put(req) 661 q.opsArrayPool.Put(ops) 662 q.Done() 663 } 664 665 // NB(bl): host is passed to writeState to determine the state of the 666 // shard on the node we're writing to 667 668 client, _, err := q.connPool.NextClient() 669 if err != nil { 670 // No client available 671 callAllCompletionFns(ops, q.host, err) 672 cleanup() 673 return 674 } 675 676 ctx, _ := thrift.NewContext(q.opts.WriteRequestTimeout()) 677 err = client.WriteBatchRaw(ctx, req) 678 if err == nil { 679 // All succeeded 680 callAllCompletionFns(ops, q.host, nil) 681 cleanup() 682 return 683 } 684 685 if batchErrs, ok := err.(*rpc.WriteBatchRawErrors); ok { 686 // Callback all writes with errors 687 hasErr := make(map[int]struct{}) 688 for _, batchErr := range batchErrs.Errors { 689 op := ops[batchErr.Index] 690 op.CompletionFn()(q.host, batchErr.Err) 691 hasErr[int(batchErr.Index)] = struct{}{} 692 } 693 // Callback all writes with no errors 694 for i := range ops { 695 if _, ok := hasErr[i]; !ok { 696 // No error 697 ops[i].CompletionFn()(q.host, nil) 698 } 699 } 700 cleanup() 701 return 702 } 703 704 // Entire batch failed 705 callAllCompletionFns(ops, q.host, err) 706 cleanup() 707 }) 708 } 709 710 func (q *queue) asyncWriteV2( 711 ops []op, 712 req *rpc.WriteBatchRawV2Request, 713 ) { 714 q.writeOpBatchSize.RecordValue(float64(len(req.Elements))) 715 q.Add(1) 716 q.workerPool.Go(func() { 717 // NB(r): Defer is slow in the hot path unfortunately 718 cleanup := func() { 719 q.writeBatchRawV2RequestElementArrayPool.Put(req.Elements) 720 q.writeBatchRawV2RequestPool.Put(req) 721 q.opsArrayPool.Put(ops) 722 q.Done() 723 } 724 725 // NB(bl): host is passed to writeState to determine the state of the 726 // shard on the node we're writing to. 727 client, _, err := q.connPool.NextClient() 728 if err != nil { 729 // No client available. 730 callAllCompletionFns(ops, q.host, err) 731 cleanup() 732 return 733 } 734 735 ctx, _ := thrift.NewContext(q.opts.WriteRequestTimeout()) 736 err = client.WriteBatchRawV2(ctx, req) 737 if err == nil { 738 // All succeeded. 739 callAllCompletionFns(ops, q.host, nil) 740 cleanup() 741 return 742 } 743 744 if batchErrs, ok := err.(*rpc.WriteBatchRawErrors); ok { 745 // Callback all writes with errors. 746 hasErr := make(map[int]struct{}) 747 for _, batchErr := range batchErrs.Errors { 748 op := ops[batchErr.Index] 749 op.CompletionFn()(q.host, batchErr.Err) 750 hasErr[int(batchErr.Index)] = struct{}{} 751 } 752 // Callback all writes with no errors. 753 for i := range ops { 754 if _, ok := hasErr[i]; !ok { 755 // No error 756 ops[i].CompletionFn()(q.host, nil) 757 } 758 } 759 cleanup() 760 return 761 } 762 763 // Entire batch failed. 764 callAllCompletionFns(ops, q.host, err) 765 cleanup() 766 }) 767 } 768 769 func (q *queue) asyncFetch(op *fetchBatchOp) { 770 q.fetchOpBatchSize.RecordValue(float64(len(op.request.Ids))) 771 q.Add(1) 772 q.workerPool.Go(func() { 773 // NB(r): Defer is slow in the hot path unfortunately 774 cleanup := func() { 775 op.DecRef() 776 op.Finalize() 777 q.Done() 778 } 779 780 client, _, err := q.connPool.NextClient() 781 if err != nil { 782 // No client available 783 op.completeAll(nil, err) 784 cleanup() 785 return 786 } 787 788 ctx, _ := thrift.NewContext(q.opts.FetchRequestTimeout()) 789 result, err := client.FetchBatchRaw(ctx, &op.request) 790 if err != nil { 791 op.completeAll(nil, err) 792 cleanup() 793 return 794 } 795 796 resultLen := len(result.Elements) 797 opLen := op.Size() 798 for i := 0; i < opLen; i++ { 799 if !(i < resultLen) { 800 // No results for this entry, in practice should never occur 801 op.complete(i, nil, errQueueFetchNoResponse(q.host.ID())) 802 continue 803 } 804 if result.Elements[i].Err != nil { 805 op.complete(i, nil, result.Elements[i].Err) 806 continue 807 } 808 op.complete(i, result.Elements[i].Segments, nil) 809 } 810 cleanup() 811 }) 812 } 813 814 func (q *queue) asyncFetchV2( 815 ops []op, 816 currV2FetchBatchRawReq *rpc.FetchBatchRawV2Request, 817 ) { 818 q.fetchOpBatchSize.RecordValue(float64(len(currV2FetchBatchRawReq.Elements))) 819 q.Add(1) 820 q.workerPool.Go(func() { 821 // NB(r): Defer is slow in the hot path unfortunately 822 cleanup := func() { 823 q.fetchBatchRawV2RequestElementArrayPool.Put(currV2FetchBatchRawReq.Elements) 824 q.fetchBatchRawV2RequestPool.Put(currV2FetchBatchRawReq) 825 for _, op := range ops { 826 fetchOp := op.(*fetchBatchOp) 827 fetchOp.DecRef() 828 fetchOp.Finalize() 829 } 830 q.Done() 831 } 832 833 client, _, err := q.connPool.NextClient() 834 if err != nil { 835 // No client available. 836 callAllCompletionFns(ops, nil, err) 837 cleanup() 838 return 839 } 840 841 ctx, _ := thrift.NewContext(q.opts.FetchRequestTimeout()) 842 result, err := client.FetchBatchRawV2(ctx, currV2FetchBatchRawReq) 843 if err != nil { 844 callAllCompletionFns(ops, nil, err) 845 cleanup() 846 return 847 } 848 849 resultIdx := -1 850 for _, op := range ops { 851 fetchOp := op.(*fetchBatchOp) 852 for j := 0; j < fetchOp.Size(); j++ { 853 resultIdx++ 854 if resultIdx >= len(result.Elements) { 855 // No results for this entry, in practice should never occur. 856 fetchOp.complete(j, nil, errQueueFetchNoResponse(q.host.ID())) 857 continue 858 } 859 860 if result.Elements[resultIdx].Err != nil { 861 fetchOp.complete(j, nil, result.Elements[resultIdx].Err) 862 continue 863 } 864 fetchOp.complete(j, result.Elements[resultIdx].Segments, nil) 865 } 866 } 867 cleanup() 868 }) 869 } 870 871 func (q *queue) asyncFetchTagged(op *fetchTaggedOp) { 872 // Note: No worker pool required for fetch tagged queries, they do 873 // not benefit from goroutine re-use the same way the write 874 // code path does which frequently runs into stack splitting due to 875 // how deep the stacks are in the write code path. 876 // Context on stack splitting (a lot of other material out there too): 877 // https://medium.com/a-journey-with-go/go-how-does-the-goroutine-stack-size-evolve-447fc02085e5 878 q.Add(1) 879 880 // NB(r): Need to perform any completion function in async 881 // goroutine since caller may hold the lock while enqueing the op 882 // which is required to access the completion fn with op.CompletionFn(). 883 go func() { 884 defer func() { 885 op.decRef() 886 q.Done() 887 }() 888 889 // All fetch tagged calls are required to provide a context with a deadline. 890 ctx, err := q.mustWrapAndCheckContext(op.context, "fetchTagged") 891 if err != nil { 892 op.CompletionFn()(fetchTaggedResultAccumulatorOpts{host: q.host}, err) 893 return 894 } 895 896 client, _, err := q.connPool.NextClient() 897 if err != nil { 898 // No client available 899 op.CompletionFn()(fetchTaggedResultAccumulatorOpts{host: q.host}, err) 900 return 901 } 902 903 result, err := client.FetchTagged(ctx, &op.request) 904 if err != nil { 905 op.CompletionFn()(fetchTaggedResultAccumulatorOpts{host: q.host}, err) 906 return 907 } 908 909 op.CompletionFn()(fetchTaggedResultAccumulatorOpts{ 910 host: q.host, 911 response: result, 912 }, err) 913 }() 914 } 915 916 func (q *queue) asyncAggregate(op *aggregateOp) { 917 // Note: No worker pool required for aggregate queries, they do 918 // not benefit from goroutine re-use the same way the write 919 // code path does which frequently runs into stack splitting due to 920 // how deep the stacks are in the write code path. 921 // Context on stack splitting (a lot of other material out there too): 922 // https://medium.com/a-journey-with-go/go-how-does-the-goroutine-stack-size-evolve-447fc02085e5 923 q.Add(1) 924 925 // NB(r): Need to perform any completion function in async 926 // goroutine since caller may hold the lock while enqueing the op 927 // which is required to access the completion fn with op.CompletionFn(). 928 go func() { 929 defer func() { 930 op.decRef() 931 q.Done() 932 }() 933 934 // All aggregate calls are required to provide a context with a deadline. 935 ctx, err := q.mustWrapAndCheckContext(op.context, "aggregate") 936 if err != nil { 937 op.CompletionFn()(aggregateResultAccumulatorOpts{host: q.host}, err) 938 return 939 } 940 941 client, _, err := q.connPool.NextClient() 942 if err != nil { 943 // No client available 944 op.CompletionFn()(aggregateResultAccumulatorOpts{host: q.host}, err) 945 return 946 } 947 948 result, err := client.AggregateRaw(ctx, &op.request) 949 if err != nil { 950 op.CompletionFn()(aggregateResultAccumulatorOpts{host: q.host}, err) 951 return 952 } 953 954 op.CompletionFn()(aggregateResultAccumulatorOpts{ 955 host: q.host, 956 response: result, 957 }, err) 958 }() 959 } 960 961 func (q *queue) asyncTruncate(op *truncateOp) { 962 q.Add(1) 963 964 q.workerPool.Go(func() { 965 cleanup := q.Done 966 967 client, _, err := q.connPool.NextClient() 968 if err != nil { 969 // No client available 970 op.completionFn(nil, err) 971 cleanup() 972 return 973 } 974 975 ctx, _ := thrift.NewContext(q.opts.TruncateRequestTimeout()) 976 if res, err := client.Truncate(ctx, &op.request); err != nil { 977 op.completionFn(nil, err) 978 } else { 979 op.completionFn(res, nil) 980 } 981 982 cleanup() 983 }) 984 } 985 986 func (q *queue) mustWrapAndCheckContext( 987 callingContext context.Context, 988 method string, 989 ) (thrift.Context, error) { 990 if callingContext == nil { 991 return nil, fmt.Errorf( 992 "%w in host queue: method=%s", ErrCallMissingContext, method) 993 } 994 995 // Use the original context for the call. 996 _, ok := callingContext.Deadline() 997 if !ok { 998 return nil, fmt.Errorf( 999 "%w in host queue: method=%s", ErrCallWithoutDeadline, method) 1000 } 1001 1002 // Create thrift context. 1003 newThriftContext := q.opts.ThriftContextFn() 1004 return newThriftContext(callingContext), nil 1005 } 1006 1007 func (q *queue) Len() int { 1008 q.RLock() 1009 v := q.opsSumSize 1010 q.RUnlock() 1011 return v 1012 } 1013 1014 func (q *queue) validateOpenWithLock() error { 1015 if q.status != statusOpen { 1016 return errQueueNotOpen(q.host.ID()) 1017 } 1018 return nil 1019 } 1020 1021 func (q *queue) Enqueue(o op) error { 1022 switch sOp := o.(type) { 1023 case *fetchTaggedOp: 1024 // Need to take ownership if its a fetch tagged op 1025 sOp.incRef() 1026 // No queueing for fetchTagged op 1027 q.RLock() 1028 if err := q.validateOpenWithLock(); err != nil { 1029 q.RUnlock() 1030 return err 1031 } 1032 q.asyncFetchTagged(sOp) 1033 q.RUnlock() 1034 return nil 1035 case *aggregateOp: 1036 // Need to take ownership if its an aggregate op 1037 sOp.incRef() 1038 // No queueing for aggregate op 1039 q.RLock() 1040 if err := q.validateOpenWithLock(); err != nil { 1041 q.RUnlock() 1042 return err 1043 } 1044 q.asyncAggregate(sOp) 1045 q.RUnlock() 1046 return nil 1047 case *fetchBatchOp: 1048 // Need to take ownership if its a fetch batch op 1049 sOp.IncRef() 1050 } 1051 1052 var needsDrain []op 1053 q.Lock() 1054 if err := q.validateOpenWithLock(); err != nil { 1055 q.Unlock() 1056 return err 1057 } 1058 q.ops = append(q.ops, o) 1059 q.opsSumSize += o.Size() 1060 // If queue is full flush 1061 if q.opsSumSize >= q.size { 1062 needsDrain = q.rotateOpsWithLock() 1063 } 1064 // Need to hold lock while writing to the drainIn 1065 // channel to ensure it has not been closed 1066 if len(needsDrain) != 0 { 1067 q.drainIn <- needsDrain 1068 } 1069 q.Unlock() 1070 return nil 1071 } 1072 1073 func (q *queue) Host() topology.Host { 1074 return q.host 1075 } 1076 1077 func (q *queue) ConnectionCount() int { 1078 return q.connPool.ConnectionCount() 1079 } 1080 1081 func (q *queue) ConnectionPool() connectionPool { 1082 return q.connPool 1083 } 1084 1085 func (q *queue) BorrowConnection(fn WithConnectionFn) error { 1086 q.RLock() 1087 if q.status != statusOpen { 1088 q.RUnlock() 1089 return errQueueNotOpen(q.host.ID()) 1090 } 1091 // Add an outstanding operation to avoid connection pool being closed 1092 q.Add(1) 1093 defer q.Done() 1094 q.RUnlock() 1095 1096 conn, ch, err := q.connPool.NextClient() 1097 if err != nil { 1098 return err 1099 } 1100 1101 fn(conn, ch) 1102 return nil 1103 } 1104 1105 func (q *queue) Close() { 1106 q.Lock() 1107 if q.status != statusOpen { 1108 q.Unlock() 1109 return 1110 } 1111 q.status = statusClosed 1112 1113 // Need to hold lock while writing to the drainIn 1114 // channel to ensure it has not been closed 1115 needsDrain := q.rotateOpsWithLock() 1116 if len(needsDrain) != 0 { 1117 q.drainIn <- needsDrain 1118 } 1119 1120 // Closed drainIn channel in lock to ensure writers know 1121 // consistently if channel is open or not by checking state 1122 close(q.drainIn) 1123 q.Unlock() 1124 } 1125 1126 // errors 1127 1128 func errQueueNotOpen(hostID string) error { 1129 return fmt.Errorf("host operation queue not open for host: %s", hostID) 1130 } 1131 1132 func errQueueUnknownOperation(hostID string) error { 1133 return fmt.Errorf("host operation queue received unknown operation for host: %s", hostID) 1134 } 1135 1136 func errQueueFetchNoResponse(hostID string) error { 1137 return fmt.Errorf("host operation queue did not receive response for given fetch for host: %s", hostID) 1138 } 1139 1140 // ops container types 1141 1142 type namespaceWriteBatchOps struct { 1143 namespace ident.ID 1144 opsArrayPool *opArrayPool 1145 writeBatchRawRequestElementArrayPool writeBatchRawRequestElementArrayPool 1146 ops []op 1147 elems []*rpc.WriteBatchRawRequestElement 1148 } 1149 1150 type namespaceWriteBatchOpsSlice []namespaceWriteBatchOps 1151 1152 func (s namespaceWriteBatchOpsSlice) indexOf( 1153 namespace ident.ID, 1154 ) int { 1155 idx := -1 1156 for i := range s { 1157 if s[i].namespace.Equal(namespace) { 1158 return i 1159 } 1160 } 1161 return idx 1162 } 1163 1164 func (s namespaceWriteBatchOpsSlice) appendAt( 1165 index int, 1166 op op, 1167 elem *rpc.WriteBatchRawRequestElement, 1168 ) { 1169 if s[index].ops == nil { 1170 s[index].ops = s[index].opsArrayPool.Get() 1171 } 1172 if s[index].elems == nil { 1173 s[index].elems = s[index].writeBatchRawRequestElementArrayPool.Get() 1174 } 1175 s[index].ops = append(s[index].ops, op) 1176 s[index].elems = append(s[index].elems, elem) 1177 } 1178 1179 func (s namespaceWriteBatchOpsSlice) lenAt( 1180 index int, 1181 ) int { 1182 return len(s[index].ops) 1183 } 1184 1185 func (s namespaceWriteBatchOpsSlice) resetAt( 1186 index int, 1187 ) { 1188 s[index].ops = nil 1189 s[index].elems = nil 1190 } 1191 1192 // TODO: use genny to make namespaceWriteBatchOps and namespaceWriteTaggedBatchOps 1193 // share code (https://github.com/m3db/m3/src/dbnode/issues/531) 1194 type namespaceWriteTaggedBatchOps struct { 1195 namespace ident.ID 1196 opsArrayPool *opArrayPool 1197 writeTaggedBatchRawRequestElementArrayPool writeTaggedBatchRawRequestElementArrayPool 1198 ops []op 1199 elems []*rpc.WriteTaggedBatchRawRequestElement 1200 } 1201 1202 type namespaceWriteTaggedBatchOpsSlice []namespaceWriteTaggedBatchOps 1203 1204 func (s namespaceWriteTaggedBatchOpsSlice) indexOf( 1205 namespace ident.ID, 1206 ) int { 1207 idx := -1 1208 for i := range s { 1209 if s[i].namespace.Equal(namespace) { 1210 return i 1211 } 1212 } 1213 return idx 1214 } 1215 1216 func (s namespaceWriteTaggedBatchOpsSlice) appendAt( 1217 index int, 1218 op op, 1219 elem *rpc.WriteTaggedBatchRawRequestElement, 1220 ) { 1221 if s[index].ops == nil { 1222 s[index].ops = s[index].opsArrayPool.Get() 1223 } 1224 if s[index].elems == nil { 1225 s[index].elems = s[index].writeTaggedBatchRawRequestElementArrayPool.Get() 1226 } 1227 s[index].ops = append(s[index].ops, op) 1228 s[index].elems = append(s[index].elems, elem) 1229 } 1230 1231 func (s namespaceWriteTaggedBatchOpsSlice) lenAt( 1232 index int, 1233 ) int { 1234 return len(s[index].ops) 1235 } 1236 1237 func (s namespaceWriteTaggedBatchOpsSlice) resetAt( 1238 index int, 1239 ) { 1240 s[index].ops = nil 1241 s[index].elems = nil 1242 }