github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/query/storage/m3/storage.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package m3 22 23 import ( 24 "bytes" 25 "context" 26 goerrors "errors" 27 "fmt" 28 "sync" 29 "time" 30 31 coordmodel "github.com/m3db/m3/src/cmd/services/m3coordinator/model" 32 "github.com/m3db/m3/src/dbnode/client" 33 "github.com/m3db/m3/src/dbnode/storage/index" 34 "github.com/m3db/m3/src/query/block" 35 "github.com/m3db/m3/src/query/errors" 36 "github.com/m3db/m3/src/query/generated/proto/prompb" 37 "github.com/m3db/m3/src/query/models" 38 "github.com/m3db/m3/src/query/storage" 39 "github.com/m3db/m3/src/query/storage/m3/consolidators" 40 "github.com/m3db/m3/src/query/storage/m3/storagemetadata" 41 "github.com/m3db/m3/src/query/tracepoint" 42 "github.com/m3db/m3/src/query/ts" 43 xcontext "github.com/m3db/m3/src/x/context" 44 xerrors "github.com/m3db/m3/src/x/errors" 45 "github.com/m3db/m3/src/x/ident" 46 "github.com/m3db/m3/src/x/instrument" 47 xtime "github.com/m3db/m3/src/x/time" 48 49 "github.com/opentracing/opentracing-go/log" 50 "github.com/prometheus/common/model" 51 "go.uber.org/zap" 52 "go.uber.org/zap/zapcore" 53 ) 54 55 const ( 56 minWriteWaitTimeout = time.Second 57 ) 58 59 var ( 60 // The default name for the name tag in Prometheus metrics. 61 promDefaultName = []byte(model.MetricNameLabel) 62 // The prefix for reserved labels, e.g. __name__ 63 reservedLabelPrefix = []byte(model.ReservedLabelPrefix) 64 // The name for the rollup tag defined by the coordinator model. 65 rollupTagName = []byte(coordmodel.RollupTagName) 66 // The value for the rollup tag defined by the coordinator model. 67 rollupTagValue = []byte(coordmodel.RollupTagValue) 68 69 errUnaggregatedAndAggregatedDisabled = goerrors.New("fetch options has both" + 70 " aggregated and unaggregated namespace lookup disabled") 71 errNoNamespacesConfigured = goerrors.New("no namespaces configured") 72 errUnaggregatedNamespaceUninitialized = goerrors.New( 73 "unaggregated namespace is not yet initialized") 74 ) 75 76 type m3storage struct { 77 clusters Clusters 78 opts Options 79 nowFn func() time.Time 80 logger *zap.Logger 81 } 82 83 // NewStorage creates a new local m3storage instance. 84 func NewStorage( 85 clusters Clusters, 86 opts Options, 87 instrumentOpts instrument.Options, 88 ) (Storage, error) { 89 if err := opts.Validate(); err != nil { 90 return nil, err 91 } 92 93 return &m3storage{ 94 clusters: clusters, 95 opts: opts, 96 nowFn: time.Now, 97 logger: instrumentOpts.Logger(), 98 }, nil 99 } 100 101 func (s *m3storage) QueryStorageMetadataAttributes( 102 _ context.Context, 103 queryStart, queryEnd time.Time, 104 opts *storage.FetchOptions, 105 ) ([]storagemetadata.Attributes, error) { 106 now := xtime.ToUnixNano(s.nowFn()) 107 _, namespaces, err := resolveClusterNamespacesForQuery(now, 108 xtime.ToUnixNano(queryStart), 109 xtime.ToUnixNano(queryEnd), 110 s.clusters, 111 opts.FanoutOptions, 112 opts.RestrictQueryOptions, 113 opts.RelatedQueryOptions) 114 if err != nil { 115 return nil, err 116 } 117 118 results := make([]storagemetadata.Attributes, 0, len(namespaces)) 119 for _, ns := range namespaces { 120 results = append(results, ns.Options().Attributes()) 121 } 122 return results, nil 123 } 124 125 func (s *m3storage) ErrorBehavior() storage.ErrorBehavior { 126 return storage.BehaviorFail 127 } 128 129 func (s *m3storage) Name() string { 130 return "local_store" 131 } 132 133 // Find a reserved label target (one that begins with the reservedLabelPrefix) 134 // from an array of sorted labels. 135 func findReservedLabel(labels []prompb.Label, target []byte) []byte { 136 // The target should always contain the reservedLabelPrefix. 137 // If it doesn't, then we won't be able to find it within 138 // the reserved labels by definition. 139 if !bytes.HasPrefix(target, reservedLabelPrefix) { 140 return nil 141 } 142 143 foundReservedLabels := false 144 for idx := 0; idx < len(labels); idx++ { 145 label := labels[idx] 146 if !bytes.HasPrefix(label.Name, reservedLabelPrefix) { 147 if foundReservedLabels { 148 // We previously found reserved labels, and now that we've iterated 149 // past the end of the section that contains them, we know the target 150 // doesn't exist. 151 return nil 152 } 153 // We haven't found reserve labels yet, so keep going. 154 continue 155 } 156 157 // At this point we know that the current label contains the reservedLabelPrefix 158 foundReservedLabels = true 159 if bytes.Equal(label.Name, target) { 160 return label.Value 161 } 162 } 163 164 return nil 165 } 166 167 func calculateMetadataByName(result *prompb.QueryResult, metadata *block.ResultMetadata) { 168 for _, series := range result.Timeseries { 169 if series == nil { 170 continue 171 } 172 173 name := findReservedLabel(series.Labels, promDefaultName) 174 rollup := findReservedLabel(series.Labels, rollupTagName) 175 if bytes.Equal(rollup, rollupTagValue) { 176 metadata.ByName(name).Aggregated++ 177 } else { 178 metadata.ByName(name).Unaggregated++ 179 } 180 } 181 } 182 183 func (s *m3storage) FetchProm( 184 ctx context.Context, 185 query *storage.FetchQuery, 186 options *storage.FetchOptions, 187 ) (storage.PromResult, error) { 188 queryOptions, err := storage.FetchOptionsToM3Options(options, query) 189 if err != nil { 190 return storage.PromResult{}, err 191 } 192 193 accumulator, _, err := s.fetchCompressed(ctx, query, options, queryOptions) 194 if err != nil { 195 return storage.PromResult{}, err 196 } 197 198 defer accumulator.Close() 199 result, attrs, err := accumulator.FinalResultWithAttrs() 200 if err != nil { 201 return storage.PromResult{}, err 202 } 203 204 resolutions := make([]time.Duration, 0, len(attrs)) 205 for _, attr := range attrs { 206 resolutions = append(resolutions, attr.Resolution) 207 } 208 209 result.Metadata.Resolutions = resolutions 210 fetchResult, err := storage.SeriesIteratorsToPromResult( 211 ctx, 212 result, 213 s.opts.ReadWorkerPool(), 214 s.opts.TagOptions(), 215 s.opts.PromConvertOptions(), 216 options, 217 ) 218 if err != nil { 219 return storage.PromResult{}, err 220 } 221 222 if options != nil && options.MaxMetricMetadataStats > 0 { 223 calculateMetadataByName(fetchResult.PromResult, &fetchResult.Metadata) 224 } 225 226 return fetchResult, nil 227 } 228 229 // FetchResultToBlockResult converts an encoded SeriesIterator fetch result 230 // into blocks. 231 func FetchResultToBlockResult( 232 result consolidators.SeriesFetchResult, 233 query *storage.FetchQuery, 234 options *storage.FetchOptions, 235 opts Options, 236 ) (block.Result, error) { 237 // If using multiblock, update options to reflect this. 238 if options.BlockType == models.TypeMultiBlock { 239 opts = opts. 240 SetSplitSeriesByBlock(true) 241 } 242 243 start := query.Start 244 bounds := models.Bounds{ 245 Start: xtime.ToUnixNano(start), 246 Duration: query.End.Sub(start), 247 StepSize: query.Interval, 248 } 249 250 blocks, err := ConvertM3DBSeriesIterators( 251 result, 252 bounds, 253 opts, 254 ) 255 if err != nil { 256 return block.Result{ 257 Metadata: block.NewResultMetadata(), 258 }, err 259 } 260 261 return block.Result{ 262 Blocks: blocks, 263 Metadata: result.Metadata, 264 }, nil 265 } 266 267 func (s *m3storage) FetchBlocks( 268 ctx context.Context, 269 query *storage.FetchQuery, 270 options *storage.FetchOptions, 271 ) (block.Result, error) { 272 // Override options with whatever is the current specified lookback duration. 273 opts := s.opts.SetLookbackDuration( 274 options.LookbackDurationOrDefault(s.opts.LookbackDuration())) 275 276 result, _, err := s.FetchCompressedResult(ctx, query, options) 277 if err != nil { 278 return block.Result{ 279 Metadata: block.NewResultMetadata(), 280 }, err 281 } 282 283 return FetchResultToBlockResult(result, query, options, opts) 284 } 285 286 func (s *m3storage) FetchCompressed( 287 ctx context.Context, 288 query *storage.FetchQuery, 289 options *storage.FetchOptions, 290 ) (consolidators.MultiFetchResult, error) { 291 queryOptions, _ := storage.FetchOptionsToM3Options(options, query) 292 accumulator, _, err := s.fetchCompressed(ctx, query, options, queryOptions) 293 return accumulator, err 294 } 295 296 func (s *m3storage) FetchCompressedResult( 297 ctx context.Context, 298 query *storage.FetchQuery, 299 options *storage.FetchOptions, 300 ) (consolidators.SeriesFetchResult, Cleanup, error) { 301 queryOptions, err := storage.FetchOptionsToM3Options(options, query) 302 if err != nil { 303 return consolidators.SeriesFetchResult{ 304 Metadata: block.NewResultMetadata(), 305 }, noop, err 306 } 307 308 accumulator, m3query, err := s.fetchCompressed(ctx, query, options, queryOptions) 309 if err != nil { 310 return consolidators.SeriesFetchResult{ 311 Metadata: block.NewResultMetadata(), 312 }, noop, err 313 } 314 315 result, attrs, err := accumulator.FinalResultWithAttrs() 316 if err != nil { 317 accumulator.Close() 318 return result, noop, err 319 } 320 321 if processor := s.opts.SeriesIteratorProcessor(); processor != nil { 322 _, span, sampled := xcontext.StartSampledTraceSpan(ctx, 323 tracepoint.FetchCompressedInspectSeries) 324 iters := result.SeriesIterators() 325 if err := processor.InspectSeries(ctx, m3query, queryOptions, iters); err != nil { 326 s.logger.Error("error inspecting series", zap.Error(err)) 327 } 328 if sampled { 329 span.LogFields( 330 log.String("query", query.Raw), 331 log.String("start", query.Start.String()), 332 log.String("end", query.End.String()), 333 log.String("interval", query.Interval.String()), 334 ) 335 } 336 span.Finish() 337 } 338 339 resolutions := make([]time.Duration, 0, len(attrs)) 340 for _, attr := range attrs { 341 resolutions = append(resolutions, attr.Resolution) 342 } 343 344 result.Metadata.Resolutions = resolutions 345 return result, accumulator.Close, nil 346 } 347 348 // fetches compressed series, returning a MultiFetchResult accumulator 349 func (s *m3storage) fetchCompressed( 350 ctx context.Context, 351 query *storage.FetchQuery, 352 options *storage.FetchOptions, 353 queryOptions index.QueryOptions, 354 ) (consolidators.MultiFetchResult, index.Query, error) { 355 if err := options.BlockType.Validate(); err != nil { 356 // This is an invariant error; should not be able to get to here. 357 return nil, index.Query{}, instrument.InvariantErrorf("invalid block type on "+ 358 "fetch, got: %v with error %v", options.BlockType, err) 359 } 360 361 // Check if the query was interrupted. 362 select { 363 case <-ctx.Done(): 364 return nil, index.Query{}, ctx.Err() 365 default: 366 } 367 368 m3query, err := storage.FetchQueryToM3Query(query, options) 369 if err != nil { 370 return nil, index.Query{}, err 371 } 372 373 var ( 374 queryStart = queryOptions.StartInclusive 375 queryEnd = queryOptions.EndExclusive 376 ) 377 378 // NB(r): Since we don't use a single index we fan out to each 379 // cluster that can completely fulfill this range and then prefer the 380 // highest resolution (most fine grained) results. 381 // This needs to be optimized, however this is a start. 382 fanout, namespaces, err := resolveClusterNamespacesForQuery( 383 xtime.ToUnixNano(s.nowFn()), 384 queryStart, 385 queryEnd, 386 s.clusters, 387 options.FanoutOptions, 388 options.RestrictQueryOptions, 389 options.RelatedQueryOptions, 390 ) 391 if err != nil { 392 return nil, index.Query{}, err 393 } 394 395 if s.logger.Core().Enabled(zapcore.DebugLevel) { 396 for _, n := range namespaces { 397 // NB(r): Need to perform log on inner loop, cannot reuse a 398 // checked entry returned from logger.Check(...). 399 // Will see: "Unsafe CheckedEntry re-use near Entry ..." otherwise. 400 debugLog := s.logger.Check(zapcore.DebugLevel, 401 "query resolved cluster namespace, will use most granular per result") 402 if debugLog == nil { 403 continue 404 } 405 406 debugLog.Write( 407 zap.String("query", query.Raw), 408 zap.String("m3query", m3query.String()), 409 zap.Time("start", queryStart.ToTime()), 410 zap.Time("narrowing.start", n.narrowing.start.ToTime()), 411 zap.Time("end", queryEnd.ToTime()), 412 zap.Time("narrowing.end", n.narrowing.end.ToTime()), 413 zap.String("fanoutType", fanout.String()), 414 zap.String("namespace", n.NamespaceID().String()), 415 zap.String("type", n.Options().Attributes().MetricsType.String()), 416 zap.String("retention", n.Options().Attributes().Retention.String()), 417 zap.String("resolution", n.Options().Attributes().Resolution.String()), 418 zap.Bool("remote", options.Remote)) 419 } 420 } 421 422 var wg sync.WaitGroup 423 if len(namespaces) == 0 { 424 return nil, index.Query{}, errNoNamespacesConfigured 425 } 426 427 matchOpts := s.opts.SeriesConsolidationMatchOptions() 428 tagOpts := s.opts.TagOptions() 429 limitOpts := consolidators.LimitOptions{ 430 Limit: options.SeriesLimit, 431 // Piggy back on the new InstanceMultiple option to enable checking require exhaustive. This preserves the 432 // existing buggy behavior of the coordinators not requiring exhaustive. Once InstanceMultiple is enabled by 433 // default, this can be removed. 434 RequireExhaustive: queryOptions.InstanceMultiple > 0 && options.RequireExhaustive, 435 } 436 result := consolidators.NewMultiFetchResult(fanout, matchOpts, tagOpts, limitOpts) 437 for _, namespace := range namespaces { 438 namespace := namespace // Capture var 439 440 wg.Add(1) 441 go func() { 442 defer wg.Done() 443 _, span, sampled := xcontext.StartSampledTraceSpan(ctx, 444 tracepoint.FetchCompressedFetchTagged) 445 defer span.Finish() 446 447 session := namespace.Session() 448 namespaceID := namespace.NamespaceID() 449 narrowedQueryOpts := narrowQueryOpts(queryOptions, namespace) 450 iters, metadata, err := session.FetchTagged(ctx, namespaceID, m3query, narrowedQueryOpts) 451 if err == nil && sampled { 452 span.LogFields( 453 log.String("namespace", namespaceID.String()), 454 log.Int("series", iters.Len()), 455 log.Bool("exhaustive", metadata.Exhaustive), 456 log.Int("responses", metadata.Responses), 457 log.Int("estimateTotalBytes", metadata.EstimateTotalBytes), 458 ) 459 } 460 461 blockMeta := block.NewResultMetadata() 462 blockMeta.AddNamespace(namespaceID.String()) 463 blockMeta.FetchedResponses = metadata.Responses 464 blockMeta.FetchedBytesEstimate = metadata.EstimateTotalBytes 465 blockMeta.Exhaustive = metadata.Exhaustive 466 blockMeta.WaitedIndex = metadata.WaitedIndex 467 blockMeta.WaitedSeriesRead = metadata.WaitedSeriesRead 468 // Ignore error from getting iterator pools, since operation 469 // will not be dramatically impacted if pools is nil 470 result.Add(consolidators.MultiFetchResults{ 471 SeriesIterators: iters, 472 Metadata: blockMeta, 473 Attrs: namespace.Options().Attributes(), 474 Err: err, 475 }) 476 }() 477 } 478 479 wg.Wait() 480 481 // Check if the query was interrupted. 482 select { 483 case <-ctx.Done(): 484 return nil, index.Query{}, ctx.Err() 485 default: 486 } 487 488 return result, m3query, err 489 } 490 491 func (s *m3storage) SearchSeries( 492 ctx context.Context, 493 query *storage.FetchQuery, 494 options *storage.FetchOptions, 495 ) (*storage.SearchResults, error) { 496 tagResult, cleanup, err := s.SearchCompressed(ctx, query, options) 497 defer cleanup() 498 if err != nil { 499 return nil, err 500 } 501 502 metrics := make(models.Metrics, 0, len(tagResult.Tags)) 503 for _, result := range tagResult.Tags { 504 m, err := storage.FromM3IdentToMetric(result.ID, 505 result.Iter, s.opts.TagOptions()) 506 if err != nil { 507 return nil, err 508 } 509 510 metrics = append(metrics, m) 511 } 512 513 return &storage.SearchResults{ 514 Metrics: metrics, 515 Metadata: tagResult.Metadata, 516 }, nil 517 } 518 519 // CompleteTagsCompressed has the same behavior as CompleteTags. 520 func (s *m3storage) CompleteTagsCompressed( 521 ctx context.Context, 522 query *storage.CompleteTagsQuery, 523 options *storage.FetchOptions, 524 ) (*consolidators.CompleteTagsResult, error) { 525 return s.CompleteTags(ctx, query, options) 526 } 527 528 func (s *m3storage) CompleteTags( 529 ctx context.Context, 530 query *storage.CompleteTagsQuery, 531 options *storage.FetchOptions, 532 ) (*consolidators.CompleteTagsResult, error) { 533 // Check if the query was interrupted. 534 select { 535 case <-ctx.Done(): 536 return nil, ctx.Err() 537 default: 538 } 539 540 fetchQuery := &storage.FetchQuery{ 541 TagMatchers: query.TagMatchers, 542 } 543 544 m3query, err := storage.FetchQueryToM3Query(fetchQuery, options) 545 if err != nil { 546 return nil, err 547 } 548 549 aggOpts, err := storage.FetchOptionsToAggregateOptions(options, query) 550 if err != nil { 551 return nil, err 552 } 553 554 var ( 555 queryStart = aggOpts.StartInclusive 556 queryEnd = aggOpts.EndExclusive 557 nameOnly = query.CompleteNameOnly 558 tagOpts = s.opts.TagOptions() 559 accumulatedTags = consolidators.NewCompleteTagsResultBuilder(nameOnly, tagOpts) 560 multiErr syncMultiErrs 561 wg sync.WaitGroup 562 ) 563 564 debugLog := s.logger.Check(zapcore.DebugLevel, 565 "completing tags") 566 if debugLog != nil { 567 filters := make([]string, len(query.FilterNameTags)) 568 for i, t := range query.FilterNameTags { 569 filters[i] = string(t) 570 } 571 572 debugLog.Write(zap.Bool("nameOnly", nameOnly), 573 zap.Strings("filterNames", filters), 574 zap.String("matchers", query.TagMatchers.String()), 575 zap.String("m3query", m3query.String()), 576 zap.Time("start", queryStart.ToTime()), 577 zap.Time("end", queryEnd.ToTime()), 578 zap.Bool("remote", options.Remote), 579 ) 580 } 581 582 // NB(r): Since we don't use a single index we fan out to each 583 // cluster that can completely fulfill this range and then prefer the 584 // highest resolution (most fine-grained) results. 585 // This needs to be optimized, however this is a start. 586 _, namespaces, err := resolveClusterNamespacesForQuery(xtime.ToUnixNano(s.nowFn()), 587 queryStart, 588 queryEnd, 589 s.clusters, 590 options.FanoutOptions, 591 options.RestrictQueryOptions, 592 nil) 593 if err != nil { 594 return nil, err 595 } 596 597 var mu sync.Mutex 598 aggIterators := make([]client.AggregatedTagsIterator, 0, len(namespaces)) 599 defer func() { 600 mu.Lock() 601 for _, it := range aggIterators { 602 it.Finalize() 603 } 604 605 mu.Unlock() 606 }() 607 608 wg.Add(len(namespaces)) 609 for _, namespace := range namespaces { 610 namespace := namespace // Capture var 611 go func() { 612 _, span, sampled := xcontext.StartSampledTraceSpan(ctx, tracepoint.CompleteTagsAggregate) 613 defer func() { 614 span.Finish() 615 wg.Done() 616 }() 617 618 session := namespace.Session() 619 namespaceID := namespace.NamespaceID() 620 narrowedAggOpts := narrowAggOpts(aggOpts, namespace) 621 aggTagIter, metadata, err := session.Aggregate(ctx, namespaceID, m3query, narrowedAggOpts) 622 if err != nil { 623 multiErr.add(err) 624 return 625 } 626 627 if sampled { 628 span.LogFields( 629 log.String("namespace", namespaceID.String()), 630 log.Int("results", aggTagIter.Remaining()), 631 log.Bool("exhaustive", metadata.Exhaustive), 632 log.Int("responses", metadata.Responses), 633 log.Int("estimateTotalBytes", metadata.EstimateTotalBytes), 634 ) 635 } 636 637 mu.Lock() 638 aggIterators = append(aggIterators, aggTagIter) 639 mu.Unlock() 640 641 completedTags := make([]consolidators.CompletedTag, 0, aggTagIter.Remaining()) 642 for aggTagIter.Next() { 643 name, values := aggTagIter.Current() 644 tagValues := make([][]byte, 0, values.Remaining()) 645 for values.Next() { 646 tagValues = append(tagValues, values.Current().Bytes()) 647 } 648 649 if err := values.Err(); err != nil { 650 multiErr.add(err) 651 return 652 } 653 654 completedTags = append(completedTags, consolidators.CompletedTag{ 655 Name: name.Bytes(), 656 Values: tagValues, 657 }) 658 } 659 660 if err := aggTagIter.Err(); err != nil { 661 multiErr.add(err) 662 return 663 } 664 665 blockMeta := block.NewResultMetadata() 666 blockMeta.AddNamespace(namespaceID.String()) 667 blockMeta.FetchedResponses = metadata.Responses 668 blockMeta.FetchedBytesEstimate = metadata.EstimateTotalBytes 669 blockMeta.Exhaustive = metadata.Exhaustive 670 blockMeta.WaitedIndex = metadata.WaitedIndex 671 blockMeta.WaitedSeriesRead = metadata.WaitedSeriesRead 672 result := &consolidators.CompleteTagsResult{ 673 CompleteNameOnly: query.CompleteNameOnly, 674 CompletedTags: completedTags, 675 Metadata: blockMeta, 676 } 677 678 if err := accumulatedTags.Add(result); err != nil { 679 multiErr.add(err) 680 } 681 }() 682 } 683 684 wg.Wait() 685 if err := multiErr.lastError(); err != nil { 686 return nil, err 687 } 688 689 built := accumulatedTags.Build() 690 return &built, nil 691 } 692 693 func (s *m3storage) SearchCompressed( 694 ctx context.Context, 695 query *storage.FetchQuery, 696 options *storage.FetchOptions, 697 ) (consolidators.TagResult, Cleanup, error) { 698 // Check if the query was interrupted. 699 tagResult := consolidators.TagResult{ 700 Metadata: block.NewResultMetadata(), 701 } 702 703 select { 704 case <-ctx.Done(): 705 return tagResult, noop, ctx.Err() 706 default: 707 } 708 709 m3query, err := storage.FetchQueryToM3Query(query, options) 710 if err != nil { 711 return tagResult, noop, err 712 } 713 714 m3opts, err := storage.FetchOptionsToM3Options(options, query) 715 if err != nil { 716 return tagResult, noop, err 717 } 718 719 var ( 720 queryStart = m3opts.StartInclusive 721 queryEnd = m3opts.EndExclusive 722 result = consolidators.NewMultiFetchTagsResult(s.opts.TagOptions()) 723 wg sync.WaitGroup 724 ) 725 726 // NB(r): Since we don't use a single index we fan out to each 727 // cluster that can completely fulfill this range and then prefer the 728 // highest resolution (most fine grained) results. 729 // This needs to be optimized, however this is a start. 730 _, namespaces, err := resolveClusterNamespacesForQuery(xtime.ToUnixNano(s.nowFn()), 731 queryStart, 732 queryEnd, 733 s.clusters, 734 options.FanoutOptions, 735 options.RestrictQueryOptions, 736 nil) 737 if err != nil { 738 return tagResult, noop, err 739 } 740 741 debugLog := s.logger.Check(zapcore.DebugLevel, 742 "searching") 743 if debugLog != nil { 744 debugLog.Write(zap.String("query", query.Raw), 745 zap.String("m3_query", m3query.String()), 746 zap.Time("start", queryStart.ToTime()), 747 zap.Time("end", queryEnd.ToTime()), 748 zap.Bool("remote", options.Remote), 749 ) 750 } 751 752 wg.Add(len(namespaces)) 753 for _, namespace := range namespaces { 754 namespace := namespace // Capture var 755 go func() { 756 _, span, sampled := xcontext.StartSampledTraceSpan(ctx, 757 tracepoint.SearchCompressedFetchTaggedIDs) 758 defer span.Finish() 759 760 session := namespace.Session() 761 namespaceID := namespace.NamespaceID() 762 narrowedM3Opts := narrowQueryOpts(m3opts, namespace) 763 iter, metadata, err := session.FetchTaggedIDs(ctx, namespaceID, m3query, narrowedM3Opts) 764 if err == nil && sampled { 765 span.LogFields( 766 log.String("namespace", namespaceID.String()), 767 log.Int("series", iter.Remaining()), 768 log.Bool("exhaustive", metadata.Exhaustive), 769 log.Int("responses", metadata.Responses), 770 log.Int("estimateTotalBytes", metadata.EstimateTotalBytes), 771 ) 772 } 773 774 blockMeta := block.NewResultMetadata() 775 blockMeta.AddNamespace(namespaceID.String()) 776 blockMeta.FetchedResponses = metadata.Responses 777 blockMeta.FetchedBytesEstimate = metadata.EstimateTotalBytes 778 blockMeta.Exhaustive = metadata.Exhaustive 779 blockMeta.WaitedIndex = metadata.WaitedIndex 780 blockMeta.WaitedSeriesRead = metadata.WaitedSeriesRead 781 result.Add(iter, blockMeta, err) 782 wg.Done() 783 }() 784 } 785 786 wg.Wait() 787 788 tagResult, err = result.FinalResult() 789 return tagResult, result.Close, err 790 } 791 792 func (s *m3storage) Write( 793 ctx context.Context, 794 query *storage.WriteQuery, 795 ) error { 796 if query == nil { 797 return errors.ErrNilWriteQuery 798 } 799 800 var ( 801 // TODO: Pool this once an ident pool is setup. We will have 802 // to stop calling NoFinalize() below if we do that. 803 tags = query.Tags() 804 datapoints = query.Datapoints() 805 idBuf = tags.ID() 806 id = ident.BytesID(idBuf) 807 err error 808 namespace ClusterNamespace 809 exists bool 810 ) 811 812 attributes := query.Attributes() 813 switch attributes.MetricsType { 814 case storagemetadata.UnaggregatedMetricsType: 815 namespace, exists = s.clusters.UnaggregatedClusterNamespace() 816 if !exists { 817 err = errUnaggregatedNamespaceUninitialized 818 } 819 case storagemetadata.AggregatedMetricsType: 820 attrs := RetentionResolution{ 821 Retention: attributes.Retention, 822 Resolution: attributes.Resolution, 823 } 824 namespace, exists = s.clusters.AggregatedClusterNamespace(attrs) 825 if !exists { 826 err = fmt.Errorf("no configured cluster namespace for: retention=%s,"+ 827 " resolution=%s", attrs.Retention.String(), attrs.Resolution.String()) 828 break 829 } 830 if namespace.Options().ReadOnly() { 831 err = fmt.Errorf( 832 "cannot write to read only namespace %s (%s:%s)", 833 namespace.NamespaceID(), attrs.Resolution.String(), attrs.Retention.String()) 834 } 835 default: 836 metricsType := attributes.MetricsType 837 err = fmt.Errorf("invalid write request metrics type: %s (%d)", 838 metricsType.String(), uint(metricsType)) 839 } 840 if err != nil { 841 return err 842 } 843 844 // Set id to NoFinalize to avoid cloning it in write operations 845 id.NoFinalize() 846 847 if s.opts.RateLimiter().Limit(ctx, namespace, datapoints, tags.Tags) { 848 return xerrors.NewResourceExhaustedError(goerrors.New("rate limit exceeded")) 849 } 850 851 tags.Tags, err = s.opts.TagsTransform()(ctx, namespace, tags.Tags) 852 if err != nil { 853 return err 854 } 855 tagIterator := storage.TagsToIdentTagIterator(tags) 856 857 if len(datapoints) == 1 { 858 // Special case single datapoint because it is common and we 859 // can avoid the overhead of a waitgroup, goroutine, multierr, 860 // iterator duplication etc. 861 return s.writeSingle(query, datapoints[0], id, tagIterator, namespace) 862 } 863 864 var ( 865 wg sync.WaitGroup 866 multiErr syncMultiErrs 867 ) 868 869 for _, datapoint := range datapoints { 870 tagIter := tagIterator.Duplicate() 871 // capture var 872 datapoint := datapoint 873 wg.Add(1) 874 875 var ( 876 now = time.Now() 877 deadline, deadlineExists = ctx.Deadline() 878 timeout = minWriteWaitTimeout 879 ) 880 if deadlineExists { 881 if remain := deadline.Sub(now); remain >= timeout { 882 timeout = remain 883 } 884 } 885 spawned := s.opts.WriteWorkerPool().GoWithTimeout(func() { 886 if err := s.writeSingle(query, datapoint, id, tagIter, namespace); err != nil { 887 multiErr.add(err) 888 } 889 890 tagIter.Close() 891 wg.Done() 892 }, timeout) 893 if !spawned { 894 multiErr.add(fmt.Errorf("timeout exceeded waiting: %v", timeout)) 895 } 896 } 897 898 wg.Wait() 899 return multiErr.lastError() 900 } 901 902 func (s *m3storage) Type() storage.Type { 903 return storage.TypeLocalDC 904 } 905 906 func (s *m3storage) Close() error { 907 return nil 908 } 909 910 func (s *m3storage) writeSingle( 911 query *storage.WriteQuery, 912 datapoint ts.Datapoint, 913 identID ident.ID, 914 iterator ident.TagIterator, 915 namespace ClusterNamespace, 916 ) error { 917 namespaceID := namespace.NamespaceID() 918 session := namespace.Session() 919 return session.WriteTagged(namespaceID, identID, iterator, 920 datapoint.Timestamp, datapoint.Value, query.Unit(), query.Annotation()) 921 } 922 923 func narrowQueryOpts(o index.QueryOptions, namespace resolvedNamespace) index.QueryOptions { 924 narrowed := o 925 if !namespace.narrowing.start.IsZero() && namespace.narrowing.start.After(o.StartInclusive) { 926 narrowed.StartInclusive = namespace.narrowing.start 927 } 928 if !namespace.narrowing.end.IsZero() && namespace.narrowing.end.Before(o.EndExclusive) { 929 narrowed.EndExclusive = namespace.narrowing.end 930 } 931 932 return narrowed 933 } 934 935 func narrowAggOpts(o index.AggregationOptions, namespace resolvedNamespace) index.AggregationOptions { 936 narrowed := o 937 narrowed.QueryOptions = narrowQueryOpts(o.QueryOptions, namespace) 938 939 return narrowed 940 }