github.com/m3db/m3@v1.5.0/src/query/functions/temporal/base.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package temporal
    22  
    23  import (
    24  	"context"
    25  	"fmt"
    26  	"runtime"
    27  	"sync"
    28  	"time"
    29  
    30  	"github.com/m3db/m3/src/query/block"
    31  	"github.com/m3db/m3/src/query/executor/transform"
    32  	"github.com/m3db/m3/src/query/models"
    33  	"github.com/m3db/m3/src/query/parser"
    34  	"github.com/m3db/m3/src/query/tracepoint"
    35  	"github.com/m3db/m3/src/query/ts"
    36  	xcontext "github.com/m3db/m3/src/x/context"
    37  	xerrors "github.com/m3db/m3/src/x/errors"
    38  	xtime "github.com/m3db/m3/src/x/time"
    39  
    40  	opentracing "github.com/opentracing/opentracing-go"
    41  )
    42  
    43  var emptyOp = baseOp{}
    44  
    45  type iterationBounds struct {
    46  	start xtime.UnixNano
    47  	end   xtime.UnixNano
    48  }
    49  
    50  // makeProcessor is a way to create a transform.
    51  type makeProcessor interface {
    52  	// initialize initializes the processor.
    53  	initialize(duration time.Duration, opts transform.Options) processor
    54  }
    55  
    56  // processor is implemented by the underlying transforms.
    57  type processor interface {
    58  	process(valueBuffer ts.Datapoints, iterationBounds iterationBounds) float64
    59  }
    60  
    61  // baseOp stores required properties for logical operations.
    62  type baseOp struct {
    63  	operatorType string
    64  	duration     time.Duration
    65  	processorFn  makeProcessor
    66  }
    67  
    68  func newBaseOp(
    69  	duration time.Duration,
    70  	operatorType string,
    71  	processorFn makeProcessor,
    72  ) (baseOp, error) {
    73  	return baseOp{
    74  		operatorType: operatorType,
    75  		processorFn:  processorFn,
    76  		duration:     duration,
    77  	}, nil
    78  }
    79  
    80  func (o baseOp) OpType() string {
    81  	return o.operatorType
    82  }
    83  
    84  func (o baseOp) String() string {
    85  	return fmt.Sprintf("type: %s, duration: %v", o.OpType(), o.duration)
    86  }
    87  
    88  // Node creates an execution node.
    89  func (o baseOp) Node(
    90  	controller *transform.Controller,
    91  	opts transform.Options,
    92  ) transform.OpNode {
    93  	return &baseNode{
    94  		controller:    controller,
    95  		op:            o,
    96  		makeProcessor: o.processorFn,
    97  		transformOpts: opts,
    98  	}
    99  }
   100  
   101  // baseNode is an execution node.
   102  type baseNode struct {
   103  	// controller uses an interface here so we can mock it out in tests.
   104  	// TODO: use an exported interface everywhere instead of *transform.Controller.
   105  	// https://github.com/m3db/m3/issues/1430
   106  	controller    controller
   107  	op            baseOp
   108  	makeProcessor makeProcessor
   109  	transformOpts transform.Options
   110  }
   111  
   112  func (c *baseNode) Process(
   113  	queryCtx *models.QueryContext,
   114  	id parser.NodeID,
   115  	b block.Block,
   116  ) error {
   117  	sp, ctx := opentracing.StartSpanFromContext(queryCtx.Ctx, c.op.OpType())
   118  	defer sp.Finish()
   119  
   120  	resultMeta := b.Meta().ResultMetadata
   121  	resultMeta.VerifyTemporalRange(c.op.duration)
   122  
   123  	meta := b.Meta()
   124  	bounds := meta.Bounds
   125  	if bounds.Duration == 0 {
   126  		return fmt.Errorf("bound duration cannot be 0, bounds: %v", bounds)
   127  	}
   128  
   129  	m := blockMeta{
   130  		end:         bounds.Start,
   131  		queryCtx:    queryCtx,
   132  		aggDuration: xtime.UnixNano(c.op.duration),
   133  		stepSize:    xtime.UnixNano(bounds.StepSize),
   134  		steps:       bounds.Steps(),
   135  		resultMeta:  resultMeta,
   136  	}
   137  
   138  	concurrency := runtime.GOMAXPROCS(0)
   139  	var builder block.Builder
   140  	batches, err := b.MultiSeriesIter(concurrency)
   141  	if err != nil {
   142  		// NB: If the unconsolidated block does not support multi series iteration,
   143  		// fallback to processing series one by one.
   144  		builder, err = c.singleProcess(ctx, b, m)
   145  	} else {
   146  		builder, err = c.batchProcess(ctx, b, batches, m)
   147  	}
   148  
   149  	if err != nil {
   150  		return err
   151  	}
   152  
   153  	// NB: safe to close the block here.
   154  	if err := b.Close(); err != nil {
   155  		return err
   156  	}
   157  
   158  	bl := builder.Build()
   159  	defer bl.Close()
   160  	return c.controller.Process(queryCtx, bl)
   161  }
   162  
   163  type blockMeta struct {
   164  	end         xtime.UnixNano
   165  	aggDuration xtime.UnixNano
   166  	stepSize    xtime.UnixNano
   167  	queryCtx    *models.QueryContext
   168  	steps       int
   169  	resultMeta  block.ResultMetadata
   170  }
   171  
   172  func (c *baseNode) batchProcess(
   173  	ctx context.Context,
   174  	b block.Block,
   175  	iterBatches []block.SeriesIterBatch,
   176  	m blockMeta,
   177  ) (block.Builder, error) {
   178  	var (
   179  		mu       sync.Mutex
   180  		wg       sync.WaitGroup
   181  		multiErr xerrors.MultiError
   182  		idx      int
   183  	)
   184  
   185  	meta := b.Meta()
   186  	meta.ResultMetadata = m.resultMeta
   187  	builder, err := c.controller.BlockBuilder(m.queryCtx, meta, nil)
   188  	if err != nil {
   189  		return nil, err
   190  	}
   191  
   192  	err = builder.AddCols(m.steps)
   193  	if err != nil {
   194  		return nil, err
   195  	}
   196  
   197  	numSeries := 0
   198  	for _, b := range iterBatches {
   199  		numSeries += b.Size
   200  	}
   201  
   202  	builder.PopulateColumns(numSeries)
   203  	for _, batch := range iterBatches {
   204  		wg.Add(1)
   205  		// capture loop variables
   206  		loopIndex := idx
   207  		batch := batch
   208  		idx = idx + batch.Size
   209  		p := c.makeProcessor.initialize(c.op.duration, c.transformOpts)
   210  		go func() {
   211  			err := parallelProcess(ctx, c.op.OpType(), loopIndex, batch.Iter, builder, m, p, &mu)
   212  			if err != nil {
   213  				mu.Lock()
   214  				// NB: this no-ops if the error is nil.
   215  				multiErr = multiErr.Add(err)
   216  				mu.Unlock()
   217  			}
   218  
   219  			wg.Done()
   220  		}()
   221  	}
   222  
   223  	wg.Wait()
   224  	return builder, multiErr.FinalError()
   225  }
   226  
   227  func parallelProcess(
   228  	ctx context.Context,
   229  	opType string,
   230  	idx int,
   231  	iter block.SeriesIter,
   232  	builder block.Builder,
   233  	blockMeta blockMeta,
   234  	processor processor,
   235  	mu *sync.Mutex,
   236  ) error {
   237  	var (
   238  		start          = time.Now()
   239  		decodeDuration time.Duration
   240  	)
   241  	defer func() {
   242  		if decodeDuration == 0 {
   243  			return // Do not record this span if instrumentation is not turned on.
   244  		}
   245  
   246  		// Simulate as if we did all the decoding up front so we can visualize
   247  		// how much decoding takes relative to the entire processing of the function.
   248  		_, sp, _ := xcontext.StartSampledTraceSpan(ctx,
   249  			tracepoint.TemporalDecodeParallel, opentracing.StartTime(start))
   250  		sp.FinishWithOptions(opentracing.FinishOptions{
   251  			FinishTime: start.Add(decodeDuration),
   252  		})
   253  	}()
   254  
   255  	values := make([]float64, 0, blockMeta.steps)
   256  	metas := iter.SeriesMeta()
   257  	for i := 0; iter.Next(); i++ {
   258  		if i >= len(metas) {
   259  			return fmt.Errorf("invalid series meta index: %d, max %d", i, len(metas))
   260  		}
   261  
   262  		var (
   263  			newVal float64
   264  			init   = 0
   265  			end    = blockMeta.end
   266  			start  = end - blockMeta.aggDuration
   267  			step   = blockMeta.stepSize
   268  
   269  			series     = iter.Current()
   270  			datapoints = series.Datapoints()
   271  			stats      = series.Stats()
   272  			seriesMeta = metas[i]
   273  		)
   274  
   275  		if stats.Enabled {
   276  			decodeDuration += stats.DecodeDuration
   277  		}
   278  
   279  		// The last_over_time function acts like offset;
   280  		// thus, it should keep the metric name.
   281  		// For all other functions,
   282  		// rename series to exclude their __name__ tag as part of function processing.
   283  		if opType != LastType {
   284  			seriesMeta.Tags = seriesMeta.Tags.WithoutName()
   285  			seriesMeta.Name = seriesMeta.Tags.ID()
   286  		}
   287  
   288  		values = values[:0]
   289  		for i := 0; i < blockMeta.steps; i++ {
   290  			iterBounds := iterationBounds{
   291  				start: start,
   292  				end:   end,
   293  			}
   294  
   295  			l, r, b := getIndices(datapoints, start, end, init)
   296  			if !b {
   297  				newVal = processor.process(ts.Datapoints{}, iterBounds)
   298  			} else {
   299  				init = l
   300  				newVal = processor.process(datapoints[l:r], iterBounds)
   301  			}
   302  
   303  			values = append(values, newVal)
   304  			start += step
   305  			end += step
   306  		}
   307  
   308  		mu.Lock()
   309  
   310  		// NB: this sets the values internally, so no need to worry about keeping
   311  		// a reference to underlying `values`.
   312  		err := builder.SetRow(idx, values, seriesMeta)
   313  		mu.Unlock()
   314  		idx++
   315  		if err != nil {
   316  			return err
   317  		}
   318  	}
   319  
   320  	return iter.Err()
   321  }
   322  
   323  func (c *baseNode) singleProcess(
   324  	ctx context.Context,
   325  	b block.Block,
   326  	m blockMeta,
   327  ) (block.Builder, error) {
   328  	var (
   329  		start          = time.Now()
   330  		decodeDuration time.Duration
   331  	)
   332  
   333  	defer func() {
   334  		if decodeDuration == 0 {
   335  			return // Do not record this span if instrumentation is not turned on.
   336  		}
   337  		// Simulate as if we did all the decoding up front so we can visualize
   338  		// how much decoding takes relative to the entire processing of the function.
   339  		_, sp, _ := xcontext.StartSampledTraceSpan(ctx,
   340  			tracepoint.TemporalDecodeSingle, opentracing.StartTime(start))
   341  		sp.FinishWithOptions(opentracing.FinishOptions{
   342  			FinishTime: start.Add(decodeDuration),
   343  		})
   344  	}()
   345  
   346  	seriesIter, err := b.SeriesIter()
   347  	if err != nil {
   348  		return nil, err
   349  	}
   350  
   351  	// The last_over_time function acts like offset;
   352  	// thus, it should keep the metric name.
   353  	// For all other functions,
   354  	// rename series to exclude their __name__ tag as part of function processing.
   355  	var resultSeriesMeta []block.SeriesMeta
   356  	if c.op.OpType() != LastType {
   357  		resultSeriesMeta = make([]block.SeriesMeta, 0, len(seriesIter.SeriesMeta()))
   358  		for _, m := range seriesIter.SeriesMeta() {
   359  			tags := m.Tags.WithoutName()
   360  			resultSeriesMeta = append(resultSeriesMeta, block.SeriesMeta{
   361  				Name: tags.ID(),
   362  				Tags: tags,
   363  			})
   364  		}
   365  	} else {
   366  		resultSeriesMeta = seriesIter.SeriesMeta()
   367  	}
   368  
   369  	meta := b.Meta()
   370  	meta.ResultMetadata = m.resultMeta
   371  	builder, err := c.controller.BlockBuilder(m.queryCtx, meta, resultSeriesMeta)
   372  	if err != nil {
   373  		return nil, err
   374  	}
   375  
   376  	err = builder.AddCols(m.steps)
   377  	if err != nil {
   378  		return nil, err
   379  	}
   380  
   381  	p := c.makeProcessor.initialize(c.op.duration, c.transformOpts)
   382  	for seriesIter.Next() {
   383  		var (
   384  			newVal float64
   385  			init   = 0
   386  			end    = m.end
   387  			start  = end - m.aggDuration
   388  			step   = m.stepSize
   389  
   390  			series     = seriesIter.Current()
   391  			datapoints = series.Datapoints()
   392  			stats      = series.Stats()
   393  		)
   394  
   395  		if stats.Enabled {
   396  			decodeDuration += stats.DecodeDuration
   397  		}
   398  
   399  		for i := 0; i < m.steps; i++ {
   400  			iterBounds := iterationBounds{
   401  				start: start,
   402  				end:   end,
   403  			}
   404  
   405  			l, r, b := getIndices(datapoints, start, end, init)
   406  			if !b {
   407  				newVal = p.process(ts.Datapoints{}, iterBounds)
   408  			} else {
   409  				init = l
   410  				newVal = p.process(datapoints[l:r], iterBounds)
   411  			}
   412  
   413  			if err := builder.AppendValue(i, newVal); err != nil {
   414  				return nil, err
   415  			}
   416  
   417  			start += step
   418  			end += step
   419  		}
   420  	}
   421  
   422  	return builder, seriesIter.Err()
   423  }
   424  
   425  // getIndices returns the index of the points on the left and the right of the
   426  // datapoint list given a starting index, as well as a boolean indicating if
   427  // the returned indices are valid.
   428  //
   429  // NB: return values from getIndices should be used as subslice indices rather
   430  // than direct index accesses, as that may cause panics when reaching the end of
   431  // the datapoint list.
   432  func getIndices(
   433  	dps []ts.Datapoint,
   434  	lBound xtime.UnixNano,
   435  	rBound xtime.UnixNano,
   436  	init int,
   437  ) (int, int, bool) {
   438  	if init >= len(dps) || init < 0 {
   439  		return -1, -1, false
   440  	}
   441  
   442  	var (
   443  		l, r      = init, -1
   444  		leftBound = false
   445  	)
   446  
   447  	for i, dp := range dps[init:] {
   448  		ts := dp.Timestamp
   449  		if !leftBound {
   450  			// Trying to set left bound.
   451  			if ts < lBound {
   452  				// data point before 0.
   453  				continue
   454  			}
   455  
   456  			leftBound = true
   457  			l = i
   458  		}
   459  
   460  		if ts <= rBound {
   461  			continue
   462  		}
   463  
   464  		r = i
   465  		break
   466  	}
   467  
   468  	if r == -1 {
   469  		r = len(dps)
   470  	} else {
   471  		r = r + init
   472  	}
   473  
   474  	if leftBound {
   475  		l = l + init
   476  	} else {
   477  		// if left bound was not found, there are no valid candidate points here.
   478  		return l, r, false
   479  	}
   480  
   481  	return l, r, true
   482  }