github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/cmd/services/m3comparator/main/parser/series_load.go (about)

     1  // Copyright (c) 2020 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package parser
    22  
    23  import (
    24  	"encoding/json"
    25  	"fmt"
    26  	"io"
    27  	"io/ioutil"
    28  	"sync"
    29  	"time"
    30  
    31  	"github.com/m3db/m3/src/dbnode/encoding"
    32  	"github.com/m3db/m3/src/dbnode/ts"
    33  	"github.com/m3db/m3/src/dbnode/x/xio"
    34  	"github.com/m3db/m3/src/query/models"
    35  	"github.com/m3db/m3/src/x/ident"
    36  	"github.com/m3db/m3/src/x/instrument"
    37  	xtime "github.com/m3db/m3/src/x/time"
    38  
    39  	"go.uber.org/zap"
    40  )
    41  
    42  // Options are options for series parsing.
    43  type Options struct {
    44  	EncoderPool       encoding.EncoderPool
    45  	IteratorPools     encoding.IteratorPools
    46  	TagOptions        models.TagOptions
    47  	InstrumentOptions instrument.Options
    48  	Size              int
    49  }
    50  
    51  type nameIDSeriesMap map[string]idSeriesMap
    52  
    53  type idSeriesMap struct {
    54  	start  time.Time
    55  	end    time.Time
    56  	series map[string][]Series
    57  }
    58  
    59  type seriesReader struct {
    60  	iterOpts        Options
    61  	nameIDSeriesMap nameIDSeriesMap
    62  	sync.RWMutex
    63  }
    64  
    65  // SeriesReader reads SeriesIterators from a generic io.Reader.
    66  type SeriesReader interface {
    67  	SeriesIterators(name string) (encoding.SeriesIterators, error)
    68  	Load(reader io.Reader) error
    69  	Clear()
    70  }
    71  
    72  // NewSeriesReader creates a new SeriesReader that reads entries as
    73  // a slice of Series.
    74  func NewSeriesReader(opts Options) SeriesReader {
    75  	size := 10
    76  	if opts.Size != 0 {
    77  		size = opts.Size
    78  	}
    79  
    80  	return &seriesReader{
    81  		iterOpts:        opts,
    82  		nameIDSeriesMap: make(nameIDSeriesMap, size),
    83  	}
    84  }
    85  
    86  func (l *seriesReader) SeriesIterators(name string) (encoding.SeriesIterators, error) {
    87  	l.RLock()
    88  	defer l.RUnlock()
    89  
    90  	var seriesMaps []idSeriesMap
    91  	logger := l.iterOpts.InstrumentOptions.Logger()
    92  	if name == "" {
    93  		// return all preloaded data
    94  		seriesMaps = make([]idSeriesMap, 0, len(l.nameIDSeriesMap))
    95  		for _, series := range l.nameIDSeriesMap {
    96  			seriesMaps = append(seriesMaps, series)
    97  		}
    98  	} else {
    99  		seriesMap, found := l.nameIDSeriesMap[name]
   100  		if !found || len(seriesMap.series) == 0 {
   101  			return nil, nil
   102  		}
   103  
   104  		seriesMaps = append(seriesMaps, seriesMap)
   105  	}
   106  
   107  	iters := make([]encoding.SeriesIterator, 0, len(seriesMaps))
   108  	for _, seriesMap := range seriesMaps {
   109  		for _, seriesPerID := range seriesMap.series {
   110  			for _, series := range seriesPerID {
   111  				encoder := l.iterOpts.EncoderPool.Get()
   112  				dps := series.Datapoints
   113  				startTime := time.Time{}
   114  				if len(dps) > 0 {
   115  					startTime = dps[0].Timestamp.Truncate(time.Hour)
   116  				}
   117  
   118  				encoder.Reset(xtime.ToUnixNano(startTime), len(dps), nil)
   119  				for _, dp := range dps {
   120  					err := encoder.Encode(ts.Datapoint{
   121  						Value:          float64(dp.Value),
   122  						TimestampNanos: xtime.ToUnixNano(dp.Timestamp),
   123  					}, xtime.Nanosecond, nil)
   124  
   125  					if err != nil {
   126  						encoder.Close()
   127  						logger.Error("error encoding datapoints", zap.Error(err))
   128  						return nil, err
   129  					}
   130  				}
   131  
   132  				readers := [][]xio.BlockReader{{{
   133  					SegmentReader: xio.NewSegmentReader(encoder.Discard()),
   134  					Start:         xtime.ToUnixNano(series.Start),
   135  					BlockSize:     series.End.Sub(series.Start),
   136  				}}}
   137  
   138  				multiReader := encoding.NewMultiReaderIterator(
   139  					iterAlloc,
   140  					l.iterOpts.IteratorPools.MultiReaderIterator(),
   141  				)
   142  
   143  				sliceOfSlicesIter := xio.NewReaderSliceOfSlicesFromBlockReadersIterator(readers)
   144  				multiReader.ResetSliceOfSlices(sliceOfSlicesIter, nil)
   145  
   146  				tagIter, id := buildTagIteratorAndID(series.Tags, l.iterOpts.TagOptions)
   147  				iter := encoding.NewSeriesIterator(
   148  					encoding.SeriesIteratorOptions{
   149  						ID:             id,
   150  						Namespace:      ident.StringID("ns"),
   151  						Tags:           tagIter,
   152  						StartInclusive: xtime.ToUnixNano(series.Start),
   153  						EndExclusive:   xtime.ToUnixNano(series.End),
   154  						Replicas: []encoding.MultiReaderIterator{
   155  							multiReader,
   156  						},
   157  					}, nil)
   158  
   159  				iters = append(iters, iter)
   160  			}
   161  		}
   162  	}
   163  
   164  	return encoding.NewSeriesIterators(iters), nil
   165  }
   166  
   167  func calculateSeriesRange(seriesList []Series) (time.Time, time.Time) {
   168  	// NB: keep consistent start/end for the entire ingested set.
   169  	//
   170  	// Try taking from set start/end; infer from first/last endpoint otherwise.
   171  	start, end := time.Time{}, time.Time{}
   172  	for _, series := range seriesList {
   173  		if start.IsZero() || series.Start.Before(start) {
   174  			start = series.Start
   175  		}
   176  
   177  		if end.IsZero() || series.End.Before(start) {
   178  			end = series.End
   179  		}
   180  	}
   181  
   182  	if !start.IsZero() && !end.IsZero() {
   183  		return start, end
   184  	}
   185  
   186  	for _, series := range seriesList {
   187  		dps := series.Datapoints
   188  		if len(dps) == 0 {
   189  			continue
   190  		}
   191  
   192  		first, last := dps[0].Timestamp, dps[len(dps)-1].Timestamp
   193  		if start.IsZero() || first.Before(start) {
   194  			start = first
   195  		}
   196  
   197  		if end.IsZero() || last.Before(start) {
   198  			end = last
   199  		}
   200  	}
   201  
   202  	return start, end
   203  }
   204  
   205  func (l *seriesReader) Load(reader io.Reader) error {
   206  	l.Lock()
   207  	defer l.Unlock()
   208  
   209  	buf, err := ioutil.ReadAll(reader)
   210  	logger := l.iterOpts.InstrumentOptions.Logger()
   211  	if err != nil {
   212  		logger.Error("could not read body", zap.Error(err))
   213  		return err
   214  	}
   215  
   216  	seriesList := make([]Series, 0, 10)
   217  	if err := json.Unmarshal(buf, &seriesList); err != nil {
   218  		logger.Error("could not unmarshal queries", zap.Error(err))
   219  		return err
   220  	}
   221  
   222  	// NB: keep consistent start/end for the entire ingested set.
   223  	start, end := calculateSeriesRange(seriesList)
   224  	nameKey := string(l.iterOpts.TagOptions.MetricName())
   225  	nameMap := make(nameIDSeriesMap, len(seriesList))
   226  	for _, series := range seriesList {
   227  		names := series.Tags.Get(nameKey)
   228  		if len(names) != 1 || len(series.Datapoints) == 0 {
   229  			if len(names) > 1 {
   230  				err := fmt.Errorf("series has duplicate __name__ tags: %v", names)
   231  				logger.Error("bad __name__ variable", zap.Error(err))
   232  				return err
   233  			}
   234  
   235  			continue
   236  		}
   237  
   238  		name := names[0]
   239  		seriesMap, found := nameMap[name]
   240  		if !found {
   241  			seriesMap = idSeriesMap{
   242  				series: make(map[string][]Series, len(seriesList)),
   243  			}
   244  		}
   245  
   246  		id := series.IDOrGenID()
   247  		seriesList, found := seriesMap.series[id]
   248  		if !found {
   249  			seriesList = make([]Series, 0, 1)
   250  		} else {
   251  			logger.Info("duplicate tag set in loaded series",
   252  				zap.Int("count", len(seriesList)),
   253  				zap.String("id", id))
   254  		}
   255  
   256  		seriesList = append(seriesList, series)
   257  		seriesMap.series[id] = seriesList
   258  		logger.Info("setting series",
   259  			zap.String("name", name), zap.String("id", id))
   260  
   261  		series.Start = start
   262  		series.End = end
   263  		nameMap[name] = seriesMap
   264  	}
   265  
   266  	for k, v := range nameMap {
   267  		// NB: overwrite existing series.
   268  		l.nameIDSeriesMap[k] = v
   269  	}
   270  
   271  	return nil
   272  }
   273  
   274  func (l *seriesReader) Clear() {
   275  	l.Lock()
   276  	for k := range l.nameIDSeriesMap {
   277  		delete(l.nameIDSeriesMap, k)
   278  	}
   279  
   280  	l.Unlock()
   281  }