github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/convert/convert.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  // Package convert contains various conversions.
    22  package convert
    23  
    24  import (
    25  	"bytes"
    26  	"errors"
    27  	"fmt"
    28  	"unicode/utf8"
    29  
    30  	"github.com/m3db/m3/src/dbnode/ts"
    31  	"github.com/m3db/m3/src/m3ninx/doc"
    32  	"github.com/m3db/m3/src/query/graphite/graphite"
    33  	"github.com/m3db/m3/src/x/ident"
    34  	"github.com/m3db/m3/src/x/pool"
    35  	"github.com/m3db/m3/src/x/serialize"
    36  )
    37  
    38  const (
    39  	// NB: this assumes that series ID has a format:
    40  	//   {tag1="value1",tag2="value2",...}
    41  	//
    42  	// Thus firstTagBytesPosition points to the 't' immediately after curly brace '{'
    43  	firstTagBytesPosition int = 1
    44  	// distanceBetweenTagNameAndValue corresponds to '="' in series ID that separates tag name from
    45  	// it's value
    46  	distanceBetweenTagNameAndValue int = 2
    47  	// distanceBetweenTagValueAndNextName corresponds to '",' in series ID that separates
    48  	// tag's value from the following tag name
    49  	distanceBetweenTagValueAndNextName int = 2
    50  )
    51  
    52  var (
    53  	// ReservedFieldNameID is the field name used to index the ID in the
    54  	// m3ninx subsytem.
    55  	ReservedFieldNameID = doc.IDReservedFieldName
    56  
    57  	// ErrUsingReservedFieldName is the error returned when a metric
    58  	// cannot be parsed due to using a resereved field name
    59  	ErrUsingReservedFieldName = errors.New(
    60  		"unable to parse metric using reserved field name: " +
    61  			string(ReservedFieldNameID))
    62  
    63  	errInvalidResultMissingID = errors.New(
    64  		"corrupt data, unable to extract id")
    65  )
    66  
    67  // Validate returns a bool indicating whether the document is valid.
    68  func Validate(d doc.Metadata) error {
    69  	if !utf8.Valid(d.ID) {
    70  		return fmt.Errorf("document has invalid non-UTF8 ID: id=%v, id_hex=%x",
    71  			d.ID, d.ID)
    72  	}
    73  
    74  	for _, f := range d.Fields {
    75  		if !utf8.Valid(f.Name) {
    76  			return fmt.Errorf("document has invalid non-UTF8 field name: name=%v, name_hex=%x",
    77  				f.Name, f.Name)
    78  		}
    79  
    80  		if bytes.Equal(f.Name, ReservedFieldNameID) {
    81  			return ErrUsingReservedFieldName
    82  		}
    83  
    84  		if !utf8.Valid(f.Value) {
    85  			return fmt.Errorf("document has invalid non-UTF8 field value: value=%v, value_hex=%x",
    86  				f.Value, f.Value)
    87  		}
    88  	}
    89  
    90  	return nil
    91  }
    92  
    93  // ValidateSeries will validate a series for use with m3ninx.
    94  func ValidateSeries(id ident.ID, tags ident.Tags) error {
    95  	if idBytes := id.Bytes(); !utf8.Valid(idBytes) {
    96  		return fmt.Errorf("series has invalid non-UTF8 ID: id=%s, id_hex=%x",
    97  			idBytes, idBytes)
    98  	}
    99  	for _, tag := range tags.Values() {
   100  		if err := ValidateSeriesTag(tag); err != nil {
   101  			return err
   102  		}
   103  	}
   104  	return nil
   105  }
   106  
   107  // ValidateSeriesTag validates a series tag for use with m3ninx.
   108  func ValidateSeriesTag(tag ident.Tag) error {
   109  	tagName := tag.Name.Bytes()
   110  	tagValue := tag.Value.Bytes()
   111  	if bytes.Equal(ReservedFieldNameID, tagName) {
   112  		return ErrUsingReservedFieldName
   113  	}
   114  	if !utf8.Valid(tagName) {
   115  		return fmt.Errorf("series contains invalid non-UTF8 field name: "+
   116  			"field=%s, field_hex=%v", tagName, tagName)
   117  	}
   118  	if !utf8.Valid(tagValue) {
   119  		return fmt.Errorf("series contains invalid non-UTF8 field value: "+
   120  			"field=%s, field_value=%s, field_value_hex=%x",
   121  			tagName, tagValue, tagValue)
   122  	}
   123  	return nil
   124  }
   125  
   126  // FromSeriesIDAndTags converts the provided series id+tags into a document.
   127  func FromSeriesIDAndTags(id ident.ID, tags ident.Tags) (doc.Metadata, error) {
   128  	var (
   129  		clonedID      = clone(id.Bytes())
   130  		fields        = make([]doc.Field, 0, len(tags.Values()))
   131  		expectedStart = firstTagBytesPosition
   132  	)
   133  	for _, tag := range tags.Values() {
   134  		nameBytes, valueBytes := tag.Name.Bytes(), tag.Value.Bytes()
   135  
   136  		var clonedName, clonedValue []byte
   137  		clonedName, expectedStart = findSliceOrClone(clonedID, nameBytes, expectedStart,
   138  			distanceBetweenTagNameAndValue)
   139  		clonedValue, expectedStart = findSliceOrClone(clonedID, valueBytes, expectedStart,
   140  			distanceBetweenTagValueAndNextName)
   141  
   142  		fields = append(fields, doc.Field{
   143  			Name:  clonedName,
   144  			Value: clonedValue,
   145  		})
   146  	}
   147  
   148  	d := doc.Metadata{
   149  		ID:     clonedID,
   150  		Fields: fields,
   151  	}
   152  	if err := Validate(d); err != nil {
   153  		return doc.Metadata{}, err
   154  	}
   155  	return d, nil
   156  }
   157  
   158  // FromSeriesIDAndTagIter converts the provided series id+tags into a document.
   159  func FromSeriesIDAndTagIter(id ident.ID, tags ident.TagIterator) (doc.Metadata, error) {
   160  	var (
   161  		clonedID      = clone(id.Bytes())
   162  		fields        = make([]doc.Field, 0, tags.Remaining())
   163  		expectedStart = firstTagBytesPosition
   164  	)
   165  	for tags.Next() {
   166  		tag := tags.Current()
   167  		nameBytes, valueBytes := tag.Name.Bytes(), tag.Value.Bytes()
   168  
   169  		var clonedName, clonedValue []byte
   170  		clonedName, expectedStart = findSliceOrClone(clonedID, nameBytes, expectedStart,
   171  			distanceBetweenTagNameAndValue)
   172  		clonedValue, expectedStart = findSliceOrClone(clonedID, valueBytes, expectedStart,
   173  			distanceBetweenTagValueAndNextName)
   174  
   175  		fields = append(fields, doc.Field{
   176  			Name:  clonedName,
   177  			Value: clonedValue,
   178  		})
   179  	}
   180  	if err := tags.Err(); err != nil {
   181  		return doc.Metadata{}, err
   182  	}
   183  
   184  	d := doc.Metadata{
   185  		ID:     clonedID,
   186  		Fields: fields,
   187  	}
   188  	if err := Validate(d); err != nil {
   189  		return doc.Metadata{}, err
   190  	}
   191  	return d, nil
   192  }
   193  
   194  // FromSeriesIDAndEncodedTags converts the provided series id and encoded tags into a doc.Metadata.
   195  func FromSeriesIDAndEncodedTags(id ident.BytesID, encodedTags ts.EncodedTags) (doc.Metadata, error) {
   196  	var (
   197  		byteOrder = serialize.ByteOrder
   198  		total     = len(encodedTags)
   199  	)
   200  	if total == 0 {
   201  		// No tags set for this series
   202  		return doc.Metadata{
   203  			ID:     clone(id.Bytes()),
   204  			Fields: nil,
   205  		}, nil
   206  	}
   207  
   208  	if total < 4 {
   209  		return doc.Metadata{}, fmt.Errorf("encoded tags too short: size=%d, need=%d", total, 4)
   210  	}
   211  
   212  	header := byteOrder.Uint16(encodedTags[:2])
   213  	encodedTags = encodedTags[2:]
   214  	if header != serialize.HeaderMagicNumber {
   215  		return doc.Metadata{}, serialize.ErrIncorrectHeader
   216  	}
   217  
   218  	length := int(byteOrder.Uint16(encodedTags[:2]))
   219  	encodedTags = encodedTags[2:]
   220  
   221  	var (
   222  		clonedID      = clone(id.Bytes())
   223  		fields        = make([]doc.Field, 0, length)
   224  		expectedStart = firstTagBytesPosition
   225  	)
   226  
   227  	for i := 0; i < length; i++ {
   228  		if len(encodedTags) < 2 {
   229  			return doc.Metadata{}, fmt.Errorf("missing size for tag name: index=%d", i)
   230  		}
   231  		numBytesName := int(byteOrder.Uint16(encodedTags[:2]))
   232  		if numBytesName == 0 {
   233  			return doc.Metadata{}, serialize.ErrEmptyTagNameLiteral
   234  		}
   235  		encodedTags = encodedTags[2:]
   236  
   237  		bytesName := encodedTags[:numBytesName]
   238  		encodedTags = encodedTags[numBytesName:]
   239  
   240  		if len(encodedTags) < 2 {
   241  			return doc.Metadata{}, fmt.Errorf("missing size for tag value: index=%d", i)
   242  		}
   243  
   244  		numBytesValue := int(byteOrder.Uint16(encodedTags[:2]))
   245  		encodedTags = encodedTags[2:]
   246  
   247  		bytesValue := encodedTags[:numBytesValue]
   248  		encodedTags = encodedTags[numBytesValue:]
   249  
   250  		var clonedName, clonedValue []byte
   251  		clonedName, expectedStart = findSliceOrClone(clonedID, bytesName, expectedStart,
   252  			distanceBetweenTagNameAndValue)
   253  		clonedValue, expectedStart = findSliceOrClone(clonedID, bytesValue, expectedStart,
   254  			distanceBetweenTagValueAndNextName)
   255  
   256  		fields = append(fields, doc.Field{
   257  			Name:  clonedName,
   258  			Value: clonedValue,
   259  		})
   260  	}
   261  
   262  	d := doc.Metadata{
   263  		ID:     clonedID,
   264  		Fields: fields,
   265  	}
   266  	if err := Validate(d); err != nil {
   267  		return doc.Metadata{}, err
   268  	}
   269  	return d, nil
   270  }
   271  
   272  func findSliceOrClone(id, tag []byte, expectedStart, nextPositionDistance int) ([]byte, int) { //nolint:unparam
   273  	n := len(tag)
   274  	expectedEnd := expectedStart + n
   275  	if expectedStart != -1 && expectedEnd <= len(id) &&
   276  		bytes.Equal(id[expectedStart:expectedEnd], tag) {
   277  		return id[expectedStart:expectedEnd], expectedEnd + nextPositionDistance
   278  	} else if idx := bytes.Index(id, tag); idx != -1 {
   279  		return id[idx : idx+n], expectedEnd + nextPositionDistance
   280  	} else {
   281  		return clone(tag), -1
   282  	}
   283  }
   284  
   285  // TagsFromTagsIter returns an ident.Tags from a TagIterator. It also tries
   286  // to re-use bytes from the seriesID if they're also present in the tags
   287  // instead of re-allocating them. This requires that the ident.Tags that is
   288  // returned will have the same (or shorter) life time as the seriesID,
   289  // otherwise the operation is unsafe.
   290  func TagsFromTagsIter(
   291  	seriesID ident.ID,
   292  	iter ident.TagIterator,
   293  	idPool ident.Pool,
   294  ) (ident.Tags, error) {
   295  	var tags ident.Tags
   296  	if idPool != nil {
   297  		tags = idPool.Tags()
   298  	} else {
   299  		tagSlice := make([]ident.Tag, 0, iter.Len())
   300  		tags = ident.NewTags(tagSlice...)
   301  	}
   302  
   303  	seriesIDBytes := ident.BytesID(seriesID.Bytes())
   304  	for iter.Next() {
   305  		curr := iter.Current()
   306  
   307  		var (
   308  			nameBytes, valueBytes = curr.Name.Bytes(), curr.Value.Bytes()
   309  			tag                   ident.Tag
   310  			idRef                 bool
   311  		)
   312  		if idx := bytes.Index(seriesIDBytes, nameBytes); idx != -1 {
   313  			tag.Name = seriesIDBytes[idx : idx+len(nameBytes)]
   314  			idRef = true
   315  		} else {
   316  			if idPool != nil {
   317  				// NB(r): Fast path for if a graphite tag name to save
   318  				// a lot of space is to reuse a preallocated tag name.
   319  				if idx, ok := graphite.TagIndex(nameBytes); ok {
   320  					tag.Name = graphite.TagNameID(idx)
   321  				} else {
   322  					tag.Name = idPool.Clone(curr.Name)
   323  				}
   324  			} else {
   325  				copiedBytes := append([]byte(nil), curr.Name.Bytes()...)
   326  				tag.Name = ident.BytesID(copiedBytes)
   327  			}
   328  		}
   329  		if idx := bytes.Index(seriesIDBytes, valueBytes); idx != -1 {
   330  			tag.Value = seriesIDBytes[idx : idx+len(valueBytes)]
   331  			idRef = true
   332  		} else {
   333  			if idPool != nil {
   334  				tag.Value = idPool.Clone(curr.Value)
   335  			} else {
   336  				copiedBytes := append([]byte(nil), curr.Value.Bytes()...)
   337  				tag.Value = ident.BytesID(copiedBytes)
   338  			}
   339  		}
   340  
   341  		if idRef {
   342  			tag.NoFinalize() // Taken ref, cannot finalize this.
   343  		}
   344  
   345  		tags.Append(tag)
   346  	}
   347  
   348  	if err := iter.Err(); err != nil {
   349  		return ident.Tags{}, err
   350  	}
   351  	return tags, nil
   352  }
   353  
   354  // NB(prateek): we take an independent copy of the bytes underlying
   355  // any ids provided, as we need to maintain the lifecycle of the indexed
   356  // bytes separately from the rest of the storage subsystem.
   357  func clone(original []byte) []byte {
   358  	clone := make([]byte, len(original))
   359  	copy(clone, original)
   360  	return clone
   361  }
   362  
   363  // Opts are the pools required for conversions.
   364  type Opts struct {
   365  	IdentPool        ident.Pool
   366  	CheckedBytesPool pool.CheckedBytesPool
   367  	NoClone          bool
   368  }
   369  
   370  // wrapBytes wraps the provided bytes into an ident.ID backed by pooled types,
   371  // such that calling Finalize() on the returned type returns the resources to
   372  // the pools.
   373  func (o Opts) wrapBytes(b []byte) ident.ID {
   374  	if o.NoClone {
   375  		return ident.BytesID(b)
   376  	}
   377  	cb := o.CheckedBytesPool.Get(len(b))
   378  	cb.IncRef()
   379  	cb.AppendAll(b)
   380  	id := o.IdentPool.BinaryID(cb)
   381  	// release held reference so now the only reference to the bytes is owned by `id`
   382  	cb.DecRef()
   383  	return id
   384  }
   385  
   386  // ToSeries converts the provided doc to metric id+tags.
   387  func ToSeries(d doc.Metadata, opts Opts) (ident.ID, ident.TagIterator, error) {
   388  	if len(d.ID) == 0 {
   389  		return nil, nil, errInvalidResultMissingID
   390  	}
   391  	return opts.wrapBytes(d.ID), ToSeriesTags(d, opts), nil
   392  }
   393  
   394  // ToSeriesTags converts the provided doc to metric tags.
   395  func ToSeriesTags(d doc.Metadata, opts Opts) ident.TagIterator {
   396  	return newTagIter(d, opts)
   397  }
   398  
   399  // tagIter exposes an ident.TagIterator interface over a doc.Metadata.
   400  type tagIter struct {
   401  	docFields doc.Fields
   402  
   403  	err        error
   404  	done       bool
   405  	currentIdx int
   406  	currentTag ident.Tag
   407  
   408  	opts Opts
   409  }
   410  
   411  // NB: force tagIter to implement the ident.TagIterator interface.
   412  var _ ident.TagIterator = &tagIter{}
   413  
   414  func newTagIter(d doc.Metadata, opts Opts) ident.TagIterator {
   415  	return &tagIter{
   416  		docFields:  d.Fields,
   417  		currentIdx: -1,
   418  		opts:       opts,
   419  	}
   420  }
   421  
   422  func (t *tagIter) Next() bool {
   423  	if t.err != nil || t.done {
   424  		return false
   425  	}
   426  	hasNext := t.parseNext()
   427  	if !hasNext {
   428  		t.done = true
   429  	}
   430  	return hasNext
   431  }
   432  
   433  func (t *tagIter) parseNext() (hasNext bool) {
   434  	t.releaseCurrent()
   435  	t.currentIdx++
   436  	// early terminate if we know there's no more fields
   437  	if t.currentIdx >= len(t.docFields) {
   438  		return false
   439  	}
   440  	// if there are fields, we have to ensure the next field
   441  	// is not using the reserved ID fieldname
   442  	next := t.docFields[t.currentIdx]
   443  	if bytes.Equal(ReservedFieldNameID, next.Name) {
   444  		t.err = ErrUsingReservedFieldName
   445  		return false
   446  	}
   447  	// otherwise, we're good.
   448  	t.currentTag = ident.Tag{
   449  		Name:  t.opts.wrapBytes(next.Name),
   450  		Value: t.opts.wrapBytes(next.Value),
   451  	}
   452  	return true
   453  }
   454  
   455  func (t *tagIter) releaseCurrent() {
   456  	if t.currentTag.Name != nil {
   457  		t.currentTag.Name.Finalize()
   458  		t.currentTag.Name = nil
   459  	}
   460  	if t.currentTag.Value != nil {
   461  		t.currentTag.Value.Finalize()
   462  		t.currentTag.Value = nil
   463  	}
   464  }
   465  
   466  func (t *tagIter) Current() ident.Tag {
   467  	return t.currentTag
   468  }
   469  
   470  func (t *tagIter) CurrentIndex() int {
   471  	if t.currentIdx >= 0 {
   472  		return t.currentIdx
   473  	}
   474  	return 0
   475  }
   476  
   477  func (t *tagIter) Err() error {
   478  	return t.err
   479  }
   480  
   481  func (t *tagIter) Close() {
   482  	t.releaseCurrent()
   483  	t.done = true
   484  }
   485  
   486  func (t *tagIter) Len() int {
   487  	return len(t.docFields)
   488  }
   489  
   490  func (t *tagIter) Remaining() int {
   491  	l := len(t.docFields) - (t.currentIdx + 1)
   492  	return l
   493  }
   494  
   495  func (t *tagIter) Duplicate() ident.TagIterator {
   496  	var dupe = *t
   497  	if t.currentTag.Name != nil {
   498  		dupe.currentTag = t.opts.IdentPool.CloneTag(t.currentTag)
   499  	}
   500  	return &dupe
   501  }
   502  
   503  func (t *tagIter) Rewind() {
   504  	t.releaseCurrent()
   505  	t.currentIdx = -1
   506  	t.done = false
   507  }