github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/x/serialize/encoder.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package serialize
    22  
    23  import (
    24  	"bytes"
    25  	"encoding/binary"
    26  	"errors"
    27  	"fmt"
    28  
    29  	"github.com/m3db/m3/src/x/checked"
    30  	xerrors "github.com/m3db/m3/src/x/errors"
    31  	"github.com/m3db/m3/src/x/ident"
    32  )
    33  
    34  /*
    35   * Serialization scheme to combat Thrift's allocation hell.
    36   *
    37   * Given Tags (i.e. key-values) this allows the bijective serialization to,
    38   * and from Tags <--> []byte.
    39   *
    40   * Consider example, Tags: {"abc": "defg", "x": "foo"}
    41   * this translates to:
    42   * []byte(
    43   *    MAGIC_MARKER + NUMBER_TAGS
    44   *                 + LENGTH([]byte("abc"))  + []byte("abc")
    45   *                 + LENGTH([]byte("defg")) + []byte("abc")
    46   *                 + LENGTH([]byte("x"))    + []byte("x")
    47   *                 + LENGTH([]byte("foo"))  + []byte("foo")
    48   * )
    49   *
    50   * Where MAGIC_MARKER/NUMBER_TAGS/LENGTH are maximum 2 bytes.
    51   */
    52  
    53  var (
    54  	// ByteOrder is the byte order used for encoding tags into a byte sequence.
    55  	ByteOrder        binary.ByteOrder = binary.LittleEndian
    56  	headerMagicBytes                  = make([]byte, 2)
    57  )
    58  
    59  func init() {
    60  	encodeUInt16(HeaderMagicNumber, headerMagicBytes)
    61  }
    62  
    63  var (
    64  	errTagEncoderInUse   = errors.New("encoder already in use")
    65  	errTagLiteralTooLong = xerrors.NewInvalidParamsError(errors.New("literal is too long"))
    66  	// ErrEmptyTagNameLiteral is an error when encoded tag name is empty.
    67  	ErrEmptyTagNameLiteral = xerrors.NewInvalidParamsError(errors.New("tag name cannot be empty"))
    68  )
    69  
    70  type newCheckedBytesFn func([]byte, checked.BytesOptions) checked.Bytes
    71  
    72  var defaultNewCheckedBytesFn = checked.NewBytes
    73  
    74  type encoder struct {
    75  	buf               *bytes.Buffer
    76  	checkedBytes      checked.Bytes
    77  	staticBuffer      [2]byte
    78  	staticBufferSlice []byte
    79  
    80  	opts TagEncoderOptions
    81  	pool TagEncoderPool
    82  }
    83  
    84  func newTagEncoder(
    85  	newFn newCheckedBytesFn,
    86  	opts TagEncoderOptions,
    87  	pool TagEncoderPool,
    88  ) TagEncoder {
    89  	b := make([]byte, 0, opts.InitialCapacity())
    90  	cb := newFn(nil, nil)
    91  	e := &encoder{
    92  		buf:          bytes.NewBuffer(b),
    93  		checkedBytes: cb,
    94  		opts:         opts,
    95  		pool:         pool,
    96  	}
    97  	e.staticBufferSlice = e.staticBuffer[:]
    98  	return e
    99  }
   100  
   101  func (e *encoder) Encode(tags ident.TagIterator) error {
   102  	if e.checkedBytes.NumRef() > 0 {
   103  		return errTagEncoderInUse
   104  	}
   105  
   106  	tags.Rewind()
   107  	defer tags.Rewind()
   108  
   109  	numTags := tags.Remaining()
   110  	max := int(e.opts.TagSerializationLimits().MaxNumberTags())
   111  	if numTags > max {
   112  		return fmt.Errorf("too many tags to encode (%d), limit is: %d", numTags, max)
   113  	}
   114  
   115  	if _, err := e.buf.Write(headerMagicBytes); err != nil {
   116  		e.buf.Reset()
   117  		return err
   118  	}
   119  
   120  	if _, err := e.buf.Write(e.encodeUInt16(uint16(numTags))); err != nil {
   121  		e.buf.Reset()
   122  		return err
   123  	}
   124  
   125  	for tags.Next() {
   126  		tag := tags.Current()
   127  		if err := e.encodeTag(tag); err != nil {
   128  			e.buf.Reset()
   129  			return err
   130  		}
   131  	}
   132  
   133  	if err := tags.Err(); err != nil {
   134  		e.buf.Reset()
   135  		return err
   136  	}
   137  
   138  	e.checkedBytes.IncRef()
   139  	e.checkedBytes.Reset(e.buf.Bytes())
   140  
   141  	return nil
   142  }
   143  
   144  func (e *encoder) Data() (checked.Bytes, bool) {
   145  	if e.checkedBytes.NumRef() == 0 {
   146  		return nil, false
   147  	}
   148  	return e.checkedBytes, true
   149  }
   150  
   151  func (e *encoder) Reset() {
   152  	if e.checkedBytes.NumRef() == 0 {
   153  		return
   154  	}
   155  	e.buf.Reset()
   156  	e.checkedBytes.Reset(nil)
   157  	e.checkedBytes.DecRef()
   158  }
   159  
   160  func (e *encoder) Finalize() {
   161  	e.Reset()
   162  	p := e.pool
   163  	if p == nil {
   164  		return
   165  	}
   166  	p.Put(e)
   167  }
   168  
   169  func (e *encoder) encodeTag(t ident.Tag) error {
   170  	if len(t.Name.Bytes()) == 0 {
   171  		return ErrEmptyTagNameLiteral
   172  	}
   173  
   174  	if err := e.encodeID(t.Name); err != nil {
   175  		return err
   176  	}
   177  
   178  	return e.encodeID(t.Value)
   179  }
   180  
   181  func (e *encoder) encodeID(i ident.ID) error {
   182  	d := i.Bytes()
   183  
   184  	max := int(e.opts.TagSerializationLimits().MaxTagLiteralLength())
   185  	if len(d) > max {
   186  		return errTagLiteralTooLong
   187  	}
   188  
   189  	ld := uint16(len(d))
   190  	if _, err := e.buf.Write(e.encodeUInt16(ld)); err != nil {
   191  		return err
   192  	}
   193  
   194  	if _, err := e.buf.Write(d); err != nil {
   195  		return err
   196  	}
   197  
   198  	return nil
   199  }
   200  
   201  func (e *encoder) encodeUInt16(v uint16) []byte {
   202  	// NB(r): Use static buffer on the struct for encoding, otherwise if it's
   203  	// statically defined inline in the function it will escape to heap.
   204  	dest := e.staticBufferSlice[:2]
   205  	return encodeUInt16(v, dest)
   206  }
   207  
   208  func encodeUInt16(v uint16, dest []byte) []byte {
   209  	ByteOrder.PutUint16(dest, v)
   210  	return dest
   211  }
   212  
   213  func decodeUInt16(b []byte) uint16 {
   214  	return ByteOrder.Uint16(b)
   215  }