github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/encoding/scheme.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package encoding
    22  
    23  import (
    24  	"github.com/m3db/m3/src/x/checked"
    25  	xtime "github.com/m3db/m3/src/x/time"
    26  )
    27  
    28  const (
    29  	// special markers
    30  	defaultEndOfStreamMarker Marker = iota
    31  	defaultAnnotationMarker
    32  	defaultTimeUnitMarker
    33  
    34  	// marker encoding information
    35  	defaultMarkerOpcode        = 0x100
    36  	defaultNumMarkerOpcodeBits = 9
    37  	defaultNumMarkerValueBits  = 2
    38  )
    39  
    40  var (
    41  	// default time encoding schemes
    42  	defaultZeroBucket             = NewTimeBucket(0x0, 1, 0)
    43  	defaultNumValueBitsForBuckets = []int{7, 9, 12}
    44  
    45  	// TODO(xichen): set more reasonable defaults once we have more knowledge
    46  	// of the use cases for time units other than seconds.
    47  	defaultTimeEncodingSchemes = map[xtime.Unit]TimeEncodingScheme{
    48  		xtime.Second:      NewTimeEncodingScheme(defaultNumValueBitsForBuckets, 32),
    49  		xtime.Millisecond: NewTimeEncodingScheme(defaultNumValueBitsForBuckets, 32),
    50  		xtime.Microsecond: NewTimeEncodingScheme(defaultNumValueBitsForBuckets, 64),
    51  		xtime.Nanosecond:  NewTimeEncodingScheme(defaultNumValueBitsForBuckets, 64),
    52  	}
    53  
    54  	// default marker encoding scheme
    55  	defaultMarkerEncodingScheme = NewMarkerEncodingScheme(
    56  		defaultMarkerOpcode,
    57  		defaultNumMarkerOpcodeBits,
    58  		defaultNumMarkerValueBits,
    59  		defaultEndOfStreamMarker,
    60  		defaultAnnotationMarker,
    61  		defaultTimeUnitMarker,
    62  	)
    63  )
    64  
    65  // TimeBucket represents a bucket for encoding time values.
    66  type TimeBucket struct {
    67  	min           int64
    68  	max           int64
    69  	opcode        uint64
    70  	numOpcodeBits int
    71  	numValueBits  int
    72  }
    73  
    74  // NewTimeBucket creates a new time bucket.
    75  func NewTimeBucket(opcode uint64, numOpcodeBits, numValueBits int) TimeBucket {
    76  	return TimeBucket{
    77  		opcode:        opcode,
    78  		numOpcodeBits: numOpcodeBits,
    79  		numValueBits:  numValueBits,
    80  		min:           -(1 << uint(numValueBits-1)),
    81  		max:           (1 << uint(numValueBits-1)) - 1,
    82  	}
    83  }
    84  
    85  // Opcode is the opcode prefix used to encode all time values in this range.
    86  func (tb *TimeBucket) Opcode() uint64 { return tb.opcode }
    87  
    88  // NumOpcodeBits is the number of bits used to write the opcode.
    89  func (tb *TimeBucket) NumOpcodeBits() int { return tb.numOpcodeBits }
    90  
    91  // Min is the minimum time value accepted in this range.
    92  func (tb *TimeBucket) Min() int64 { return tb.min }
    93  
    94  // Max is the maximum time value accepted in this range.
    95  func (tb *TimeBucket) Max() int64 { return tb.max }
    96  
    97  // NumValueBits is the number of bits used to write the time value.
    98  func (tb *TimeBucket) NumValueBits() int { return tb.numValueBits }
    99  
   100  // TimeEncodingScheme captures information related to time encoding.
   101  type TimeEncodingScheme struct {
   102  	zeroBucket    TimeBucket
   103  	buckets       []TimeBucket
   104  	defaultBucket TimeBucket
   105  }
   106  
   107  // NewTimeEncodingSchemes converts the unit-to-scheme mapping
   108  // to the underlying TimeEncodingSchemes used for lookups.
   109  func NewTimeEncodingSchemes(schemes map[xtime.Unit]TimeEncodingScheme) TimeEncodingSchemes {
   110  	encodingSchemes := make(TimeEncodingSchemes, xtime.UnitCount())
   111  	for k, v := range schemes {
   112  		if !k.IsValid() {
   113  			continue
   114  		}
   115  
   116  		encodingSchemes[k] = v
   117  	}
   118  
   119  	return encodingSchemes
   120  }
   121  
   122  // NewTimeEncodingScheme creates a new time encoding scheme.
   123  // NB(xichen): numValueBitsForBuckets should be ordered by value
   124  // in ascending order (smallest value first).
   125  func NewTimeEncodingScheme(numValueBitsForBuckets []int, numValueBitsForDefault int) TimeEncodingScheme {
   126  	numBuckets := len(numValueBitsForBuckets)
   127  	buckets := make([]TimeBucket, 0, numBuckets)
   128  	numOpcodeBits := 1
   129  	opcode := uint64(0)
   130  	i := 0
   131  	for i < numBuckets {
   132  		opcode = uint64(1<<uint(i+1)) | opcode
   133  		buckets = append(buckets, NewTimeBucket(opcode, numOpcodeBits+1, numValueBitsForBuckets[i]))
   134  		i++
   135  		numOpcodeBits++
   136  	}
   137  	defaultBucket := NewTimeBucket(opcode|0x1, numOpcodeBits, numValueBitsForDefault)
   138  
   139  	return TimeEncodingScheme{
   140  		zeroBucket:    defaultZeroBucket,
   141  		buckets:       buckets,
   142  		defaultBucket: defaultBucket,
   143  	}
   144  }
   145  
   146  // ZeroBucket is time bucket for encoding zero time values.
   147  func (tes *TimeEncodingScheme) ZeroBucket() *TimeBucket { return &tes.zeroBucket }
   148  
   149  // Buckets are the ordered time buckets used to encode non-zero, non-default time values.
   150  func (tes *TimeEncodingScheme) Buckets() []TimeBucket { return tes.buckets }
   151  
   152  // DefaultBucket is the time bucket for catching all other time values not included in the regular buckets.
   153  func (tes *TimeEncodingScheme) DefaultBucket() *TimeBucket { return &tes.defaultBucket }
   154  
   155  // TimeEncodingSchemes defines the time encoding schemes for different time units.
   156  type TimeEncodingSchemes []TimeEncodingScheme
   157  
   158  // SchemeForUnit returns the corresponding TimeEncodingScheme for the provided unit.
   159  // Returns false if the unit does not match a scheme or is invalid.
   160  func (s TimeEncodingSchemes) SchemeForUnit(u xtime.Unit) (*TimeEncodingScheme, bool) {
   161  	if !u.IsValid() || int(u) >= len(s) {
   162  		return nil, false
   163  	}
   164  	return &s[u], true
   165  }
   166  
   167  // Marker represents the markers.
   168  type Marker byte
   169  
   170  // MarkerEncodingScheme captures the information related to marker encoding.
   171  type MarkerEncodingScheme struct {
   172  	opcode        uint64
   173  	numOpcodeBits int
   174  	numValueBits  int
   175  	endOfStream   Marker
   176  	annotation    Marker
   177  	timeUnit      Marker
   178  	tails         [256][8]checked.Bytes
   179  }
   180  
   181  // NewMarkerEncodingScheme returns new marker encoding.
   182  func NewMarkerEncodingScheme(
   183  	opcode uint64,
   184  	numOpcodeBits int,
   185  	numValueBits int,
   186  	endOfStream Marker,
   187  	annotation Marker,
   188  	timeUnit Marker,
   189  ) *MarkerEncodingScheme {
   190  	scheme := &MarkerEncodingScheme{
   191  		opcode:        opcode,
   192  		numOpcodeBits: numOpcodeBits,
   193  		numValueBits:  numValueBits,
   194  		endOfStream:   endOfStream,
   195  		annotation:    annotation,
   196  		timeUnit:      timeUnit,
   197  	}
   198  	// NB(r): we precompute all possible tail streams dependent on last byte
   199  	// so we never have to pool or allocate tails for each stream when we
   200  	// want to take a snapshot of the current stream returned by the `Stream` method.
   201  	for i := range scheme.tails {
   202  		for j := range scheme.tails[i] {
   203  			pos := j + 1
   204  			tmp := NewOStream(checked.NewBytes(nil, nil), false, nil)
   205  			tmp.WriteBits(uint64(i)>>uint(8-pos), pos)
   206  			WriteSpecialMarker(tmp, scheme, endOfStream)
   207  			rawBytes, _ := tmp.RawBytes()
   208  			tail := checked.NewBytes(rawBytes, nil)
   209  			scheme.tails[i][j] = tail
   210  		}
   211  	}
   212  	return scheme
   213  }
   214  
   215  // WriteSpecialMarker writes the marker that marks the start of a special symbol,
   216  // e.g., the eos marker, the annotation marker, or the time unit marker.
   217  func WriteSpecialMarker(os OStream, scheme *MarkerEncodingScheme, marker Marker) {
   218  	os.WriteBits(scheme.Opcode(), scheme.NumOpcodeBits())
   219  	os.WriteBits(uint64(marker), scheme.NumValueBits())
   220  }
   221  
   222  // Opcode returns the marker opcode.
   223  func (mes *MarkerEncodingScheme) Opcode() uint64 { return mes.opcode }
   224  
   225  // NumOpcodeBits returns the number of bits used for the opcode.
   226  func (mes *MarkerEncodingScheme) NumOpcodeBits() int { return mes.numOpcodeBits }
   227  
   228  // NumValueBits returns the number of bits used for the marker value.
   229  func (mes *MarkerEncodingScheme) NumValueBits() int { return mes.numValueBits }
   230  
   231  // EndOfStream returns the end of stream marker.
   232  func (mes *MarkerEncodingScheme) EndOfStream() Marker { return mes.endOfStream }
   233  
   234  // Annotation returns the annotation marker.
   235  func (mes *MarkerEncodingScheme) Annotation() Marker { return mes.annotation }
   236  
   237  // TimeUnit returns the time unit marker.
   238  func (mes *MarkerEncodingScheme) TimeUnit() Marker { return mes.timeUnit }
   239  
   240  // Tail will return the tail portion of a stream including the relevant bits
   241  // in the last byte along with the end of stream marker.
   242  func (mes *MarkerEncodingScheme) Tail(b byte, pos int) checked.Bytes { return mes.tails[int(b)][pos-1] }