github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/fs/msgpack/encoder.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE
    20  
    21  package msgpack
    22  
    23  import (
    24  	"bytes"
    25  
    26  	"github.com/m3db/m3/src/dbnode/digest"
    27  	"github.com/m3db/m3/src/dbnode/persist/schema"
    28  
    29  	"gopkg.in/vmihailenco/msgpack.v2"
    30  )
    31  
    32  type encodeVersionFn func(value int)
    33  type encodeNumObjectFieldsForFn func(value objectType)
    34  type encodeVarintFn func(value int64)
    35  type encodeVarUintFn func(value uint64)
    36  type encodeFloat64Fn func(value float64)
    37  type encodeBytesFn func(value []byte)
    38  type encodeArrayLenFn func(value int)
    39  
    40  // Encoder encodes data in msgpack format for persistence.
    41  type Encoder struct {
    42  	buf *bytes.Buffer
    43  	enc *msgpack.Encoder
    44  	err error
    45  
    46  	encodeVersionFn            encodeVersionFn
    47  	encodeNumObjectFieldsForFn encodeNumObjectFieldsForFn
    48  	encodeVarintFn             encodeVarintFn
    49  	encodeVarUintFn            encodeVarUintFn
    50  	encodeFloat64Fn            encodeFloat64Fn
    51  	encodeBytesFn              encodeBytesFn
    52  	encodeArrayLenFn           encodeArrayLenFn
    53  
    54  	legacy LegacyEncodingOptions
    55  }
    56  
    57  // LegacyEncodingIndexInfoVersion is the encoding/decoding version to use when processing index info files
    58  type LegacyEncodingIndexInfoVersion int
    59  
    60  const (
    61  	LegacyEncodingIndexVersionCurrent                                = LegacyEncodingIndexVersionV5
    62  	LegacyEncodingIndexVersionV1      LegacyEncodingIndexInfoVersion = iota
    63  	LegacyEncodingIndexVersionV2
    64  	LegacyEncodingIndexVersionV3
    65  	LegacyEncodingIndexVersionV4
    66  	LegacyEncodingIndexVersionV5
    67  )
    68  
    69  // LegacyEncodingIndexEntryVersion is the encoding/decoding version to use when processing index entries
    70  type LegacyEncodingIndexEntryVersion int
    71  
    72  const (
    73  	LegacyEncodingIndexEntryVersionCurrent                                 = LegacyEncodingIndexEntryVersionV3
    74  	LegacyEncodingIndexEntryVersionV1      LegacyEncodingIndexEntryVersion = iota
    75  	LegacyEncodingIndexEntryVersionV2
    76  	LegacyEncodingIndexEntryVersionV3
    77  )
    78  
    79  // LegacyEncodingOptions allows you to specify the version to use when encoding/decoding
    80  // index info and index files
    81  type LegacyEncodingOptions struct {
    82  	EncodeLegacyIndexInfoVersion LegacyEncodingIndexInfoVersion
    83  	DecodeLegacyIndexInfoVersion LegacyEncodingIndexInfoVersion
    84  
    85  	EncodeLegacyIndexEntryVersion LegacyEncodingIndexEntryVersion
    86  	DecodeLegacyIndexEntryVersion LegacyEncodingIndexEntryVersion
    87  }
    88  
    89  // DefaultLegacyEncodingOptions are the default options to use with msgpack.Encoder and msgpack.Decoder.
    90  var DefaultLegacyEncodingOptions = LegacyEncodingOptions{
    91  	EncodeLegacyIndexInfoVersion: LegacyEncodingIndexVersionCurrent,
    92  	DecodeLegacyIndexInfoVersion: LegacyEncodingIndexVersionCurrent,
    93  
    94  	EncodeLegacyIndexEntryVersion: LegacyEncodingIndexEntryVersionCurrent,
    95  	DecodeLegacyIndexEntryVersion: LegacyEncodingIndexEntryVersionCurrent,
    96  }
    97  
    98  // NewEncoder creates a new encoder.
    99  func NewEncoder() *Encoder {
   100  	return newEncoder(DefaultLegacyEncodingOptions)
   101  }
   102  
   103  // NewEncoderWithOptions creates a new encoder with the specified legacy options.
   104  func NewEncoderWithOptions(legacy LegacyEncodingOptions) *Encoder {
   105  	return newEncoder(legacy)
   106  }
   107  
   108  func newEncoder(legacy LegacyEncodingOptions) *Encoder {
   109  	buf := bytes.NewBuffer(nil)
   110  	enc := &Encoder{
   111  		buf: buf,
   112  		enc: msgpack.NewEncoder(buf),
   113  	}
   114  
   115  	enc.encodeVersionFn = enc.encodeVersion
   116  	enc.encodeNumObjectFieldsForFn = enc.encodeNumObjectFieldsFor
   117  	enc.encodeVarintFn = enc.encodeVarint
   118  	enc.encodeVarUintFn = enc.encodeVarUint
   119  	enc.encodeFloat64Fn = enc.encodeFloat64
   120  	enc.encodeBytesFn = enc.encodeBytes
   121  	enc.encodeArrayLenFn = enc.encodeArrayLen
   122  
   123  	// Used primarily for testing however legitimate production uses exist (e.g. addition of IndexEntryChecksum in
   124  	// IndexEntryV3)
   125  	enc.legacy = legacy
   126  
   127  	return enc
   128  }
   129  
   130  // Reset resets the buffer.
   131  func (enc *Encoder) Reset() {
   132  	enc.buf.Truncate(0)
   133  	enc.err = nil
   134  }
   135  
   136  // Bytes returns the encoded bytes.
   137  func (enc *Encoder) Bytes() []byte { return enc.buf.Bytes() }
   138  
   139  // EncodeIndexInfo encodes index info.
   140  func (enc *Encoder) EncodeIndexInfo(info schema.IndexInfo) error {
   141  	if enc.err != nil {
   142  		return enc.err
   143  	}
   144  	enc.encodeRootObject(indexInfoVersion, indexInfoType)
   145  	switch enc.legacy.EncodeLegacyIndexInfoVersion {
   146  	case LegacyEncodingIndexVersionV1:
   147  		enc.encodeIndexInfoV1(info)
   148  	case LegacyEncodingIndexVersionV2:
   149  		enc.encodeIndexInfoV2(info)
   150  	case LegacyEncodingIndexVersionV3:
   151  		enc.encodeIndexInfoV3(info)
   152  	case LegacyEncodingIndexVersionV4:
   153  		enc.encodeIndexInfoV4(info)
   154  	default:
   155  		enc.encodeIndexInfoV5(info)
   156  	}
   157  	return enc.err
   158  }
   159  
   160  // EncodeIndexEntry encodes index entry.
   161  func (enc *Encoder) EncodeIndexEntry(entry schema.IndexEntry) error {
   162  	if enc.err != nil {
   163  		return enc.err
   164  	}
   165  
   166  	// There's no guarantee EncodeIndexEntry is called with an empty buffer so ensure
   167  	// only checksumming the bits we care about.
   168  	checksumStart := enc.buf.Len()
   169  
   170  	enc.encodeRootObject(indexEntryVersion, indexEntryType)
   171  	switch enc.legacy.EncodeLegacyIndexEntryVersion {
   172  	case LegacyEncodingIndexEntryVersionV1:
   173  		enc.encodeIndexEntryV1(entry)
   174  	case LegacyEncodingIndexEntryVersionV2:
   175  		enc.encodeIndexEntryV2(entry)
   176  	default:
   177  		enc.encodeIndexEntryV3(entry, checksumStart)
   178  	}
   179  	return enc.err
   180  }
   181  
   182  // EncodeIndexSummary encodes index summary.
   183  func (enc *Encoder) EncodeIndexSummary(summary schema.IndexSummary) error {
   184  	if enc.err != nil {
   185  		return enc.err
   186  	}
   187  	enc.encodeRootObject(indexSummaryVersion, indexSummaryType)
   188  	enc.encodeIndexSummary(summary)
   189  	return enc.err
   190  }
   191  
   192  // EncodeLogInfo encodes commit log info.
   193  func (enc *Encoder) EncodeLogInfo(info schema.LogInfo) error {
   194  	if enc.err != nil {
   195  		return enc.err
   196  	}
   197  	enc.encodeRootObject(logInfoVersion, logInfoType)
   198  	enc.encodeLogInfo(info)
   199  	return enc.err
   200  }
   201  
   202  // EncodeLogEntry encodes commit log entry.
   203  func (enc *Encoder) EncodeLogEntry(entry schema.LogEntry) error {
   204  	if enc.err != nil {
   205  		return enc.err
   206  	}
   207  	enc.encodeRootObject(logEntryVersion, logEntryType)
   208  	enc.encodeLogEntry(entry)
   209  	return enc.err
   210  }
   211  
   212  // EncodeLogMetadata encodes commit log metadata
   213  func (enc *Encoder) EncodeLogMetadata(entry schema.LogMetadata) error {
   214  	if enc.err != nil {
   215  		return enc.err
   216  	}
   217  	enc.encodeRootObject(logMetadataVersion, logMetadataType)
   218  	enc.encodeLogMetadata(entry)
   219  	return enc.err
   220  }
   221  
   222  // We only keep this method around for the sake of testing
   223  // backwards-compatbility.
   224  func (enc *Encoder) encodeIndexInfoV1(info schema.IndexInfo) {
   225  	// Manually encode num fields for testing purposes.
   226  	enc.encodeArrayLenFn(6) // V1 had 6 fields.
   227  	enc.encodeVarintFn(info.BlockStart)
   228  	enc.encodeVarintFn(info.BlockSize)
   229  	enc.encodeVarintFn(info.Entries)
   230  	enc.encodeVarintFn(info.MajorVersion)
   231  	enc.encodeIndexSummariesInfo(info.Summaries)
   232  	enc.encodeIndexBloomFilterInfo(info.BloomFilter)
   233  }
   234  
   235  // We only keep this method around for the sake of testing
   236  // backwards-compatbility.
   237  func (enc *Encoder) encodeIndexInfoV2(info schema.IndexInfo) {
   238  	// Manually encode num fields for testing purposes.
   239  	enc.encodeArrayLenFn(8) // V2 had 8 fields.
   240  	enc.encodeVarintFn(info.BlockStart)
   241  	enc.encodeVarintFn(info.BlockSize)
   242  	enc.encodeVarintFn(info.Entries)
   243  	enc.encodeVarintFn(info.MajorVersion)
   244  	enc.encodeIndexSummariesInfo(info.Summaries)
   245  	enc.encodeIndexBloomFilterInfo(info.BloomFilter)
   246  	enc.encodeVarintFn(info.SnapshotTime)
   247  	enc.encodeVarintFn(int64(info.FileType))
   248  }
   249  
   250  // We only keep this method around for the sake of testing
   251  // backwards-compatbility.
   252  func (enc *Encoder) encodeIndexInfoV3(info schema.IndexInfo) {
   253  	// Manually encode num fields for testing purposes.
   254  	enc.encodeArrayLenFn(9) // V3 had 9 fields.
   255  	enc.encodeVarintFn(info.BlockStart)
   256  	enc.encodeVarintFn(info.BlockSize)
   257  	enc.encodeVarintFn(info.Entries)
   258  	enc.encodeVarintFn(info.MajorVersion)
   259  	enc.encodeIndexSummariesInfo(info.Summaries)
   260  	enc.encodeIndexBloomFilterInfo(info.BloomFilter)
   261  	enc.encodeVarintFn(info.SnapshotTime)
   262  	enc.encodeVarintFn(int64(info.FileType))
   263  	enc.encodeBytesFn(info.SnapshotID)
   264  }
   265  
   266  func (enc *Encoder) encodeIndexInfoV4(info schema.IndexInfo) {
   267  	enc.encodeArrayLenFn(10) // V4 had 10 fields.
   268  	enc.encodeVarintFn(info.BlockStart)
   269  	enc.encodeVarintFn(info.BlockSize)
   270  	enc.encodeVarintFn(info.Entries)
   271  	enc.encodeVarintFn(info.MajorVersion)
   272  	enc.encodeIndexSummariesInfo(info.Summaries)
   273  	enc.encodeIndexBloomFilterInfo(info.BloomFilter)
   274  	enc.encodeVarintFn(info.SnapshotTime)
   275  	enc.encodeVarintFn(int64(info.FileType))
   276  	enc.encodeBytesFn(info.SnapshotID)
   277  	enc.encodeVarintFn(int64(info.VolumeIndex))
   278  }
   279  
   280  func (enc *Encoder) encodeIndexInfoV5(info schema.IndexInfo) {
   281  	enc.encodeNumObjectFieldsForFn(indexInfoType)
   282  	enc.encodeVarintFn(info.BlockStart)
   283  	enc.encodeVarintFn(info.BlockSize)
   284  	enc.encodeVarintFn(info.Entries)
   285  	enc.encodeVarintFn(info.MajorVersion)
   286  	enc.encodeIndexSummariesInfo(info.Summaries)
   287  	enc.encodeIndexBloomFilterInfo(info.BloomFilter)
   288  	enc.encodeVarintFn(info.SnapshotTime)
   289  	enc.encodeVarintFn(int64(info.FileType))
   290  	enc.encodeBytesFn(info.SnapshotID)
   291  	enc.encodeVarintFn(int64(info.VolumeIndex))
   292  	enc.encodeVarintFn(info.MinorVersion)
   293  }
   294  
   295  func (enc *Encoder) encodeIndexSummariesInfo(info schema.IndexSummariesInfo) {
   296  	enc.encodeNumObjectFieldsForFn(indexSummariesInfoType)
   297  	enc.encodeVarintFn(info.Summaries)
   298  }
   299  
   300  func (enc *Encoder) encodeIndexBloomFilterInfo(info schema.IndexBloomFilterInfo) {
   301  	enc.encodeNumObjectFieldsForFn(indexBloomFilterInfoType)
   302  	enc.encodeVarintFn(info.NumElementsM)
   303  	enc.encodeVarintFn(info.NumHashesK)
   304  }
   305  
   306  // We only keep this method around for the sake of testing
   307  // backwards-compatbility.
   308  func (enc *Encoder) encodeIndexEntryV1(entry schema.IndexEntry) {
   309  	// Manually encode num fields for testing purposes.
   310  	enc.encodeArrayLenFn(5) // V1 had 5 fields.
   311  	enc.encodeVarintFn(entry.Index)
   312  	enc.encodeBytesFn(entry.ID)
   313  	enc.encodeVarintFn(entry.Size)
   314  	enc.encodeVarintFn(entry.Offset)
   315  	enc.encodeVarintFn(entry.DataChecksum)
   316  }
   317  
   318  func (enc *Encoder) encodeIndexEntryV2(entry schema.IndexEntry) {
   319  	enc.encodeArrayLenFn(6) // V2 had 6 fields.
   320  	enc.encodeVarintFn(entry.Index)
   321  	enc.encodeBytesFn(entry.ID)
   322  	enc.encodeVarintFn(entry.Size)
   323  	enc.encodeVarintFn(entry.Offset)
   324  	enc.encodeVarintFn(entry.DataChecksum)
   325  	enc.encodeBytesFn(entry.EncodedTags)
   326  }
   327  
   328  func (enc *Encoder) encodeIndexEntryV3(entry schema.IndexEntry, checksumStart int) {
   329  	enc.encodeNumObjectFieldsForFn(indexEntryType)
   330  	enc.encodeVarintFn(entry.Index)
   331  	enc.encodeBytesFn(entry.ID)
   332  	enc.encodeVarintFn(entry.Size)
   333  	enc.encodeVarintFn(entry.Offset)
   334  	enc.encodeVarintFn(entry.DataChecksum)
   335  	enc.encodeBytesFn(entry.EncodedTags)
   336  
   337  	checksum := digest.Checksum(enc.Bytes()[checksumStart:])
   338  	enc.encodeVarintFn(int64(checksum))
   339  }
   340  
   341  func (enc *Encoder) encodeIndexSummary(summary schema.IndexSummary) {
   342  	enc.encodeNumObjectFieldsForFn(indexSummaryType)
   343  	enc.encodeVarintFn(summary.Index)
   344  	enc.encodeBytesFn(summary.ID)
   345  	enc.encodeVarintFn(summary.IndexEntryOffset)
   346  }
   347  
   348  func (enc *Encoder) encodeLogInfo(info schema.LogInfo) {
   349  	enc.encodeNumObjectFieldsForFn(logInfoType)
   350  
   351  	// Deprecated, have to encode anyways for backwards compatibility, but we ignore the values.
   352  	// TODO(V1): Remove when we make backwards incompatible changes with an upgrade to V1.
   353  	enc.encodeVarintFn(info.DeprecatedDoNotUseStart)
   354  	enc.encodeVarintFn(info.DeprecatedDoNotUseDuration)
   355  
   356  	enc.encodeVarintFn(info.Index)
   357  }
   358  
   359  func (enc *Encoder) encodeLogEntry(entry schema.LogEntry) {
   360  	enc.encodeNumObjectFieldsForFn(logEntryType)
   361  	// Encode the index first because the commitlog reader needs this information first
   362  	// to distribute the rest of the decoding to a group of workers.
   363  	enc.encodeVarUintFn(entry.Index)
   364  	enc.encodeVarintFn(entry.Create)
   365  	enc.encodeBytesFn(entry.Metadata)
   366  	enc.encodeVarintFn(entry.Timestamp)
   367  	enc.encodeFloat64Fn(entry.Value)
   368  	enc.encodeVarUintFn(uint64(entry.Unit))
   369  	enc.encodeBytesFn(entry.Annotation)
   370  }
   371  
   372  func (enc *Encoder) encodeLogMetadata(metadata schema.LogMetadata) {
   373  	enc.encodeNumObjectFieldsForFn(logMetadataType)
   374  	enc.encodeBytesFn(metadata.ID)
   375  	enc.encodeBytesFn(metadata.Namespace)
   376  	enc.encodeVarUintFn(uint64(metadata.Shard))
   377  	enc.encodeBytesFn(metadata.EncodedTags)
   378  }
   379  
   380  func (enc *Encoder) encodeRootObject(version int, objType objectType) {
   381  	enc.encodeVersionFn(version)
   382  	enc.encodeNumObjectFieldsForFn(rootObjectType)
   383  	enc.encodeObjectType(objType)
   384  }
   385  
   386  func (enc *Encoder) encodeVersion(version int) {
   387  	enc.encodeVarintFn(int64(version))
   388  }
   389  
   390  func (enc *Encoder) encodeNumObjectFieldsFor(objType objectType) {
   391  	_, curr := numFieldsForType(objType)
   392  	enc.encodeArrayLenFn(curr)
   393  }
   394  
   395  func (enc *Encoder) encodeObjectType(objType objectType) {
   396  	enc.encodeVarintFn(int64(objType))
   397  }
   398  
   399  func (enc *Encoder) encodeVarint(value int64) {
   400  	if enc.err != nil {
   401  		return
   402  	}
   403  	enc.err = enc.enc.EncodeInt64(value)
   404  }
   405  
   406  func (enc *Encoder) encodeVarUint(value uint64) {
   407  	if enc.err != nil {
   408  		return
   409  	}
   410  	enc.err = enc.enc.EncodeUint64(value)
   411  }
   412  
   413  func (enc *Encoder) encodeFloat64(value float64) {
   414  	if enc.err != nil {
   415  		return
   416  	}
   417  	enc.err = enc.enc.EncodeFloat64(value)
   418  }
   419  
   420  func (enc *Encoder) encodeBytes(value []byte) {
   421  	if enc.err != nil {
   422  		return
   423  	}
   424  	enc.err = enc.enc.EncodeBytes(value)
   425  }
   426  
   427  func (enc *Encoder) encodeArrayLen(value int) {
   428  	if enc.err != nil {
   429  		return
   430  	}
   431  	enc.err = enc.enc.EncodeArrayLen(value)
   432  }