github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/fs/msgpack/decoder_fast.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE
    20  
    21  package msgpack
    22  
    23  import (
    24  	"fmt"
    25  	"math"
    26  
    27  	"github.com/m3db/m3/src/dbnode/persist/schema"
    28  
    29  	"gopkg.in/vmihailenco/msgpack.v2/codes"
    30  )
    31  
    32  const (
    33  	decodeLogEntryFuncName    = "decodeLogEntry"
    34  	decodeLogMetadataFuncName = "decodeLogMetadata"
    35  	decodeIntFuncName         = "decodeInt"
    36  	decodeUIntFuncName        = "decodeUInt"
    37  	decodeFloat64FuncName     = "decodeFloat64"
    38  	decodeBytesLenFuncName    = "decodeBytesLen"
    39  	decodeBytesFuncName       = "decodeBytes"
    40  	// nolint: unused
    41  	decodeArrayLenFuncName = "decodeArrayLen"
    42  )
    43  
    44  // DecodeLogEntryFast decodes a commit log entry with no buffering and using optimized helper
    45  // functions that bypass the msgpack decoding library by manually inlining the equivalent code.
    46  //
    47  // The reason we had to bypass the msgpack decoding library is that during perf testing we found that
    48  // this function was spending most of its time setting up stack frames for function calls. While
    49  // the overhead of a function call in Golang is small, when every helper function does nothing more
    50  // than read a few bytes from an in-memory array the function call overhead begins to dominate,
    51  // especially when each call to this function results in dozens of such helper function calls.
    52  //
    53  // Manually inlining the msgpack decoding results in a lot of code duplication for this one path, but
    54  // we pay the price because this codepath is one of the primary bottlenecks influencing how fast we
    55  // can bootstrap M3DB from the commitlog. As a result, almost any performance gains that can be had in
    56  // this function are worth it.
    57  //
    58  // Before modifying this function, please run the BenchmarkLogEntryDecodeFast benchmark.
    59  //
    60  // Also note that there are extensive prop tests for this function in the encoder_decoder_prop_test.go
    61  // file which verify its correctness, as well as its resilience to arbitrary data corruption and truncation.
    62  func DecodeLogEntryFast(b []byte) (schema.LogEntry, error) {
    63  	var (
    64  		empty  schema.LogEntry
    65  		schema schema.LogEntry
    66  	)
    67  
    68  	if len(b) < len(logEntryHeader) {
    69  		return schema, notEnoughBytesError(
    70  			decodeLogEntryFuncName, len(logEntryHeader), len(b))
    71  	}
    72  	b = b[len(logEntryHeader):]
    73  
    74  	var err error
    75  	schema.Index, b, err = decodeUint(b)
    76  	if err != nil {
    77  		return empty, err
    78  	}
    79  
    80  	schema.Create, b, err = decodeInt(b)
    81  	if err != nil {
    82  		return empty, err
    83  	}
    84  
    85  	schema.Metadata, b, err = decodeBytes(b)
    86  	if err != nil {
    87  		return empty, err
    88  	}
    89  
    90  	schema.Timestamp, b, err = decodeInt(b)
    91  	if err != nil {
    92  		return empty, err
    93  	}
    94  
    95  	schema.Value, b, err = decodeFloat64(b)
    96  	if err != nil {
    97  		return empty, err
    98  	}
    99  
   100  	unit, b, err := decodeUint(b)
   101  	if err != nil {
   102  		return empty, err
   103  	}
   104  	schema.Unit = uint32(unit)
   105  
   106  	schema.Annotation, b, err = decodeBytes(b)
   107  	if err != nil {
   108  		return empty, err
   109  	}
   110  
   111  	return schema, err
   112  }
   113  
   114  // DecodeLogMetadataFast is the same as DecodeLogEntryFast except for the metadata
   115  // entries instead of the data entries.
   116  func DecodeLogMetadataFast(b []byte) (schema.LogMetadata, error) {
   117  	var (
   118  		empty    schema.LogMetadata
   119  		metadata schema.LogMetadata
   120  	)
   121  
   122  	if len(b) < len(logMetadataHeader) {
   123  		return metadata, notEnoughBytesError(
   124  			decodeLogMetadataFuncName, len(logMetadataHeader), len(b))
   125  	}
   126  	b = b[len(logMetadataHeader):]
   127  
   128  	id, b, err := decodeBytes(b)
   129  	if err != nil {
   130  		return empty, err
   131  	}
   132  	metadata.ID = id
   133  
   134  	metadata.Namespace, b, err = decodeBytes(b)
   135  	if err != nil {
   136  		return empty, err
   137  	}
   138  
   139  	shard, b, err := decodeUint(b)
   140  	if err != nil {
   141  		return empty, err
   142  	}
   143  	metadata.Shard = uint32(shard)
   144  
   145  	metadata.EncodedTags, b, err = decodeBytes(b)
   146  	if err != nil {
   147  		return empty, err
   148  	}
   149  
   150  	return metadata, nil
   151  }
   152  
   153  // decodeArrayLen not currently used, but may be needed in future if commit
   154  // log entries ever includes array values.
   155  // nolint: unused
   156  func decodeArrayLen(b []byte) (int, []byte, error) {
   157  	if len(b) < 1 {
   158  		return 0, nil, notEnoughBytesError(decodeArrayLenFuncName, 1, len(b))
   159  	}
   160  
   161  	c := b[0]
   162  	if c == codes.Nil {
   163  		return -1, b[1:], nil
   164  	}
   165  
   166  	if len(b) < 2 {
   167  		return 0, nil, notEnoughBytesError(decodeArrayLenFuncName, 1, len(b))
   168  	}
   169  	if c >= codes.FixedArrayLow && c <= codes.FixedArrayHigh {
   170  		return int(c & codes.FixedArrayMask), b[1:], nil
   171  	}
   172  
   173  	v, b, err := decodeInt(b)
   174  	return int(v), b, err
   175  }
   176  
   177  func decodeInt(b []byte) (int64, []byte, error) {
   178  	if len(b) < 1 {
   179  		return 0, nil, notEnoughBytesError(decodeIntFuncName, 1, len(b))
   180  	}
   181  
   182  	c := b[0]
   183  	b = b[1:]
   184  
   185  	if c == codes.Nil {
   186  		return 0, b, nil
   187  	}
   188  
   189  	if codes.IsFixedNum(c) {
   190  		return int64(int8(c)), b, nil
   191  	}
   192  
   193  	switch c {
   194  	case codes.Uint8:
   195  		if len(b) < 1 {
   196  			return 0, nil, notEnoughBytesError(decodeIntFuncName, 1, len(b))
   197  		}
   198  
   199  		return int64(b[0]), b[1:], nil
   200  	case codes.Int8:
   201  		if len(b) < 1 {
   202  			return 0, nil, notEnoughBytesError(decodeIntFuncName, 1, len(b))
   203  		}
   204  
   205  		return int64(int8(b[0])), b[1:], nil
   206  	case codes.Uint16:
   207  		if len(b) < 2 {
   208  			return 0, nil, notEnoughBytesError(decodeIntFuncName, 2, len(b))
   209  		}
   210  
   211  		return int64((uint16(b[0]) << 8) | uint16(b[1])), b[2:], nil
   212  	case codes.Int16:
   213  		if len(b) < 2 {
   214  			return 0, nil, notEnoughBytesError(decodeIntFuncName, 2, len(b))
   215  		}
   216  
   217  		return int64(int16((uint16(b[0]) << 8) | uint16(b[1]))), b[2:], nil
   218  	case codes.Uint32:
   219  		if len(b) < 4 {
   220  			return 0, nil, notEnoughBytesError(decodeIntFuncName, 4, len(b))
   221  		}
   222  
   223  		return int64((uint32(b[0]) << 24) |
   224  			(uint32(b[1]) << 16) |
   225  			(uint32(b[2]) << 8) |
   226  			uint32(b[3])), b[4:], nil
   227  	case codes.Int32:
   228  		if len(b) < 4 {
   229  			return 0, nil, notEnoughBytesError(decodeIntFuncName, 4, len(b))
   230  		}
   231  
   232  		return int64(int32((uint32(b[0]) << 24) |
   233  			(uint32(b[1]) << 16) |
   234  			(uint32(b[2]) << 8) |
   235  			uint32(b[3]))), b[4:], nil
   236  	case codes.Uint64, codes.Int64:
   237  		if len(b) < 8 {
   238  			return 0, nil, notEnoughBytesError(decodeIntFuncName, 8, len(b))
   239  		}
   240  
   241  		return int64((uint64(b[0]) << 56) |
   242  			(uint64(b[1]) << 48) |
   243  			(uint64(b[2]) << 40) |
   244  			(uint64(b[3]) << 32) |
   245  			(uint64(b[4]) << 24) |
   246  			(uint64(b[5]) << 16) |
   247  			(uint64(b[6]) << 8) |
   248  			uint64(b[7])), b[8:], nil
   249  	default:
   250  		return 0, nil, fmt.Errorf("error decoding int: invalid code: %d", c)
   251  	}
   252  }
   253  
   254  func decodeUint(b []byte) (uint64, []byte, error) {
   255  	if len(b) < 1 {
   256  		return 0, nil, notEnoughBytesError(decodeUIntFuncName, 1, len(b))
   257  	}
   258  
   259  	c := b[0]
   260  	b = b[1:]
   261  
   262  	if c == codes.Nil {
   263  		return 0, b, nil
   264  	}
   265  
   266  	if codes.IsFixedNum(c) {
   267  		return uint64(int8(c)), b, nil
   268  	}
   269  
   270  	switch c {
   271  	case codes.Uint8:
   272  		if len(b) < 1 {
   273  			return 0, nil, notEnoughBytesError(decodeUIntFuncName, 1, len(b))
   274  		}
   275  
   276  		return uint64(b[0]), b[1:], nil
   277  	case codes.Int8:
   278  		if len(b) < 1 {
   279  			return 0, nil, notEnoughBytesError(decodeUIntFuncName, 1, len(b))
   280  		}
   281  
   282  		return uint64(int8(b[0])), b[1:], nil
   283  	case codes.Uint16:
   284  		if len(b) < 2 {
   285  			return 0, nil, notEnoughBytesError(decodeUIntFuncName, 2, len(b))
   286  		}
   287  
   288  		return uint64((uint16(b[0]) << 8) | uint16(b[1])), b[2:], nil
   289  	case codes.Int16:
   290  		if len(b) < 2 {
   291  			return 0, nil, notEnoughBytesError(decodeUIntFuncName, 2, len(b))
   292  		}
   293  
   294  		return uint64(int16((uint16(b[0]) << 8) | uint16(b[1]))), b[2:], nil
   295  	case codes.Uint32:
   296  		if len(b) < 4 {
   297  			return 0, nil, notEnoughBytesError(decodeUIntFuncName, 4, len(b))
   298  		}
   299  
   300  		return uint64((uint32(b[0]) << 24) |
   301  			(uint32(b[1]) << 16) |
   302  			(uint32(b[2]) << 8) |
   303  			uint32(b[3])), b[4:], nil
   304  	case codes.Int32:
   305  		if len(b) < 4 {
   306  			return 0, nil, notEnoughBytesError(decodeUIntFuncName, 4, len(b))
   307  		}
   308  
   309  		return uint64(int32((uint32(b[0]) << 24) |
   310  			(uint32(b[1]) << 16) |
   311  			(uint32(b[2]) << 8) |
   312  			uint32(b[3]))), b[4:], nil
   313  	case codes.Uint64, codes.Int64:
   314  		if len(b) < 8 {
   315  			return 0, nil, notEnoughBytesError(decodeUIntFuncName, 8, len(b))
   316  		}
   317  
   318  		return (uint64(b[0]) << 56) |
   319  			(uint64(b[1]) << 48) |
   320  			(uint64(b[2]) << 40) |
   321  			(uint64(b[3]) << 32) |
   322  			(uint64(b[4]) << 24) |
   323  			(uint64(b[5]) << 16) |
   324  			(uint64(b[6]) << 8) |
   325  			uint64(b[7]), b[8:], nil
   326  	default:
   327  		return 0, nil, fmt.Errorf("error decoding uint: invalid code: %d", c)
   328  	}
   329  }
   330  
   331  func decodeFloat64(b []byte) (float64, []byte, error) {
   332  	if len(b) < 5 {
   333  		return 0, nil, notEnoughBytesError(decodeFloat64FuncName, 5, len(b))
   334  	}
   335  
   336  	c := b[0]
   337  	b = b[1:]
   338  
   339  	if c == codes.Float {
   340  		i := (uint32(b[0]) << 24) |
   341  			(uint32(b[1]) << 16) |
   342  			(uint32(b[2]) << 8) |
   343  			uint32(b[3])
   344  		return float64(math.Float32frombits(i)), b[4:], nil
   345  	}
   346  
   347  	if len(b) < 8 {
   348  		return 0, nil, notEnoughBytesError(decodeFloat64FuncName, 8, len(b))
   349  	}
   350  
   351  	if c == codes.Double {
   352  		i := (uint64(b[0]) << 56) |
   353  			(uint64(b[1]) << 48) |
   354  			(uint64(b[2]) << 40) |
   355  			(uint64(b[3]) << 32) |
   356  			(uint64(b[4]) << 24) |
   357  			(uint64(b[5]) << 16) |
   358  			(uint64(b[6]) << 8) |
   359  			uint64(b[7])
   360  		return math.Float64frombits(i), b[8:], nil
   361  	}
   362  
   363  	return 0, b, fmt.Errorf("error decoding float64: invalid code: %d", c)
   364  }
   365  
   366  func decodeBytesLen(b []byte) (int, []byte, error) {
   367  	if len(b) < 1 {
   368  		return 0, nil, notEnoughBytesError(decodeBytesLenFuncName, 1, len(b))
   369  	}
   370  
   371  	c := b[0]
   372  	b = b[1:]
   373  
   374  	if c == codes.Nil {
   375  		return -1, b, nil
   376  	} else if codes.IsFixedString(c) {
   377  		return int(c & codes.FixedStrMask), b, nil
   378  	}
   379  
   380  	switch c {
   381  	case codes.Str8, codes.Bin8:
   382  		if len(b) < 1 {
   383  			return 0, nil, notEnoughBytesError(decodeBytesLenFuncName, 1, len(b))
   384  		}
   385  
   386  		return int(b[0]), b[1:], nil
   387  	case codes.Str16, codes.Bin16:
   388  		if len(b) < 2 {
   389  			return 0, nil, notEnoughBytesError(decodeBytesLenFuncName, 2, len(b))
   390  		}
   391  
   392  		return int((uint16(b[0]) << 8) | uint16(b[1])), b[2:], nil
   393  	case codes.Str32, codes.Bin32:
   394  		if len(b) < 4 {
   395  			return 0, nil, notEnoughBytesError(decodeBytesLenFuncName, 4, len(b))
   396  		}
   397  
   398  		return int(int32((uint32(b[0]) << 24) |
   399  			(uint32(b[1]) << 16) |
   400  			(uint32(b[2]) << 8) |
   401  			uint32(b[3]))), b[4:], nil
   402  	}
   403  	return -1, nil, fmt.Errorf("error decoding bytes len: invalid code: %d", c)
   404  }
   405  
   406  func decodeBytes(b []byte) ([]byte, []byte, error) {
   407  	bytesLen, b, err := decodeBytesLen(b)
   408  	if err != nil {
   409  		return nil, nil, err
   410  	}
   411  
   412  	if bytesLen == -1 {
   413  		return nil, b, nil
   414  	}
   415  
   416  	// Smaller than zero check to handle corrupt data
   417  	if len(b) < bytesLen || bytesLen < 0 {
   418  		return nil, nil, notEnoughBytesError(decodeBytesFuncName, bytesLen, len(b))
   419  	}
   420  
   421  	return b[:bytesLen], b[bytesLen:], nil
   422  }
   423  
   424  func notEnoughBytesError(funcName string, expected, actual int) error {
   425  	return fmt.Errorf(
   426  		"not enough bytes for msgpack decode in %s, expected %d but had %d",
   427  		funcName, expected, actual)
   428  }