github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/fs/msgpack/decoder_fast.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE 20 21 package msgpack 22 23 import ( 24 "fmt" 25 "math" 26 27 "github.com/m3db/m3/src/dbnode/persist/schema" 28 29 "gopkg.in/vmihailenco/msgpack.v2/codes" 30 ) 31 32 const ( 33 decodeLogEntryFuncName = "decodeLogEntry" 34 decodeLogMetadataFuncName = "decodeLogMetadata" 35 decodeIntFuncName = "decodeInt" 36 decodeUIntFuncName = "decodeUInt" 37 decodeFloat64FuncName = "decodeFloat64" 38 decodeBytesLenFuncName = "decodeBytesLen" 39 decodeBytesFuncName = "decodeBytes" 40 // nolint: unused 41 decodeArrayLenFuncName = "decodeArrayLen" 42 ) 43 44 // DecodeLogEntryFast decodes a commit log entry with no buffering and using optimized helper 45 // functions that bypass the msgpack decoding library by manually inlining the equivalent code. 46 // 47 // The reason we had to bypass the msgpack decoding library is that during perf testing we found that 48 // this function was spending most of its time setting up stack frames for function calls. While 49 // the overhead of a function call in Golang is small, when every helper function does nothing more 50 // than read a few bytes from an in-memory array the function call overhead begins to dominate, 51 // especially when each call to this function results in dozens of such helper function calls. 52 // 53 // Manually inlining the msgpack decoding results in a lot of code duplication for this one path, but 54 // we pay the price because this codepath is one of the primary bottlenecks influencing how fast we 55 // can bootstrap M3DB from the commitlog. As a result, almost any performance gains that can be had in 56 // this function are worth it. 57 // 58 // Before modifying this function, please run the BenchmarkLogEntryDecodeFast benchmark. 59 // 60 // Also note that there are extensive prop tests for this function in the encoder_decoder_prop_test.go 61 // file which verify its correctness, as well as its resilience to arbitrary data corruption and truncation. 62 func DecodeLogEntryFast(b []byte) (schema.LogEntry, error) { 63 var ( 64 empty schema.LogEntry 65 schema schema.LogEntry 66 ) 67 68 if len(b) < len(logEntryHeader) { 69 return schema, notEnoughBytesError( 70 decodeLogEntryFuncName, len(logEntryHeader), len(b)) 71 } 72 b = b[len(logEntryHeader):] 73 74 var err error 75 schema.Index, b, err = decodeUint(b) 76 if err != nil { 77 return empty, err 78 } 79 80 schema.Create, b, err = decodeInt(b) 81 if err != nil { 82 return empty, err 83 } 84 85 schema.Metadata, b, err = decodeBytes(b) 86 if err != nil { 87 return empty, err 88 } 89 90 schema.Timestamp, b, err = decodeInt(b) 91 if err != nil { 92 return empty, err 93 } 94 95 schema.Value, b, err = decodeFloat64(b) 96 if err != nil { 97 return empty, err 98 } 99 100 unit, b, err := decodeUint(b) 101 if err != nil { 102 return empty, err 103 } 104 schema.Unit = uint32(unit) 105 106 schema.Annotation, b, err = decodeBytes(b) 107 if err != nil { 108 return empty, err 109 } 110 111 return schema, err 112 } 113 114 // DecodeLogMetadataFast is the same as DecodeLogEntryFast except for the metadata 115 // entries instead of the data entries. 116 func DecodeLogMetadataFast(b []byte) (schema.LogMetadata, error) { 117 var ( 118 empty schema.LogMetadata 119 metadata schema.LogMetadata 120 ) 121 122 if len(b) < len(logMetadataHeader) { 123 return metadata, notEnoughBytesError( 124 decodeLogMetadataFuncName, len(logMetadataHeader), len(b)) 125 } 126 b = b[len(logMetadataHeader):] 127 128 id, b, err := decodeBytes(b) 129 if err != nil { 130 return empty, err 131 } 132 metadata.ID = id 133 134 metadata.Namespace, b, err = decodeBytes(b) 135 if err != nil { 136 return empty, err 137 } 138 139 shard, b, err := decodeUint(b) 140 if err != nil { 141 return empty, err 142 } 143 metadata.Shard = uint32(shard) 144 145 metadata.EncodedTags, b, err = decodeBytes(b) 146 if err != nil { 147 return empty, err 148 } 149 150 return metadata, nil 151 } 152 153 // decodeArrayLen not currently used, but may be needed in future if commit 154 // log entries ever includes array values. 155 // nolint: unused 156 func decodeArrayLen(b []byte) (int, []byte, error) { 157 if len(b) < 1 { 158 return 0, nil, notEnoughBytesError(decodeArrayLenFuncName, 1, len(b)) 159 } 160 161 c := b[0] 162 if c == codes.Nil { 163 return -1, b[1:], nil 164 } 165 166 if len(b) < 2 { 167 return 0, nil, notEnoughBytesError(decodeArrayLenFuncName, 1, len(b)) 168 } 169 if c >= codes.FixedArrayLow && c <= codes.FixedArrayHigh { 170 return int(c & codes.FixedArrayMask), b[1:], nil 171 } 172 173 v, b, err := decodeInt(b) 174 return int(v), b, err 175 } 176 177 func decodeInt(b []byte) (int64, []byte, error) { 178 if len(b) < 1 { 179 return 0, nil, notEnoughBytesError(decodeIntFuncName, 1, len(b)) 180 } 181 182 c := b[0] 183 b = b[1:] 184 185 if c == codes.Nil { 186 return 0, b, nil 187 } 188 189 if codes.IsFixedNum(c) { 190 return int64(int8(c)), b, nil 191 } 192 193 switch c { 194 case codes.Uint8: 195 if len(b) < 1 { 196 return 0, nil, notEnoughBytesError(decodeIntFuncName, 1, len(b)) 197 } 198 199 return int64(b[0]), b[1:], nil 200 case codes.Int8: 201 if len(b) < 1 { 202 return 0, nil, notEnoughBytesError(decodeIntFuncName, 1, len(b)) 203 } 204 205 return int64(int8(b[0])), b[1:], nil 206 case codes.Uint16: 207 if len(b) < 2 { 208 return 0, nil, notEnoughBytesError(decodeIntFuncName, 2, len(b)) 209 } 210 211 return int64((uint16(b[0]) << 8) | uint16(b[1])), b[2:], nil 212 case codes.Int16: 213 if len(b) < 2 { 214 return 0, nil, notEnoughBytesError(decodeIntFuncName, 2, len(b)) 215 } 216 217 return int64(int16((uint16(b[0]) << 8) | uint16(b[1]))), b[2:], nil 218 case codes.Uint32: 219 if len(b) < 4 { 220 return 0, nil, notEnoughBytesError(decodeIntFuncName, 4, len(b)) 221 } 222 223 return int64((uint32(b[0]) << 24) | 224 (uint32(b[1]) << 16) | 225 (uint32(b[2]) << 8) | 226 uint32(b[3])), b[4:], nil 227 case codes.Int32: 228 if len(b) < 4 { 229 return 0, nil, notEnoughBytesError(decodeIntFuncName, 4, len(b)) 230 } 231 232 return int64(int32((uint32(b[0]) << 24) | 233 (uint32(b[1]) << 16) | 234 (uint32(b[2]) << 8) | 235 uint32(b[3]))), b[4:], nil 236 case codes.Uint64, codes.Int64: 237 if len(b) < 8 { 238 return 0, nil, notEnoughBytesError(decodeIntFuncName, 8, len(b)) 239 } 240 241 return int64((uint64(b[0]) << 56) | 242 (uint64(b[1]) << 48) | 243 (uint64(b[2]) << 40) | 244 (uint64(b[3]) << 32) | 245 (uint64(b[4]) << 24) | 246 (uint64(b[5]) << 16) | 247 (uint64(b[6]) << 8) | 248 uint64(b[7])), b[8:], nil 249 default: 250 return 0, nil, fmt.Errorf("error decoding int: invalid code: %d", c) 251 } 252 } 253 254 func decodeUint(b []byte) (uint64, []byte, error) { 255 if len(b) < 1 { 256 return 0, nil, notEnoughBytesError(decodeUIntFuncName, 1, len(b)) 257 } 258 259 c := b[0] 260 b = b[1:] 261 262 if c == codes.Nil { 263 return 0, b, nil 264 } 265 266 if codes.IsFixedNum(c) { 267 return uint64(int8(c)), b, nil 268 } 269 270 switch c { 271 case codes.Uint8: 272 if len(b) < 1 { 273 return 0, nil, notEnoughBytesError(decodeUIntFuncName, 1, len(b)) 274 } 275 276 return uint64(b[0]), b[1:], nil 277 case codes.Int8: 278 if len(b) < 1 { 279 return 0, nil, notEnoughBytesError(decodeUIntFuncName, 1, len(b)) 280 } 281 282 return uint64(int8(b[0])), b[1:], nil 283 case codes.Uint16: 284 if len(b) < 2 { 285 return 0, nil, notEnoughBytesError(decodeUIntFuncName, 2, len(b)) 286 } 287 288 return uint64((uint16(b[0]) << 8) | uint16(b[1])), b[2:], nil 289 case codes.Int16: 290 if len(b) < 2 { 291 return 0, nil, notEnoughBytesError(decodeUIntFuncName, 2, len(b)) 292 } 293 294 return uint64(int16((uint16(b[0]) << 8) | uint16(b[1]))), b[2:], nil 295 case codes.Uint32: 296 if len(b) < 4 { 297 return 0, nil, notEnoughBytesError(decodeUIntFuncName, 4, len(b)) 298 } 299 300 return uint64((uint32(b[0]) << 24) | 301 (uint32(b[1]) << 16) | 302 (uint32(b[2]) << 8) | 303 uint32(b[3])), b[4:], nil 304 case codes.Int32: 305 if len(b) < 4 { 306 return 0, nil, notEnoughBytesError(decodeUIntFuncName, 4, len(b)) 307 } 308 309 return uint64(int32((uint32(b[0]) << 24) | 310 (uint32(b[1]) << 16) | 311 (uint32(b[2]) << 8) | 312 uint32(b[3]))), b[4:], nil 313 case codes.Uint64, codes.Int64: 314 if len(b) < 8 { 315 return 0, nil, notEnoughBytesError(decodeUIntFuncName, 8, len(b)) 316 } 317 318 return (uint64(b[0]) << 56) | 319 (uint64(b[1]) << 48) | 320 (uint64(b[2]) << 40) | 321 (uint64(b[3]) << 32) | 322 (uint64(b[4]) << 24) | 323 (uint64(b[5]) << 16) | 324 (uint64(b[6]) << 8) | 325 uint64(b[7]), b[8:], nil 326 default: 327 return 0, nil, fmt.Errorf("error decoding uint: invalid code: %d", c) 328 } 329 } 330 331 func decodeFloat64(b []byte) (float64, []byte, error) { 332 if len(b) < 5 { 333 return 0, nil, notEnoughBytesError(decodeFloat64FuncName, 5, len(b)) 334 } 335 336 c := b[0] 337 b = b[1:] 338 339 if c == codes.Float { 340 i := (uint32(b[0]) << 24) | 341 (uint32(b[1]) << 16) | 342 (uint32(b[2]) << 8) | 343 uint32(b[3]) 344 return float64(math.Float32frombits(i)), b[4:], nil 345 } 346 347 if len(b) < 8 { 348 return 0, nil, notEnoughBytesError(decodeFloat64FuncName, 8, len(b)) 349 } 350 351 if c == codes.Double { 352 i := (uint64(b[0]) << 56) | 353 (uint64(b[1]) << 48) | 354 (uint64(b[2]) << 40) | 355 (uint64(b[3]) << 32) | 356 (uint64(b[4]) << 24) | 357 (uint64(b[5]) << 16) | 358 (uint64(b[6]) << 8) | 359 uint64(b[7]) 360 return math.Float64frombits(i), b[8:], nil 361 } 362 363 return 0, b, fmt.Errorf("error decoding float64: invalid code: %d", c) 364 } 365 366 func decodeBytesLen(b []byte) (int, []byte, error) { 367 if len(b) < 1 { 368 return 0, nil, notEnoughBytesError(decodeBytesLenFuncName, 1, len(b)) 369 } 370 371 c := b[0] 372 b = b[1:] 373 374 if c == codes.Nil { 375 return -1, b, nil 376 } else if codes.IsFixedString(c) { 377 return int(c & codes.FixedStrMask), b, nil 378 } 379 380 switch c { 381 case codes.Str8, codes.Bin8: 382 if len(b) < 1 { 383 return 0, nil, notEnoughBytesError(decodeBytesLenFuncName, 1, len(b)) 384 } 385 386 return int(b[0]), b[1:], nil 387 case codes.Str16, codes.Bin16: 388 if len(b) < 2 { 389 return 0, nil, notEnoughBytesError(decodeBytesLenFuncName, 2, len(b)) 390 } 391 392 return int((uint16(b[0]) << 8) | uint16(b[1])), b[2:], nil 393 case codes.Str32, codes.Bin32: 394 if len(b) < 4 { 395 return 0, nil, notEnoughBytesError(decodeBytesLenFuncName, 4, len(b)) 396 } 397 398 return int(int32((uint32(b[0]) << 24) | 399 (uint32(b[1]) << 16) | 400 (uint32(b[2]) << 8) | 401 uint32(b[3]))), b[4:], nil 402 } 403 return -1, nil, fmt.Errorf("error decoding bytes len: invalid code: %d", c) 404 } 405 406 func decodeBytes(b []byte) ([]byte, []byte, error) { 407 bytesLen, b, err := decodeBytesLen(b) 408 if err != nil { 409 return nil, nil, err 410 } 411 412 if bytesLen == -1 { 413 return nil, b, nil 414 } 415 416 // Smaller than zero check to handle corrupt data 417 if len(b) < bytesLen || bytesLen < 0 { 418 return nil, nil, notEnoughBytesError(decodeBytesFuncName, bytesLen, len(b)) 419 } 420 421 return b[:bytesLen], b[bytesLen:], nil 422 } 423 424 func notEnoughBytesError(funcName string, expected, actual int) error { 425 return fmt.Errorf( 426 "not enough bytes for msgpack decode in %s, expected %d but had %d", 427 funcName, expected, actual) 428 }