github.com/datastax/go-cassandra-native-protocol@v0.0.0-20220706104457-5e8aad05cf90/primitive/vint.go (about)

     1  // Copyright 2021 DataStax
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package primitive
    16  
    17  import (
    18  	"fmt"
    19  	"io"
    20  	"math/bits"
    21  )
    22  
    23  // [unsigned vint] and [vint] are protocol-level structures.
    24  // They were first introduced in DSE protocols v1 and v2, then introduced in OSS protocol v5.
    25  // Since they are declared in section 3 of protocol specs, they are handled in the primitive package.
    26  // However, they are currently only used for encoding and decoding the CQL duration type, also introduced in the
    27  // same versions above.
    28  
    29  // [unsigned vint]
    30  // An unsigned variable length integer. A vint is encoded with the most significant byte (MSB) first.
    31  // The most significant byte will contain the information about how many extra bytes need to be read
    32  // as well as the most significant bits of the integer.
    33  // The number of extra bytes to read is encoded as 1 bit on the left side.
    34  // For example, if we need to read 2 more bytes the first byte will start with 110
    35  // (e.g. 256 000 will be encoded on 3 bytes as [110]00011 11101000 00000000)
    36  // If the encoded integer is 8 bytes long, the vint will be encoded on 9 bytes and the first
    37  // byte will be: 11111111.
    38  
    39  // Implementation note:
    40  // the binary package has the functions: PutVarint, PutUvarint, ReadVarint and ReadUvarint. The encoding scheme
    41  // used by these functions is similar to the one used here, but unfortunately Cassandra vints are big-endian,
    42  // while varints, in the functions above, are encoded in little-endian order.
    43  
    44  func ReadUnsignedVint(source io.Reader) (val uint64, read int, err error) {
    45  	var head [1]byte
    46  	read, err = io.ReadFull(source, head[:])
    47  	if err == nil {
    48  		firstByte := head[0]
    49  		if firstByte&0x80 == 0 {
    50  			val = uint64(firstByte)
    51  		} else {
    52  			remainingBytes := bits.LeadingZeros32(uint32(^firstByte)) - 24
    53  			tail := make([]byte, remainingBytes)
    54  			var n int
    55  			n, err = io.ReadFull(source, tail)
    56  			read += n
    57  			if err == nil {
    58  				val = uint64(firstByte & (0xff >> uint(remainingBytes)))
    59  				for i := 0; i < remainingBytes; i++ {
    60  					val <<= 8
    61  					val |= uint64(tail[i] & 0xff)
    62  				}
    63  			}
    64  		}
    65  	}
    66  	if err != nil {
    67  		err = fmt.Errorf("cannot read [unsigned vint]: %w", err)
    68  	}
    69  	return
    70  }
    71  
    72  func WriteUnsignedVint(v uint64, dest io.Writer) (written int, err error) {
    73  	magnitude := bits.LeadingZeros64(v)
    74  	numBytes := (639 - magnitude*9) >> 6
    75  	// It can be 1 or 0 is v ==0
    76  	if numBytes <= 1 {
    77  		written, err = dest.Write([]byte{byte(v)})
    78  	} else {
    79  		extraBytes := numBytes - 1
    80  		var buf = make([]byte, numBytes)
    81  		for i := extraBytes; i >= 0; i-- {
    82  			buf[i] = byte(v)
    83  			v >>= 8
    84  		}
    85  		buf[0] |= byte(^(0xff >> uint(extraBytes)))
    86  		written, err = dest.Write(buf)
    87  	}
    88  	if err != nil {
    89  		err = fmt.Errorf("cannot write [unsigned vint]: %w", err)
    90  	}
    91  	return
    92  }
    93  
    94  func LengthOfUnsignedVint(v uint64) int {
    95  	magnitude := bits.LeadingZeros64(v)
    96  	numBytes := (639 - magnitude*9) >> 6
    97  	// It can be 1 or 0 is v ==0
    98  	if numBytes <= 1 {
    99  		return 1
   100  	}
   101  	return numBytes
   102  }
   103  
   104  // [vint]
   105  // A signed variable length integer. This is encoded using zig-zag encoding and then sent
   106  // like an [unsigned vint]. Zig-zag encoding converts numbers as follows:
   107  // 0 = 0, -1 = 1, 1 = 2, -2 = 3, 2 = 4, -3 = 5, 3 = 6 and so forth.
   108  // The purpose is to send small negative values as small unsigned values, so that we save bytes on the wire.
   109  // To encode a value n use "(n >> 31) ^ (n << 1)" for 32 bit values, and "(n >> 63) ^ (n << 1)"
   110  // for 64 bit values where "^" is the xor operation, "<<" is the left shift operation and ">>" is
   111  // the arithmetic right shift operation (highest-order bit is replicated).
   112  // Decode with "(n >> 1) ^ -(n & 1)".
   113  
   114  func ReadVint(source io.Reader) (val int64, read int, err error) {
   115  	var unsigned uint64
   116  	unsigned, read, err = ReadUnsignedVint(source)
   117  	if err != nil {
   118  		err = fmt.Errorf("cannot read [vint]: %w", err)
   119  	} else {
   120  		val = decodeZigZag(unsigned)
   121  	}
   122  	return
   123  }
   124  
   125  func WriteVint(v int64, dest io.Writer) (written int, err error) {
   126  	written, err = WriteUnsignedVint(encodeZigZag(v), dest)
   127  	if err != nil {
   128  		err = fmt.Errorf("cannot write [vint]: %w", err)
   129  	}
   130  	return
   131  }
   132  
   133  func LengthOfVint(v int64) int {
   134  	return LengthOfUnsignedVint(encodeZigZag(v))
   135  }
   136  
   137  func decodeZigZag(n uint64) int64 {
   138  	return int64((n >> 1) ^ -(n & 1))
   139  }
   140  
   141  func encodeZigZag(n int64) uint64 {
   142  	return uint64((n >> 63) ^ (n << 1))
   143  }