go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/common/storage/bigtable/rowKey.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bigtable
    16  
    17  import (
    18  	"bytes"
    19  	"crypto/sha256"
    20  	"encoding/base64"
    21  	"encoding/hex"
    22  	"errors"
    23  	"strings"
    24  	"sync"
    25  
    26  	"go.chromium.org/luci/common/data/cmpbin"
    27  )
    28  
    29  // rowKeyBufferPool stores a pool of allocated Buffer instances for reuse when
    30  // constructing row keys.
    31  var (
    32  	// errMalformedRowKey is an error that is returned if the row key in the
    33  	// tables does not comform to our row key structure.
    34  	errMalformedRowKey = errors.New("bigtable: malformed row key")
    35  
    36  	// encodedPrefixSize is the size in bytes of the encoded row key prefix. All
    37  	// rows from the same stream path share this prefix.
    38  	encodedPrefixSize = base64.URLEncoding.EncodedLen(sha256.Size)
    39  	// maxEncodedKeySize is the maximum size in bytes of a full row key.
    40  	maxEncodedKeySize = encodedPrefixSize + (2 * (len("~") + hex.EncodedLen(cmpbin.MaxIntLen64)))
    41  
    42  	rowKeyBufferPool = sync.Pool{
    43  		New: func() any {
    44  			return &rowKeyBuffers{}
    45  		},
    46  	}
    47  )
    48  
    49  type rowKeyBuffers struct {
    50  	// binBuf is a Buffer to write binary data for encoding.
    51  	binBuf bytes.Buffer
    52  	// key is where the encoded key get built.
    53  	key []byte
    54  	// size is the current number of bytes used in "key".
    55  	size int
    56  }
    57  
    58  func withRowKeyBuffers(f func(rkb *rowKeyBuffers)) {
    59  	rkb := rowKeyBufferPool.Get().(*rowKeyBuffers)
    60  	defer rowKeyBufferPool.Put(rkb)
    61  
    62  	rkb.reset()
    63  	f(rkb)
    64  }
    65  
    66  func (rkb *rowKeyBuffers) reset() {
    67  	if rkb.key == nil {
    68  		rkb.key = make([]byte, maxEncodedKeySize)
    69  	}
    70  	rkb.size = 0
    71  }
    72  
    73  func (rkb *rowKeyBuffers) appendPathPrefix(pathHash []byte) {
    74  	base64.URLEncoding.Encode(rkb.remaining(), pathHash)
    75  	rkb.size += base64.URLEncoding.EncodedLen(len(pathHash))
    76  }
    77  
    78  func (rkb *rowKeyBuffers) appendInt64(i int64) {
    79  	// Encode index to "cmpbin".
    80  	rkb.binBuf.Reset()
    81  	cmpbin.WriteInt(&rkb.binBuf, i)
    82  
    83  	rkb.size += hex.Encode(rkb.remaining(), rkb.binBuf.Bytes())
    84  }
    85  
    86  func (rkb *rowKeyBuffers) appendBytes(d []byte) {
    87  	rkb.size += copy(rkb.remaining(), d)
    88  }
    89  
    90  func (rkb *rowKeyBuffers) remaining() []byte {
    91  	return rkb.key[rkb.size:]
    92  }
    93  
    94  func (rkb *rowKeyBuffers) value() string {
    95  	return string(rkb.key[:rkb.size])
    96  }
    97  
    98  // rowKey is a BigTable row key.
    99  //
   100  // The row key is formed from a Path and its Index. The goal:
   101  // - Rows with the same path should be clustered.
   102  // - Rows with the same path should be sorted according to index.
   103  //
   104  // The row key index is the index of the LAST entry in the row. Therefore, a
   105  // row for a given row key will span log indexes [index-count+1..index].
   106  //
   107  // Since BigTable rows must be valid UTF8, and since paths are effectively
   108  // unbounded, the row key will be formed by composing:
   109  //
   110  // [ base64(sha256(path)) ] + '~' + [ hex(cmpbin(index)) ] + '~' +
   111  // [hex(cmpbin(count)]
   112  type rowKey struct {
   113  	pathHash []byte
   114  	index    int64
   115  	count    int64
   116  }
   117  
   118  // newRowKey generates the row key matching a given entry path and index.
   119  func newRowKey(project, path string, index, count int64) *rowKey {
   120  	h := sha256.New()
   121  
   122  	_, _ = h.Write([]byte(project))
   123  	_, _ = h.Write([]byte("/"))
   124  	_, _ = h.Write([]byte(path))
   125  	return &rowKey{
   126  		pathHash: h.Sum(nil),
   127  		index:    index,
   128  		count:    count,
   129  	}
   130  }
   131  
   132  // decodeRowKey decodes an encoded row key into its structural components.
   133  func decodeRowKey(v string) (*rowKey, error) {
   134  	keyParts := strings.SplitN(v, "~", 3)
   135  	if len(keyParts) != 3 {
   136  		return nil, errMalformedRowKey
   137  	}
   138  
   139  	hashEnc, idxEnc, countEnc := keyParts[0], keyParts[1], keyParts[2]
   140  	if base64.URLEncoding.DecodedLen(len(hashEnc)) < sha256.Size {
   141  		return nil, errMalformedRowKey
   142  	}
   143  
   144  	// Decode encoded project/path hash.
   145  	var err error
   146  	rk := rowKey{}
   147  	rk.pathHash, err = base64.URLEncoding.DecodeString(hashEnc)
   148  	if err != nil {
   149  		return nil, errMalformedRowKey
   150  	}
   151  
   152  	// Decode index.
   153  	rk.index, err = readHexInt64(idxEnc)
   154  	if err != nil {
   155  		return nil, err
   156  	}
   157  
   158  	// If a count is available, decode that as well.
   159  	rk.count, err = readHexInt64(countEnc)
   160  	if err != nil {
   161  		return nil, err
   162  	}
   163  
   164  	return &rk, nil
   165  }
   166  
   167  func (rk *rowKey) String() string {
   168  	return rk.encode()
   169  }
   170  
   171  // newRowKey instantiates a new rowKey from its components.
   172  func (rk *rowKey) encode() (v string) {
   173  	// Write the final key to "key": (base64(HASH)~hex(INDEX))
   174  	withRowKeyBuffers(func(rkb *rowKeyBuffers) {
   175  		rkb.appendPathPrefix(rk.pathHash)
   176  		rkb.appendBytes([]byte("~"))
   177  		rkb.appendInt64(rk.index)
   178  		rkb.appendBytes([]byte("~"))
   179  		rkb.appendInt64(rk.count)
   180  		v = rkb.value()
   181  	})
   182  	return
   183  }
   184  
   185  // prefix returns the encoded path prefix for the row key, which is the hash of
   186  // that row's project/path.
   187  func (rk *rowKey) pathPrefix() (v string) {
   188  	withRowKeyBuffers(func(rkb *rowKeyBuffers) {
   189  		rkb.appendPathPrefix(rk.pathHash)
   190  		rkb.appendBytes([]byte("~"))
   191  		v = rkb.value()
   192  	})
   193  	return
   194  }
   195  
   196  // pathPrefixUpperBound returns the path prefix that is higher than any path
   197  // allowed in the row key space.
   198  //
   199  // This is accomplished by appending a "~" character to the path prefix,
   200  // creating something like this:
   201  //
   202  //	prefix~~
   203  //
   204  // The "prefix~" is shared with all keys in "rk", but the extra "~" is larger
   205  // than any hex-encoded row index, so this key will always be larger.
   206  func (rk *rowKey) pathPrefixUpperBound() (v string) {
   207  	withRowKeyBuffers(func(rkb *rowKeyBuffers) {
   208  		rkb.appendPathPrefix(rk.pathHash)
   209  		rkb.appendBytes([]byte("~~"))
   210  		v = rkb.value()
   211  	})
   212  	return
   213  }
   214  
   215  // firstIndex returns the first log entry index represented by this row key.
   216  func (rk *rowKey) firstIndex() int64 { return rk.index - rk.count + 1 }
   217  
   218  // sharesPrefixWith tests if the "path" component of the row key "rk" matches
   219  // the "path" component of "o".
   220  func (rk *rowKey) sharesPathWith(o *rowKey) bool {
   221  	return bytes.Equal(rk.pathHash, o.pathHash)
   222  }
   223  
   224  func readHexInt64(v string) (int64, error) {
   225  	d, err := hex.DecodeString(v)
   226  	if err != nil {
   227  		return 0, errMalformedRowKey
   228  	}
   229  
   230  	dr := bytes.NewReader(d)
   231  	value, _, err := cmpbin.ReadInt(dr)
   232  	if err != nil {
   233  		return 0, errMalformedRowKey
   234  	}
   235  
   236  	// There should be no more data.
   237  	if dr.Len() > 0 {
   238  		return 0, errMalformedRowKey
   239  	}
   240  
   241  	return value, nil
   242  }