go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/common/storage/bigtable/rowKey.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bigtable 16 17 import ( 18 "bytes" 19 "crypto/sha256" 20 "encoding/base64" 21 "encoding/hex" 22 "errors" 23 "strings" 24 "sync" 25 26 "go.chromium.org/luci/common/data/cmpbin" 27 ) 28 29 // rowKeyBufferPool stores a pool of allocated Buffer instances for reuse when 30 // constructing row keys. 31 var ( 32 // errMalformedRowKey is an error that is returned if the row key in the 33 // tables does not comform to our row key structure. 34 errMalformedRowKey = errors.New("bigtable: malformed row key") 35 36 // encodedPrefixSize is the size in bytes of the encoded row key prefix. All 37 // rows from the same stream path share this prefix. 38 encodedPrefixSize = base64.URLEncoding.EncodedLen(sha256.Size) 39 // maxEncodedKeySize is the maximum size in bytes of a full row key. 40 maxEncodedKeySize = encodedPrefixSize + (2 * (len("~") + hex.EncodedLen(cmpbin.MaxIntLen64))) 41 42 rowKeyBufferPool = sync.Pool{ 43 New: func() any { 44 return &rowKeyBuffers{} 45 }, 46 } 47 ) 48 49 type rowKeyBuffers struct { 50 // binBuf is a Buffer to write binary data for encoding. 51 binBuf bytes.Buffer 52 // key is where the encoded key get built. 53 key []byte 54 // size is the current number of bytes used in "key". 55 size int 56 } 57 58 func withRowKeyBuffers(f func(rkb *rowKeyBuffers)) { 59 rkb := rowKeyBufferPool.Get().(*rowKeyBuffers) 60 defer rowKeyBufferPool.Put(rkb) 61 62 rkb.reset() 63 f(rkb) 64 } 65 66 func (rkb *rowKeyBuffers) reset() { 67 if rkb.key == nil { 68 rkb.key = make([]byte, maxEncodedKeySize) 69 } 70 rkb.size = 0 71 } 72 73 func (rkb *rowKeyBuffers) appendPathPrefix(pathHash []byte) { 74 base64.URLEncoding.Encode(rkb.remaining(), pathHash) 75 rkb.size += base64.URLEncoding.EncodedLen(len(pathHash)) 76 } 77 78 func (rkb *rowKeyBuffers) appendInt64(i int64) { 79 // Encode index to "cmpbin". 80 rkb.binBuf.Reset() 81 cmpbin.WriteInt(&rkb.binBuf, i) 82 83 rkb.size += hex.Encode(rkb.remaining(), rkb.binBuf.Bytes()) 84 } 85 86 func (rkb *rowKeyBuffers) appendBytes(d []byte) { 87 rkb.size += copy(rkb.remaining(), d) 88 } 89 90 func (rkb *rowKeyBuffers) remaining() []byte { 91 return rkb.key[rkb.size:] 92 } 93 94 func (rkb *rowKeyBuffers) value() string { 95 return string(rkb.key[:rkb.size]) 96 } 97 98 // rowKey is a BigTable row key. 99 // 100 // The row key is formed from a Path and its Index. The goal: 101 // - Rows with the same path should be clustered. 102 // - Rows with the same path should be sorted according to index. 103 // 104 // The row key index is the index of the LAST entry in the row. Therefore, a 105 // row for a given row key will span log indexes [index-count+1..index]. 106 // 107 // Since BigTable rows must be valid UTF8, and since paths are effectively 108 // unbounded, the row key will be formed by composing: 109 // 110 // [ base64(sha256(path)) ] + '~' + [ hex(cmpbin(index)) ] + '~' + 111 // [hex(cmpbin(count)] 112 type rowKey struct { 113 pathHash []byte 114 index int64 115 count int64 116 } 117 118 // newRowKey generates the row key matching a given entry path and index. 119 func newRowKey(project, path string, index, count int64) *rowKey { 120 h := sha256.New() 121 122 _, _ = h.Write([]byte(project)) 123 _, _ = h.Write([]byte("/")) 124 _, _ = h.Write([]byte(path)) 125 return &rowKey{ 126 pathHash: h.Sum(nil), 127 index: index, 128 count: count, 129 } 130 } 131 132 // decodeRowKey decodes an encoded row key into its structural components. 133 func decodeRowKey(v string) (*rowKey, error) { 134 keyParts := strings.SplitN(v, "~", 3) 135 if len(keyParts) != 3 { 136 return nil, errMalformedRowKey 137 } 138 139 hashEnc, idxEnc, countEnc := keyParts[0], keyParts[1], keyParts[2] 140 if base64.URLEncoding.DecodedLen(len(hashEnc)) < sha256.Size { 141 return nil, errMalformedRowKey 142 } 143 144 // Decode encoded project/path hash. 145 var err error 146 rk := rowKey{} 147 rk.pathHash, err = base64.URLEncoding.DecodeString(hashEnc) 148 if err != nil { 149 return nil, errMalformedRowKey 150 } 151 152 // Decode index. 153 rk.index, err = readHexInt64(idxEnc) 154 if err != nil { 155 return nil, err 156 } 157 158 // If a count is available, decode that as well. 159 rk.count, err = readHexInt64(countEnc) 160 if err != nil { 161 return nil, err 162 } 163 164 return &rk, nil 165 } 166 167 func (rk *rowKey) String() string { 168 return rk.encode() 169 } 170 171 // newRowKey instantiates a new rowKey from its components. 172 func (rk *rowKey) encode() (v string) { 173 // Write the final key to "key": (base64(HASH)~hex(INDEX)) 174 withRowKeyBuffers(func(rkb *rowKeyBuffers) { 175 rkb.appendPathPrefix(rk.pathHash) 176 rkb.appendBytes([]byte("~")) 177 rkb.appendInt64(rk.index) 178 rkb.appendBytes([]byte("~")) 179 rkb.appendInt64(rk.count) 180 v = rkb.value() 181 }) 182 return 183 } 184 185 // prefix returns the encoded path prefix for the row key, which is the hash of 186 // that row's project/path. 187 func (rk *rowKey) pathPrefix() (v string) { 188 withRowKeyBuffers(func(rkb *rowKeyBuffers) { 189 rkb.appendPathPrefix(rk.pathHash) 190 rkb.appendBytes([]byte("~")) 191 v = rkb.value() 192 }) 193 return 194 } 195 196 // pathPrefixUpperBound returns the path prefix that is higher than any path 197 // allowed in the row key space. 198 // 199 // This is accomplished by appending a "~" character to the path prefix, 200 // creating something like this: 201 // 202 // prefix~~ 203 // 204 // The "prefix~" is shared with all keys in "rk", but the extra "~" is larger 205 // than any hex-encoded row index, so this key will always be larger. 206 func (rk *rowKey) pathPrefixUpperBound() (v string) { 207 withRowKeyBuffers(func(rkb *rowKeyBuffers) { 208 rkb.appendPathPrefix(rk.pathHash) 209 rkb.appendBytes([]byte("~~")) 210 v = rkb.value() 211 }) 212 return 213 } 214 215 // firstIndex returns the first log entry index represented by this row key. 216 func (rk *rowKey) firstIndex() int64 { return rk.index - rk.count + 1 } 217 218 // sharesPrefixWith tests if the "path" component of the row key "rk" matches 219 // the "path" component of "o". 220 func (rk *rowKey) sharesPathWith(o *rowKey) bool { 221 return bytes.Equal(rk.pathHash, o.pathHash) 222 } 223 224 func readHexInt64(v string) (int64, error) { 225 d, err := hex.DecodeString(v) 226 if err != nil { 227 return 0, errMalformedRowKey 228 } 229 230 dr := bytes.NewReader(d) 231 value, _, err := cmpbin.ReadInt(dr) 232 if err != nil { 233 return 0, errMalformedRowKey 234 } 235 236 // There should be no more data. 237 if dr.Len() > 0 { 238 return 0, errMalformedRowKey 239 } 240 241 return value, nil 242 }