go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/common/storage/bigtable/storage.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bigtable 16 17 import ( 18 "bytes" 19 "context" 20 "errors" 21 "fmt" 22 23 "go.chromium.org/luci/common/data/recordio" 24 log "go.chromium.org/luci/common/logging" 25 "go.chromium.org/luci/logdog/common/storage" 26 "go.chromium.org/luci/logdog/common/types" 27 28 "cloud.google.com/go/bigtable" 29 "google.golang.org/grpc/metadata" 30 ) 31 32 var ( 33 // StorageScopes is the set of OAuth scopes needed to use the storage 34 // functionality. 35 StorageScopes = []string{ 36 bigtable.Scope, 37 } 38 39 // StorageReadOnlyScopes is the set of OAuth scopes needed to use the storage 40 // functionality. 41 StorageReadOnlyScopes = []string{ 42 bigtable.ReadonlyScope, 43 } 44 ) 45 46 var ( 47 // errStop is an internal sentinel error used to indicate "stop iteration" 48 // to btTable.getLogData iterator. 49 errStop = errors.New("bigtable: stop iteration") 50 ) 51 52 // Storage is a BigTable storage configuration client. 53 type Storage struct { 54 // Client, if not nil, is the BigTable client to use for BigTable accesses. 55 Client *bigtable.Client 56 57 // LogTable is the name of the BigTable table to use for logs. 58 LogTable string 59 60 // Cache, if not nil, will be used to cache data. 61 Cache storage.Cache 62 63 // testBTInterface, if not nil, is the BigTable interface to use. This is 64 // useful for testing. If nil, this will default to the production instance. 65 testBTInterface btIface 66 } 67 68 func (s *Storage) getIface() btIface { 69 if s.testBTInterface != nil { 70 return s.testBTInterface 71 } 72 return prodBTIface{s} 73 } 74 75 // Close implements storage.Storage. 76 func (s *Storage) Close() {} 77 78 // Put implements storage.Storage. 79 func (s *Storage) Put(c context.Context, r storage.PutRequest) error { 80 c = prepareContext(c) 81 82 iface := s.getIface() 83 rw := rowWriter{ 84 threshold: iface.getMaxRowSize(), 85 } 86 87 for len(r.Values) > 0 { 88 // Add the next entry to the writer. 89 if appended := rw.append(r.Values[0]); !appended { 90 // We have failed to append our maximum BigTable row size. Flush any 91 // currently-buffered row data and try again with an empty buffer. 92 count, err := rw.flush(c, iface, r.Index, r.Project, r.Path) 93 if err != nil { 94 return err 95 } 96 97 if count == 0 { 98 // Nothing was buffered, but we still couldn't append an entry. The 99 // current entry is too large by itself, so we must fail. 100 return fmt.Errorf("single row entry exceeds maximum size (%d > %d)", len(r.Values[0]), rw.threshold) 101 } 102 103 r.Index += types.MessageIndex(count) 104 continue 105 } 106 107 // We successfully appended this entry, so advance. 108 r.Values = r.Values[1:] 109 } 110 111 // Flush any buffered rows. 112 if _, err := rw.flush(c, iface, r.Index, r.Project, r.Path); err != nil { 113 return err 114 } 115 return nil 116 } 117 118 // Expunge implements storage.Storage. 119 func (s *Storage) Expunge(c context.Context, r storage.ExpungeRequest) error { 120 return s.getIface().dropRowRange( 121 prepareContext(c), newRowKey(string(r.Project), string(r.Path), 0, 0)) 122 } 123 124 // Get implements storage.Storage. 125 func (s *Storage) Get(c context.Context, r storage.GetRequest, cb storage.GetCallback) error { 126 c = prepareContext(c) 127 128 startKey := newRowKey(string(r.Project), string(r.Path), int64(r.Index), 0) 129 c = log.SetFields(c, log.Fields{ 130 "project": r.Project, 131 "path": r.Path, 132 "index": r.Index, 133 "limit": r.Limit, 134 "startRowKey": startKey, 135 "keysOnly": r.KeysOnly, 136 }) 137 138 // If we issue a query and get back a legacy row, it will have no count 139 // associated with it. We will fast-exit 140 141 limit := r.Limit 142 err := s.getIface().getLogData(c, startKey, r.Limit, r.KeysOnly, func(rk *rowKey, data []byte) error { 143 // Does this key match our requested log stream? If not, we've moved past 144 // this stream's records and must stop iteration. 145 if !rk.sharesPathWith(startKey) { 146 return errStop 147 } 148 149 // Calculate the start index of the contiguous row. Since we index the row 150 // on the LAST entry in the row, count backwards to get the index of the 151 // first entry. 152 startIndex := rk.firstIndex() 153 if startIndex < 0 { 154 return storage.ErrBadData 155 } 156 157 // Split our data into records. Leave the records slice nil if we're doing 158 // a keys-only get. 159 var records [][]byte 160 if !r.KeysOnly { 161 var err error 162 if records, err = recordio.Split(data); err != nil { 163 return storage.ErrBadData 164 } 165 166 if rk.count != int64(len(records)) { 167 log.Fields{ 168 "count": rk.count, 169 "recordCount": len(records), 170 }.Errorf(c, "Record count doesn't match declared count.") 171 return storage.ErrBadData 172 } 173 } 174 175 // If we are indexed somewhere within this entry's records, discard any 176 // records before our index. 177 if discard := int64(r.Index) - startIndex; discard > 0 { 178 if discard > rk.count { 179 // This should never happen unless there is corrupt or conflicting data. 180 return nil 181 } 182 startIndex += discard 183 if !r.KeysOnly { 184 records = records[discard:] 185 } 186 } 187 188 for index := startIndex; index <= rk.index; index++ { 189 // If we're not doing keys-only, consume the row. 190 var row []byte 191 if !r.KeysOnly { 192 row, records = records[0], records[1:] 193 } 194 195 if !cb(storage.MakeEntry(row, types.MessageIndex(index))) { 196 return errStop 197 } 198 r.Index = types.MessageIndex(index + 1) 199 200 // Artificially apply limit within our row records. 201 if limit > 0 { 202 limit-- 203 if limit == 0 { 204 return errStop 205 } 206 } 207 } 208 return nil 209 }) 210 211 switch err { 212 case nil, errStop: 213 return nil 214 215 default: 216 log.WithError(err).Errorf(c, "Failed to retrieve row range.") 217 return err 218 } 219 } 220 221 // Tail implements storage.Storage. 222 func (s *Storage) Tail(c context.Context, project string, path types.StreamPath) (*storage.Entry, error) { 223 c = prepareContext(c) 224 c = log.SetFields(c, log.Fields{ 225 "project": project, 226 "path": path, 227 }) 228 iface := s.getIface() 229 230 // Load the "last tail index" from cache. If we have no cache, start at 0. 231 var startIdx int64 232 if s.Cache != nil { 233 startIdx = getLastTailIndex(c, s.Cache, project, path) 234 } 235 236 // Iterate through all log keys in the stream. Record the latest contiguous 237 // one. 238 var ( 239 rk = newRowKey(project, string(path), startIdx, 0) 240 latest *rowKey 241 nextIndex = startIdx 242 ) 243 err := iface.getLogData(c, rk, 0, true, func(rk *rowKey, data []byte) error { 244 // If this record is non-contiguous, we're done iterating. 245 if rk.firstIndex() != nextIndex { 246 return errStop 247 } 248 249 latest, nextIndex = rk, rk.index+1 250 return nil 251 }) 252 if err != nil && err != errStop { 253 log.Fields{ 254 log.ErrorKey: err, 255 "table": s.LogTable, 256 }.Errorf(c, "Failed to scan for tail.") 257 } 258 259 if latest == nil { 260 // No rows for the specified stream. 261 return nil, storage.ErrDoesNotExist 262 } 263 264 // Update our cache if the tail index has changed. 265 if s.Cache != nil && startIdx != latest.index { 266 // We cache the first index in the row so that subsequent cached fetches 267 // have the correct "startIdx" expectations. 268 putLastTailIndex(c, s.Cache, project, path, latest.firstIndex()) 269 } 270 271 // Fetch the latest row's data. 272 var d []byte 273 err = iface.getLogData(c, latest, 1, false, func(rk *rowKey, data []byte) error { 274 records, err := recordio.Split(data) 275 if err != nil || len(records) == 0 { 276 return storage.ErrBadData 277 } 278 d = records[len(records)-1] 279 return errStop 280 }) 281 if err != nil && err != errStop { 282 log.Fields{ 283 log.ErrorKey: err, 284 "table": s.LogTable, 285 }.Errorf(c, "Failed to retrieve tail row.") 286 } 287 288 return storage.MakeEntry(d, types.MessageIndex(latest.index)), nil 289 } 290 291 // rowWriter facilitates writing several consecutive data values to a single 292 // BigTable row. 293 type rowWriter struct { 294 // buf is the current set of buffered data. 295 buf bytes.Buffer 296 297 // count is the number of rows in the writer. 298 count int 299 300 // threshold is the maximum number of bytes that we can write. 301 threshold int 302 } 303 304 func (w *rowWriter) append(d []byte) (appended bool) { 305 origSize := w.buf.Len() 306 defer func() { 307 // Restore our previous buffer state if we are reporting the write as 308 // failed. 309 if !appended { 310 w.buf.Truncate(origSize) 311 } 312 }() 313 314 // Serialize the next entry as a recordio blob. 315 if _, err := recordio.WriteFrame(&w.buf, d); err != nil { 316 return 317 } 318 319 // If we have exceeded our threshold, report a failure. 320 appended = (w.buf.Len() <= w.threshold) 321 if appended { 322 w.count++ 323 } 324 return 325 } 326 327 func (w *rowWriter) flush(c context.Context, iface btIface, index types.MessageIndex, 328 project string, path types.StreamPath) (int, error) { 329 330 flushCount := w.count 331 if flushCount == 0 { 332 return 0, nil 333 } 334 335 // Write the current set of buffered rows to the table. Index on the LAST 336 // row index. 337 lastIndex := int64(index) + int64(flushCount) - 1 338 rk := newRowKey(string(project), string(path), lastIndex, int64(w.count)) 339 340 if err := iface.putLogData(c, rk, w.buf.Bytes()); err != nil { 341 return 0, err 342 } 343 344 // Reset our buffer state. 345 w.buf.Reset() 346 w.count = 0 347 return flushCount, nil 348 } 349 350 func prepareContext(c context.Context) context.Context { 351 // Explicitly clear gRPC metadata from the Context. It is forwarded to 352 // delegate calls by default, and standard request metadata can break BigTable 353 // calls. 354 return metadata.NewOutgoingContext(c, nil) 355 }