github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/chunk/client/gcp/bigtable_index_client.go (about) 1 package gcp 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/binary" 7 "encoding/hex" 8 "flag" 9 "fmt" 10 "strings" 11 "time" 12 13 "cloud.google.com/go/bigtable" 14 "github.com/go-kit/log" 15 "github.com/grafana/dskit/grpcclient" 16 ot "github.com/opentracing/opentracing-go" 17 "github.com/pkg/errors" 18 19 "github.com/grafana/loki/pkg/storage/chunk/client/util" 20 "github.com/grafana/loki/pkg/storage/config" 21 "github.com/grafana/loki/pkg/storage/stores/series/index" 22 "github.com/grafana/loki/pkg/util/math" 23 "github.com/grafana/loki/pkg/util/spanlogger" 24 ) 25 26 const ( 27 columnFamily = "f" 28 columnPrefix = columnFamily + ":" 29 column = "c" 30 separator = "\000" 31 maxRowReads = 100 32 ) 33 34 // Config for a StorageClient 35 type Config struct { 36 Project string `yaml:"project"` 37 Instance string `yaml:"instance"` 38 39 GRPCClientConfig grpcclient.Config `yaml:"grpc_client_config"` 40 41 ColumnKey bool `yaml:"-"` 42 DistributeKeys bool `yaml:"-"` 43 44 TableCacheEnabled bool `yaml:"table_cache_enabled"` 45 TableCacheExpiration time.Duration `yaml:"table_cache_expiration"` 46 } 47 48 // RegisterFlags adds the flags required to config this to the given FlagSet 49 func (cfg *Config) RegisterFlags(f *flag.FlagSet) { 50 f.StringVar(&cfg.Project, "bigtable.project", "", "Bigtable project ID.") 51 f.StringVar(&cfg.Instance, "bigtable.instance", "", "Bigtable instance ID. Please refer to https://cloud.google.com/docs/authentication/production for more information about how to configure authentication.") 52 f.BoolVar(&cfg.TableCacheEnabled, "bigtable.table-cache.enabled", true, "If enabled, once a tables info is fetched, it is cached.") 53 f.DurationVar(&cfg.TableCacheExpiration, "bigtable.table-cache.expiration", 30*time.Minute, "Duration to cache tables before checking again.") 54 55 // This overrides our default from TLS disabled to TLS enabled 56 cfg.GRPCClientConfig.TLSEnabled = true 57 cfg.GRPCClientConfig.RegisterFlagsWithPrefix("bigtable", f) 58 } 59 60 func (cfg *Config) Validate(log log.Logger) error { 61 return cfg.GRPCClientConfig.Validate(log) 62 } 63 64 // storageClientColumnKey implements chunk.storageClient for GCP. 65 type storageClientColumnKey struct { 66 cfg Config 67 schemaCfg config.SchemaConfig 68 client *bigtable.Client 69 keysFn keysFn 70 } 71 72 // storageClientV1 implements chunk.storageClient for GCP. 73 type storageClientV1 struct { 74 storageClientColumnKey 75 } 76 77 // NewStorageClientV1 returns a new v1 StorageClient. 78 func NewStorageClientV1(ctx context.Context, cfg Config, schemaCfg config.SchemaConfig) (index.Client, error) { 79 dialOpts, err := cfg.GRPCClientConfig.DialOption(bigtableInstrumentation()) 80 if err != nil { 81 return nil, err 82 } 83 client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, toOptions(dialOpts)...) 84 if err != nil { 85 return nil, err 86 } 87 return newStorageClientV1(cfg, schemaCfg, client), nil 88 } 89 90 func newStorageClientV1(cfg Config, schemaCfg config.SchemaConfig, client *bigtable.Client) *storageClientV1 { 91 return &storageClientV1{ 92 storageClientColumnKey{ 93 cfg: cfg, 94 schemaCfg: schemaCfg, 95 client: client, 96 keysFn: func(hashValue string, rangeValue []byte) (string, string) { 97 rowKey := hashValue + separator + string(rangeValue) 98 return rowKey, column 99 }, 100 }, 101 } 102 } 103 104 // NewStorageClientColumnKey returns a new v2 StorageClient. 105 func NewStorageClientColumnKey(ctx context.Context, cfg Config, schemaCfg config.SchemaConfig) (index.Client, error) { 106 dialOpts, err := cfg.GRPCClientConfig.DialOption(bigtableInstrumentation()) 107 if err != nil { 108 return nil, err 109 } 110 client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, toOptions(dialOpts)...) 111 if err != nil { 112 return nil, err 113 } 114 return newStorageClientColumnKey(cfg, schemaCfg, client), nil 115 } 116 117 func newStorageClientColumnKey(cfg Config, schemaCfg config.SchemaConfig, client *bigtable.Client) *storageClientColumnKey { 118 return &storageClientColumnKey{ 119 cfg: cfg, 120 schemaCfg: schemaCfg, 121 client: client, 122 keysFn: func(hashValue string, rangeValue []byte) (string, string) { 123 // We hash the row key and prepend it back to the key for better distribution. 124 // We preserve the existing key to make migrations and o11y easier. 125 if cfg.DistributeKeys { 126 hashValue = HashPrefix(hashValue) + "-" + hashValue 127 } 128 129 return hashValue, string(rangeValue) 130 }, 131 } 132 } 133 134 // HashPrefix calculates a 64bit hash of the input string and hex-encodes 135 // the result, taking care to zero pad etc. 136 func HashPrefix(input string) string { 137 prefix := hashAdd(hashNew(), input) 138 var encodedUint64 [8]byte 139 binary.LittleEndian.PutUint64(encodedUint64[:], prefix) 140 var hexEncoded [16]byte 141 hex.Encode(hexEncoded[:], encodedUint64[:]) 142 return string(hexEncoded[:]) 143 } 144 145 func (s *storageClientColumnKey) Stop() { 146 s.client.Close() 147 } 148 149 func (s *storageClientColumnKey) NewWriteBatch() index.WriteBatch { 150 return bigtableWriteBatch{ 151 tables: map[string]map[string]*bigtable.Mutation{}, 152 keysFn: s.keysFn, 153 } 154 } 155 156 // keysFn returns the row and column keys for the given hash and range keys. 157 type keysFn func(hashValue string, rangeValue []byte) (rowKey, columnKey string) 158 159 type bigtableWriteBatch struct { 160 tables map[string]map[string]*bigtable.Mutation 161 keysFn keysFn 162 } 163 164 func (b bigtableWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) { 165 b.addMutation(tableName, hashValue, rangeValue, func(mutation *bigtable.Mutation, columnKey string) { 166 mutation.Set(columnFamily, columnKey, 0, value) 167 }) 168 } 169 170 func (b bigtableWriteBatch) Delete(tableName, hashValue string, rangeValue []byte) { 171 b.addMutation(tableName, hashValue, rangeValue, func(mutation *bigtable.Mutation, columnKey string) { 172 mutation.DeleteCellsInColumn(columnFamily, columnKey) 173 }) 174 } 175 176 func (b bigtableWriteBatch) addMutation(tableName, hashValue string, rangeValue []byte, callback func(mutation *bigtable.Mutation, columnKey string)) { 177 rows, ok := b.tables[tableName] 178 if !ok { 179 rows = map[string]*bigtable.Mutation{} 180 b.tables[tableName] = rows 181 } 182 183 rowKey, columnKey := b.keysFn(hashValue, rangeValue) 184 mutation, ok := rows[rowKey] 185 if !ok { 186 mutation = bigtable.NewMutation() 187 rows[rowKey] = mutation 188 } 189 190 callback(mutation, columnKey) 191 } 192 193 func (s *storageClientColumnKey) BatchWrite(ctx context.Context, batch index.WriteBatch) error { 194 bigtableBatch := batch.(bigtableWriteBatch) 195 196 for tableName, rows := range bigtableBatch.tables { 197 table := s.client.Open(tableName) 198 rowKeys := make([]string, 0, len(rows)) 199 muts := make([]*bigtable.Mutation, 0, len(rows)) 200 for rowKey, mut := range rows { 201 rowKeys = append(rowKeys, rowKey) 202 muts = append(muts, mut) 203 } 204 205 errs, err := table.ApplyBulk(ctx, rowKeys, muts) 206 if err != nil { 207 return err 208 } 209 for _, err := range errs { 210 if err != nil { 211 return err 212 } 213 } 214 } 215 216 return nil 217 } 218 219 func (s *storageClientColumnKey) QueryPages(ctx context.Context, queries []index.Query, callback index.QueryPagesCallback) error { 220 sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages") 221 defer sp.Finish() 222 223 // A limitation of this approach is that this only fetches whole rows; but 224 // whatever, we filter them in the cache on the client. But for unit tests to 225 // pass, we must do this. 226 callback = index.QueryFilter(callback) 227 228 type tableQuery struct { 229 name string 230 queries map[string]index.Query 231 rows bigtable.RowList 232 } 233 234 tableQueries := map[string]tableQuery{} 235 for _, query := range queries { 236 tq, ok := tableQueries[query.TableName] 237 if !ok { 238 tq = tableQuery{ 239 name: query.TableName, 240 queries: map[string]index.Query{}, 241 } 242 } 243 hashKey, _ := s.keysFn(query.HashValue, nil) 244 tq.queries[hashKey] = query 245 tq.rows = append(tq.rows, hashKey) 246 tableQueries[query.TableName] = tq 247 } 248 249 errs := make(chan error) 250 for _, tq := range tableQueries { 251 table := s.client.Open(tq.name) 252 253 for i := 0; i < len(tq.rows); i += maxRowReads { 254 page := tq.rows[i:math.Min(i+maxRowReads, len(tq.rows))] 255 go func(page bigtable.RowList, tq tableQuery) { 256 var processingErr error 257 // rows are returned in key order, not order in row list 258 err := table.ReadRows(ctx, page, func(row bigtable.Row) bool { 259 query, ok := tq.queries[row.Key()] 260 if !ok { 261 processingErr = errors.WithStack(fmt.Errorf("Got row for unknown chunk: %s", row.Key())) 262 return false 263 } 264 265 val, ok := row[columnFamily] 266 if !ok { 267 // There are no matching rows. 268 return true 269 } 270 271 return callback(query, &columnKeyBatch{ 272 items: val, 273 }) 274 }) 275 276 if processingErr != nil { 277 errs <- processingErr 278 } else { 279 errs <- err 280 } 281 }(page, tq) 282 } 283 } 284 285 var lastErr error 286 for _, tq := range tableQueries { 287 for i := 0; i < len(tq.rows); i += maxRowReads { 288 err := <-errs 289 if err != nil { 290 lastErr = err 291 } 292 } 293 } 294 return lastErr 295 } 296 297 // columnKeyBatch represents a batch of values read from Bigtable. 298 type columnKeyBatch struct { 299 items []bigtable.ReadItem 300 } 301 302 func (c *columnKeyBatch) Iterator() index.ReadBatchIterator { 303 return &columnKeyIterator{ 304 i: -1, 305 columnKeyBatch: c, 306 } 307 } 308 309 type columnKeyIterator struct { 310 i int 311 *columnKeyBatch 312 } 313 314 func (c *columnKeyIterator) Next() bool { 315 c.i++ 316 return c.i < len(c.items) 317 } 318 319 func (c *columnKeyIterator) RangeValue() []byte { 320 return []byte(strings.TrimPrefix(c.items[c.i].Column, columnPrefix)) 321 } 322 323 func (c *columnKeyIterator) Value() []byte { 324 return c.items[c.i].Value 325 } 326 327 func (s *storageClientV1) QueryPages(ctx context.Context, queries []index.Query, callback index.QueryPagesCallback) error { 328 return util.DoParallelQueries(ctx, s.query, queries, callback) 329 } 330 331 func (s *storageClientV1) query(ctx context.Context, query index.Query, callback index.QueryPagesCallback) error { 332 const null = string('\xff') 333 334 log, ctx := spanlogger.New(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue}) 335 defer log.Finish() 336 337 table := s.client.Open(query.TableName) 338 339 var rowRange bigtable.RowRange 340 341 /* Bigtable only seems to support regex match on cell values, so doing it 342 client side for now 343 readOpts := []bigtable.ReadOption{ 344 bigtable.RowFilter(bigtable.FamilyFilter(columnFamily)), 345 } 346 if query.ValueEqual != nil { 347 readOpts = append(readOpts, bigtable.RowFilter(bigtable.ValueFilter(string(query.ValueEqual)))) 348 } 349 */ 350 if len(query.RangeValuePrefix) > 0 { 351 rowRange = bigtable.PrefixRange(query.HashValue + separator + string(query.RangeValuePrefix)) 352 } else if len(query.RangeValueStart) > 0 { 353 rowRange = bigtable.NewRange(query.HashValue+separator+string(query.RangeValueStart), query.HashValue+separator+null) 354 } else { 355 rowRange = bigtable.PrefixRange(query.HashValue + separator) 356 } 357 358 err := table.ReadRows(ctx, rowRange, func(r bigtable.Row) bool { 359 if query.ValueEqual == nil || bytes.Equal(r[columnFamily][0].Value, query.ValueEqual) { 360 return callback(query, &rowBatch{ 361 row: r, 362 }) 363 } 364 365 return true 366 }) 367 if err != nil { 368 log.Error(err) 369 return errors.WithStack(err) 370 } 371 return nil 372 } 373 374 // rowBatch represents a batch of rows read from Bigtable. As the 375 // bigtable interface gives us rows one-by-one, a batch always only contains 376 // a single row. 377 type rowBatch struct { 378 row bigtable.Row 379 } 380 381 func (b *rowBatch) Iterator() index.ReadBatchIterator { 382 return &rowBatchIterator{ 383 rowBatch: b, 384 } 385 } 386 387 type rowBatchIterator struct { 388 consumed bool 389 *rowBatch 390 } 391 392 func (b *rowBatchIterator) Next() bool { 393 if b.consumed { 394 return false 395 } 396 b.consumed = true 397 return true 398 } 399 400 func (b *rowBatchIterator) RangeValue() []byte { 401 // String before the first separator is the hashkey 402 parts := strings.SplitN(b.row.Key(), separator, 2) 403 return []byte(parts[1]) 404 } 405 406 func (b *rowBatchIterator) Value() []byte { 407 cf, ok := b.row[columnFamily] 408 if !ok || len(cf) != 1 { 409 panic("bad response from bigtable") 410 } 411 return cf[0].Value 412 }