github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/tools/blocksconvert/scanner/bigtable_index_reader.go (about)

     1  package scanner
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"io"
     7  	"sort"
     8  	"strings"
     9  
    10  	"cloud.google.com/go/bigtable"
    11  	"github.com/go-kit/log"
    12  	"github.com/go-kit/log/level"
    13  	"github.com/pkg/errors"
    14  	"github.com/prometheus/client_golang/prometheus"
    15  	"golang.org/x/sync/errgroup"
    16  
    17  	"github.com/cortexproject/cortex/pkg/chunk"
    18  	"github.com/cortexproject/cortex/pkg/chunk/gcp"
    19  )
    20  
    21  type bigtableIndexReader struct {
    22  	log      log.Logger
    23  	project  string
    24  	instance string
    25  
    26  	rowsRead                  prometheus.Counter
    27  	parsedIndexEntries        prometheus.Counter
    28  	currentTableRanges        prometheus.Gauge
    29  	currentTableScannedRanges prometheus.Gauge
    30  }
    31  
    32  func newBigtableIndexReader(project, instance string, l log.Logger, rowsRead prometheus.Counter, parsedIndexEntries prometheus.Counter, currentTableRanges, scannedRanges prometheus.Gauge) *bigtableIndexReader {
    33  	return &bigtableIndexReader{
    34  		log:      l,
    35  		project:  project,
    36  		instance: instance,
    37  
    38  		rowsRead:                  rowsRead,
    39  		parsedIndexEntries:        parsedIndexEntries,
    40  		currentTableRanges:        currentTableRanges,
    41  		currentTableScannedRanges: scannedRanges,
    42  	}
    43  }
    44  
    45  func (r *bigtableIndexReader) IndexTableNames(ctx context.Context) ([]string, error) {
    46  	client, err := bigtable.NewAdminClient(ctx, r.project, r.instance)
    47  	if err != nil {
    48  		return nil, errors.Wrap(err, "create bigtable client failed")
    49  	}
    50  	defer closeCloser(r.log, "bigtable admin client", client)
    51  
    52  	return client.Tables(ctx)
    53  }
    54  
    55  // This reader supports both used versions of BigTable index client used by Cortex:
    56  //
    57  // 1) newStorageClientV1 ("gcp"), which sets
    58  //    - RowKey = entry.HashValue + \0 + entry.RangeValue
    59  //    - Column: "c" (in family "f")
    60  //    - Value: entry.Value
    61  //
    62  // 2) newStorageClientColumnKey ("gcp-columnkey", "bigtable", "bigtable-hashed"), which has two possibilities:
    63  //    - RowKey = entry.HashValue OR (if distribute key flag is enabled) hashPrefix(entry.HashValue) + "-" + entry.HashValue, where hashPrefix is 64-bit FNV64a hash, encoded as little-endian hex value
    64  //    - Column: entry.RangeValue (in family "f")
    65  //    - Value: entry.Value
    66  //
    67  // Index entries are returned in HashValue, RangeValue order.
    68  // Entries for the same HashValue and RangeValue are passed to the same processor.
    69  func (r *bigtableIndexReader) ReadIndexEntries(ctx context.Context, tableName string, processors []chunk.IndexEntryProcessor) error {
    70  	client, err := bigtable.NewClient(ctx, r.project, r.instance)
    71  	if err != nil {
    72  		return errors.Wrap(err, "create bigtable client failed")
    73  	}
    74  	defer closeCloser(r.log, "bigtable client", client)
    75  
    76  	var rangesCh chan bigtable.RowRange
    77  
    78  	tbl := client.Open(tableName)
    79  	if keys, err := tbl.SampleRowKeys(ctx); err == nil {
    80  		level.Info(r.log).Log("msg", "sampled row keys", "keys", strings.Join(keys, ", "))
    81  
    82  		rangesCh = make(chan bigtable.RowRange, len(keys)+1)
    83  
    84  		start := ""
    85  		for _, k := range keys {
    86  			rangesCh <- bigtable.NewRange(start, k)
    87  			start = k
    88  		}
    89  		rangesCh <- bigtable.InfiniteRange(start) // Last segment from last key, to the end.
    90  		close(rangesCh)
    91  	} else {
    92  		level.Warn(r.log).Log("msg", "failed to sample row keys", "err", err)
    93  
    94  		rangesCh = make(chan bigtable.RowRange, 1)
    95  		rangesCh <- bigtable.InfiniteRange("")
    96  		close(rangesCh)
    97  	}
    98  
    99  	r.currentTableRanges.Set(float64(len(rangesCh)))
   100  	r.currentTableScannedRanges.Set(0)
   101  
   102  	defer r.currentTableRanges.Set(0)
   103  	defer r.currentTableScannedRanges.Set(0)
   104  
   105  	g, gctx := errgroup.WithContext(ctx)
   106  
   107  	for ix := range processors {
   108  		p := processors[ix]
   109  
   110  		g.Go(func() error {
   111  			for rng := range rangesCh {
   112  				var innerErr error
   113  
   114  				level.Info(r.log).Log("msg", "reading rows", "range", rng)
   115  
   116  				err := tbl.ReadRows(gctx, rng, func(row bigtable.Row) bool {
   117  					r.rowsRead.Inc()
   118  
   119  					entries, err := parseRowKey(row, tableName)
   120  					if err != nil {
   121  						innerErr = errors.Wrapf(err, "failed to parse row: %s", row.Key())
   122  						return false
   123  					}
   124  
   125  					r.parsedIndexEntries.Add(float64(len(entries)))
   126  
   127  					for _, e := range entries {
   128  						err := p.ProcessIndexEntry(e)
   129  						if err != nil {
   130  							innerErr = errors.Wrap(err, "processor error")
   131  							return false
   132  						}
   133  					}
   134  
   135  					return true
   136  				})
   137  
   138  				if innerErr != nil {
   139  					return innerErr
   140  				}
   141  
   142  				if err != nil {
   143  					return err
   144  				}
   145  
   146  				r.currentTableScannedRanges.Inc()
   147  			}
   148  
   149  			return p.Flush()
   150  		})
   151  	}
   152  
   153  	return g.Wait()
   154  }
   155  
   156  func parseRowKey(row bigtable.Row, tableName string) ([]chunk.IndexEntry, error) {
   157  	var entries []chunk.IndexEntry
   158  
   159  	rowKey := row.Key()
   160  
   161  	rangeInRowKey := false
   162  	hashValue := row.Key()
   163  	rangeValue := ""
   164  
   165  	// Remove hashPrefix, if used. Easy to check.
   166  	if len(hashValue) > 16 && hashValue[16] == '-' && hashValue[:16] == gcp.HashPrefix(hashValue[17:]) {
   167  		hashValue = hashValue[17:]
   168  	} else if ix := strings.IndexByte(hashValue, 0); ix > 0 {
   169  		// newStorageClientV1 uses
   170  		//    - RowKey: entry.HashValue + \0 + entry.RangeValue
   171  		//    - Column: "c" (in family "f")
   172  		//    - Value: entry.Value
   173  
   174  		rangeInRowKey = true
   175  		rangeValue = hashValue[ix+1:]
   176  		hashValue = hashValue[:ix]
   177  	}
   178  
   179  	for family, columns := range row {
   180  		if family != "f" {
   181  			return nil, errors.Errorf("unknown family: %s", family)
   182  		}
   183  
   184  		for _, colVal := range columns {
   185  			if colVal.Row != rowKey {
   186  				return nil, errors.Errorf("rowkey mismatch: %q, %q", colVal.Row, rowKey)
   187  			}
   188  
   189  			if rangeInRowKey {
   190  				if colVal.Column != "f:c" {
   191  					return nil, errors.Errorf("found rangeValue in RowKey, but column is not 'f:c': %q", colVal.Column)
   192  				}
   193  				// we already have rangeValue
   194  			} else {
   195  				if !strings.HasPrefix(colVal.Column, "f:") {
   196  					return nil, errors.Errorf("invalid column prefix: %q", colVal.Column)
   197  				}
   198  				rangeValue = colVal.Column[2:] // With "f:" part removed
   199  			}
   200  
   201  			entry := chunk.IndexEntry{
   202  				TableName:  tableName,
   203  				HashValue:  hashValue,
   204  				RangeValue: []byte(rangeValue),
   205  				Value:      colVal.Value,
   206  			}
   207  
   208  			entries = append(entries, entry)
   209  		}
   210  	}
   211  
   212  	if len(entries) > 1 {
   213  		// Sort entries by RangeValue. This is done to support `newStorageClientColumnKey` version properly:
   214  		// all index entries with same hashValue are in the same row, but map iteration over columns may
   215  		// have returned them in wrong order.
   216  
   217  		sort.Sort(sortableIndexEntries(entries))
   218  	}
   219  
   220  	return entries, nil
   221  }
   222  
   223  func closeCloser(log log.Logger, closerName string, closer io.Closer) {
   224  	err := closer.Close()
   225  	if err != nil {
   226  		level.Warn(log).Log("msg", "failed to close "+closerName, "err", err)
   227  	}
   228  }
   229  
   230  type sortableIndexEntries []chunk.IndexEntry
   231  
   232  func (s sortableIndexEntries) Len() int {
   233  	return len(s)
   234  }
   235  
   236  func (s sortableIndexEntries) Less(i, j int) bool {
   237  	return bytes.Compare(s[i].RangeValue, s[j].RangeValue) < 0
   238  }
   239  
   240  func (s sortableIndexEntries) Swap(i, j int) {
   241  	s[i], s[j] = s[j], s[i]
   242  }