github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/tools/blocksconvert/scanner/bigtable_index_reader.go (about) 1 package scanner 2 3 import ( 4 "bytes" 5 "context" 6 "io" 7 "sort" 8 "strings" 9 10 "cloud.google.com/go/bigtable" 11 "github.com/go-kit/log" 12 "github.com/go-kit/log/level" 13 "github.com/pkg/errors" 14 "github.com/prometheus/client_golang/prometheus" 15 "golang.org/x/sync/errgroup" 16 17 "github.com/cortexproject/cortex/pkg/chunk" 18 "github.com/cortexproject/cortex/pkg/chunk/gcp" 19 ) 20 21 type bigtableIndexReader struct { 22 log log.Logger 23 project string 24 instance string 25 26 rowsRead prometheus.Counter 27 parsedIndexEntries prometheus.Counter 28 currentTableRanges prometheus.Gauge 29 currentTableScannedRanges prometheus.Gauge 30 } 31 32 func newBigtableIndexReader(project, instance string, l log.Logger, rowsRead prometheus.Counter, parsedIndexEntries prometheus.Counter, currentTableRanges, scannedRanges prometheus.Gauge) *bigtableIndexReader { 33 return &bigtableIndexReader{ 34 log: l, 35 project: project, 36 instance: instance, 37 38 rowsRead: rowsRead, 39 parsedIndexEntries: parsedIndexEntries, 40 currentTableRanges: currentTableRanges, 41 currentTableScannedRanges: scannedRanges, 42 } 43 } 44 45 func (r *bigtableIndexReader) IndexTableNames(ctx context.Context) ([]string, error) { 46 client, err := bigtable.NewAdminClient(ctx, r.project, r.instance) 47 if err != nil { 48 return nil, errors.Wrap(err, "create bigtable client failed") 49 } 50 defer closeCloser(r.log, "bigtable admin client", client) 51 52 return client.Tables(ctx) 53 } 54 55 // This reader supports both used versions of BigTable index client used by Cortex: 56 // 57 // 1) newStorageClientV1 ("gcp"), which sets 58 // - RowKey = entry.HashValue + \0 + entry.RangeValue 59 // - Column: "c" (in family "f") 60 // - Value: entry.Value 61 // 62 // 2) newStorageClientColumnKey ("gcp-columnkey", "bigtable", "bigtable-hashed"), which has two possibilities: 63 // - RowKey = entry.HashValue OR (if distribute key flag is enabled) hashPrefix(entry.HashValue) + "-" + entry.HashValue, where hashPrefix is 64-bit FNV64a hash, encoded as little-endian hex value 64 // - Column: entry.RangeValue (in family "f") 65 // - Value: entry.Value 66 // 67 // Index entries are returned in HashValue, RangeValue order. 68 // Entries for the same HashValue and RangeValue are passed to the same processor. 69 func (r *bigtableIndexReader) ReadIndexEntries(ctx context.Context, tableName string, processors []chunk.IndexEntryProcessor) error { 70 client, err := bigtable.NewClient(ctx, r.project, r.instance) 71 if err != nil { 72 return errors.Wrap(err, "create bigtable client failed") 73 } 74 defer closeCloser(r.log, "bigtable client", client) 75 76 var rangesCh chan bigtable.RowRange 77 78 tbl := client.Open(tableName) 79 if keys, err := tbl.SampleRowKeys(ctx); err == nil { 80 level.Info(r.log).Log("msg", "sampled row keys", "keys", strings.Join(keys, ", ")) 81 82 rangesCh = make(chan bigtable.RowRange, len(keys)+1) 83 84 start := "" 85 for _, k := range keys { 86 rangesCh <- bigtable.NewRange(start, k) 87 start = k 88 } 89 rangesCh <- bigtable.InfiniteRange(start) // Last segment from last key, to the end. 90 close(rangesCh) 91 } else { 92 level.Warn(r.log).Log("msg", "failed to sample row keys", "err", err) 93 94 rangesCh = make(chan bigtable.RowRange, 1) 95 rangesCh <- bigtable.InfiniteRange("") 96 close(rangesCh) 97 } 98 99 r.currentTableRanges.Set(float64(len(rangesCh))) 100 r.currentTableScannedRanges.Set(0) 101 102 defer r.currentTableRanges.Set(0) 103 defer r.currentTableScannedRanges.Set(0) 104 105 g, gctx := errgroup.WithContext(ctx) 106 107 for ix := range processors { 108 p := processors[ix] 109 110 g.Go(func() error { 111 for rng := range rangesCh { 112 var innerErr error 113 114 level.Info(r.log).Log("msg", "reading rows", "range", rng) 115 116 err := tbl.ReadRows(gctx, rng, func(row bigtable.Row) bool { 117 r.rowsRead.Inc() 118 119 entries, err := parseRowKey(row, tableName) 120 if err != nil { 121 innerErr = errors.Wrapf(err, "failed to parse row: %s", row.Key()) 122 return false 123 } 124 125 r.parsedIndexEntries.Add(float64(len(entries))) 126 127 for _, e := range entries { 128 err := p.ProcessIndexEntry(e) 129 if err != nil { 130 innerErr = errors.Wrap(err, "processor error") 131 return false 132 } 133 } 134 135 return true 136 }) 137 138 if innerErr != nil { 139 return innerErr 140 } 141 142 if err != nil { 143 return err 144 } 145 146 r.currentTableScannedRanges.Inc() 147 } 148 149 return p.Flush() 150 }) 151 } 152 153 return g.Wait() 154 } 155 156 func parseRowKey(row bigtable.Row, tableName string) ([]chunk.IndexEntry, error) { 157 var entries []chunk.IndexEntry 158 159 rowKey := row.Key() 160 161 rangeInRowKey := false 162 hashValue := row.Key() 163 rangeValue := "" 164 165 // Remove hashPrefix, if used. Easy to check. 166 if len(hashValue) > 16 && hashValue[16] == '-' && hashValue[:16] == gcp.HashPrefix(hashValue[17:]) { 167 hashValue = hashValue[17:] 168 } else if ix := strings.IndexByte(hashValue, 0); ix > 0 { 169 // newStorageClientV1 uses 170 // - RowKey: entry.HashValue + \0 + entry.RangeValue 171 // - Column: "c" (in family "f") 172 // - Value: entry.Value 173 174 rangeInRowKey = true 175 rangeValue = hashValue[ix+1:] 176 hashValue = hashValue[:ix] 177 } 178 179 for family, columns := range row { 180 if family != "f" { 181 return nil, errors.Errorf("unknown family: %s", family) 182 } 183 184 for _, colVal := range columns { 185 if colVal.Row != rowKey { 186 return nil, errors.Errorf("rowkey mismatch: %q, %q", colVal.Row, rowKey) 187 } 188 189 if rangeInRowKey { 190 if colVal.Column != "f:c" { 191 return nil, errors.Errorf("found rangeValue in RowKey, but column is not 'f:c': %q", colVal.Column) 192 } 193 // we already have rangeValue 194 } else { 195 if !strings.HasPrefix(colVal.Column, "f:") { 196 return nil, errors.Errorf("invalid column prefix: %q", colVal.Column) 197 } 198 rangeValue = colVal.Column[2:] // With "f:" part removed 199 } 200 201 entry := chunk.IndexEntry{ 202 TableName: tableName, 203 HashValue: hashValue, 204 RangeValue: []byte(rangeValue), 205 Value: colVal.Value, 206 } 207 208 entries = append(entries, entry) 209 } 210 } 211 212 if len(entries) > 1 { 213 // Sort entries by RangeValue. This is done to support `newStorageClientColumnKey` version properly: 214 // all index entries with same hashValue are in the same row, but map iteration over columns may 215 // have returned them in wrong order. 216 217 sort.Sort(sortableIndexEntries(entries)) 218 } 219 220 return entries, nil 221 } 222 223 func closeCloser(log log.Logger, closerName string, closer io.Closer) { 224 err := closer.Close() 225 if err != nil { 226 level.Warn(log).Log("msg", "failed to close "+closerName, "err", err) 227 } 228 } 229 230 type sortableIndexEntries []chunk.IndexEntry 231 232 func (s sortableIndexEntries) Len() int { 233 return len(s) 234 } 235 236 func (s sortableIndexEntries) Less(i, j int) bool { 237 return bytes.Compare(s[i].RangeValue, s[j].RangeValue) < 0 238 } 239 240 func (s sortableIndexEntries) Swap(i, j int) { 241 s[i], s[j] = s[j], s[i] 242 }