github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/tools/blocksconvert/scanner/cassandra_index_reader.go (about) 1 package scanner 2 3 import ( 4 "context" 5 "fmt" 6 "math" 7 "strings" 8 9 "github.com/go-kit/log" 10 "github.com/go-kit/log/level" 11 "github.com/pkg/errors" 12 "github.com/prometheus/client_golang/prometheus" 13 "golang.org/x/sync/errgroup" 14 15 "github.com/cortexproject/cortex/pkg/chunk" 16 "github.com/cortexproject/cortex/pkg/chunk/cassandra" 17 ) 18 19 /* Cassandra can easily run out of memory or timeout if we try to SELECT the 20 * entire table. Splitting into many smaller chunks help a lot. */ 21 const nbTokenRanges = 512 22 const queryPageSize = 10000 23 24 type cassandraIndexReader struct { 25 log log.Logger 26 cassandraStorageConfig cassandra.Config 27 schemaCfg chunk.SchemaConfig 28 29 rowsRead prometheus.Counter 30 parsedIndexEntries prometheus.Counter 31 currentTableRanges prometheus.Gauge 32 currentTableScannedRanges prometheus.Gauge 33 } 34 35 func newCassandraIndexReader(cfg cassandra.Config, schemaCfg chunk.SchemaConfig, l log.Logger, rowsRead prometheus.Counter, parsedIndexEntries prometheus.Counter, currentTableRanges, scannedRanges prometheus.Gauge) *cassandraIndexReader { 36 return &cassandraIndexReader{ 37 log: l, 38 cassandraStorageConfig: cfg, 39 40 rowsRead: rowsRead, 41 parsedIndexEntries: parsedIndexEntries, 42 currentTableRanges: currentTableRanges, 43 currentTableScannedRanges: scannedRanges, 44 } 45 } 46 47 func (r *cassandraIndexReader) IndexTableNames(ctx context.Context) ([]string, error) { 48 client, err := cassandra.NewTableClient(ctx, r.cassandraStorageConfig, nil) 49 if err != nil { 50 return nil, errors.Wrap(err, "create cassandra client failed") 51 } 52 53 defer client.Stop() 54 55 return client.ListTables(ctx) 56 } 57 58 type tokenRange struct { 59 start int64 60 end int64 61 } 62 63 func (r *cassandraIndexReader) ReadIndexEntries(ctx context.Context, tableName string, processors []chunk.IndexEntryProcessor) error { 64 level.Debug(r.log).Log("msg", "scanning table", "table", tableName) 65 66 client, err := cassandra.NewStorageClient(r.cassandraStorageConfig, r.schemaCfg, nil) 67 if err != nil { 68 return errors.Wrap(err, "create cassandra storage client failed") 69 } 70 71 defer client.Stop() 72 73 session := client.GetReadSession() 74 75 rangesCh := make(chan tokenRange, nbTokenRanges) 76 77 var step, n, start int64 78 79 step = int64(math.MaxUint64 / nbTokenRanges) 80 81 for n = 0; n < nbTokenRanges; n++ { 82 start = math.MinInt64 + n*step 83 end := start + step 84 85 if n == (nbTokenRanges - 1) { 86 end = math.MaxInt64 87 } 88 89 t := tokenRange{start: start, end: end} 90 rangesCh <- t 91 } 92 93 close(rangesCh) 94 95 r.currentTableRanges.Set(float64(len(rangesCh))) 96 r.currentTableScannedRanges.Set(0) 97 98 defer r.currentTableRanges.Set(0) 99 defer r.currentTableScannedRanges.Set(0) 100 101 g, gctx := errgroup.WithContext(ctx) 102 103 for ix := range processors { 104 p := processors[ix] 105 g.Go(func() error { 106 for rng := range rangesCh { 107 level.Debug(r.log).Log("msg", "reading rows", "range_start", rng.start, "range_end", rng.end, "table_name", tableName) 108 109 query := fmt.Sprintf("SELECT hash, range, value FROM %s WHERE token(hash) >= %v", tableName, rng.start) 110 111 if rng.end < math.MaxInt64 { 112 query += fmt.Sprintf(" AND token(hash) < %v", rng.end) 113 } 114 115 iter := session.Query(query).WithContext(gctx).PageSize(queryPageSize).Iter() 116 117 if len(iter.Warnings()) > 0 { 118 level.Warn(r.log).Log("msg", "warnings from cassandra", "warnings", strings.Join(iter.Warnings(), " :: ")) 119 } 120 121 scanner := iter.Scanner() 122 123 oldHash := "" 124 oldRng := "" 125 126 for scanner.Next() { 127 var hash, rng, value string 128 129 err := scanner.Scan(&hash, &rng, &value) 130 if err != nil { 131 return errors.Wrap(err, "Cassandra scan error") 132 } 133 134 r.rowsRead.Inc() 135 r.parsedIndexEntries.Inc() 136 137 entry := chunk.IndexEntry{ 138 TableName: tableName, 139 HashValue: hash, 140 RangeValue: []byte(rng), 141 Value: []byte(value), 142 } 143 144 if rng < oldRng && oldHash == hash { 145 level.Error(r.log).Log("msg", "new rng bad", "rng", rng, "old_rng", oldRng, "hash", hash, "old_hash", oldHash) 146 return fmt.Errorf("received range row in the wrong order for same hash: %v < %v", rng, oldRng) 147 } 148 149 err = p.ProcessIndexEntry(entry) 150 if err != nil { 151 return errors.Wrap(err, "processor error") 152 } 153 154 oldHash = hash 155 oldRng = rng 156 } 157 158 // This will also close the iterator. 159 err := scanner.Err() 160 if err != nil { 161 return errors.Wrap(err, "Cassandra error during scan") 162 } 163 164 r.currentTableScannedRanges.Inc() 165 } 166 167 return p.Flush() 168 }) 169 } 170 171 return g.Wait() 172 }