github.com/sequix/cortex@v1.1.6/pkg/chunk/cassandra/storage_client.go (about) 1 package cassandra 2 3 import ( 4 "context" 5 "flag" 6 "fmt" 7 "strings" 8 "time" 9 10 "github.com/gocql/gocql" 11 "github.com/pkg/errors" 12 13 "github.com/sequix/cortex/pkg/chunk" 14 "github.com/sequix/cortex/pkg/chunk/util" 15 ) 16 17 const ( 18 maxRowReads = 100 19 ) 20 21 // Config for a StorageClient 22 type Config struct { 23 Addresses string `yaml:"addresses,omitempty"` 24 Port int `yaml:"port,omitempty"` 25 Keyspace string `yaml:"keyspace,omitempty"` 26 Consistency string `yaml:"consistency,omitempty"` 27 ReplicationFactor int `yaml:"replication_factor,omitempty"` 28 DisableInitialHostLookup bool `yaml:"disable_initial_host_lookup,omitempty"` 29 SSL bool `yaml:"SSL,omitempty"` 30 HostVerification bool `yaml:"host_verification,omitempty"` 31 CAPath string `yaml:"CA_path,omitempty"` 32 Auth bool `yaml:"auth,omitempty"` 33 Username string `yaml:"username,omitempty"` 34 Password string `yaml:"password,omitempty"` 35 Timeout time.Duration `yaml:"timeout,omitempty"` 36 ConnectTimeout time.Duration `yaml:"connect_timeout,omitempty"` 37 } 38 39 // RegisterFlags adds the flags required to config this to the given FlagSet 40 func (cfg *Config) RegisterFlags(f *flag.FlagSet) { 41 f.StringVar(&cfg.Addresses, "cassandra.addresses", "", "Comma-separated hostnames or IPs of Cassandra instances.") 42 f.IntVar(&cfg.Port, "cassandra.port", 9042, "Port that Cassandra is running on") 43 f.StringVar(&cfg.Keyspace, "cassandra.keyspace", "", "Keyspace to use in Cassandra.") 44 f.StringVar(&cfg.Consistency, "cassandra.consistency", "QUORUM", "Consistency level for Cassandra.") 45 f.IntVar(&cfg.ReplicationFactor, "cassandra.replication-factor", 1, "Replication factor to use in Cassandra.") 46 f.BoolVar(&cfg.DisableInitialHostLookup, "cassandra.disable-initial-host-lookup", false, "Instruct the cassandra driver to not attempt to get host info from the system.peers table.") 47 f.BoolVar(&cfg.SSL, "cassandra.ssl", false, "Use SSL when connecting to cassandra instances.") 48 f.BoolVar(&cfg.HostVerification, "cassandra.host-verification", true, "Require SSL certificate validation.") 49 f.StringVar(&cfg.CAPath, "cassandra.ca-path", "", "Path to certificate file to verify the peer.") 50 f.BoolVar(&cfg.Auth, "cassandra.auth", false, "Enable password authentication when connecting to cassandra.") 51 f.StringVar(&cfg.Username, "cassandra.username", "", "Username to use when connecting to cassandra.") 52 f.StringVar(&cfg.Password, "cassandra.password", "", "Password to use when connecting to cassandra.") 53 f.DurationVar(&cfg.Timeout, "cassandra.timeout", 600*time.Millisecond, "Timeout when connecting to cassandra.") 54 f.DurationVar(&cfg.ConnectTimeout, "cassandra.connect-timeout", 600*time.Millisecond, "Initial connection timeout, used during initial dial to server.") 55 } 56 57 func (cfg *Config) session() (*gocql.Session, error) { 58 consistency, err := gocql.ParseConsistencyWrapper(cfg.Consistency) 59 if err != nil { 60 return nil, errors.WithStack(err) 61 } 62 63 if err := cfg.createKeyspace(); err != nil { 64 return nil, errors.WithStack(err) 65 } 66 67 cluster := gocql.NewCluster(strings.Split(cfg.Addresses, ",")...) 68 cluster.Port = cfg.Port 69 cluster.Keyspace = cfg.Keyspace 70 cluster.Consistency = consistency 71 cluster.BatchObserver = observer{} 72 cluster.QueryObserver = observer{} 73 cluster.Timeout = cfg.Timeout 74 cluster.ConnectTimeout = cfg.ConnectTimeout 75 cfg.setClusterConfig(cluster) 76 77 return cluster.CreateSession() 78 } 79 80 // apply config settings to a cassandra ClusterConfig 81 func (cfg *Config) setClusterConfig(cluster *gocql.ClusterConfig) { 82 cluster.DisableInitialHostLookup = cfg.DisableInitialHostLookup 83 84 if cfg.SSL { 85 cluster.SslOpts = &gocql.SslOptions{ 86 CaPath: cfg.CAPath, 87 EnableHostVerification: cfg.HostVerification, 88 } 89 } 90 if cfg.Auth { 91 cluster.Authenticator = gocql.PasswordAuthenticator{ 92 Username: cfg.Username, 93 Password: cfg.Password, 94 } 95 } 96 } 97 98 // createKeyspace will create the desired keyspace if it doesn't exist. 99 func (cfg *Config) createKeyspace() error { 100 cluster := gocql.NewCluster(strings.Split(cfg.Addresses, ",")...) 101 cluster.Port = cfg.Port 102 cluster.Keyspace = "system" 103 cluster.Timeout = 20 * time.Second 104 cluster.ConnectTimeout = 20 * time.Second 105 106 cfg.setClusterConfig(cluster) 107 108 session, err := cluster.CreateSession() 109 if err != nil { 110 return errors.WithStack(err) 111 } 112 defer session.Close() 113 114 err = session.Query(fmt.Sprintf( 115 `CREATE KEYSPACE IF NOT EXISTS %s 116 WITH replication = { 117 'class' : 'SimpleStrategy', 118 'replication_factor' : %d 119 }`, 120 cfg.Keyspace, cfg.ReplicationFactor)).Exec() 121 return errors.WithStack(err) 122 } 123 124 // StorageClient implements chunk.IndexClient and chunk.ObjectClient for Cassandra. 125 type StorageClient struct { 126 cfg Config 127 schemaCfg chunk.SchemaConfig 128 session *gocql.Session 129 } 130 131 // NewStorageClient returns a new StorageClient. 132 func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (*StorageClient, error) { 133 session, err := cfg.session() 134 if err != nil { 135 return nil, errors.WithStack(err) 136 } 137 138 client := &StorageClient{ 139 cfg: cfg, 140 schemaCfg: schemaCfg, 141 session: session, 142 } 143 return client, nil 144 } 145 146 // Stop implement chunk.IndexClient. 147 func (s *StorageClient) Stop() { 148 s.session.Close() 149 } 150 151 // Cassandra batching isn't really useful in this case, its more to do multiple 152 // atomic writes. Therefore we just do a bunch of writes in parallel. 153 type writeBatch struct { 154 entries []chunk.IndexEntry 155 } 156 157 // NewWriteBatch implement chunk.IndexClient. 158 func (s *StorageClient) NewWriteBatch() chunk.WriteBatch { 159 return &writeBatch{} 160 } 161 162 func (b *writeBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) { 163 b.entries = append(b.entries, chunk.IndexEntry{ 164 TableName: tableName, 165 HashValue: hashValue, 166 RangeValue: rangeValue, 167 Value: value, 168 }) 169 } 170 171 // BatchWrite implement chunk.IndexClient. 172 func (s *StorageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error { 173 b := batch.(*writeBatch) 174 175 for _, entry := range b.entries { 176 err := s.session.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, ?, ?)", 177 entry.TableName), entry.HashValue, entry.RangeValue, entry.Value).WithContext(ctx).Exec() 178 if err != nil { 179 return errors.WithStack(err) 180 } 181 } 182 183 return nil 184 } 185 186 // QueryPages implement chunk.IndexClient. 187 func (s *StorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error { 188 return util.DoParallelQueries(ctx, s.query, queries, callback) 189 } 190 191 func (s *StorageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error { 192 var q *gocql.Query 193 194 switch { 195 case len(query.RangeValuePrefix) > 0 && query.ValueEqual == nil: 196 q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND range < ?", 197 query.TableName), query.HashValue, query.RangeValuePrefix, append(query.RangeValuePrefix, '\xff')) 198 199 case len(query.RangeValuePrefix) > 0 && query.ValueEqual != nil: 200 q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND range < ? AND value = ? ALLOW FILTERING", 201 query.TableName), query.HashValue, query.RangeValuePrefix, append(query.RangeValuePrefix, '\xff'), query.ValueEqual) 202 203 case len(query.RangeValueStart) > 0 && query.ValueEqual == nil: 204 q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ?", 205 query.TableName), query.HashValue, query.RangeValueStart) 206 207 case len(query.RangeValueStart) > 0 && query.ValueEqual != nil: 208 q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND value = ? ALLOW FILTERING", 209 query.TableName), query.HashValue, query.RangeValueStart, query.ValueEqual) 210 211 case query.ValueEqual == nil: 212 q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ?", 213 query.TableName), query.HashValue) 214 215 case query.ValueEqual != nil: 216 q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? value = ? ALLOW FILTERING", 217 query.TableName), query.HashValue, query.ValueEqual) 218 } 219 220 iter := q.WithContext(ctx).Iter() 221 defer iter.Close() 222 scanner := iter.Scanner() 223 for scanner.Next() { 224 b := &readBatch{} 225 if err := scanner.Scan(&b.rangeValue, &b.value); err != nil { 226 return errors.WithStack(err) 227 } 228 if !callback(b) { 229 return nil 230 } 231 } 232 return errors.WithStack(scanner.Err()) 233 } 234 235 // readBatch represents a batch of rows read from Cassandra. 236 type readBatch struct { 237 consumed bool 238 rangeValue []byte 239 value []byte 240 } 241 242 func (r *readBatch) Iterator() chunk.ReadBatchIterator { 243 return &readBatchIter{ 244 readBatch: r, 245 } 246 } 247 248 type readBatchIter struct { 249 consumed bool 250 *readBatch 251 } 252 253 func (b *readBatchIter) Next() bool { 254 if b.consumed { 255 return false 256 } 257 b.consumed = true 258 return true 259 } 260 261 func (b *readBatchIter) RangeValue() []byte { 262 return b.rangeValue 263 } 264 265 func (b *readBatchIter) Value() []byte { 266 return b.value 267 } 268 269 // PutChunks implements chunk.ObjectClient. 270 func (s *StorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error { 271 for i := range chunks { 272 buf, err := chunks[i].Encoded() 273 if err != nil { 274 return errors.WithStack(err) 275 } 276 key := chunks[i].ExternalKey() 277 tableName, err := s.schemaCfg.ChunkTableFor(chunks[i].From) 278 if err != nil { 279 return err 280 } 281 282 // Must provide a range key, even though its not useds - hence 0x00. 283 q := s.session.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, 0x00, ?)", 284 tableName), key, buf) 285 if err := q.WithContext(ctx).Exec(); err != nil { 286 return errors.WithStack(err) 287 } 288 } 289 290 return nil 291 } 292 293 // GetChunks implements chunk.ObjectClient. 294 func (s *StorageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) { 295 return util.GetParallelChunks(ctx, input, s.getChunk) 296 } 297 298 func (s *StorageClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) { 299 tableName, err := s.schemaCfg.ChunkTableFor(input.From) 300 if err != nil { 301 return input, err 302 } 303 304 var buf []byte 305 if err := s.session.Query(fmt.Sprintf("SELECT value FROM %s WHERE hash = ?", tableName), input.ExternalKey()). 306 WithContext(ctx).Scan(&buf); err != nil { 307 return input, errors.WithStack(err) 308 } 309 err = input.Decode(decodeContext, buf) 310 return input, err 311 }