github.com/sequix/cortex@v1.1.6/pkg/chunk/cassandra/storage_client.go (about)

     1  package cassandra
     2  
     3  import (
     4  	"context"
     5  	"flag"
     6  	"fmt"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/gocql/gocql"
    11  	"github.com/pkg/errors"
    12  
    13  	"github.com/sequix/cortex/pkg/chunk"
    14  	"github.com/sequix/cortex/pkg/chunk/util"
    15  )
    16  
    17  const (
    18  	maxRowReads = 100
    19  )
    20  
    21  // Config for a StorageClient
    22  type Config struct {
    23  	Addresses                string        `yaml:"addresses,omitempty"`
    24  	Port                     int           `yaml:"port,omitempty"`
    25  	Keyspace                 string        `yaml:"keyspace,omitempty"`
    26  	Consistency              string        `yaml:"consistency,omitempty"`
    27  	ReplicationFactor        int           `yaml:"replication_factor,omitempty"`
    28  	DisableInitialHostLookup bool          `yaml:"disable_initial_host_lookup,omitempty"`
    29  	SSL                      bool          `yaml:"SSL,omitempty"`
    30  	HostVerification         bool          `yaml:"host_verification,omitempty"`
    31  	CAPath                   string        `yaml:"CA_path,omitempty"`
    32  	Auth                     bool          `yaml:"auth,omitempty"`
    33  	Username                 string        `yaml:"username,omitempty"`
    34  	Password                 string        `yaml:"password,omitempty"`
    35  	Timeout                  time.Duration `yaml:"timeout,omitempty"`
    36  	ConnectTimeout           time.Duration `yaml:"connect_timeout,omitempty"`
    37  }
    38  
    39  // RegisterFlags adds the flags required to config this to the given FlagSet
    40  func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
    41  	f.StringVar(&cfg.Addresses, "cassandra.addresses", "", "Comma-separated hostnames or IPs of Cassandra instances.")
    42  	f.IntVar(&cfg.Port, "cassandra.port", 9042, "Port that Cassandra is running on")
    43  	f.StringVar(&cfg.Keyspace, "cassandra.keyspace", "", "Keyspace to use in Cassandra.")
    44  	f.StringVar(&cfg.Consistency, "cassandra.consistency", "QUORUM", "Consistency level for Cassandra.")
    45  	f.IntVar(&cfg.ReplicationFactor, "cassandra.replication-factor", 1, "Replication factor to use in Cassandra.")
    46  	f.BoolVar(&cfg.DisableInitialHostLookup, "cassandra.disable-initial-host-lookup", false, "Instruct the cassandra driver to not attempt to get host info from the system.peers table.")
    47  	f.BoolVar(&cfg.SSL, "cassandra.ssl", false, "Use SSL when connecting to cassandra instances.")
    48  	f.BoolVar(&cfg.HostVerification, "cassandra.host-verification", true, "Require SSL certificate validation.")
    49  	f.StringVar(&cfg.CAPath, "cassandra.ca-path", "", "Path to certificate file to verify the peer.")
    50  	f.BoolVar(&cfg.Auth, "cassandra.auth", false, "Enable password authentication when connecting to cassandra.")
    51  	f.StringVar(&cfg.Username, "cassandra.username", "", "Username to use when connecting to cassandra.")
    52  	f.StringVar(&cfg.Password, "cassandra.password", "", "Password to use when connecting to cassandra.")
    53  	f.DurationVar(&cfg.Timeout, "cassandra.timeout", 600*time.Millisecond, "Timeout when connecting to cassandra.")
    54  	f.DurationVar(&cfg.ConnectTimeout, "cassandra.connect-timeout", 600*time.Millisecond, "Initial connection timeout, used during initial dial to server.")
    55  }
    56  
    57  func (cfg *Config) session() (*gocql.Session, error) {
    58  	consistency, err := gocql.ParseConsistencyWrapper(cfg.Consistency)
    59  	if err != nil {
    60  		return nil, errors.WithStack(err)
    61  	}
    62  
    63  	if err := cfg.createKeyspace(); err != nil {
    64  		return nil, errors.WithStack(err)
    65  	}
    66  
    67  	cluster := gocql.NewCluster(strings.Split(cfg.Addresses, ",")...)
    68  	cluster.Port = cfg.Port
    69  	cluster.Keyspace = cfg.Keyspace
    70  	cluster.Consistency = consistency
    71  	cluster.BatchObserver = observer{}
    72  	cluster.QueryObserver = observer{}
    73  	cluster.Timeout = cfg.Timeout
    74  	cluster.ConnectTimeout = cfg.ConnectTimeout
    75  	cfg.setClusterConfig(cluster)
    76  
    77  	return cluster.CreateSession()
    78  }
    79  
    80  // apply config settings to a cassandra ClusterConfig
    81  func (cfg *Config) setClusterConfig(cluster *gocql.ClusterConfig) {
    82  	cluster.DisableInitialHostLookup = cfg.DisableInitialHostLookup
    83  
    84  	if cfg.SSL {
    85  		cluster.SslOpts = &gocql.SslOptions{
    86  			CaPath:                 cfg.CAPath,
    87  			EnableHostVerification: cfg.HostVerification,
    88  		}
    89  	}
    90  	if cfg.Auth {
    91  		cluster.Authenticator = gocql.PasswordAuthenticator{
    92  			Username: cfg.Username,
    93  			Password: cfg.Password,
    94  		}
    95  	}
    96  }
    97  
    98  // createKeyspace will create the desired keyspace if it doesn't exist.
    99  func (cfg *Config) createKeyspace() error {
   100  	cluster := gocql.NewCluster(strings.Split(cfg.Addresses, ",")...)
   101  	cluster.Port = cfg.Port
   102  	cluster.Keyspace = "system"
   103  	cluster.Timeout = 20 * time.Second
   104  	cluster.ConnectTimeout = 20 * time.Second
   105  
   106  	cfg.setClusterConfig(cluster)
   107  
   108  	session, err := cluster.CreateSession()
   109  	if err != nil {
   110  		return errors.WithStack(err)
   111  	}
   112  	defer session.Close()
   113  
   114  	err = session.Query(fmt.Sprintf(
   115  		`CREATE KEYSPACE IF NOT EXISTS %s
   116  		 WITH replication = {
   117  			 'class' : 'SimpleStrategy',
   118  			 'replication_factor' : %d
   119  		 }`,
   120  		cfg.Keyspace, cfg.ReplicationFactor)).Exec()
   121  	return errors.WithStack(err)
   122  }
   123  
   124  // StorageClient implements chunk.IndexClient and chunk.ObjectClient for Cassandra.
   125  type StorageClient struct {
   126  	cfg       Config
   127  	schemaCfg chunk.SchemaConfig
   128  	session   *gocql.Session
   129  }
   130  
   131  // NewStorageClient returns a new StorageClient.
   132  func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (*StorageClient, error) {
   133  	session, err := cfg.session()
   134  	if err != nil {
   135  		return nil, errors.WithStack(err)
   136  	}
   137  
   138  	client := &StorageClient{
   139  		cfg:       cfg,
   140  		schemaCfg: schemaCfg,
   141  		session:   session,
   142  	}
   143  	return client, nil
   144  }
   145  
   146  // Stop implement chunk.IndexClient.
   147  func (s *StorageClient) Stop() {
   148  	s.session.Close()
   149  }
   150  
   151  // Cassandra batching isn't really useful in this case, its more to do multiple
   152  // atomic writes.  Therefore we just do a bunch of writes in parallel.
   153  type writeBatch struct {
   154  	entries []chunk.IndexEntry
   155  }
   156  
   157  // NewWriteBatch implement chunk.IndexClient.
   158  func (s *StorageClient) NewWriteBatch() chunk.WriteBatch {
   159  	return &writeBatch{}
   160  }
   161  
   162  func (b *writeBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
   163  	b.entries = append(b.entries, chunk.IndexEntry{
   164  		TableName:  tableName,
   165  		HashValue:  hashValue,
   166  		RangeValue: rangeValue,
   167  		Value:      value,
   168  	})
   169  }
   170  
   171  // BatchWrite implement chunk.IndexClient.
   172  func (s *StorageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
   173  	b := batch.(*writeBatch)
   174  
   175  	for _, entry := range b.entries {
   176  		err := s.session.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, ?, ?)",
   177  			entry.TableName), entry.HashValue, entry.RangeValue, entry.Value).WithContext(ctx).Exec()
   178  		if err != nil {
   179  			return errors.WithStack(err)
   180  		}
   181  	}
   182  
   183  	return nil
   184  }
   185  
   186  // QueryPages implement chunk.IndexClient.
   187  func (s *StorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
   188  	return util.DoParallelQueries(ctx, s.query, queries, callback)
   189  }
   190  
   191  func (s *StorageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
   192  	var q *gocql.Query
   193  
   194  	switch {
   195  	case len(query.RangeValuePrefix) > 0 && query.ValueEqual == nil:
   196  		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND range < ?",
   197  			query.TableName), query.HashValue, query.RangeValuePrefix, append(query.RangeValuePrefix, '\xff'))
   198  
   199  	case len(query.RangeValuePrefix) > 0 && query.ValueEqual != nil:
   200  		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND range < ? AND value = ? ALLOW FILTERING",
   201  			query.TableName), query.HashValue, query.RangeValuePrefix, append(query.RangeValuePrefix, '\xff'), query.ValueEqual)
   202  
   203  	case len(query.RangeValueStart) > 0 && query.ValueEqual == nil:
   204  		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ?",
   205  			query.TableName), query.HashValue, query.RangeValueStart)
   206  
   207  	case len(query.RangeValueStart) > 0 && query.ValueEqual != nil:
   208  		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND value = ? ALLOW FILTERING",
   209  			query.TableName), query.HashValue, query.RangeValueStart, query.ValueEqual)
   210  
   211  	case query.ValueEqual == nil:
   212  		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ?",
   213  			query.TableName), query.HashValue)
   214  
   215  	case query.ValueEqual != nil:
   216  		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? value = ? ALLOW FILTERING",
   217  			query.TableName), query.HashValue, query.ValueEqual)
   218  	}
   219  
   220  	iter := q.WithContext(ctx).Iter()
   221  	defer iter.Close()
   222  	scanner := iter.Scanner()
   223  	for scanner.Next() {
   224  		b := &readBatch{}
   225  		if err := scanner.Scan(&b.rangeValue, &b.value); err != nil {
   226  			return errors.WithStack(err)
   227  		}
   228  		if !callback(b) {
   229  			return nil
   230  		}
   231  	}
   232  	return errors.WithStack(scanner.Err())
   233  }
   234  
   235  // readBatch represents a batch of rows read from Cassandra.
   236  type readBatch struct {
   237  	consumed   bool
   238  	rangeValue []byte
   239  	value      []byte
   240  }
   241  
   242  func (r *readBatch) Iterator() chunk.ReadBatchIterator {
   243  	return &readBatchIter{
   244  		readBatch: r,
   245  	}
   246  }
   247  
   248  type readBatchIter struct {
   249  	consumed bool
   250  	*readBatch
   251  }
   252  
   253  func (b *readBatchIter) Next() bool {
   254  	if b.consumed {
   255  		return false
   256  	}
   257  	b.consumed = true
   258  	return true
   259  }
   260  
   261  func (b *readBatchIter) RangeValue() []byte {
   262  	return b.rangeValue
   263  }
   264  
   265  func (b *readBatchIter) Value() []byte {
   266  	return b.value
   267  }
   268  
   269  // PutChunks implements chunk.ObjectClient.
   270  func (s *StorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
   271  	for i := range chunks {
   272  		buf, err := chunks[i].Encoded()
   273  		if err != nil {
   274  			return errors.WithStack(err)
   275  		}
   276  		key := chunks[i].ExternalKey()
   277  		tableName, err := s.schemaCfg.ChunkTableFor(chunks[i].From)
   278  		if err != nil {
   279  			return err
   280  		}
   281  
   282  		// Must provide a range key, even though its not useds - hence 0x00.
   283  		q := s.session.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, 0x00, ?)",
   284  			tableName), key, buf)
   285  		if err := q.WithContext(ctx).Exec(); err != nil {
   286  			return errors.WithStack(err)
   287  		}
   288  	}
   289  
   290  	return nil
   291  }
   292  
   293  // GetChunks implements chunk.ObjectClient.
   294  func (s *StorageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
   295  	return util.GetParallelChunks(ctx, input, s.getChunk)
   296  }
   297  
   298  func (s *StorageClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
   299  	tableName, err := s.schemaCfg.ChunkTableFor(input.From)
   300  	if err != nil {
   301  		return input, err
   302  	}
   303  
   304  	var buf []byte
   305  	if err := s.session.Query(fmt.Sprintf("SELECT value FROM %s WHERE hash = ?", tableName), input.ExternalKey()).
   306  		WithContext(ctx).Scan(&buf); err != nil {
   307  		return input, errors.WithStack(err)
   308  	}
   309  	err = input.Decode(decodeContext, buf)
   310  	return input, err
   311  }