github.com/crowdsecurity/crowdsec@v1.6.1/pkg/acquisition/modules/kinesis/kinesis.go (about)

     1  package kinesisacquisition
     2  
     3  import (
     4  	"bytes"
     5  	"compress/gzip"
     6  	"encoding/json"
     7  	"fmt"
     8  	"io"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/aws/aws-sdk-go/aws"
    13  	"github.com/aws/aws-sdk-go/aws/arn"
    14  	"github.com/aws/aws-sdk-go/aws/session"
    15  	"github.com/aws/aws-sdk-go/service/kinesis"
    16  	"github.com/prometheus/client_golang/prometheus"
    17  	log "github.com/sirupsen/logrus"
    18  	"gopkg.in/tomb.v2"
    19  	"gopkg.in/yaml.v2"
    20  
    21  	"github.com/crowdsecurity/go-cs-lib/trace"
    22  
    23  	"github.com/crowdsecurity/crowdsec/pkg/acquisition/configuration"
    24  	"github.com/crowdsecurity/crowdsec/pkg/types"
    25  )
    26  
    27  type KinesisConfiguration struct {
    28  	configuration.DataSourceCommonCfg `yaml:",inline"`
    29  	StreamName                        string  `yaml:"stream_name"`
    30  	StreamARN                         string  `yaml:"stream_arn"`
    31  	UseEnhancedFanOut                 bool    `yaml:"use_enhanced_fanout"` //Use RegisterStreamConsumer and SubscribeToShard instead of GetRecords
    32  	AwsProfile                        *string `yaml:"aws_profile"`
    33  	AwsRegion                         string  `yaml:"aws_region"`
    34  	AwsEndpoint                       string  `yaml:"aws_endpoint"`
    35  	ConsumerName                      string  `yaml:"consumer_name"`
    36  	FromSubscription                  bool    `yaml:"from_subscription"`
    37  	MaxRetries                        int     `yaml:"max_retries"`
    38  }
    39  
    40  type KinesisSource struct {
    41  	metricsLevel    int
    42  	Config          KinesisConfiguration
    43  	logger          *log.Entry
    44  	kClient         *kinesis.Kinesis
    45  	shardReaderTomb *tomb.Tomb
    46  }
    47  
    48  type CloudWatchSubscriptionRecord struct {
    49  	MessageType         string                           `json:"messageType"`
    50  	Owner               string                           `json:"owner"`
    51  	LogGroup            string                           `json:"logGroup"`
    52  	LogStream           string                           `json:"logStream"`
    53  	SubscriptionFilters []string                         `json:"subscriptionFilters"`
    54  	LogEvents           []CloudwatchSubscriptionLogEvent `json:"logEvents"`
    55  }
    56  
    57  type CloudwatchSubscriptionLogEvent struct {
    58  	ID        string `json:"id"`
    59  	Message   string `json:"message"`
    60  	Timestamp int64  `json:"timestamp"`
    61  }
    62  
    63  var linesRead = prometheus.NewCounterVec(
    64  	prometheus.CounterOpts{
    65  		Name: "cs_kinesis_stream_hits_total",
    66  		Help: "Number of event read per stream.",
    67  	},
    68  	[]string{"stream"},
    69  )
    70  
    71  var linesReadShards = prometheus.NewCounterVec(
    72  	prometheus.CounterOpts{
    73  		Name: "cs_kinesis_shards_hits_total",
    74  		Help: "Number of event read per shards.",
    75  	},
    76  	[]string{"stream", "shard"},
    77  )
    78  
    79  func (k *KinesisSource) GetUuid() string {
    80  	return k.Config.UniqueId
    81  }
    82  
    83  func (k *KinesisSource) newClient() error {
    84  	var sess *session.Session
    85  
    86  	if k.Config.AwsProfile != nil {
    87  		sess = session.Must(session.NewSessionWithOptions(session.Options{
    88  			SharedConfigState: session.SharedConfigEnable,
    89  			Profile:           *k.Config.AwsProfile,
    90  		}))
    91  	} else {
    92  		sess = session.Must(session.NewSessionWithOptions(session.Options{
    93  			SharedConfigState: session.SharedConfigEnable,
    94  		}))
    95  	}
    96  
    97  	if sess == nil {
    98  		return fmt.Errorf("failed to create aws session")
    99  	}
   100  	config := aws.NewConfig()
   101  	if k.Config.AwsRegion != "" {
   102  		config = config.WithRegion(k.Config.AwsRegion)
   103  	}
   104  	if k.Config.AwsEndpoint != "" {
   105  		config = config.WithEndpoint(k.Config.AwsEndpoint)
   106  	}
   107  	k.kClient = kinesis.New(sess, config)
   108  	if k.kClient == nil {
   109  		return fmt.Errorf("failed to create kinesis client")
   110  	}
   111  	return nil
   112  }
   113  
   114  func (k *KinesisSource) GetMetrics() []prometheus.Collector {
   115  	return []prometheus.Collector{linesRead, linesReadShards}
   116  
   117  }
   118  func (k *KinesisSource) GetAggregMetrics() []prometheus.Collector {
   119  	return []prometheus.Collector{linesRead, linesReadShards}
   120  }
   121  
   122  func (k *KinesisSource) UnmarshalConfig(yamlConfig []byte) error {
   123  	k.Config = KinesisConfiguration{}
   124  
   125  	err := yaml.UnmarshalStrict(yamlConfig, &k.Config)
   126  	if err != nil {
   127  		return fmt.Errorf("Cannot parse kinesis datasource configuration: %w", err)
   128  	}
   129  
   130  	if k.Config.Mode == "" {
   131  		k.Config.Mode = configuration.TAIL_MODE
   132  	}
   133  
   134  	if k.Config.StreamName == "" && !k.Config.UseEnhancedFanOut {
   135  		return fmt.Errorf("stream_name is mandatory when use_enhanced_fanout is false")
   136  	}
   137  	if k.Config.StreamARN == "" && k.Config.UseEnhancedFanOut {
   138  		return fmt.Errorf("stream_arn is mandatory when use_enhanced_fanout is true")
   139  	}
   140  	if k.Config.ConsumerName == "" && k.Config.UseEnhancedFanOut {
   141  		return fmt.Errorf("consumer_name is mandatory when use_enhanced_fanout is true")
   142  	}
   143  	if k.Config.StreamARN != "" && k.Config.StreamName != "" {
   144  		return fmt.Errorf("stream_arn and stream_name are mutually exclusive")
   145  	}
   146  	if k.Config.MaxRetries <= 0 {
   147  		k.Config.MaxRetries = 10
   148  	}
   149  
   150  	return nil
   151  }
   152  
   153  func (k *KinesisSource) Configure(yamlConfig []byte, logger *log.Entry, MetricsLevel int) error {
   154  	k.logger = logger
   155  	k.metricsLevel = MetricsLevel
   156  
   157  	err := k.UnmarshalConfig(yamlConfig)
   158  	if err != nil {
   159  		return err
   160  	}
   161  
   162  	err = k.newClient()
   163  	if err != nil {
   164  		return fmt.Errorf("cannot create kinesis client: %w", err)
   165  	}
   166  
   167  	k.shardReaderTomb = &tomb.Tomb{}
   168  	return nil
   169  }
   170  
   171  func (k *KinesisSource) ConfigureByDSN(string, map[string]string, *log.Entry, string) error {
   172  	return fmt.Errorf("kinesis datasource does not support command-line acquisition")
   173  }
   174  
   175  func (k *KinesisSource) GetMode() string {
   176  	return k.Config.Mode
   177  }
   178  
   179  func (k *KinesisSource) GetName() string {
   180  	return "kinesis"
   181  }
   182  
   183  func (k *KinesisSource) OneShotAcquisition(out chan types.Event, t *tomb.Tomb) error {
   184  	return fmt.Errorf("kinesis datasource does not support one-shot acquisition")
   185  }
   186  
   187  func (k *KinesisSource) decodeFromSubscription(record []byte) ([]CloudwatchSubscriptionLogEvent, error) {
   188  	b := bytes.NewBuffer(record)
   189  	r, err := gzip.NewReader(b)
   190  
   191  	if err != nil {
   192  		k.logger.Error(err)
   193  		return nil, err
   194  	}
   195  	decompressed, err := io.ReadAll(r)
   196  	if err != nil {
   197  		k.logger.Error(err)
   198  		return nil, err
   199  	}
   200  	var subscriptionRecord CloudWatchSubscriptionRecord
   201  	err = json.Unmarshal(decompressed, &subscriptionRecord)
   202  	if err != nil {
   203  		k.logger.Error(err)
   204  		return nil, err
   205  	}
   206  	return subscriptionRecord.LogEvents, nil
   207  }
   208  
   209  func (k *KinesisSource) WaitForConsumerDeregistration(consumerName string, streamARN string) error {
   210  	maxTries := k.Config.MaxRetries
   211  	for i := 0; i < maxTries; i++ {
   212  		_, err := k.kClient.DescribeStreamConsumer(&kinesis.DescribeStreamConsumerInput{
   213  			ConsumerName: aws.String(consumerName),
   214  			StreamARN:    aws.String(streamARN),
   215  		})
   216  		if err != nil {
   217  			switch err.(type) {
   218  			case *kinesis.ResourceNotFoundException:
   219  				return nil
   220  			default:
   221  				k.logger.Errorf("Error while waiting for consumer deregistration: %s", err)
   222  				return fmt.Errorf("cannot describe stream consumer: %w", err)
   223  			}
   224  		}
   225  		time.Sleep(time.Millisecond * 200 * time.Duration(i+1))
   226  	}
   227  	return fmt.Errorf("consumer %s is not deregistered after %d tries", consumerName, maxTries)
   228  }
   229  
   230  func (k *KinesisSource) DeregisterConsumer() error {
   231  	k.logger.Debugf("Deregistering consumer %s if it exists", k.Config.ConsumerName)
   232  	_, err := k.kClient.DeregisterStreamConsumer(&kinesis.DeregisterStreamConsumerInput{
   233  		ConsumerName: aws.String(k.Config.ConsumerName),
   234  		StreamARN:    aws.String(k.Config.StreamARN),
   235  	})
   236  	if err != nil {
   237  		switch err.(type) {
   238  		case *kinesis.ResourceNotFoundException:
   239  		default:
   240  			return fmt.Errorf("cannot deregister stream consumer: %w", err)
   241  		}
   242  	}
   243  	err = k.WaitForConsumerDeregistration(k.Config.ConsumerName, k.Config.StreamARN)
   244  	if err != nil {
   245  		return fmt.Errorf("cannot wait for consumer deregistration: %w", err)
   246  	}
   247  	return nil
   248  }
   249  
   250  func (k *KinesisSource) WaitForConsumerRegistration(consumerARN string) error {
   251  	maxTries := k.Config.MaxRetries
   252  	for i := 0; i < maxTries; i++ {
   253  		describeOutput, err := k.kClient.DescribeStreamConsumer(&kinesis.DescribeStreamConsumerInput{
   254  			ConsumerARN: aws.String(consumerARN),
   255  		})
   256  		if err != nil {
   257  			return fmt.Errorf("cannot describe stream consumer: %w", err)
   258  		}
   259  		if *describeOutput.ConsumerDescription.ConsumerStatus == "ACTIVE" {
   260  			k.logger.Debugf("Consumer %s is active", consumerARN)
   261  			return nil
   262  		}
   263  		time.Sleep(time.Millisecond * 200 * time.Duration(i+1))
   264  		k.logger.Debugf("Waiting for consumer registration %d", i)
   265  	}
   266  	return fmt.Errorf("consumer %s is not active after %d tries", consumerARN, maxTries)
   267  }
   268  
   269  func (k *KinesisSource) RegisterConsumer() (*kinesis.RegisterStreamConsumerOutput, error) {
   270  	k.logger.Debugf("Registering consumer %s", k.Config.ConsumerName)
   271  	streamConsumer, err := k.kClient.RegisterStreamConsumer(&kinesis.RegisterStreamConsumerInput{
   272  		ConsumerName: aws.String(k.Config.ConsumerName),
   273  		StreamARN:    aws.String(k.Config.StreamARN),
   274  	})
   275  	if err != nil {
   276  		return nil, fmt.Errorf("cannot register stream consumer: %w", err)
   277  	}
   278  	err = k.WaitForConsumerRegistration(*streamConsumer.Consumer.ConsumerARN)
   279  	if err != nil {
   280  		return nil, fmt.Errorf("timeout while waiting for consumer to be active: %w", err)
   281  	}
   282  	return streamConsumer, nil
   283  }
   284  
   285  func (k *KinesisSource) ParseAndPushRecords(records []*kinesis.Record, out chan types.Event, logger *log.Entry, shardId string) {
   286  	for _, record := range records {
   287  		if k.Config.StreamARN != "" {
   288  			if k.metricsLevel != configuration.METRICS_NONE {
   289  				linesReadShards.With(prometheus.Labels{"stream": k.Config.StreamARN, "shard": shardId}).Inc()
   290  				linesRead.With(prometheus.Labels{"stream": k.Config.StreamARN}).Inc()
   291  			}
   292  		} else {
   293  			if k.metricsLevel != configuration.METRICS_NONE {
   294  				linesReadShards.With(prometheus.Labels{"stream": k.Config.StreamName, "shard": shardId}).Inc()
   295  				linesRead.With(prometheus.Labels{"stream": k.Config.StreamName}).Inc()
   296  			}
   297  		}
   298  		var data []CloudwatchSubscriptionLogEvent
   299  		var err error
   300  		if k.Config.FromSubscription {
   301  			//The AWS docs says that the data is base64 encoded
   302  			//but apparently GetRecords decodes it for us ?
   303  			data, err = k.decodeFromSubscription(record.Data)
   304  			if err != nil {
   305  				logger.Errorf("Cannot decode data: %s", err)
   306  				continue
   307  			}
   308  		} else {
   309  			data = []CloudwatchSubscriptionLogEvent{{Message: string(record.Data)}}
   310  		}
   311  		for _, event := range data {
   312  			logger.Tracef("got record %s", event.Message)
   313  			l := types.Line{}
   314  			l.Raw = event.Message
   315  			l.Labels = k.Config.Labels
   316  			l.Time = time.Now().UTC()
   317  			l.Process = true
   318  			l.Module = k.GetName()
   319  			if k.Config.StreamARN != "" {
   320  				l.Src = k.Config.StreamARN
   321  			} else {
   322  				l.Src = k.Config.StreamName
   323  			}
   324  			var evt types.Event
   325  			if !k.Config.UseTimeMachine {
   326  				evt = types.Event{Line: l, Process: true, Type: types.LOG, ExpectMode: types.LIVE}
   327  			} else {
   328  				evt = types.Event{Line: l, Process: true, Type: types.LOG, ExpectMode: types.TIMEMACHINE}
   329  			}
   330  			out <- evt
   331  		}
   332  	}
   333  }
   334  
   335  func (k *KinesisSource) ReadFromSubscription(reader kinesis.SubscribeToShardEventStreamReader, out chan types.Event, shardId string, streamName string) error {
   336  	logger := k.logger.WithFields(log.Fields{"shard_id": shardId})
   337  	//ghetto sync, kinesis allows to subscribe to a closed shard, which will make the goroutine exit immediately
   338  	//and we won't be able to start a new one if this is the first one started by the tomb
   339  	//TODO: look into parent shards to see if a shard is closed before starting to read it ?
   340  	time.Sleep(time.Second)
   341  	for {
   342  		select {
   343  		case <-k.shardReaderTomb.Dying():
   344  			logger.Infof("Subscribed shard reader is dying")
   345  			err := reader.Close()
   346  			if err != nil {
   347  				return fmt.Errorf("cannot close kinesis subscribed shard reader: %w", err)
   348  			}
   349  			return nil
   350  		case event, ok := <-reader.Events():
   351  			if !ok {
   352  				logger.Infof("Event chan has been closed")
   353  				return nil
   354  			}
   355  			switch event := event.(type) {
   356  			case *kinesis.SubscribeToShardEvent:
   357  				k.ParseAndPushRecords(event.Records, out, logger, shardId)
   358  			case *kinesis.SubscribeToShardEventStreamUnknownEvent:
   359  				logger.Infof("got an unknown event, what to do ?")
   360  			}
   361  		}
   362  	}
   363  }
   364  
   365  func (k *KinesisSource) SubscribeToShards(arn arn.ARN, streamConsumer *kinesis.RegisterStreamConsumerOutput, out chan types.Event) error {
   366  	shards, err := k.kClient.ListShards(&kinesis.ListShardsInput{
   367  		StreamName: aws.String(arn.Resource[7:]),
   368  	})
   369  	if err != nil {
   370  		return fmt.Errorf("cannot list shards for enhanced_read: %w", err)
   371  	}
   372  
   373  	for _, shard := range shards.Shards {
   374  		shardId := *shard.ShardId
   375  		r, err := k.kClient.SubscribeToShard(&kinesis.SubscribeToShardInput{
   376  			ShardId:          aws.String(shardId),
   377  			StartingPosition: &kinesis.StartingPosition{Type: aws.String(kinesis.ShardIteratorTypeLatest)},
   378  			ConsumerARN:      streamConsumer.Consumer.ConsumerARN,
   379  		})
   380  		if err != nil {
   381  			return fmt.Errorf("cannot subscribe to shard: %w", err)
   382  		}
   383  		k.shardReaderTomb.Go(func() error {
   384  			return k.ReadFromSubscription(r.GetEventStream().Reader, out, shardId, arn.Resource[7:])
   385  		})
   386  	}
   387  	return nil
   388  }
   389  
   390  func (k *KinesisSource) EnhancedRead(out chan types.Event, t *tomb.Tomb) error {
   391  	parsedARN, err := arn.Parse(k.Config.StreamARN)
   392  	if err != nil {
   393  		return fmt.Errorf("cannot parse stream ARN: %w", err)
   394  	}
   395  	if !strings.HasPrefix(parsedARN.Resource, "stream/") {
   396  		return fmt.Errorf("resource part of stream ARN %s does not start with stream/", k.Config.StreamARN)
   397  	}
   398  
   399  	k.logger = k.logger.WithFields(log.Fields{"stream": parsedARN.Resource[7:]})
   400  	k.logger.Info("starting kinesis acquisition with enhanced fan-out")
   401  	err = k.DeregisterConsumer()
   402  	if err != nil {
   403  		return fmt.Errorf("cannot deregister consumer: %w", err)
   404  	}
   405  
   406  	streamConsumer, err := k.RegisterConsumer()
   407  	if err != nil {
   408  		return fmt.Errorf("cannot register consumer: %w", err)
   409  	}
   410  
   411  	for {
   412  		k.shardReaderTomb = &tomb.Tomb{}
   413  
   414  		err = k.SubscribeToShards(parsedARN, streamConsumer, out)
   415  		if err != nil {
   416  			return fmt.Errorf("cannot subscribe to shards: %w", err)
   417  		}
   418  		select {
   419  		case <-t.Dying():
   420  			k.logger.Infof("Kinesis source is dying")
   421  			k.shardReaderTomb.Kill(nil)
   422  			_ = k.shardReaderTomb.Wait() //we don't care about the error as we kill the tomb ourselves
   423  			err = k.DeregisterConsumer()
   424  			if err != nil {
   425  				return fmt.Errorf("cannot deregister consumer: %w", err)
   426  			}
   427  			return nil
   428  		case <-k.shardReaderTomb.Dying():
   429  			k.logger.Debugf("Kinesis subscribed shard reader is dying")
   430  			if k.shardReaderTomb.Err() != nil {
   431  				return k.shardReaderTomb.Err()
   432  			}
   433  			//All goroutines have exited without error, so a resharding event, start again
   434  			k.logger.Debugf("All reader goroutines have exited, resharding event or periodic resubscribe")
   435  			continue
   436  		}
   437  	}
   438  }
   439  
   440  func (k *KinesisSource) ReadFromShard(out chan types.Event, shardId string) error {
   441  	logger := k.logger.WithFields(log.Fields{"shard": shardId})
   442  	logger.Debugf("Starting to read shard")
   443  	sharIt, err := k.kClient.GetShardIterator(&kinesis.GetShardIteratorInput{ShardId: aws.String(shardId),
   444  		StreamName:        &k.Config.StreamName,
   445  		ShardIteratorType: aws.String(kinesis.ShardIteratorTypeLatest)})
   446  	if err != nil {
   447  		logger.Errorf("Cannot get shard iterator: %s", err)
   448  		return fmt.Errorf("cannot get shard iterator: %w", err)
   449  	}
   450  	it := sharIt.ShardIterator
   451  	//AWS recommends to wait for a second between calls to GetRecords for a given shard
   452  	ticker := time.NewTicker(time.Second)
   453  	for {
   454  		select {
   455  		case <-ticker.C:
   456  			records, err := k.kClient.GetRecords(&kinesis.GetRecordsInput{ShardIterator: it})
   457  			it = records.NextShardIterator
   458  			if err != nil {
   459  				switch err.(type) {
   460  				case *kinesis.ProvisionedThroughputExceededException:
   461  					logger.Warn("Provisioned throughput exceeded")
   462  					//TODO: implement exponential backoff
   463  					continue
   464  				case *kinesis.ExpiredIteratorException:
   465  					logger.Warn("Expired iterator")
   466  					continue
   467  				default:
   468  					logger.Error("Cannot get records")
   469  					return fmt.Errorf("cannot get records: %w", err)
   470  				}
   471  			}
   472  			k.ParseAndPushRecords(records.Records, out, logger, shardId)
   473  
   474  			if it == nil {
   475  				logger.Warnf("Shard has been closed")
   476  				return nil
   477  			}
   478  		case <-k.shardReaderTomb.Dying():
   479  			logger.Infof("shardReaderTomb is dying, exiting ReadFromShard")
   480  			ticker.Stop()
   481  			return nil
   482  		}
   483  	}
   484  }
   485  
   486  func (k *KinesisSource) ReadFromStream(out chan types.Event, t *tomb.Tomb) error {
   487  	k.logger = k.logger.WithFields(log.Fields{"stream": k.Config.StreamName})
   488  	k.logger.Info("starting kinesis acquisition from shards")
   489  	for {
   490  		shards, err := k.kClient.ListShards(&kinesis.ListShardsInput{
   491  			StreamName: aws.String(k.Config.StreamName),
   492  		})
   493  		if err != nil {
   494  			return fmt.Errorf("cannot list shards: %w", err)
   495  		}
   496  		k.shardReaderTomb = &tomb.Tomb{}
   497  		for _, shard := range shards.Shards {
   498  			shardId := *shard.ShardId
   499  			k.shardReaderTomb.Go(func() error {
   500  				defer trace.CatchPanic("crowdsec/acquis/kinesis/streaming/shard")
   501  				return k.ReadFromShard(out, shardId)
   502  			})
   503  		}
   504  		select {
   505  		case <-t.Dying():
   506  			k.logger.Info("kinesis source is dying")
   507  			k.shardReaderTomb.Kill(nil)
   508  			_ = k.shardReaderTomb.Wait() //we don't care about the error as we kill the tomb ourselves
   509  			return nil
   510  		case <-k.shardReaderTomb.Dying():
   511  			reason := k.shardReaderTomb.Err()
   512  			if reason != nil {
   513  				k.logger.Errorf("Unexpected error from shard reader : %s", reason)
   514  				return reason
   515  			}
   516  			k.logger.Infof("All shards have been closed, probably a resharding event, restarting acquisition")
   517  			continue
   518  		}
   519  	}
   520  }
   521  
   522  func (k *KinesisSource) StreamingAcquisition(out chan types.Event, t *tomb.Tomb) error {
   523  	t.Go(func() error {
   524  		defer trace.CatchPanic("crowdsec/acquis/kinesis/streaming")
   525  		if k.Config.UseEnhancedFanOut {
   526  			return k.EnhancedRead(out, t)
   527  		} else {
   528  			return k.ReadFromStream(out, t)
   529  		}
   530  	})
   531  	return nil
   532  }
   533  
   534  func (k *KinesisSource) CanRun() error {
   535  	return nil
   536  }
   537  
   538  func (k *KinesisSource) Dump() interface{} {
   539  	return k
   540  }