github.com/portworx/docker@v1.12.1/daemon/logger/awslogs/cloudwatchlogs.go (about)

     1  // Package awslogs provides the logdriver for forwarding container logs to Amazon CloudWatch Logs
     2  package awslogs
     3  
     4  import (
     5  	"errors"
     6  	"fmt"
     7  	"os"
     8  	"runtime"
     9  	"sort"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/Sirupsen/logrus"
    15  	"github.com/aws/aws-sdk-go/aws"
    16  	"github.com/aws/aws-sdk-go/aws/awserr"
    17  	"github.com/aws/aws-sdk-go/aws/ec2metadata"
    18  	"github.com/aws/aws-sdk-go/aws/request"
    19  	"github.com/aws/aws-sdk-go/aws/session"
    20  	"github.com/aws/aws-sdk-go/service/cloudwatchlogs"
    21  	"github.com/docker/docker/daemon/logger"
    22  	"github.com/docker/docker/dockerversion"
    23  )
    24  
    25  const (
    26  	name                  = "awslogs"
    27  	regionKey             = "awslogs-region"
    28  	regionEnvKey          = "AWS_REGION"
    29  	logGroupKey           = "awslogs-group"
    30  	logStreamKey          = "awslogs-stream"
    31  	batchPublishFrequency = 5 * time.Second
    32  
    33  	// See: http://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_PutLogEvents.html
    34  	perEventBytes          = 26
    35  	maximumBytesPerPut     = 1048576
    36  	maximumLogEventsPerPut = 10000
    37  
    38  	// See: http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_limits.html
    39  	maximumBytesPerEvent = 262144 - perEventBytes
    40  
    41  	resourceAlreadyExistsCode = "ResourceAlreadyExistsException"
    42  	dataAlreadyAcceptedCode   = "DataAlreadyAcceptedException"
    43  	invalidSequenceTokenCode  = "InvalidSequenceTokenException"
    44  
    45  	userAgentHeader = "User-Agent"
    46  )
    47  
    48  type logStream struct {
    49  	logStreamName string
    50  	logGroupName  string
    51  	client        api
    52  	messages      chan *logger.Message
    53  	lock          sync.RWMutex
    54  	closed        bool
    55  	sequenceToken *string
    56  }
    57  
    58  type api interface {
    59  	CreateLogStream(*cloudwatchlogs.CreateLogStreamInput) (*cloudwatchlogs.CreateLogStreamOutput, error)
    60  	PutLogEvents(*cloudwatchlogs.PutLogEventsInput) (*cloudwatchlogs.PutLogEventsOutput, error)
    61  }
    62  
    63  type regionFinder interface {
    64  	Region() (string, error)
    65  }
    66  
    67  type byTimestamp []*cloudwatchlogs.InputLogEvent
    68  
    69  // init registers the awslogs driver
    70  func init() {
    71  	if err := logger.RegisterLogDriver(name, New); err != nil {
    72  		logrus.Fatal(err)
    73  	}
    74  	if err := logger.RegisterLogOptValidator(name, ValidateLogOpt); err != nil {
    75  		logrus.Fatal(err)
    76  	}
    77  }
    78  
    79  // New creates an awslogs logger using the configuration passed in on the
    80  // context.  Supported context configuration variables are awslogs-region,
    81  // awslogs-group, and awslogs-stream.  When available, configuration is
    82  // also taken from environment variables AWS_REGION, AWS_ACCESS_KEY_ID,
    83  // AWS_SECRET_ACCESS_KEY, the shared credentials file (~/.aws/credentials), and
    84  // the EC2 Instance Metadata Service.
    85  func New(ctx logger.Context) (logger.Logger, error) {
    86  	logGroupName := ctx.Config[logGroupKey]
    87  	logStreamName := ctx.ContainerID
    88  	if ctx.Config[logStreamKey] != "" {
    89  		logStreamName = ctx.Config[logStreamKey]
    90  	}
    91  	client, err := newAWSLogsClient(ctx)
    92  	if err != nil {
    93  		return nil, err
    94  	}
    95  	containerStream := &logStream{
    96  		logStreamName: logStreamName,
    97  		logGroupName:  logGroupName,
    98  		client:        client,
    99  		messages:      make(chan *logger.Message, 4096),
   100  	}
   101  	err = containerStream.create()
   102  	if err != nil {
   103  		return nil, err
   104  	}
   105  	go containerStream.collectBatch()
   106  
   107  	return containerStream, nil
   108  }
   109  
   110  // newRegionFinder is a variable such that the implementation
   111  // can be swapped out for unit tests.
   112  var newRegionFinder = func() regionFinder {
   113  	return ec2metadata.New(session.New())
   114  }
   115  
   116  // newAWSLogsClient creates the service client for Amazon CloudWatch Logs.
   117  // Customizations to the default client from the SDK include a Docker-specific
   118  // User-Agent string and automatic region detection using the EC2 Instance
   119  // Metadata Service when region is otherwise unspecified.
   120  func newAWSLogsClient(ctx logger.Context) (api, error) {
   121  	var region *string
   122  	if os.Getenv(regionEnvKey) != "" {
   123  		region = aws.String(os.Getenv(regionEnvKey))
   124  	}
   125  	if ctx.Config[regionKey] != "" {
   126  		region = aws.String(ctx.Config[regionKey])
   127  	}
   128  	if region == nil || *region == "" {
   129  		logrus.Info("Trying to get region from EC2 Metadata")
   130  		ec2MetadataClient := newRegionFinder()
   131  		r, err := ec2MetadataClient.Region()
   132  		if err != nil {
   133  			logrus.WithFields(logrus.Fields{
   134  				"error": err,
   135  			}).Error("Could not get region from EC2 metadata, environment, or log option")
   136  			return nil, errors.New("Cannot determine region for awslogs driver")
   137  		}
   138  		region = &r
   139  	}
   140  	logrus.WithFields(logrus.Fields{
   141  		"region": *region,
   142  	}).Debug("Created awslogs client")
   143  
   144  	client := cloudwatchlogs.New(session.New(), aws.NewConfig().WithRegion(*region))
   145  
   146  	client.Handlers.Build.PushBackNamed(request.NamedHandler{
   147  		Name: "DockerUserAgentHandler",
   148  		Fn: func(r *request.Request) {
   149  			currentAgent := r.HTTPRequest.Header.Get(userAgentHeader)
   150  			r.HTTPRequest.Header.Set(userAgentHeader,
   151  				fmt.Sprintf("Docker %s (%s) %s",
   152  					dockerversion.Version, runtime.GOOS, currentAgent))
   153  		},
   154  	})
   155  	return client, nil
   156  }
   157  
   158  // Name returns the name of the awslogs logging driver
   159  func (l *logStream) Name() string {
   160  	return name
   161  }
   162  
   163  // Log submits messages for logging by an instance of the awslogs logging driver
   164  func (l *logStream) Log(msg *logger.Message) error {
   165  	l.lock.RLock()
   166  	defer l.lock.RUnlock()
   167  	if !l.closed {
   168  		l.messages <- msg
   169  	}
   170  	return nil
   171  }
   172  
   173  // Close closes the instance of the awslogs logging driver
   174  func (l *logStream) Close() error {
   175  	l.lock.Lock()
   176  	defer l.lock.Unlock()
   177  	if !l.closed {
   178  		close(l.messages)
   179  	}
   180  	l.closed = true
   181  	return nil
   182  }
   183  
   184  // create creates a log stream for the instance of the awslogs logging driver
   185  func (l *logStream) create() error {
   186  	input := &cloudwatchlogs.CreateLogStreamInput{
   187  		LogGroupName:  aws.String(l.logGroupName),
   188  		LogStreamName: aws.String(l.logStreamName),
   189  	}
   190  
   191  	_, err := l.client.CreateLogStream(input)
   192  
   193  	if err != nil {
   194  		if awsErr, ok := err.(awserr.Error); ok {
   195  			fields := logrus.Fields{
   196  				"errorCode":     awsErr.Code(),
   197  				"message":       awsErr.Message(),
   198  				"origError":     awsErr.OrigErr(),
   199  				"logGroupName":  l.logGroupName,
   200  				"logStreamName": l.logStreamName,
   201  			}
   202  			if awsErr.Code() == resourceAlreadyExistsCode {
   203  				// Allow creation to succeed
   204  				logrus.WithFields(fields).Info("Log stream already exists")
   205  				return nil
   206  			}
   207  			logrus.WithFields(fields).Error("Failed to create log stream")
   208  		}
   209  	}
   210  	return err
   211  }
   212  
   213  // newTicker is used for time-based batching.  newTicker is a variable such
   214  // that the implementation can be swapped out for unit tests.
   215  var newTicker = func(freq time.Duration) *time.Ticker {
   216  	return time.NewTicker(freq)
   217  }
   218  
   219  // collectBatch executes as a goroutine to perform batching of log events for
   220  // submission to the log stream.  Batching is performed on time- and size-
   221  // bases.  Time-based batching occurs at a 5 second interval (defined in the
   222  // batchPublishFrequency const).  Size-based batching is performed on the
   223  // maximum number of events per batch (defined in maximumLogEventsPerPut) and
   224  // the maximum number of total bytes in a batch (defined in
   225  // maximumBytesPerPut).  Log messages are split by the maximum bytes per event
   226  // (defined in maximumBytesPerEvent).  There is a fixed per-event byte overhead
   227  // (defined in perEventBytes) which is accounted for in split- and batch-
   228  // calculations.
   229  func (l *logStream) collectBatch() {
   230  	timer := newTicker(batchPublishFrequency)
   231  	var events []*cloudwatchlogs.InputLogEvent
   232  	bytes := 0
   233  	for {
   234  		select {
   235  		case <-timer.C:
   236  			l.publishBatch(events)
   237  			events = events[:0]
   238  			bytes = 0
   239  		case msg, more := <-l.messages:
   240  			if !more {
   241  				l.publishBatch(events)
   242  				return
   243  			}
   244  			unprocessedLine := msg.Line
   245  			for len(unprocessedLine) > 0 {
   246  				// Split line length so it does not exceed the maximum
   247  				lineBytes := len(unprocessedLine)
   248  				if lineBytes > maximumBytesPerEvent {
   249  					lineBytes = maximumBytesPerEvent
   250  				}
   251  				line := unprocessedLine[:lineBytes]
   252  				unprocessedLine = unprocessedLine[lineBytes:]
   253  				if (len(events) >= maximumLogEventsPerPut) || (bytes+lineBytes+perEventBytes > maximumBytesPerPut) {
   254  					// Publish an existing batch if it's already over the maximum number of events or if adding this
   255  					// event would push it over the maximum number of total bytes.
   256  					l.publishBatch(events)
   257  					events = events[:0]
   258  					bytes = 0
   259  				}
   260  				events = append(events, &cloudwatchlogs.InputLogEvent{
   261  					Message:   aws.String(string(line)),
   262  					Timestamp: aws.Int64(msg.Timestamp.UnixNano() / int64(time.Millisecond)),
   263  				})
   264  				bytes += (lineBytes + perEventBytes)
   265  			}
   266  		}
   267  	}
   268  }
   269  
   270  // publishBatch calls PutLogEvents for a given set of InputLogEvents,
   271  // accounting for sequencing requirements (each request must reference the
   272  // sequence token returned by the previous request).
   273  func (l *logStream) publishBatch(events []*cloudwatchlogs.InputLogEvent) {
   274  	if len(events) == 0 {
   275  		return
   276  	}
   277  
   278  	sort.Sort(byTimestamp(events))
   279  
   280  	nextSequenceToken, err := l.putLogEvents(events, l.sequenceToken)
   281  
   282  	if err != nil {
   283  		if awsErr, ok := err.(awserr.Error); ok {
   284  			if awsErr.Code() == dataAlreadyAcceptedCode {
   285  				// already submitted, just grab the correct sequence token
   286  				parts := strings.Split(awsErr.Message(), " ")
   287  				nextSequenceToken = &parts[len(parts)-1]
   288  				logrus.WithFields(logrus.Fields{
   289  					"errorCode":     awsErr.Code(),
   290  					"message":       awsErr.Message(),
   291  					"logGroupName":  l.logGroupName,
   292  					"logStreamName": l.logStreamName,
   293  				}).Info("Data already accepted, ignoring error")
   294  				err = nil
   295  			} else if awsErr.Code() == invalidSequenceTokenCode {
   296  				// sequence code is bad, grab the correct one and retry
   297  				parts := strings.Split(awsErr.Message(), " ")
   298  				token := parts[len(parts)-1]
   299  				nextSequenceToken, err = l.putLogEvents(events, &token)
   300  			}
   301  		}
   302  	}
   303  	if err != nil {
   304  		logrus.Error(err)
   305  	} else {
   306  		l.sequenceToken = nextSequenceToken
   307  	}
   308  }
   309  
   310  // putLogEvents wraps the PutLogEvents API
   311  func (l *logStream) putLogEvents(events []*cloudwatchlogs.InputLogEvent, sequenceToken *string) (*string, error) {
   312  	input := &cloudwatchlogs.PutLogEventsInput{
   313  		LogEvents:     events,
   314  		SequenceToken: sequenceToken,
   315  		LogGroupName:  aws.String(l.logGroupName),
   316  		LogStreamName: aws.String(l.logStreamName),
   317  	}
   318  	resp, err := l.client.PutLogEvents(input)
   319  	if err != nil {
   320  		if awsErr, ok := err.(awserr.Error); ok {
   321  			logrus.WithFields(logrus.Fields{
   322  				"errorCode":     awsErr.Code(),
   323  				"message":       awsErr.Message(),
   324  				"origError":     awsErr.OrigErr(),
   325  				"logGroupName":  l.logGroupName,
   326  				"logStreamName": l.logStreamName,
   327  			}).Error("Failed to put log events")
   328  		}
   329  		return nil, err
   330  	}
   331  	return resp.NextSequenceToken, nil
   332  }
   333  
   334  // ValidateLogOpt looks for awslogs-specific log options awslogs-region,
   335  // awslogs-group, and awslogs-stream
   336  func ValidateLogOpt(cfg map[string]string) error {
   337  	for key := range cfg {
   338  		switch key {
   339  		case logGroupKey:
   340  		case logStreamKey:
   341  		case regionKey:
   342  		default:
   343  			return fmt.Errorf("unknown log opt '%s' for %s log driver", key, name)
   344  		}
   345  	}
   346  	if cfg[logGroupKey] == "" {
   347  		return fmt.Errorf("must specify a value for log opt '%s'", logGroupKey)
   348  	}
   349  	return nil
   350  }
   351  
   352  // Len returns the length of a byTimestamp slice.  Len is required by the
   353  // sort.Interface interface.
   354  func (slice byTimestamp) Len() int {
   355  	return len(slice)
   356  }
   357  
   358  // Less compares two values in a byTimestamp slice by Timestamp.  Less is
   359  // required by the sort.Interface interface.
   360  func (slice byTimestamp) Less(i, j int) bool {
   361  	iTimestamp, jTimestamp := int64(0), int64(0)
   362  	if slice != nil && slice[i].Timestamp != nil {
   363  		iTimestamp = *slice[i].Timestamp
   364  	}
   365  	if slice != nil && slice[j].Timestamp != nil {
   366  		jTimestamp = *slice[j].Timestamp
   367  	}
   368  	return iTimestamp < jTimestamp
   369  }
   370  
   371  // Swap swaps two values in a byTimestamp slice with each other.  Swap is
   372  // required by the sort.Interface interface.
   373  func (slice byTimestamp) Swap(i, j int) {
   374  	slice[i], slice[j] = slice[j], slice[i]
   375  }