github.com/vieux/docker@v0.6.3-0.20161004191708-e097c2a938c7/daemon/logger/awslogs/cloudwatchlogs.go (about)

     1  // Package awslogs provides the logdriver for forwarding container logs to Amazon CloudWatch Logs
     2  package awslogs
     3  
     4  import (
     5  	"errors"
     6  	"fmt"
     7  	"os"
     8  	"runtime"
     9  	"sort"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/Sirupsen/logrus"
    15  	"github.com/aws/aws-sdk-go/aws"
    16  	"github.com/aws/aws-sdk-go/aws/awserr"
    17  	"github.com/aws/aws-sdk-go/aws/ec2metadata"
    18  	"github.com/aws/aws-sdk-go/aws/request"
    19  	"github.com/aws/aws-sdk-go/aws/session"
    20  	"github.com/aws/aws-sdk-go/service/cloudwatchlogs"
    21  	"github.com/docker/docker/daemon/logger"
    22  	"github.com/docker/docker/dockerversion"
    23  )
    24  
    25  const (
    26  	name                  = "awslogs"
    27  	regionKey             = "awslogs-region"
    28  	regionEnvKey          = "AWS_REGION"
    29  	logGroupKey           = "awslogs-group"
    30  	logStreamKey          = "awslogs-stream"
    31  	batchPublishFrequency = 5 * time.Second
    32  
    33  	// See: http://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_PutLogEvents.html
    34  	perEventBytes          = 26
    35  	maximumBytesPerPut     = 1048576
    36  	maximumLogEventsPerPut = 10000
    37  
    38  	// See: http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/cloudwatch_limits.html
    39  	maximumBytesPerEvent = 262144 - perEventBytes
    40  
    41  	resourceAlreadyExistsCode = "ResourceAlreadyExistsException"
    42  	dataAlreadyAcceptedCode   = "DataAlreadyAcceptedException"
    43  	invalidSequenceTokenCode  = "InvalidSequenceTokenException"
    44  
    45  	userAgentHeader = "User-Agent"
    46  )
    47  
    48  type logStream struct {
    49  	logStreamName string
    50  	logGroupName  string
    51  	client        api
    52  	messages      chan *logger.Message
    53  	lock          sync.RWMutex
    54  	closed        bool
    55  	sequenceToken *string
    56  }
    57  
    58  type api interface {
    59  	CreateLogStream(*cloudwatchlogs.CreateLogStreamInput) (*cloudwatchlogs.CreateLogStreamOutput, error)
    60  	PutLogEvents(*cloudwatchlogs.PutLogEventsInput) (*cloudwatchlogs.PutLogEventsOutput, error)
    61  }
    62  
    63  type regionFinder interface {
    64  	Region() (string, error)
    65  }
    66  
    67  type wrappedEvent struct {
    68  	inputLogEvent *cloudwatchlogs.InputLogEvent
    69  	insertOrder   int
    70  }
    71  type byTimestamp []wrappedEvent
    72  
    73  // init registers the awslogs driver
    74  func init() {
    75  	if err := logger.RegisterLogDriver(name, New); err != nil {
    76  		logrus.Fatal(err)
    77  	}
    78  	if err := logger.RegisterLogOptValidator(name, ValidateLogOpt); err != nil {
    79  		logrus.Fatal(err)
    80  	}
    81  }
    82  
    83  // New creates an awslogs logger using the configuration passed in on the
    84  // context.  Supported context configuration variables are awslogs-region,
    85  // awslogs-group, and awslogs-stream.  When available, configuration is
    86  // also taken from environment variables AWS_REGION, AWS_ACCESS_KEY_ID,
    87  // AWS_SECRET_ACCESS_KEY, the shared credentials file (~/.aws/credentials), and
    88  // the EC2 Instance Metadata Service.
    89  func New(ctx logger.Context) (logger.Logger, error) {
    90  	logGroupName := ctx.Config[logGroupKey]
    91  	logStreamName := ctx.ContainerID
    92  	if ctx.Config[logStreamKey] != "" {
    93  		logStreamName = ctx.Config[logStreamKey]
    94  	}
    95  	client, err := newAWSLogsClient(ctx)
    96  	if err != nil {
    97  		return nil, err
    98  	}
    99  	containerStream := &logStream{
   100  		logStreamName: logStreamName,
   101  		logGroupName:  logGroupName,
   102  		client:        client,
   103  		messages:      make(chan *logger.Message, 4096),
   104  	}
   105  	err = containerStream.create()
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  	go containerStream.collectBatch()
   110  
   111  	return containerStream, nil
   112  }
   113  
   114  // newRegionFinder is a variable such that the implementation
   115  // can be swapped out for unit tests.
   116  var newRegionFinder = func() regionFinder {
   117  	return ec2metadata.New(session.New())
   118  }
   119  
   120  // newAWSLogsClient creates the service client for Amazon CloudWatch Logs.
   121  // Customizations to the default client from the SDK include a Docker-specific
   122  // User-Agent string and automatic region detection using the EC2 Instance
   123  // Metadata Service when region is otherwise unspecified.
   124  func newAWSLogsClient(ctx logger.Context) (api, error) {
   125  	var region *string
   126  	if os.Getenv(regionEnvKey) != "" {
   127  		region = aws.String(os.Getenv(regionEnvKey))
   128  	}
   129  	if ctx.Config[regionKey] != "" {
   130  		region = aws.String(ctx.Config[regionKey])
   131  	}
   132  	if region == nil || *region == "" {
   133  		logrus.Info("Trying to get region from EC2 Metadata")
   134  		ec2MetadataClient := newRegionFinder()
   135  		r, err := ec2MetadataClient.Region()
   136  		if err != nil {
   137  			logrus.WithFields(logrus.Fields{
   138  				"error": err,
   139  			}).Error("Could not get region from EC2 metadata, environment, or log option")
   140  			return nil, errors.New("Cannot determine region for awslogs driver")
   141  		}
   142  		region = &r
   143  	}
   144  	logrus.WithFields(logrus.Fields{
   145  		"region": *region,
   146  	}).Debug("Created awslogs client")
   147  
   148  	client := cloudwatchlogs.New(session.New(), aws.NewConfig().WithRegion(*region))
   149  
   150  	client.Handlers.Build.PushBackNamed(request.NamedHandler{
   151  		Name: "DockerUserAgentHandler",
   152  		Fn: func(r *request.Request) {
   153  			currentAgent := r.HTTPRequest.Header.Get(userAgentHeader)
   154  			r.HTTPRequest.Header.Set(userAgentHeader,
   155  				fmt.Sprintf("Docker %s (%s) %s",
   156  					dockerversion.Version, runtime.GOOS, currentAgent))
   157  		},
   158  	})
   159  	return client, nil
   160  }
   161  
   162  // Name returns the name of the awslogs logging driver
   163  func (l *logStream) Name() string {
   164  	return name
   165  }
   166  
   167  // Log submits messages for logging by an instance of the awslogs logging driver
   168  func (l *logStream) Log(msg *logger.Message) error {
   169  	l.lock.RLock()
   170  	defer l.lock.RUnlock()
   171  	if !l.closed {
   172  		// buffer up the data, making sure to copy the Line data
   173  		l.messages <- logger.CopyMessage(msg)
   174  	}
   175  	return nil
   176  }
   177  
   178  // Close closes the instance of the awslogs logging driver
   179  func (l *logStream) Close() error {
   180  	l.lock.Lock()
   181  	defer l.lock.Unlock()
   182  	if !l.closed {
   183  		close(l.messages)
   184  	}
   185  	l.closed = true
   186  	return nil
   187  }
   188  
   189  // create creates a log stream for the instance of the awslogs logging driver
   190  func (l *logStream) create() error {
   191  	input := &cloudwatchlogs.CreateLogStreamInput{
   192  		LogGroupName:  aws.String(l.logGroupName),
   193  		LogStreamName: aws.String(l.logStreamName),
   194  	}
   195  
   196  	_, err := l.client.CreateLogStream(input)
   197  
   198  	if err != nil {
   199  		if awsErr, ok := err.(awserr.Error); ok {
   200  			fields := logrus.Fields{
   201  				"errorCode":     awsErr.Code(),
   202  				"message":       awsErr.Message(),
   203  				"origError":     awsErr.OrigErr(),
   204  				"logGroupName":  l.logGroupName,
   205  				"logStreamName": l.logStreamName,
   206  			}
   207  			if awsErr.Code() == resourceAlreadyExistsCode {
   208  				// Allow creation to succeed
   209  				logrus.WithFields(fields).Info("Log stream already exists")
   210  				return nil
   211  			}
   212  			logrus.WithFields(fields).Error("Failed to create log stream")
   213  		}
   214  	}
   215  	return err
   216  }
   217  
   218  // newTicker is used for time-based batching.  newTicker is a variable such
   219  // that the implementation can be swapped out for unit tests.
   220  var newTicker = func(freq time.Duration) *time.Ticker {
   221  	return time.NewTicker(freq)
   222  }
   223  
   224  // collectBatch executes as a goroutine to perform batching of log events for
   225  // submission to the log stream.  Batching is performed on time- and size-
   226  // bases.  Time-based batching occurs at a 5 second interval (defined in the
   227  // batchPublishFrequency const).  Size-based batching is performed on the
   228  // maximum number of events per batch (defined in maximumLogEventsPerPut) and
   229  // the maximum number of total bytes in a batch (defined in
   230  // maximumBytesPerPut).  Log messages are split by the maximum bytes per event
   231  // (defined in maximumBytesPerEvent).  There is a fixed per-event byte overhead
   232  // (defined in perEventBytes) which is accounted for in split- and batch-
   233  // calculations.
   234  func (l *logStream) collectBatch() {
   235  	timer := newTicker(batchPublishFrequency)
   236  	var events []wrappedEvent
   237  	bytes := 0
   238  	for {
   239  		select {
   240  		case <-timer.C:
   241  			l.publishBatch(events)
   242  			events = events[:0]
   243  			bytes = 0
   244  		case msg, more := <-l.messages:
   245  			if !more {
   246  				l.publishBatch(events)
   247  				return
   248  			}
   249  			unprocessedLine := msg.Line
   250  			for len(unprocessedLine) > 0 {
   251  				// Split line length so it does not exceed the maximum
   252  				lineBytes := len(unprocessedLine)
   253  				if lineBytes > maximumBytesPerEvent {
   254  					lineBytes = maximumBytesPerEvent
   255  				}
   256  				line := unprocessedLine[:lineBytes]
   257  				unprocessedLine = unprocessedLine[lineBytes:]
   258  				if (len(events) >= maximumLogEventsPerPut) || (bytes+lineBytes+perEventBytes > maximumBytesPerPut) {
   259  					// Publish an existing batch if it's already over the maximum number of events or if adding this
   260  					// event would push it over the maximum number of total bytes.
   261  					l.publishBatch(events)
   262  					events = events[:0]
   263  					bytes = 0
   264  				}
   265  				events = append(events, wrappedEvent{
   266  					inputLogEvent: &cloudwatchlogs.InputLogEvent{
   267  						Message:   aws.String(string(line)),
   268  						Timestamp: aws.Int64(msg.Timestamp.UnixNano() / int64(time.Millisecond)),
   269  					},
   270  					insertOrder: len(events),
   271  				})
   272  				bytes += (lineBytes + perEventBytes)
   273  			}
   274  		}
   275  	}
   276  }
   277  
   278  // publishBatch calls PutLogEvents for a given set of InputLogEvents,
   279  // accounting for sequencing requirements (each request must reference the
   280  // sequence token returned by the previous request).
   281  func (l *logStream) publishBatch(events []wrappedEvent) {
   282  	if len(events) == 0 {
   283  		return
   284  	}
   285  
   286  	// events in a batch must be sorted by timestamp
   287  	// see http://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_PutLogEvents.html
   288  	sort.Sort(byTimestamp(events))
   289  	cwEvents := unwrapEvents(events)
   290  
   291  	nextSequenceToken, err := l.putLogEvents(cwEvents, l.sequenceToken)
   292  
   293  	if err != nil {
   294  		if awsErr, ok := err.(awserr.Error); ok {
   295  			if awsErr.Code() == dataAlreadyAcceptedCode {
   296  				// already submitted, just grab the correct sequence token
   297  				parts := strings.Split(awsErr.Message(), " ")
   298  				nextSequenceToken = &parts[len(parts)-1]
   299  				logrus.WithFields(logrus.Fields{
   300  					"errorCode":     awsErr.Code(),
   301  					"message":       awsErr.Message(),
   302  					"logGroupName":  l.logGroupName,
   303  					"logStreamName": l.logStreamName,
   304  				}).Info("Data already accepted, ignoring error")
   305  				err = nil
   306  			} else if awsErr.Code() == invalidSequenceTokenCode {
   307  				// sequence code is bad, grab the correct one and retry
   308  				parts := strings.Split(awsErr.Message(), " ")
   309  				token := parts[len(parts)-1]
   310  				nextSequenceToken, err = l.putLogEvents(cwEvents, &token)
   311  			}
   312  		}
   313  	}
   314  	if err != nil {
   315  		logrus.Error(err)
   316  	} else {
   317  		l.sequenceToken = nextSequenceToken
   318  	}
   319  }
   320  
   321  // putLogEvents wraps the PutLogEvents API
   322  func (l *logStream) putLogEvents(events []*cloudwatchlogs.InputLogEvent, sequenceToken *string) (*string, error) {
   323  	input := &cloudwatchlogs.PutLogEventsInput{
   324  		LogEvents:     events,
   325  		SequenceToken: sequenceToken,
   326  		LogGroupName:  aws.String(l.logGroupName),
   327  		LogStreamName: aws.String(l.logStreamName),
   328  	}
   329  	resp, err := l.client.PutLogEvents(input)
   330  	if err != nil {
   331  		if awsErr, ok := err.(awserr.Error); ok {
   332  			logrus.WithFields(logrus.Fields{
   333  				"errorCode":     awsErr.Code(),
   334  				"message":       awsErr.Message(),
   335  				"origError":     awsErr.OrigErr(),
   336  				"logGroupName":  l.logGroupName,
   337  				"logStreamName": l.logStreamName,
   338  			}).Error("Failed to put log events")
   339  		}
   340  		return nil, err
   341  	}
   342  	return resp.NextSequenceToken, nil
   343  }
   344  
   345  // ValidateLogOpt looks for awslogs-specific log options awslogs-region,
   346  // awslogs-group, and awslogs-stream
   347  func ValidateLogOpt(cfg map[string]string) error {
   348  	for key := range cfg {
   349  		switch key {
   350  		case logGroupKey:
   351  		case logStreamKey:
   352  		case regionKey:
   353  		default:
   354  			return fmt.Errorf("unknown log opt '%s' for %s log driver", key, name)
   355  		}
   356  	}
   357  	if cfg[logGroupKey] == "" {
   358  		return fmt.Errorf("must specify a value for log opt '%s'", logGroupKey)
   359  	}
   360  	return nil
   361  }
   362  
   363  // Len returns the length of a byTimestamp slice.  Len is required by the
   364  // sort.Interface interface.
   365  func (slice byTimestamp) Len() int {
   366  	return len(slice)
   367  }
   368  
   369  // Less compares two values in a byTimestamp slice by Timestamp.  Less is
   370  // required by the sort.Interface interface.
   371  func (slice byTimestamp) Less(i, j int) bool {
   372  	iTimestamp, jTimestamp := int64(0), int64(0)
   373  	if slice != nil && slice[i].inputLogEvent.Timestamp != nil {
   374  		iTimestamp = *slice[i].inputLogEvent.Timestamp
   375  	}
   376  	if slice != nil && slice[j].inputLogEvent.Timestamp != nil {
   377  		jTimestamp = *slice[j].inputLogEvent.Timestamp
   378  	}
   379  	if iTimestamp == jTimestamp {
   380  		return slice[i].insertOrder < slice[j].insertOrder
   381  	}
   382  	return iTimestamp < jTimestamp
   383  }
   384  
   385  // Swap swaps two values in a byTimestamp slice with each other.  Swap is
   386  // required by the sort.Interface interface.
   387  func (slice byTimestamp) Swap(i, j int) {
   388  	slice[i], slice[j] = slice[j], slice[i]
   389  }
   390  
   391  func unwrapEvents(events []wrappedEvent) []*cloudwatchlogs.InputLogEvent {
   392  	cwEvents := []*cloudwatchlogs.InputLogEvent{}
   393  	for _, input := range events {
   394  		cwEvents = append(cwEvents, input.inputLogEvent)
   395  	}
   396  	return cwEvents
   397  }