gitlab.com/beacon-software/gadget@v0.0.0-20181217202115-54565ea1ed5e/log/cloudwatch/output.go (about)

     1  package cloudwatch
     2  
     3  import (
     4  	"fmt"
     5  	"regexp"
     6  	"strings"
     7  	"sync"
     8  	"time"
     9  
    10  	"gitlab.com/beacon-software/gadget/timeutil"
    11  
    12  	"github.com/aws/aws-sdk-go/aws/awserr"
    13  	"github.com/aws/aws-sdk-go/aws/session"
    14  	"github.com/aws/aws-sdk-go/service/cloudwatchlogs"
    15  
    16  	"gitlab.com/beacon-software/gadget/dispatcher"
    17  	"gitlab.com/beacon-software/gadget/errors"
    18  	"gitlab.com/beacon-software/gadget/log"
    19  	"gitlab.com/beacon-software/gadget/stringutil"
    20  )
    21  
    22  // 1 mebibyte is the actual max, but pad with a tenth so we don't have to be
    23  // exact when calculating message size (int(1048576 * 0.9))
    24  const (
    25  	defaultSendWait         = 30 * time.Second
    26  	maxPayloadSizeBytes int = 943718
    27  )
    28  
    29  func newSession() (*session.Session, errors.TracerError) {
    30  	session, err := session.NewSessionWithOptions(session.Options{
    31  		SharedConfigState: session.SharedConfigEnable,
    32  	})
    33  	return session, errors.Wrap(err)
    34  }
    35  
    36  type administration struct {
    37  	sync.Mutex
    38  	sendWait   time.Duration
    39  	dispatcher dispatcher.Dispatcher
    40  	cwlogs     *cloudwatchlogs.CloudWatchLogs
    41  	logGroups  map[string]*cloudwatchlogs.LogGroup
    42  	logStreams map[string]*cloudwatchlogs.LogStream
    43  	// wrap destinations
    44  	outputs map[string]*output
    45  }
    46  
    47  // we only need one of these lazy initialized
    48  var admin = &administration{
    49  	logGroups:  make(map[string]*cloudwatchlogs.LogGroup),
    50  	logStreams: make(map[string]*cloudwatchlogs.LogStream),
    51  	outputs:    make(map[string]*output),
    52  }
    53  
    54  // Administration provides a layer that manages the control of cloud watch logs to behave
    55  // like a standard log output.
    56  type Administration interface {
    57  	// GetOutput for the specified group and output name.
    58  	GetOutput(groupName, outputName string, logLevel log.LevelFlag) (log.Output, errors.TracerError)
    59  }
    60  
    61  // GetAdministration for cloud watch logs
    62  func GetAdministration() (Administration, errors.TracerError) {
    63  	if nil != admin.cwlogs {
    64  		return admin, nil
    65  	}
    66  	session, err := newSession()
    67  	if nil != err {
    68  		log.Error(err)
    69  		return nil, err
    70  	}
    71  	admin.cwlogs = cloudwatchlogs.New(session)
    72  	err = admin.UpdateLogGroups()
    73  	log.Error(err)
    74  	return admin, errors.Wrap(err)
    75  }
    76  
    77  func (cwa *administration) Run() {
    78  	cwa.Lock()
    79  	defer cwa.Unlock()
    80  	timeutil.RunEvery(func() {
    81  		for _, output := range cwa.outputs {
    82  			output.SendEvents()
    83  		}
    84  	}, cwa.sendWait)
    85  }
    86  
    87  func createStreamKey(groupName, streamName string) string {
    88  	groupName = EnsureGroupNameIsValid(groupName)
    89  	streamName = EnsureStreamNameIsValid(streamName)
    90  	return fmt.Sprintf("%s.%s", groupName, streamName)
    91  }
    92  
    93  // UpdateLogGroups pulls all the existing log groups from CloudWatch and adds
    94  // them to this instance so that they might be used.
    95  // NOTE: We should not have a ton of log groups so holding all of them in memory
    96  // should not be a big deal. The standard maximum number of log groups in AWS
    97  // is 5000.
    98  func (cwa *administration) UpdateLogGroups() errors.TracerError {
    99  	cwa.Lock()
   100  	defer cwa.Unlock()
   101  	var nextToken string
   102  	var err error
   103  	var input *cloudwatchlogs.DescribeLogGroupsInput
   104  	var output *cloudwatchlogs.DescribeLogGroupsOutput
   105  
   106  	var limit int64 = 50
   107  	for {
   108  		if stringutil.IsWhiteSpace(nextToken) {
   109  			input = &cloudwatchlogs.DescribeLogGroupsInput{
   110  				Limit: &limit,
   111  			}
   112  		} else {
   113  			input = &cloudwatchlogs.DescribeLogGroupsInput{
   114  				Limit:     &limit,
   115  				NextToken: &nextToken,
   116  			}
   117  		}
   118  		output, err = cwa.cwlogs.DescribeLogGroups(input)
   119  		if nil != err {
   120  			break
   121  		}
   122  		for _, group := range output.LogGroups {
   123  			cwa.logGroups[*group.LogGroupName] = group
   124  		}
   125  		if len(output.LogGroups) < int(limit) || nil == output.NextToken || stringutil.IsWhiteSpace(*output.NextToken) {
   126  			break
   127  		}
   128  		nextToken = *output.NextToken
   129  	}
   130  	return errors.Wrap(err)
   131  }
   132  
   133  func (cwa *administration) GetOutput(groupName, streamName string, logLevel log.LevelFlag) (log.Output, errors.TracerError) {
   134  	var err error
   135  	// get the log group
   136  	group, err := cwa.GetLogGroup(groupName)
   137  	if nil != err {
   138  		return nil, errors.Wrap(err)
   139  	}
   140  	// now for the stream
   141  	streamName = EnsureStreamNameIsValid(streamName)
   142  	outputKey := createStreamKey(*group.LogGroupName, streamName)
   143  	logOutput, ok := cwa.outputs[outputKey]
   144  	if !ok {
   145  		stream, err := cwa.GetLogStream(group, streamName)
   146  		if nil != err {
   147  			return nil, errors.Wrap(err)
   148  		}
   149  		// we are gtg
   150  		logOutput = &output{
   151  			name:     createStreamKey(*group.LogGroupName, *stream.LogStreamName),
   152  			group:    group,
   153  			stream:   stream,
   154  			logLevel: logLevel,
   155  			admin:    cwa,
   156  			buffer:   NewEventQueue(),
   157  		}
   158  	}
   159  	return logOutput, errors.Wrap(err)
   160  }
   161  
   162  func (cwa *administration) GetLogGroup(groupName string) (*cloudwatchlogs.LogGroup, errors.TracerError) {
   163  	groupName = EnsureGroupNameIsValid(groupName)
   164  	var err error
   165  	cwa.Lock()
   166  	group, ok := cwa.logGroups[groupName]
   167  	cwa.Unlock()
   168  	if ok {
   169  		return group, nil
   170  	}
   171  	// it does not exist as far as we can tell so try creation
   172  	input := &cloudwatchlogs.CreateLogGroupInput{
   173  		LogGroupName: &groupName,
   174  		// we can put tags here as well as needed
   175  	}
   176  	// the response from this is a marker so we do not need it.
   177  	_, err = cwa.cwlogs.CreateLogGroup(input)
   178  	if nil != err {
   179  		// error handling, return error unless it is an 'already exists' which means we just
   180  		// didn't know about it yet
   181  		if err, ok := err.(awserr.Error); !ok || err.Code() != cloudwatchlogs.ErrCodeResourceAlreadyExistsException {
   182  			return nil, errors.Wrap(err)
   183  		}
   184  	}
   185  	// update to bring it into the fold
   186  	err = cwa.UpdateLogGroups()
   187  	if nil != err {
   188  		return nil, errors.Wrap(err)
   189  	}
   190  	cwa.Lock()
   191  	group, ok = cwa.logGroups[groupName]
   192  	cwa.Unlock()
   193  	if !ok {
   194  		return nil, errors.New("could not create or find cloud watch logs log group %s", groupName)
   195  	}
   196  	// if creation fails as existing try an update
   197  	return group, errors.Wrap(err)
   198  }
   199  
   200  func (cwa *administration) GetLogStream(group *cloudwatchlogs.LogGroup, streamName string) (*cloudwatchlogs.LogStream, errors.TracerError) {
   201  	streamName = EnsureStreamNameIsValid(streamName)
   202  	var err error
   203  	streamKey := createStreamKey(*group.LogGroupName, streamName)
   204  	cwa.Lock()
   205  	stream, ok := cwa.logStreams[streamKey]
   206  	cwa.Unlock()
   207  	if ok {
   208  		return stream, nil
   209  	}
   210  	input := &cloudwatchlogs.CreateLogStreamInput{
   211  		LogGroupName:  group.LogGroupName,
   212  		LogStreamName: &streamName,
   213  	}
   214  	// return is a marker value which can be ignored.
   215  	_, err = cwa.cwlogs.CreateLogStream(input)
   216  	if nil != err {
   217  		if err, ok := err.(awserr.Error); !ok || err.Code() != cloudwatchlogs.ErrCodeResourceAlreadyExistsException {
   218  			return nil, errors.Wrap(err)
   219  		}
   220  	}
   221  	// now actually get the damn thing
   222  	stream, err = cwa.FindLogStream(*group.LogGroupName, streamName)
   223  	if nil != err {
   224  		return nil, errors.Wrap(err)
   225  	}
   226  	// add the reference to our map
   227  	cwa.Lock()
   228  	cwa.logStreams[streamKey] = stream
   229  	cwa.Unlock()
   230  	return stream, errors.Wrap(err)
   231  }
   232  
   233  func (cwa *administration) UpdateLogStream(groupName, streamName string) {
   234  	streamKey := createStreamKey(groupName, streamName)
   235  	stream, err := cwa.FindLogStream(groupName, streamName)
   236  	if nil != err {
   237  		log.Errorf("failed to update log stream: %s", err)
   238  	}
   239  	cwa.Lock()
   240  	s, ok := cwa.logStreams[streamKey]
   241  	if ok {
   242  		// do not replace or existing tasks will lose their reference.
   243  		s.SetUploadSequenceToken(*stream.UploadSequenceToken)
   244  	} else {
   245  		// this would be weird, but handle it just in case
   246  		cwa.logStreams[streamKey] = stream
   247  	}
   248  	cwa.Unlock()
   249  }
   250  
   251  func (cwa *administration) FindLogStream(groupName, streamName string) (*cloudwatchlogs.LogStream, errors.TracerError) {
   252  	groupName = EnsureGroupNameIsValid(groupName)
   253  	streamName = EnsureStreamNameIsValid(streamName)
   254  	input := &cloudwatchlogs.DescribeLogStreamsInput{
   255  		LogGroupName:        &groupName,
   256  		LogStreamNamePrefix: &streamName,
   257  	}
   258  	output, err := cwa.cwlogs.DescribeLogStreams(input)
   259  	if nil != err {
   260  		return nil, errors.Wrap(err)
   261  	}
   262  	for _, stream := range output.LogStreams {
   263  		if *stream.LogStreamName == streamName {
   264  			// we are not locking so don't update anything here
   265  			return stream, nil
   266  		}
   267  	}
   268  	return nil, errors.New("failed to locate log stream '%s' in log group '%s'", streamName, groupName)
   269  }
   270  
   271  // output is a wrapper for a log stream that we can attach our interface methods to.
   272  type output struct {
   273  	sync.Mutex
   274  	name     string
   275  	admin    *administration
   276  	logLevel log.LevelFlag
   277  	group    *cloudwatchlogs.LogGroup
   278  	stream   *cloudwatchlogs.LogStream
   279  	buffer   EventQueue
   280  	// token is unique to the stream and must be set to sequence the events correctly
   281  	token *string
   282  }
   283  
   284  func (o *output) Level() log.LevelFlag {
   285  	return o.logLevel
   286  }
   287  
   288  func (o *output) Log(message log.Message) {
   289  	o.Lock()
   290  	defer o.Unlock()
   291  	payload := message.JSONString()
   292  	// they want milliseconds since epoch and our timestamps are in seconds.
   293  	ts := message.TimestampUnix * 1000
   294  	o.buffer.Push(&cloudwatchlogs.InputLogEvent{
   295  		Message:   &payload,
   296  		Timestamp: &ts,
   297  	})
   298  }
   299  
   300  func (o *output) SendEvents() error {
   301  	o.Lock()
   302  	defer o.Unlock()
   303  	if o.buffer.Size() == 0 {
   304  		return nil
   305  	}
   306  	events := make([]*cloudwatchlogs.InputLogEvent, 0)
   307  	sizeBytes := 0
   308  	for o.buffer.Size() > 0 {
   309  		event, err := o.buffer.Peek()
   310  		if nil != err {
   311  			break
   312  		}
   313  		if sizeBytes+len([]byte(*event.Message)) > maxPayloadSizeBytes {
   314  			break
   315  		}
   316  		// actually pop it now
   317  		o.buffer.Pop()
   318  		events = append(events, event)
   319  	}
   320  	if len(events) == 0 {
   321  		return nil
   322  	}
   323  	input := &cloudwatchlogs.PutLogEventsInput{
   324  		LogEvents:     events,
   325  		LogGroupName:  o.group.LogGroupName,
   326  		LogStreamName: o.stream.LogStreamName,
   327  		SequenceToken: o.stream.UploadSequenceToken,
   328  	}
   329  	resp, err := o.admin.cwlogs.PutLogEvents(input)
   330  	if err == nil {
   331  		o.stream = o.stream.SetUploadSequenceToken(*resp.NextSequenceToken)
   332  	}
   333  	if awsErr, ok := err.(awserr.Error); !ok || awsErr.Code() != cloudwatchlogs.ErrCodeInvalidSequenceTokenException {
   334  		// our sequence token got out of data so refresh it
   335  		o.admin.UpdateLogStream(*o.group.LogGroupName, *o.stream.LogStreamName)
   336  		// don't log this error
   337  		err = nil
   338  	} else if nil != err {
   339  		log.Error(err)
   340  	}
   341  	// if we still have events run again
   342  	if o.buffer.Size() > 0 && nil == err {
   343  		return o.SendEvents()
   344  	}
   345  	return err
   346  }
   347  
   348  var groupNameRegex = regexp.MustCompile("[^a-zA-Z0-9_\\-/.]+")
   349  
   350  // EnsureGroupNameIsValid based upon the rules from aws:
   351  // * Log group names must be unique within a region for an AWS account.
   352  // * Log group names can be between 1 and 512 characters long.
   353  // * Log group names consist of the following characters: a-z, A-Z, 0-9,
   354  // 		'_' (underscore), '-' (hyphen), '/' (forward slash), and '.' (period).
   355  func EnsureGroupNameIsValid(name string) string {
   356  	validName := groupNameRegex.ReplaceAllString(name, "")
   357  	if stringutil.IsWhiteSpace(validName) {
   358  		validName = "EmptyLogGroupName"
   359  	}
   360  	return stringutil.SafeSubstring(validName, 0, 511)
   361  }
   362  
   363  // EnsureStreamNameIsValid based upon the provided rules from AWS
   364  //	* Log stream names must be unique within the log group.
   365  //	* Log stream names can be between 1 and 512 characters long.
   366  //	* The ':' (colon) and '*' (asterisk) characters are not allowed.
   367  func EnsureStreamNameIsValid(name string) string {
   368  	validName := strings.Replace(name, ":", "", -1)
   369  	validName = strings.Replace(validName, "*", "", -1)
   370  	if stringutil.IsWhiteSpace(validName) {
   371  		validName = "EmptyLogStreamName"
   372  	}
   373  	return stringutil.SafeSubstring(validName, 0, 511)
   374  }