gitlab.com/beacon-software/gadget@v0.0.0-20181217202115-54565ea1ed5e/log/cloudwatch/output.go (about) 1 package cloudwatch 2 3 import ( 4 "fmt" 5 "regexp" 6 "strings" 7 "sync" 8 "time" 9 10 "gitlab.com/beacon-software/gadget/timeutil" 11 12 "github.com/aws/aws-sdk-go/aws/awserr" 13 "github.com/aws/aws-sdk-go/aws/session" 14 "github.com/aws/aws-sdk-go/service/cloudwatchlogs" 15 16 "gitlab.com/beacon-software/gadget/dispatcher" 17 "gitlab.com/beacon-software/gadget/errors" 18 "gitlab.com/beacon-software/gadget/log" 19 "gitlab.com/beacon-software/gadget/stringutil" 20 ) 21 22 // 1 mebibyte is the actual max, but pad with a tenth so we don't have to be 23 // exact when calculating message size (int(1048576 * 0.9)) 24 const ( 25 defaultSendWait = 30 * time.Second 26 maxPayloadSizeBytes int = 943718 27 ) 28 29 func newSession() (*session.Session, errors.TracerError) { 30 session, err := session.NewSessionWithOptions(session.Options{ 31 SharedConfigState: session.SharedConfigEnable, 32 }) 33 return session, errors.Wrap(err) 34 } 35 36 type administration struct { 37 sync.Mutex 38 sendWait time.Duration 39 dispatcher dispatcher.Dispatcher 40 cwlogs *cloudwatchlogs.CloudWatchLogs 41 logGroups map[string]*cloudwatchlogs.LogGroup 42 logStreams map[string]*cloudwatchlogs.LogStream 43 // wrap destinations 44 outputs map[string]*output 45 } 46 47 // we only need one of these lazy initialized 48 var admin = &administration{ 49 logGroups: make(map[string]*cloudwatchlogs.LogGroup), 50 logStreams: make(map[string]*cloudwatchlogs.LogStream), 51 outputs: make(map[string]*output), 52 } 53 54 // Administration provides a layer that manages the control of cloud watch logs to behave 55 // like a standard log output. 56 type Administration interface { 57 // GetOutput for the specified group and output name. 58 GetOutput(groupName, outputName string, logLevel log.LevelFlag) (log.Output, errors.TracerError) 59 } 60 61 // GetAdministration for cloud watch logs 62 func GetAdministration() (Administration, errors.TracerError) { 63 if nil != admin.cwlogs { 64 return admin, nil 65 } 66 session, err := newSession() 67 if nil != err { 68 log.Error(err) 69 return nil, err 70 } 71 admin.cwlogs = cloudwatchlogs.New(session) 72 err = admin.UpdateLogGroups() 73 log.Error(err) 74 return admin, errors.Wrap(err) 75 } 76 77 func (cwa *administration) Run() { 78 cwa.Lock() 79 defer cwa.Unlock() 80 timeutil.RunEvery(func() { 81 for _, output := range cwa.outputs { 82 output.SendEvents() 83 } 84 }, cwa.sendWait) 85 } 86 87 func createStreamKey(groupName, streamName string) string { 88 groupName = EnsureGroupNameIsValid(groupName) 89 streamName = EnsureStreamNameIsValid(streamName) 90 return fmt.Sprintf("%s.%s", groupName, streamName) 91 } 92 93 // UpdateLogGroups pulls all the existing log groups from CloudWatch and adds 94 // them to this instance so that they might be used. 95 // NOTE: We should not have a ton of log groups so holding all of them in memory 96 // should not be a big deal. The standard maximum number of log groups in AWS 97 // is 5000. 98 func (cwa *administration) UpdateLogGroups() errors.TracerError { 99 cwa.Lock() 100 defer cwa.Unlock() 101 var nextToken string 102 var err error 103 var input *cloudwatchlogs.DescribeLogGroupsInput 104 var output *cloudwatchlogs.DescribeLogGroupsOutput 105 106 var limit int64 = 50 107 for { 108 if stringutil.IsWhiteSpace(nextToken) { 109 input = &cloudwatchlogs.DescribeLogGroupsInput{ 110 Limit: &limit, 111 } 112 } else { 113 input = &cloudwatchlogs.DescribeLogGroupsInput{ 114 Limit: &limit, 115 NextToken: &nextToken, 116 } 117 } 118 output, err = cwa.cwlogs.DescribeLogGroups(input) 119 if nil != err { 120 break 121 } 122 for _, group := range output.LogGroups { 123 cwa.logGroups[*group.LogGroupName] = group 124 } 125 if len(output.LogGroups) < int(limit) || nil == output.NextToken || stringutil.IsWhiteSpace(*output.NextToken) { 126 break 127 } 128 nextToken = *output.NextToken 129 } 130 return errors.Wrap(err) 131 } 132 133 func (cwa *administration) GetOutput(groupName, streamName string, logLevel log.LevelFlag) (log.Output, errors.TracerError) { 134 var err error 135 // get the log group 136 group, err := cwa.GetLogGroup(groupName) 137 if nil != err { 138 return nil, errors.Wrap(err) 139 } 140 // now for the stream 141 streamName = EnsureStreamNameIsValid(streamName) 142 outputKey := createStreamKey(*group.LogGroupName, streamName) 143 logOutput, ok := cwa.outputs[outputKey] 144 if !ok { 145 stream, err := cwa.GetLogStream(group, streamName) 146 if nil != err { 147 return nil, errors.Wrap(err) 148 } 149 // we are gtg 150 logOutput = &output{ 151 name: createStreamKey(*group.LogGroupName, *stream.LogStreamName), 152 group: group, 153 stream: stream, 154 logLevel: logLevel, 155 admin: cwa, 156 buffer: NewEventQueue(), 157 } 158 } 159 return logOutput, errors.Wrap(err) 160 } 161 162 func (cwa *administration) GetLogGroup(groupName string) (*cloudwatchlogs.LogGroup, errors.TracerError) { 163 groupName = EnsureGroupNameIsValid(groupName) 164 var err error 165 cwa.Lock() 166 group, ok := cwa.logGroups[groupName] 167 cwa.Unlock() 168 if ok { 169 return group, nil 170 } 171 // it does not exist as far as we can tell so try creation 172 input := &cloudwatchlogs.CreateLogGroupInput{ 173 LogGroupName: &groupName, 174 // we can put tags here as well as needed 175 } 176 // the response from this is a marker so we do not need it. 177 _, err = cwa.cwlogs.CreateLogGroup(input) 178 if nil != err { 179 // error handling, return error unless it is an 'already exists' which means we just 180 // didn't know about it yet 181 if err, ok := err.(awserr.Error); !ok || err.Code() != cloudwatchlogs.ErrCodeResourceAlreadyExistsException { 182 return nil, errors.Wrap(err) 183 } 184 } 185 // update to bring it into the fold 186 err = cwa.UpdateLogGroups() 187 if nil != err { 188 return nil, errors.Wrap(err) 189 } 190 cwa.Lock() 191 group, ok = cwa.logGroups[groupName] 192 cwa.Unlock() 193 if !ok { 194 return nil, errors.New("could not create or find cloud watch logs log group %s", groupName) 195 } 196 // if creation fails as existing try an update 197 return group, errors.Wrap(err) 198 } 199 200 func (cwa *administration) GetLogStream(group *cloudwatchlogs.LogGroup, streamName string) (*cloudwatchlogs.LogStream, errors.TracerError) { 201 streamName = EnsureStreamNameIsValid(streamName) 202 var err error 203 streamKey := createStreamKey(*group.LogGroupName, streamName) 204 cwa.Lock() 205 stream, ok := cwa.logStreams[streamKey] 206 cwa.Unlock() 207 if ok { 208 return stream, nil 209 } 210 input := &cloudwatchlogs.CreateLogStreamInput{ 211 LogGroupName: group.LogGroupName, 212 LogStreamName: &streamName, 213 } 214 // return is a marker value which can be ignored. 215 _, err = cwa.cwlogs.CreateLogStream(input) 216 if nil != err { 217 if err, ok := err.(awserr.Error); !ok || err.Code() != cloudwatchlogs.ErrCodeResourceAlreadyExistsException { 218 return nil, errors.Wrap(err) 219 } 220 } 221 // now actually get the damn thing 222 stream, err = cwa.FindLogStream(*group.LogGroupName, streamName) 223 if nil != err { 224 return nil, errors.Wrap(err) 225 } 226 // add the reference to our map 227 cwa.Lock() 228 cwa.logStreams[streamKey] = stream 229 cwa.Unlock() 230 return stream, errors.Wrap(err) 231 } 232 233 func (cwa *administration) UpdateLogStream(groupName, streamName string) { 234 streamKey := createStreamKey(groupName, streamName) 235 stream, err := cwa.FindLogStream(groupName, streamName) 236 if nil != err { 237 log.Errorf("failed to update log stream: %s", err) 238 } 239 cwa.Lock() 240 s, ok := cwa.logStreams[streamKey] 241 if ok { 242 // do not replace or existing tasks will lose their reference. 243 s.SetUploadSequenceToken(*stream.UploadSequenceToken) 244 } else { 245 // this would be weird, but handle it just in case 246 cwa.logStreams[streamKey] = stream 247 } 248 cwa.Unlock() 249 } 250 251 func (cwa *administration) FindLogStream(groupName, streamName string) (*cloudwatchlogs.LogStream, errors.TracerError) { 252 groupName = EnsureGroupNameIsValid(groupName) 253 streamName = EnsureStreamNameIsValid(streamName) 254 input := &cloudwatchlogs.DescribeLogStreamsInput{ 255 LogGroupName: &groupName, 256 LogStreamNamePrefix: &streamName, 257 } 258 output, err := cwa.cwlogs.DescribeLogStreams(input) 259 if nil != err { 260 return nil, errors.Wrap(err) 261 } 262 for _, stream := range output.LogStreams { 263 if *stream.LogStreamName == streamName { 264 // we are not locking so don't update anything here 265 return stream, nil 266 } 267 } 268 return nil, errors.New("failed to locate log stream '%s' in log group '%s'", streamName, groupName) 269 } 270 271 // output is a wrapper for a log stream that we can attach our interface methods to. 272 type output struct { 273 sync.Mutex 274 name string 275 admin *administration 276 logLevel log.LevelFlag 277 group *cloudwatchlogs.LogGroup 278 stream *cloudwatchlogs.LogStream 279 buffer EventQueue 280 // token is unique to the stream and must be set to sequence the events correctly 281 token *string 282 } 283 284 func (o *output) Level() log.LevelFlag { 285 return o.logLevel 286 } 287 288 func (o *output) Log(message log.Message) { 289 o.Lock() 290 defer o.Unlock() 291 payload := message.JSONString() 292 // they want milliseconds since epoch and our timestamps are in seconds. 293 ts := message.TimestampUnix * 1000 294 o.buffer.Push(&cloudwatchlogs.InputLogEvent{ 295 Message: &payload, 296 Timestamp: &ts, 297 }) 298 } 299 300 func (o *output) SendEvents() error { 301 o.Lock() 302 defer o.Unlock() 303 if o.buffer.Size() == 0 { 304 return nil 305 } 306 events := make([]*cloudwatchlogs.InputLogEvent, 0) 307 sizeBytes := 0 308 for o.buffer.Size() > 0 { 309 event, err := o.buffer.Peek() 310 if nil != err { 311 break 312 } 313 if sizeBytes+len([]byte(*event.Message)) > maxPayloadSizeBytes { 314 break 315 } 316 // actually pop it now 317 o.buffer.Pop() 318 events = append(events, event) 319 } 320 if len(events) == 0 { 321 return nil 322 } 323 input := &cloudwatchlogs.PutLogEventsInput{ 324 LogEvents: events, 325 LogGroupName: o.group.LogGroupName, 326 LogStreamName: o.stream.LogStreamName, 327 SequenceToken: o.stream.UploadSequenceToken, 328 } 329 resp, err := o.admin.cwlogs.PutLogEvents(input) 330 if err == nil { 331 o.stream = o.stream.SetUploadSequenceToken(*resp.NextSequenceToken) 332 } 333 if awsErr, ok := err.(awserr.Error); !ok || awsErr.Code() != cloudwatchlogs.ErrCodeInvalidSequenceTokenException { 334 // our sequence token got out of data so refresh it 335 o.admin.UpdateLogStream(*o.group.LogGroupName, *o.stream.LogStreamName) 336 // don't log this error 337 err = nil 338 } else if nil != err { 339 log.Error(err) 340 } 341 // if we still have events run again 342 if o.buffer.Size() > 0 && nil == err { 343 return o.SendEvents() 344 } 345 return err 346 } 347 348 var groupNameRegex = regexp.MustCompile("[^a-zA-Z0-9_\\-/.]+") 349 350 // EnsureGroupNameIsValid based upon the rules from aws: 351 // * Log group names must be unique within a region for an AWS account. 352 // * Log group names can be between 1 and 512 characters long. 353 // * Log group names consist of the following characters: a-z, A-Z, 0-9, 354 // '_' (underscore), '-' (hyphen), '/' (forward slash), and '.' (period). 355 func EnsureGroupNameIsValid(name string) string { 356 validName := groupNameRegex.ReplaceAllString(name, "") 357 if stringutil.IsWhiteSpace(validName) { 358 validName = "EmptyLogGroupName" 359 } 360 return stringutil.SafeSubstring(validName, 0, 511) 361 } 362 363 // EnsureStreamNameIsValid based upon the provided rules from AWS 364 // * Log stream names must be unique within the log group. 365 // * Log stream names can be between 1 and 512 characters long. 366 // * The ':' (colon) and '*' (asterisk) characters are not allowed. 367 func EnsureStreamNameIsValid(name string) string { 368 validName := strings.Replace(name, ":", "", -1) 369 validName = strings.Replace(validName, "*", "", -1) 370 if stringutil.IsWhiteSpace(validName) { 371 validName = "EmptyLogStreamName" 372 } 373 return stringutil.SafeSubstring(validName, 0, 511) 374 }