github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/state/logs.go (about) 1 // Copyright 2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 // Low-level functionality for interacting with the logs collection 5 // and tailing logs from the replication oplog. 6 7 package state 8 9 import ( 10 "fmt" 11 "regexp" 12 "strings" 13 "time" 14 15 "github.com/dustin/go-humanize" 16 "github.com/juju/errors" 17 "github.com/juju/loggo" 18 "github.com/juju/names" 19 "github.com/juju/utils/deque" 20 "github.com/juju/utils/set" 21 "gopkg.in/mgo.v2" 22 "gopkg.in/mgo.v2/bson" 23 "launchpad.net/tomb" 24 25 "github.com/juju/juju/mongo" 26 ) 27 28 const ( 29 logsDB = "logs" 30 logsC = "logs" 31 forwardedC = "forwarded" 32 ) 33 34 // ErrNeverForwarded signals to the caller that the timestamp of a 35 // previously forwarded log record could not be found. 36 var ErrNeverForwarded = errors.Errorf("cannot find timestamp of the last forwarded record") 37 38 // LoggingState describes the methods on State required for logging to 39 // the database. 40 type LoggingState interface { 41 ModelUUID() string 42 MongoSession() *mgo.Session 43 IsController() bool 44 } 45 46 // InitDbLogs sets up the indexes for the logs collection. It should 47 // be called as state is opened. It is idempotent. 48 func InitDbLogs(session *mgo.Session) error { 49 logsColl := session.DB(logsDB).C(logsC) 50 for _, key := range [][]string{{"e", "t"}, {"e", "n"}} { 51 err := logsColl.EnsureIndex(mgo.Index{Key: key}) 52 if err != nil { 53 return errors.Annotate(err, "cannot create index for logs collection") 54 } 55 } 56 return nil 57 } 58 59 // lastSentDoc captures timestamp of the last log record forwarded 60 // to a log sink. 61 type lastSentDoc struct { 62 ID string `bson:"_id"` 63 ModelUUID string `bson:"model-uuid"` 64 Sink string `bson:"sink"` 65 Time int64 `bson:"timestamp"` 66 } 67 68 // NewLastSentLogger returns a NewLastSentLogger struct that records and retrieves 69 // the timestamps of the most recent log records forwarded to the log sink. 70 func NewLastSentLogger(st LoggingState, sink string) *DbLoggerLastSent { 71 return &DbLoggerLastSent{ 72 id: fmt.Sprintf("%v#%v", st.ModelUUID(), sink), 73 model: st.ModelUUID(), 74 sink: sink, 75 session: st.MongoSession(), 76 } 77 } 78 79 // DBLoggerLastSent returns a struct that records and retrieves timestamps of the 80 // most recent log records forwarded to the log sink. 81 type DbLoggerLastSent struct { 82 session *mgo.Session 83 id string 84 model string 85 sink string 86 } 87 88 // Set records the timestamp. 89 func (logger *DbLoggerLastSent) Set(t time.Time) error { 90 collection := logger.session.DB(logsDB).C(forwardedC) 91 _, err := collection.UpsertId( 92 logger.id, 93 lastSentDoc{ 94 ID: logger.id, 95 ModelUUID: logger.model, 96 Sink: logger.sink, 97 Time: t.UnixNano(), 98 }, 99 ) 100 return errors.Trace(err) 101 } 102 103 // Get retrieves the timestamp. 104 func (logger *DbLoggerLastSent) Get() (time.Time, error) { 105 zeroTime := time.Time{} 106 collection := logger.session.DB(logsDB).C(forwardedC) 107 var lastSent lastSentDoc 108 err := collection.FindId(logger.id).One(&lastSent) 109 if err != nil { 110 if err == mgo.ErrNotFound { 111 return zeroTime, errors.Trace(ErrNeverForwarded) 112 } 113 return zeroTime, errors.Trace(err) 114 } 115 return time.Unix(0, lastSent.Time).UTC(), nil 116 } 117 118 // logDoc describes log messages stored in MongoDB. 119 // 120 // Single character field names are used for serialisation to save 121 // space. These documents will be inserted 1000's of times and each 122 // document includes the field names. 123 // (alesstimec) It would be really nice if we could store Time as int64 124 // for increased precision. 125 type logDoc struct { 126 Id bson.ObjectId `bson:"_id"` 127 Time time.Time `bson:"t"` 128 ModelUUID string `bson:"e"` 129 Entity string `bson:"n"` // e.g. "machine-0" 130 Module string `bson:"m"` // e.g. "juju.worker.firewaller" 131 Location string `bson:"l"` // "filename:lineno" 132 Level loggo.Level `bson:"v"` 133 Message string `bson:"x"` 134 } 135 136 type DbLogger struct { 137 logsColl *mgo.Collection 138 modelUUID string 139 entity string 140 } 141 142 // NewDbLogger returns a DbLogger instance which is used to write logs 143 // to the database. 144 func NewDbLogger(st LoggingState, entity names.Tag) *DbLogger { 145 _, logsColl := initLogsSession(st) 146 return &DbLogger{ 147 logsColl: logsColl, 148 modelUUID: st.ModelUUID(), 149 entity: entity.String(), 150 } 151 } 152 153 // Log writes a log message to the database. 154 func (logger *DbLogger) Log(t time.Time, module string, location string, level loggo.Level, msg string) error { 155 return logger.logsColl.Insert(&logDoc{ 156 Id: bson.NewObjectId(), 157 Time: t, 158 ModelUUID: logger.modelUUID, 159 Entity: logger.entity, 160 Module: module, 161 Location: location, 162 Level: level, 163 Message: msg, 164 }) 165 } 166 167 // Close cleans up resources used by the DbLogger instance. 168 func (logger *DbLogger) Close() { 169 if logger.logsColl != nil { 170 logger.logsColl.Database.Session.Close() 171 } 172 } 173 174 // LogTailer allows for retrieval of Juju's logs from MongoDB. It 175 // first returns any matching already recorded logs and then waits for 176 // additional matching logs as they appear. 177 type LogTailer interface { 178 // Logs returns the channel through which the LogTailer returns 179 // Juju logs. It will be closed when the tailer stops. 180 Logs() <-chan *LogRecord 181 182 // Dying returns a channel which will be closed as the LogTailer 183 // stops. 184 Dying() <-chan struct{} 185 186 // Stop is used to request that the LogTailer stops. It blocks 187 // unil the LogTailer has stopped. 188 Stop() error 189 190 // Err returns the error that caused the LogTailer to stopped. If 191 // it hasn't stopped or stopped without error nil will be 192 // returned. 193 Err() error 194 } 195 196 // LogRecord defines a single Juju log message as returned by 197 // LogTailer. 198 type LogRecord struct { 199 Time time.Time 200 Entity string 201 Module string 202 Location string 203 Level loggo.Level 204 Message string 205 ModelUUID string 206 } 207 208 // LogTailerParams specifies the filtering a LogTailer should apply to 209 // logs in order to decide which to return. 210 type LogTailerParams struct { 211 StartTime time.Time 212 MinLevel loggo.Level 213 InitialLines int 214 NoTail bool 215 IncludeEntity []string 216 ExcludeEntity []string 217 IncludeModule []string 218 ExcludeModule []string 219 Oplog *mgo.Collection // For testing only 220 AllModels bool 221 } 222 223 // oplogOverlap is used to decide on the initial oplog timestamp to 224 // use when the LogTailer transitions from querying the logs 225 // collection to tailing the oplog. Oplog records with a timestamp >= 226 // tolastTsFromLogsCollection - oplogOverlap will be considered. This 227 // is to allow for delayed log writes, clock skew between the Juju 228 // cluster hosts and log writes that occur during the transition 229 // period. 230 const oplogOverlap = time.Minute 231 232 // This is the maximum number of log document ids that will be tracked 233 // to avoid re-reporting logs when transitioning between querying the 234 // logs collection and tailing the oplog. 235 // 236 // The value was calculated by looking at the per-minute peak log 237 // output of large broken models with logging at DEBUG. 238 var maxRecentLogIds = int(oplogOverlap.Minutes() * 150000) 239 240 // NewLogTailer returns a LogTailer which filters according to the 241 // parameters given. 242 func NewLogTailer(st LoggingState, params *LogTailerParams) (LogTailer, error) { 243 if !st.IsController() && params.AllModels { 244 return nil, errors.NewNotValid(nil, "not allowed to tail logs from all models: not a controller") 245 } 246 247 session := st.MongoSession().Copy() 248 t := &logTailer{ 249 modelUUID: st.ModelUUID(), 250 session: session, 251 logsColl: session.DB(logsDB).C(logsC).With(session), 252 params: params, 253 logCh: make(chan *LogRecord), 254 recentIds: newRecentIdTracker(maxRecentLogIds), 255 } 256 go func() { 257 err := t.loop() 258 t.tomb.Kill(errors.Cause(err)) 259 close(t.logCh) 260 session.Close() 261 t.tomb.Done() 262 }() 263 return t, nil 264 } 265 266 type logTailer struct { 267 tomb tomb.Tomb 268 modelUUID string 269 session *mgo.Session 270 logsColl *mgo.Collection 271 params *LogTailerParams 272 logCh chan *LogRecord 273 lastTime time.Time 274 recentIds *recentIdTracker 275 } 276 277 // Logs implements the LogTailer interface. 278 func (t *logTailer) Logs() <-chan *LogRecord { 279 return t.logCh 280 } 281 282 // Dying implements the LogTailer interface. 283 func (t *logTailer) Dying() <-chan struct{} { 284 return t.tomb.Dying() 285 } 286 287 // Stop implements the LogTailer interface. 288 func (t *logTailer) Stop() error { 289 t.tomb.Kill(nil) 290 return t.tomb.Wait() 291 } 292 293 // Err implements the LogTailer interface. 294 func (t *logTailer) Err() error { 295 return t.tomb.Err() 296 } 297 298 func (t *logTailer) loop() error { 299 err := t.processCollection() 300 if err != nil { 301 return errors.Trace(err) 302 } 303 304 if t.params.NoTail { 305 return nil 306 } 307 308 err = t.tailOplog() 309 return errors.Trace(err) 310 } 311 312 func (t *logTailer) processCollection() error { 313 // Create a selector from the params. 314 sel := t.paramsToSelector(t.params, "") 315 query := t.logsColl.Find(sel) 316 317 if t.params.InitialLines > 0 { 318 // This is a little racy but it's good enough. 319 count, err := query.Count() 320 if err != nil { 321 return errors.Annotate(err, "query count failed") 322 } 323 if skipOver := count - t.params.InitialLines; skipOver > 0 { 324 query = query.Skip(skipOver) 325 } 326 } 327 328 iter := query.Sort("t").Iter() 329 doc := new(logDoc) 330 for iter.Next(doc) { 331 select { 332 case <-t.tomb.Dying(): 333 return errors.Trace(tomb.ErrDying) 334 case t.logCh <- logDocToRecord(doc): 335 t.lastTime = doc.Time 336 t.recentIds.Add(doc.Id) 337 } 338 } 339 return errors.Trace(iter.Close()) 340 } 341 342 func (t *logTailer) tailOplog() error { 343 recentIds := t.recentIds.AsSet() 344 345 newParams := t.params 346 newParams.StartTime = t.lastTime 347 oplogSel := append(t.paramsToSelector(newParams, "o."), 348 bson.DocElem{"ns", logsDB + "." + logsC}, 349 ) 350 351 oplog := t.params.Oplog 352 if oplog == nil { 353 oplog = mongo.GetOplog(t.session) 354 } 355 356 minOplogTs := t.lastTime.Add(-oplogOverlap) 357 oplogTailer := mongo.NewOplogTailer(oplog, oplogSel, minOplogTs) 358 defer oplogTailer.Stop() 359 360 logger.Tracef("LogTailer starting oplog tailing: recent id count=%d, lastTime=%s, minOplogTs=%s", 361 recentIds.Length(), t.lastTime, minOplogTs) 362 363 skipCount := 0 364 for { 365 select { 366 case <-t.tomb.Dying(): 367 return errors.Trace(tomb.ErrDying) 368 case oplogDoc, ok := <-oplogTailer.Out(): 369 if !ok { 370 return errors.Annotate(oplogTailer.Err(), "oplog tailer died") 371 } 372 373 doc := new(logDoc) 374 err := oplogDoc.UnmarshalObject(doc) 375 if err != nil { 376 return errors.Annotate(err, "oplog unmarshalling failed") 377 } 378 379 if recentIds.Contains(doc.Id) { 380 // This document has already been reported. 381 skipCount++ 382 if skipCount%1000 == 0 { 383 logger.Tracef("LogTailer duplicates skipped: %d", skipCount) 384 } 385 continue 386 } 387 select { 388 case <-t.tomb.Dying(): 389 return errors.Trace(tomb.ErrDying) 390 case t.logCh <- logDocToRecord(doc): 391 } 392 } 393 } 394 } 395 396 func (t *logTailer) paramsToSelector(params *LogTailerParams, prefix string) bson.D { 397 sel := bson.D{ 398 {"t", bson.M{"$gte": params.StartTime}}, 399 } 400 if !params.AllModels { 401 sel = append(sel, bson.DocElem{"e", t.modelUUID}) 402 } 403 if params.MinLevel > loggo.UNSPECIFIED { 404 sel = append(sel, bson.DocElem{"v", bson.M{"$gte": params.MinLevel}}) 405 } 406 if len(params.IncludeEntity) > 0 { 407 sel = append(sel, 408 bson.DocElem{"n", bson.RegEx{Pattern: makeEntityPattern(params.IncludeEntity)}}) 409 } 410 if len(params.ExcludeEntity) > 0 { 411 sel = append(sel, 412 bson.DocElem{"n", bson.M{"$not": bson.RegEx{Pattern: makeEntityPattern(params.ExcludeEntity)}}}) 413 } 414 if len(params.IncludeModule) > 0 { 415 sel = append(sel, 416 bson.DocElem{"m", bson.RegEx{Pattern: makeModulePattern(params.IncludeModule)}}) 417 } 418 if len(params.ExcludeModule) > 0 { 419 sel = append(sel, 420 bson.DocElem{"m", bson.M{"$not": bson.RegEx{Pattern: makeModulePattern(params.ExcludeModule)}}}) 421 } 422 if prefix != "" { 423 for i, elem := range sel { 424 sel[i].Name = prefix + elem.Name 425 } 426 } 427 return sel 428 } 429 430 func makeEntityPattern(entities []string) string { 431 var patterns []string 432 for _, entity := range entities { 433 // Convert * wildcard to the regex equivalent. This is safe 434 // because * never appears in entity names. 435 patterns = append(patterns, strings.Replace(entity, "*", ".*", -1)) 436 } 437 return `^(` + strings.Join(patterns, "|") + `)$` 438 } 439 440 func makeModulePattern(modules []string) string { 441 var patterns []string 442 for _, module := range modules { 443 patterns = append(patterns, regexp.QuoteMeta(module)) 444 } 445 return `^(` + strings.Join(patterns, "|") + `)(\..+)?$` 446 } 447 448 func newRecentIdTracker(maxLen int) *recentIdTracker { 449 return &recentIdTracker{ 450 ids: deque.NewWithMaxLen(maxLen), 451 } 452 } 453 454 type recentIdTracker struct { 455 ids *deque.Deque 456 } 457 458 func (t *recentIdTracker) Add(id bson.ObjectId) { 459 t.ids.PushBack(id) 460 } 461 462 func (t *recentIdTracker) AsSet() *objectIdSet { 463 out := newObjectIdSet() 464 for { 465 id, ok := t.ids.PopFront() 466 if !ok { 467 break 468 } 469 out.Add(id.(bson.ObjectId)) 470 } 471 return out 472 } 473 474 func newObjectIdSet() *objectIdSet { 475 return &objectIdSet{ 476 ids: set.NewStrings(), 477 } 478 } 479 480 type objectIdSet struct { 481 ids set.Strings 482 } 483 484 func (s *objectIdSet) Add(id bson.ObjectId) { 485 s.ids.Add(string(id)) 486 } 487 488 func (s *objectIdSet) Contains(id bson.ObjectId) bool { 489 return s.ids.Contains(string(id)) 490 } 491 492 func (s *objectIdSet) Length() int { 493 return len(s.ids) 494 } 495 496 func logDocToRecord(doc *logDoc) *LogRecord { 497 return &LogRecord{ 498 Time: doc.Time, 499 Entity: doc.Entity, 500 Module: doc.Module, 501 Location: doc.Location, 502 Level: doc.Level, 503 Message: doc.Message, 504 ModelUUID: doc.ModelUUID, 505 } 506 } 507 508 // PruneLogs removes old log documents in order to control the size of 509 // logs collection. All logs older than minLogTime are 510 // removed. Further removal is also performed if the logs collection 511 // size is greater than maxLogsMB. 512 func PruneLogs(st LoggingState, minLogTime time.Time, maxLogsMB int) error { 513 session, logsColl := initLogsSession(st) 514 defer session.Close() 515 516 modelUUIDs, err := getEnvsInLogs(logsColl) 517 if err != nil { 518 return errors.Annotate(err, "failed to get log counts") 519 } 520 521 pruneCounts := make(map[string]int) 522 523 // Remove old log entries (per model UUID to take advantage 524 // of indexes on the logs collection). 525 for _, modelUUID := range modelUUIDs { 526 removeInfo, err := logsColl.RemoveAll(bson.M{ 527 "e": modelUUID, 528 "t": bson.M{"$lt": minLogTime}, 529 }) 530 if err != nil { 531 return errors.Annotate(err, "failed to prune logs by time") 532 } 533 pruneCounts[modelUUID] = removeInfo.Removed 534 } 535 536 // Do further pruning if the logs collection is over the maximum size. 537 for { 538 collMB, err := getCollectionMB(logsColl) 539 if err != nil { 540 return errors.Annotate(err, "failed to retrieve log counts") 541 } 542 if collMB <= maxLogsMB { 543 break 544 } 545 546 modelUUID, count, err := findEnvWithMostLogs(logsColl, modelUUIDs) 547 if err != nil { 548 return errors.Annotate(err, "log count query failed") 549 } 550 if count < 5000 { 551 break // Pruning is not worthwhile 552 } 553 554 // Remove the oldest 1% of log records for the model. 555 toRemove := int(float64(count) * 0.01) 556 557 // Find the threshold timestammp to start removing from. 558 // NOTE: this assumes that there are no more logs being added 559 // for the time range being pruned (which should be true for 560 // any realistic minimum log collection size). 561 tsQuery := logsColl.Find(bson.M{"e": modelUUID}).Sort("t") 562 tsQuery = tsQuery.Skip(toRemove) 563 tsQuery = tsQuery.Select(bson.M{"t": 1}) 564 var doc bson.M 565 err = tsQuery.One(&doc) 566 if err != nil { 567 return errors.Annotate(err, "log pruning timestamp query failed") 568 } 569 thresholdTs := doc["t"].(time.Time) 570 571 // Remove old records. 572 removeInfo, err := logsColl.RemoveAll(bson.M{ 573 "e": modelUUID, 574 "t": bson.M{"$lt": thresholdTs}, 575 }) 576 if err != nil { 577 return errors.Annotate(err, "log pruning failed") 578 } 579 pruneCounts[modelUUID] += removeInfo.Removed 580 } 581 582 for modelUUID, count := range pruneCounts { 583 if count > 0 { 584 logger.Debugf("pruned %d logs for model %s", count, modelUUID) 585 } 586 } 587 return nil 588 } 589 590 // initLogsSession creates a new session suitable for logging updates, 591 // returning the session and a logs mgo.Collection connected to that 592 // session. 593 func initLogsSession(st LoggingState) (*mgo.Session, *mgo.Collection) { 594 // To improve throughput, only wait for the logs to be written to 595 // the primary. For some reason, this makes a huge difference even 596 // when the replicaset only has one member (i.e. a single primary). 597 session := st.MongoSession().Copy() 598 session.SetSafe(&mgo.Safe{ 599 W: 1, 600 }) 601 db := session.DB(logsDB) 602 return session, db.C(logsC).With(session) 603 } 604 605 // getCollectionMB returns the size of a MongoDB collection (in 606 // bytes), excluding space used by indexes. 607 func getCollectionMB(coll *mgo.Collection) (int, error) { 608 var result bson.M 609 err := coll.Database.Run(bson.D{ 610 {"collStats", coll.Name}, 611 {"scale", humanize.MiByte}, 612 }, &result) 613 if err != nil { 614 return 0, errors.Trace(err) 615 } 616 return result["size"].(int), nil 617 } 618 619 // getEnvsInLogs returns the unique model UUIDs that exist in 620 // the logs collection. This uses the one of the indexes on the 621 // collection and should be fast. 622 func getEnvsInLogs(coll *mgo.Collection) ([]string, error) { 623 var modelUUIDs []string 624 err := coll.Find(nil).Distinct("e", &modelUUIDs) 625 if err != nil { 626 return nil, errors.Trace(err) 627 } 628 return modelUUIDs, nil 629 } 630 631 // findEnvWithMostLogs returns the modelUUID and log count for the 632 // model with the most logs in the logs collection. 633 func findEnvWithMostLogs(logsColl *mgo.Collection, modelUUIDs []string) (string, int, error) { 634 var maxModelUUID string 635 var maxCount int 636 for _, modelUUID := range modelUUIDs { 637 count, err := getLogCountForEnv(logsColl, modelUUID) 638 if err != nil { 639 return "", -1, errors.Trace(err) 640 } 641 if count > maxCount { 642 maxModelUUID = modelUUID 643 maxCount = count 644 } 645 } 646 return maxModelUUID, maxCount, nil 647 } 648 649 // getLogCountForEnv returns the number of log records stored for a 650 // given model. 651 func getLogCountForEnv(coll *mgo.Collection, modelUUID string) (int, error) { 652 count, err := coll.Find(bson.M{"e": modelUUID}).Count() 653 if err != nil { 654 return -1, errors.Annotate(err, "failed to get log count") 655 } 656 return count, nil 657 }