github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/teams/box_audit.go (about) 1 package teams 2 3 import ( 4 "bytes" 5 "crypto/rand" 6 "errors" 7 "fmt" 8 "math/big" 9 "sync" 10 "time" 11 12 lru "github.com/hashicorp/golang-lru" 13 "github.com/keybase/client/go/libkb" 14 storage "github.com/keybase/client/go/teams/storage" 15 "golang.org/x/net/context" 16 17 "github.com/keybase/client/go/protocol/keybase1" 18 ) 19 20 func ShouldRunBoxAudit(mctx libkb.MetaContext) bool { 21 if !mctx.G().ActiveDevice.Valid() { 22 mctx.Debug("ShouldRunBoxAudit: not logged in") 23 return false 24 } 25 26 if mctx.G().IsMobileAppType() { 27 netState := mctx.G().MobileNetState.State() 28 if netState.IsLimited() { 29 mctx.Debug("ShouldRunBoxAudit: skipping box audit, network state: %v", netState) 30 return false 31 } 32 appState, stateMtime := mctx.G().MobileAppState.StateAndMtime() 33 mctx.Debug("ShouldRunBoxAudit: mobileAppState=%+v, stateMtime=%+v", appState, stateMtime) 34 if stateMtime == nil || appState != keybase1.MobileAppState_FOREGROUND || time.Since(*stateMtime) < 3*time.Minute { 35 mctx.Debug("ShouldRunBoxAudit: mobile and backgrounded") 36 return false 37 } 38 } 39 40 return true 41 } 42 43 const CurrentBoxAuditVersion boxAuditVersion = 6 44 const JailLRUSize = 100 45 const BoxAuditIDLen = 16 46 const MaxBoxAuditRetryAttempts = 6 47 const BoxAuditTag = "BOXAUD" 48 const MaxBoxAuditQueueSize = 100 49 const MaxBoxAuditLogSize = 10 50 51 type contextKey string 52 53 const SkipBoxAuditCheckContextKey contextKey = "skip-box-audit-check" 54 55 type ClientBoxAuditError struct { 56 inner error 57 } 58 59 func (e ClientBoxAuditError) Error() string { 60 return fmt.Sprintf("audit failed due to client-side issue; will be retried later: %s", e.inner) 61 } 62 63 type NonfatalBoxAuditError struct { 64 inner error 65 } 66 67 func (e NonfatalBoxAuditError) Error() string { 68 return fmt.Sprintf("audit failed; will be retried later: %s.", e.inner) 69 } 70 71 type FatalBoxAuditError struct { 72 inner error 73 } 74 75 func (e FatalBoxAuditError) Error() string { 76 return fmt.Sprintf("audit failed fatally; will not be retried until requested: %s", e.inner) 77 } 78 79 func shouldSkipBasedOnRecursion(mctx libkb.MetaContext) (libkb.MetaContext, bool) { 80 shouldSkip, ok := mctx.Ctx().Value(SkipBoxAuditCheckContextKey).(bool) 81 if ok && shouldSkip { 82 return mctx, true 83 } 84 mctx = mctx.WithCtx(context.WithValue(mctx.Ctx(), SkipBoxAuditCheckContextKey, true)) 85 return mctx, false 86 } 87 88 func VerifyBoxAudit(mctx libkb.MetaContext, teamID keybase1.TeamID) (newMctx libkb.MetaContext, shouldReload bool) { 89 mctx, shouldSkip := shouldSkipBasedOnRecursion(mctx) 90 if shouldSkip { 91 return mctx, false 92 } 93 94 didReaudit, err := mctx.G().GetTeamBoxAuditor().AssertUnjailedOrReaudit(mctx, teamID) 95 if err != nil { 96 mctx.G().NotifyRouter.HandleBoxAuditError(mctx.Ctx(), err.Error()) 97 return mctx, true 98 } 99 return mctx, didReaudit 100 } 101 102 // BoxAuditor ensures all of a team's secret boxes are encrypted for the right 103 // people, and that the server has not neglected to notify a team to rotate 104 // their keys in the event of a user revoking a device or resetting their 105 // account. Security depends on the security of the Merkle tree so we know the 106 // current status of all the team's members. BoxAuditor operations are 107 // thread-safe and can be run concurrently for many teams. Security also 108 // relies on team members and the Keybase server not colluding together to sign 109 // box summary hashes into the sigchain that don't match what was actually 110 // encrypted (which is somewhat trivial, since members can leak the secret if 111 // they want regardless of server cooperation). 112 type BoxAuditor struct { 113 Version boxAuditVersion 114 115 // Singleflight lock on team ID. 116 locktab *libkb.LockTable 117 118 // jailMutex and queueMutex are not per-team locks, since they are 119 // collections of multiple team IDs. Two audits of two teams can happen at 120 // the same time, but they cannot access the jail or the retry queue at the 121 // same time. 122 jailMutex sync.Mutex 123 queueMutex sync.Mutex 124 125 // The box audit jail has an LRU for performance, we need a mutex so we 126 // don't use a partially initialized jailLRU. 127 jailLRUMutex sync.Mutex 128 jailLRU *lru.Cache 129 130 delayedSlotsMutex sync.Mutex 131 delayedSlots map[keybase1.TeamID]bool 132 } 133 134 var _ libkb.TeamBoxAuditor = &BoxAuditor{} 135 136 func (a *BoxAuditor) resetJailLRU() { 137 a.jailLRUMutex.Lock() 138 defer a.jailLRUMutex.Unlock() 139 140 jailLRU, err := lru.New(JailLRUSize) 141 // lru.New only returns an error on a negative size, so it's safe to panic 142 // on an error. 143 if err != nil { 144 panic(err) 145 } 146 147 a.jailLRU = jailLRU 148 } 149 150 func (a *BoxAuditor) getJailLRU() *lru.Cache { 151 a.jailLRUMutex.Lock() 152 defer a.jailLRUMutex.Unlock() 153 return a.jailLRU 154 } 155 156 func (a *BoxAuditor) OnLogout(mctx libkb.MetaContext) error { 157 a.resetJailLRU() 158 return nil 159 } 160 161 func (a *BoxAuditor) OnDbNuke(mctx libkb.MetaContext) error { 162 a.jailLRU.Purge() 163 return nil 164 } 165 166 func NewBoxAuditor(g *libkb.GlobalContext) *BoxAuditor { 167 return newBoxAuditorWithVersion(g, CurrentBoxAuditVersion) 168 } 169 170 func newBoxAuditorWithVersion(g *libkb.GlobalContext, version boxAuditVersion) *BoxAuditor { 171 a := &BoxAuditor{ 172 Version: version, 173 locktab: libkb.NewLockTable(), 174 delayedSlots: make(map[keybase1.TeamID]bool), 175 } 176 a.resetJailLRU() 177 return a 178 } 179 180 func NewBoxAuditorAndInstall(g *libkb.GlobalContext) { 181 if g.GetEnv().GetDisableTeamBoxAuditor() { 182 g.Log.CWarningf(context.TODO(), "Box auditor disabled: using dummy auditor") 183 g.SetTeamBoxAuditor(DummyBoxAuditor{}) 184 } else { 185 a := NewBoxAuditor(g) 186 g.SetTeamBoxAuditor(a) 187 g.AddLogoutHook(a, "boxAuditor") 188 g.AddDbNukeHook(a, "boxAuditor") 189 } 190 } 191 192 func (a *BoxAuditor) initMctx(mctx libkb.MetaContext) libkb.MetaContext { 193 mctx = mctx.WithLogTag(BoxAuditTag) 194 mctx = mctx.WithCtx(context.WithValue(mctx.Ctx(), SkipBoxAuditCheckContextKey, true)) 195 return mctx 196 } 197 198 // BoxAuditTeam performs one attempt of a BoxAudit. If one is in progress for 199 // the teamid, make a new attempt. If exceeded max tries or hit a malicious 200 // error, return a fatal error. Otherwise, make a new audit and fill it with 201 // one attempt. If the attempt failed nonfatally, enqueue it in the retry 202 // queue. If it failed fatally, add it to the jail. If it failed for reasons 203 // that are purely client-side, like a disk write error, we retry it as well 204 // but distinguish it from a failure the server could have possibly maliciously 205 // caused. 206 func (a *BoxAuditor) BoxAuditTeam(mctx libkb.MetaContext, teamID keybase1.TeamID) (attempt *keybase1.BoxAuditAttempt, err error) { 207 mctx = a.initMctx(mctx) 208 if !ShouldRunBoxAudit(mctx) { 209 mctx.Debug("Box auditor feature flagged off or not logged in; not auditing...") 210 return nil, nil 211 } 212 defer mctx.Trace(fmt.Sprintf("BoxAuditTeam(%s)", teamID), &err)() 213 defer mctx.PerfTrace(fmt.Sprintf("BoxAuditTeam(%s)", teamID), &err)() 214 start := time.Now() 215 defer func() { 216 var message string 217 if err == nil { 218 message = fmt.Sprintf("Audited boxes for team %s", teamID) 219 } else { 220 message = fmt.Sprintf("Failed to box audit %s", teamID) 221 } 222 mctx.G().RuntimeStats.PushPerfEvent(keybase1.PerfEvent{ 223 EventType: keybase1.PerfEventType_TEAMBOXAUDIT, 224 Message: message, 225 Ctime: keybase1.ToTime(start), 226 }) 227 }() 228 229 lock := a.locktab.AcquireOnName(mctx.Ctx(), mctx.G(), teamID.String()) 230 defer lock.Release(mctx.Ctx()) 231 return a.boxAuditTeamLocked(mctx, teamID) 232 } 233 234 func (a *BoxAuditor) boxAuditTeamLocked(mctx libkb.MetaContext, teamID keybase1.TeamID) (attemptPtr *keybase1.BoxAuditAttempt, err error) { 235 defer mctx.Trace(fmt.Sprintf("boxAuditTeamLocked(%s)", teamID), &err)() 236 237 a.clearDelayedSlotForTeam(teamID) 238 239 log, err := a.maybeGetLog(mctx, teamID) 240 if err != nil { 241 return nil, ClientBoxAuditError{err} 242 } 243 if log == nil { 244 log = NewBoxAuditLog(a.Version) 245 } 246 247 isRetry := log.InProgress 248 // First, attempt the audit. 249 attempt := a.attemptLocked(mctx, teamID, false /* rotateBeforeAudit */, false /* justRotated */) 250 // In the case where it was a retry on a failed audit, *and* the audit that 251 // just happened failed, try to rotate the team before auditing again. This 252 // is so we don't unnecessarily rotate if the previous failure was due to a 253 // network error. If the network is still down, this rotate will fail. 254 if isRetry && !attempt.Result.IsOK() && !mctx.G().TestOptions.NoAutorotateOnBoxAuditRetry { 255 attempt = a.attemptLocked(mctx, teamID, true /* rotateBeforeAudit */, false /* justRotated */) 256 } 257 258 var id BoxAuditID 259 if isRetry { 260 // If there's already an inprogress Audit (i.e., previous failure and 261 // we're doing a retry), rotate and do a new attempt in the same audit 262 mctx.Debug("Retrying failed box audit") 263 lastAudit := log.Last() 264 id = lastAudit.ID 265 newAudit := BoxAudit{ 266 ID: lastAudit.ID, 267 Attempts: append(lastAudit.Attempts, attempt), 268 } 269 log.Audits[len(log.Audits)-1] = newAudit 270 } else { 271 // If the last audit was completed, start a new audit. 272 mctx.Debug("Starting new box audit") 273 id, err = NewBoxAuditID() 274 if err != nil { 275 return &attempt, ClientBoxAuditError{err} 276 } 277 audit := BoxAudit{ 278 ID: id, 279 Attempts: []keybase1.BoxAuditAttempt{attempt}, 280 } 281 log.Audits = append(log.Audits, audit) 282 } 283 if len(log.Audits) > MaxBoxAuditLogSize { 284 mctx.Debug("Truncating box audit log") 285 log.Audits = log.Audits[len(log.Audits)-MaxBoxAuditLogSize:] 286 } 287 288 isOK := attempt.Result.IsOK() 289 retryAttemptsExhausted := len(log.Last().Attempts) >= MaxBoxAuditRetryAttempts 290 isFatal := attempt.Result == keybase1.BoxAuditAttemptResult_FAILURE_MALICIOUS_SERVER || retryAttemptsExhausted 291 mctx.Debug("Box audit diagnosis: isOK=%t, retryAttemptsExhausted=%t, isFatal=%t", isOK, retryAttemptsExhausted, isFatal) 292 293 // NOTE An audit that has failed fatally will *not* be automatically 294 // retried, but it is still considered InProgress because it is not in a 295 // successful state, and more attempts will append to the currently failed 296 // audit, instead of starting a new one. 297 log.InProgress = !isOK 298 299 err = putLogToDisk(mctx, log, teamID) 300 if err != nil { 301 return &attempt, ClientBoxAuditError{err} 302 } 303 304 switch { 305 case isOK: 306 mctx.Debug("Box audit successful") 307 _, err = a.clearRetryQueueOf(mctx, teamID) 308 if err != nil { 309 return &attempt, ClientBoxAuditError{err} 310 } 311 err = a.unjail(mctx, teamID) 312 if err != nil { 313 return &attempt, ClientBoxAuditError{err} 314 } 315 return &attempt, nil 316 case isFatal: 317 mctx.Debug("Box audit failed fatally") 318 _, err = a.clearRetryQueueOf(mctx, teamID) 319 if err != nil { 320 return &attempt, ClientBoxAuditError{err} 321 } 322 err = a.jail(mctx, teamID) 323 if err != nil { 324 return &attempt, ClientBoxAuditError{err} 325 } 326 return &attempt, FatalBoxAuditError{errors.New(*attempt.Error)} 327 default: // retryable error 328 mctx.Debug("Box audit failed nonfatally; retryable") 329 err := a.pushRetryQueue(mctx, teamID, id) 330 if err != nil { 331 return &attempt, ClientBoxAuditError{err} 332 } 333 return &attempt, NonfatalBoxAuditError{errors.New(*attempt.Error)} 334 } 335 } 336 337 func (a *BoxAuditor) AssertUnjailedOrReaudit(mctx libkb.MetaContext, teamID keybase1.TeamID) (didReaudit bool, err error) { 338 mctx = a.initMctx(mctx) 339 defer mctx.Trace("AssertUnjailedOrReaudit", &err)() 340 341 if !ShouldRunBoxAudit(mctx) { 342 mctx.Debug("Box auditor feature flagged off or not logged in; not AssertUnjailedOrReauditing...") 343 return false, nil 344 } 345 346 inJail, err := a.IsInJail(mctx, teamID) 347 if err != nil { 348 return false, fmt.Errorf("failed to check box audit jail during team load: %s", err) 349 } 350 if !inJail { 351 return false, nil 352 } 353 354 mctx.Debug("team in jail; retrying box audit") 355 maxRetries := 3 356 var errs []error 357 for i := 0; i <= maxRetries; i++ { 358 _, err = a.BoxAuditTeam(mctx, teamID) 359 if err != nil { 360 mctx.Debug("AssertUnjailedOrReaudit: box audit try #%d failed...", i+1) 361 errs = append(errs, err) 362 } else { 363 return true, nil 364 } 365 } 366 return false, fmt.Errorf("failed to successfully reaudit team %s in box audit jail after %d retries: %s", teamID, maxRetries, libkb.CombineErrors(errs...)) 367 } 368 369 // RetryNextBoxAudit selects a teamID from the box audit retry queue and performs another box audit. 370 func (a *BoxAuditor) RetryNextBoxAudit(mctx libkb.MetaContext) (attempt *keybase1.BoxAuditAttempt, err error) { 371 mctx = a.initMctx(mctx) 372 defer mctx.Trace("RetryNextBoxAudit", &err)() 373 374 if !ShouldRunBoxAudit(mctx) { 375 mctx.Debug("Box auditor feature flagged off or not logged in; not RetryNextBoxAuditing...") 376 return nil, nil 377 } 378 379 queueItem, err := a.popRetryQueue(mctx) 380 if err != nil { 381 return nil, err 382 } 383 if queueItem == nil { 384 mctx.Debug("Retry queue empty, succeeding vacuously") 385 return nil, nil 386 } 387 return a.BoxAuditTeam(mctx, queueItem.TeamID) 388 } 389 390 // BoxAuditRandomTeam selects a random known team from the slow team or FTL 391 // cache, including implicit teams, and audits it. It may succeed trivially 392 // because, for example, user is a reader and so does not have permissions to 393 // do a box audit or the team is an open team. 394 func (a *BoxAuditor) BoxAuditRandomTeam(mctx libkb.MetaContext) (attempt *keybase1.BoxAuditAttempt, err error) { 395 mctx = a.initMctx(mctx) 396 defer mctx.Trace("BoxAuditRandomTeam", &err)() 397 398 if !ShouldRunBoxAudit(mctx) { 399 mctx.Debug("Box auditor feature flagged off or not logged in; not BoxAuditRandomTeaming...") 400 return nil, nil 401 } 402 403 teamID, err := randomKnownTeamID(mctx) 404 if err != nil { 405 return nil, err 406 } 407 if teamID == nil { 408 mctx.Debug("No known teams to audit in db, skipping box audit") 409 return nil, nil 410 } 411 412 return a.BoxAuditTeam(mctx, *teamID) 413 } 414 415 func (a *BoxAuditor) IsInJail(mctx libkb.MetaContext, teamID keybase1.TeamID) (inJail bool, err error) { 416 mctx = a.initMctx(mctx) 417 418 if !ShouldRunBoxAudit(mctx) { 419 mctx.Debug("Box auditor feature flagged off or not logged in; not IsInJailing...") 420 return false, nil 421 } 422 423 val, ok := a.getJailLRU().Get(teamID) 424 if ok { 425 valBool, ok := val.(bool) 426 if ok { 427 return valBool, nil 428 } 429 mctx.Error("Bad boolean type assertion in IsInJail LRU for %s", teamID) 430 // Fall through to disk if the LRU is corrupted 431 } 432 433 jail, err := a.maybeGetJail(mctx) 434 if err != nil { 435 return false, err 436 } 437 if jail == nil { 438 a.getJailLRU().Add(teamID, false) 439 return false, nil 440 } 441 _, ok = jail.TeamIDs[teamID] 442 a.getJailLRU().Add(teamID, ok) 443 return ok, nil 444 } 445 446 // Attempt tries one time to box audit a Team ID. It does not store any 447 // persistent state to disk related to the box audit, but it may, e.g., refresh 448 // the team cache. 449 func (a *BoxAuditor) Attempt(mctx libkb.MetaContext, teamID keybase1.TeamID, rotateBeforeAudit bool) (attempt keybase1.BoxAuditAttempt) { 450 mctx = a.initMctx(mctx) 451 var err error 452 defer mctx.Trace(fmt.Sprintf("Attempt(%s, %t)", teamID, rotateBeforeAudit), &err)() 453 defer func() { 454 if attempt.Error != nil { 455 err = errors.New(*attempt.Error) 456 } 457 }() 458 lock := a.locktab.AcquireOnName(mctx.Ctx(), mctx.G(), teamID.String()) 459 defer lock.Release(mctx.Ctx()) 460 return a.attemptLocked(mctx, teamID, rotateBeforeAudit, false) 461 } 462 463 func (a *BoxAuditor) attemptLocked(mctx libkb.MetaContext, teamID keybase1.TeamID, rotateBeforeAudit bool, justRotated bool) (attempt keybase1.BoxAuditAttempt) { 464 var err error 465 defer mctx.Trace(fmt.Sprintf("attemptLocked(%s, %t)", teamID, rotateBeforeAudit), &err)() 466 defer func() { 467 if attempt.Error != nil { 468 err = errors.New(*attempt.Error) 469 } 470 }() 471 472 attempt = keybase1.BoxAuditAttempt{ 473 Result: keybase1.BoxAuditAttemptResult_FAILURE_RETRYABLE, 474 Ctime: keybase1.ToUnixTime(time.Now()), 475 } 476 477 getErrorMessage := func(err error) *string { 478 msg := err.Error() 479 return &msg 480 } 481 482 team, err := loadTeamForBoxAudit(mctx, teamID) 483 if err != nil { 484 attempt.Error = getErrorMessage(fmt.Errorf("failed to load team: %s", err)) 485 return attempt 486 } 487 488 g := team.Generation() 489 attempt.Generation = &g 490 491 shouldAudit, shouldAuditResult, err := a.shouldAudit(mctx, *team) 492 if err != nil { 493 attempt.Error = getErrorMessage(err) 494 return attempt 495 } 496 497 rotateType := keybase1.RotationType_VISIBLE 498 if team.Hidden != nil && team.Hidden.NeedRotate { 499 shouldAudit = true 500 rotateBeforeAudit = true 501 rotateType = keybase1.RotationType_CLKR 502 mctx.Debug("Hidden load said need rotate; so we're attempt a CLKR-style rotation, then will reaudit") 503 } 504 505 if !shouldAudit { 506 mctx.Debug("Not attempting box audit attempt; %s", attempt.Result) 507 attempt.Result = *shouldAuditResult 508 return attempt 509 } 510 511 if rotateBeforeAudit { 512 mctx.Debug("rotating before audit") 513 err := team.Rotate(mctx.Ctx(), rotateType) 514 if err != nil { 515 mctx.Warning("failed to rotate team before audit: %s", err) 516 // continue despite having failed to rotate 517 } else { 518 // reload the team 519 return a.attemptLocked(mctx, teamID, false, true) 520 } 521 } 522 523 if justRotated { 524 attempt.Rotated = true 525 } 526 527 pastSummary, err := calculateChainSummary(mctx, team) 528 if err != nil { 529 attempt.Error = getErrorMessage(err) 530 return attempt 531 } 532 533 currentSummary, err := calculateCurrentSummary(mctx, team) 534 if err != nil { 535 attempt.Error = getErrorMessage(err) 536 return attempt 537 } 538 539 if !bytes.Equal(currentSummary.Hash(), pastSummary.Hash()) { 540 // No need to make these Warnings, because these could happen when a 541 // user has just changed their PUK and CLKR hasn't fired yet, or if the 542 // team doesn't have any box summary hashes in the sigchain yet, etc. 543 mctx.Debug("ERROR: Box audit summary mismatch") 544 mctx.Debug("Past summary: %+v", pastSummary.table) 545 mctx.Debug("Current summary: %+v", currentSummary.table) 546 547 attempt.Error = getErrorMessage(fmt.Errorf("box summary hash mismatch")) 548 return attempt 549 } 550 551 attempt.Result = keybase1.BoxAuditAttemptResult_OK_VERIFIED 552 return attempt 553 } 554 555 func (a *BoxAuditor) clearRetryQueueOf(mctx libkb.MetaContext, teamID keybase1.TeamID) (queue *BoxAuditQueue, err error) { 556 defer mctx.Trace(fmt.Sprintf("clearRetryQueueOf(%s)", teamID), &err)() 557 a.queueMutex.Lock() 558 defer a.queueMutex.Unlock() 559 return a.clearRetryQueueOfLocked(mctx, teamID) 560 } 561 562 func (a *BoxAuditor) clearRetryQueueOfLocked(mctx libkb.MetaContext, teamID keybase1.TeamID) (queue *BoxAuditQueue, err error) { 563 defer mctx.Trace(fmt.Sprintf("clearRetryQueueOfLocked(%s)", teamID), &err)() 564 queue, err = a.maybeGetQueue(mctx) 565 if err != nil { 566 return nil, err 567 } 568 if queue == nil { 569 return nil, nil 570 } 571 newItems := make([]BoxAuditQueueItem, 0, len(queue.Items)) 572 for _, item := range queue.Items { 573 if item.TeamID != teamID { 574 newItems = append(newItems, item) 575 } 576 } 577 queue.Items = newItems 578 err = putQueueToDisk(mctx, queue) 579 if err != nil { 580 return nil, err 581 } 582 return queue, nil 583 } 584 585 func (a *BoxAuditor) popRetryQueue(mctx libkb.MetaContext) (itemPtr *BoxAuditQueueItem, err error) { 586 defer mctx.Trace("popRetryQueue", &err)() 587 a.queueMutex.Lock() 588 defer a.queueMutex.Unlock() 589 590 queue, err := a.maybeGetQueue(mctx) 591 if err != nil { 592 return nil, err 593 } 594 if queue == nil { 595 return nil, nil 596 } 597 if len(queue.Items) == 0 { 598 return nil, nil 599 } 600 item, newItems := queue.Items[0], queue.Items[1:] 601 queue.Items = newItems 602 err = putQueueToDisk(mctx, queue) 603 if err != nil { 604 return nil, err 605 } 606 return &item, nil 607 } 608 609 func (a *BoxAuditor) pushRetryQueue(mctx libkb.MetaContext, teamID keybase1.TeamID, auditID BoxAuditID) (err error) { 610 defer mctx.Trace(fmt.Sprintf("pushRetryQueue(%s, %x)", teamID, auditID), &err)() 611 a.queueMutex.Lock() 612 defer a.queueMutex.Unlock() 613 614 queue, err := a.maybeGetQueue(mctx) 615 if err != nil { 616 return err 617 } 618 if queue != nil { 619 // If already in the queue, remove it so we can bump it to the top. 620 queue, err = a.clearRetryQueueOfLocked(mctx, teamID) 621 if err != nil { 622 return err 623 } 624 } else { 625 queue = NewBoxAuditQueue(a.Version) 626 } 627 628 queue.Items = append(queue.Items, BoxAuditQueueItem{Ctime: time.Now(), TeamID: teamID, BoxAuditID: auditID}) 629 if len(queue.Items) > MaxBoxAuditQueueSize { 630 // Truncate oldest first. 631 mctx.Debug("Truncating box audit queue") 632 queue.Items = queue.Items[len(queue.Items)-MaxBoxAuditQueueSize:] 633 } 634 err = putQueueToDisk(mctx, queue) 635 if err != nil { 636 return err 637 } 638 return nil 639 } 640 641 func (a *BoxAuditor) jail(mctx libkb.MetaContext, teamID keybase1.TeamID) (err error) { 642 defer mctx.Trace(fmt.Sprintf("jail(%s)", teamID), &err)() 643 a.jailMutex.Lock() 644 defer a.jailMutex.Unlock() 645 646 a.getJailLRU().Add(teamID, true) 647 648 jail, err := a.maybeGetJail(mctx) 649 if err != nil { 650 return err 651 } 652 if jail == nil { 653 jail = NewBoxAuditJail(a.Version) 654 } 655 jail.TeamIDs[teamID] = true 656 err = putJailToDisk(mctx, jail) 657 if err != nil { 658 return err 659 } 660 return nil 661 } 662 663 func (a *BoxAuditor) unjail(mctx libkb.MetaContext, teamID keybase1.TeamID) (err error) { 664 defer mctx.Trace(fmt.Sprintf("unjail(%s)", teamID), &err)() 665 a.jailMutex.Lock() 666 defer a.jailMutex.Unlock() 667 668 a.getJailLRU().Add(teamID, false) 669 670 jail, err := a.maybeGetJail(mctx) 671 if err != nil { 672 return err 673 } 674 if jail == nil { 675 jail = NewBoxAuditJail(a.Version) 676 } 677 delete(jail.TeamIDs, teamID) 678 err = putJailToDisk(mctx, jail) 679 if err != nil { 680 return err 681 } 682 return nil 683 } 684 685 type DummyBoxAuditor struct{} 686 687 var _ libkb.TeamBoxAuditor = &DummyBoxAuditor{} 688 689 const dummyMsg = "Box auditor disabled; aborting successfully" 690 691 func (d DummyBoxAuditor) AssertUnjailedOrReaudit(mctx libkb.MetaContext, _ keybase1.TeamID) (bool, error) { 692 mctx.Debug(dummyMsg) 693 return false, nil 694 } 695 func (d DummyBoxAuditor) IsInJail(mctx libkb.MetaContext, _ keybase1.TeamID) (bool, error) { 696 mctx.Debug(dummyMsg) 697 return false, nil 698 } 699 func (d DummyBoxAuditor) RetryNextBoxAudit(mctx libkb.MetaContext) (*keybase1.BoxAuditAttempt, error) { 700 mctx.Debug(dummyMsg) 701 return nil, nil 702 } 703 func (d DummyBoxAuditor) BoxAuditRandomTeam(mctx libkb.MetaContext) (*keybase1.BoxAuditAttempt, error) { 704 mctx.Debug(dummyMsg) 705 return nil, nil 706 } 707 func (d DummyBoxAuditor) BoxAuditTeam(mctx libkb.MetaContext, _ keybase1.TeamID) (*keybase1.BoxAuditAttempt, error) { 708 mctx.Debug(dummyMsg) 709 return nil, nil 710 } 711 func (d DummyBoxAuditor) Attempt(mctx libkb.MetaContext, _ keybase1.TeamID, _ bool) keybase1.BoxAuditAttempt { 712 mctx.Debug(dummyMsg) 713 return keybase1.BoxAuditAttempt{ 714 Result: keybase1.BoxAuditAttemptResult_OK_NOT_ATTEMPTED_ROLE, 715 Ctime: keybase1.ToUnixTime(time.Now()), 716 } 717 } 718 func (d DummyBoxAuditor) MaybeScheduleDelayedBoxAuditTeam(mctx libkb.MetaContext, teamID keybase1.TeamID) { 719 } 720 721 // BoxAuditLog is a log of audits for a particular team. 722 type BoxAuditLog struct { 723 // The last entry of Audits is the latest one. 724 Audits []BoxAudit 725 726 // Whether the last Audit is still in progress; false initially. 727 InProgress bool 728 729 Version boxAuditVersion 730 } 731 732 var _ boxAuditVersioned = &BoxAuditLog{} 733 734 func (l *BoxAuditLog) getVersion() boxAuditVersion { 735 return l.Version 736 } 737 738 func NewBoxAuditLog(version boxAuditVersion) *BoxAuditLog { 739 return &BoxAuditLog{ 740 Audits: nil, 741 InProgress: false, 742 Version: version, 743 } 744 } 745 746 func (l *BoxAuditLog) Last() *BoxAudit { 747 if l == nil || len(l.Audits) == 0 { 748 return nil 749 } 750 return &l.Audits[len(l.Audits)-1] 751 } 752 753 // BoxAudit is a single sequence of audit attempts for a single team. 754 type BoxAudit struct { 755 ID BoxAuditID 756 Attempts []keybase1.BoxAuditAttempt 757 } 758 759 type BoxAuditID = []byte 760 761 func NewBoxAuditID() (BoxAuditID, error) { 762 idBytes := make([]byte, BoxAuditIDLen) 763 _, err := rand.Read(idBytes) 764 if err != nil { 765 return nil, err 766 } 767 return idBytes, nil 768 } 769 770 // BoxAuditQueue holds a list of teams that need to be reaudited, because the 771 // previously failed an audit. When a team does pass an audit, it is removed 772 // from the queue. 773 type BoxAuditQueue struct { 774 Items []BoxAuditQueueItem 775 Version boxAuditVersion 776 } 777 778 var _ boxAuditVersioned = &BoxAuditQueue{} 779 780 func (q *BoxAuditQueue) getVersion() boxAuditVersion { 781 return q.Version 782 } 783 784 func NewBoxAuditQueue(version boxAuditVersion) *BoxAuditQueue { 785 return &BoxAuditQueue{ 786 Items: nil, 787 Version: version, 788 } 789 } 790 791 type BoxAuditQueueItem struct { 792 Ctime time.Time 793 TeamID keybase1.TeamID 794 BoxAuditID BoxAuditID 795 } 796 797 // BoxAuditJail contains TeamIDs that have hit a fatal audit failure or the max 798 // number of retryable audit failures. Teams in jail will not be reaudited 799 // unless they are explicitly loaded by the fast or slow team loaders. 800 type BoxAuditJail struct { 801 TeamIDs map[keybase1.TeamID]bool 802 Version boxAuditVersion 803 } 804 805 var _ boxAuditVersioned = &BoxAuditJail{} 806 807 func (j *BoxAuditJail) getVersion() boxAuditVersion { 808 return j.Version 809 } 810 811 func NewBoxAuditJail(version boxAuditVersion) *BoxAuditJail { 812 return &BoxAuditJail{ 813 TeamIDs: make(map[keybase1.TeamID]bool), 814 Version: version, 815 } 816 } 817 818 func (a *BoxAuditor) shouldAudit(mctx libkb.MetaContext, team Team) (bool, *keybase1.BoxAuditAttemptResult, error) { 819 if team.IsOpen() { 820 res := keybase1.BoxAuditAttemptResult_OK_NOT_ATTEMPTED_OPENTEAM 821 return false, &res, nil 822 } 823 role, err := team.MemberRole(mctx.Ctx(), mctx.CurrentUserVersion()) 824 if err != nil { 825 return false, nil, err 826 } 827 if !role.IsOrAbove(keybase1.TeamRole_WRITER) { 828 res := keybase1.BoxAuditAttemptResult_OK_NOT_ATTEMPTED_ROLE 829 return false, &res, nil 830 } 831 832 return true, nil, nil 833 } 834 835 // loadTeamForBoxAudit loads a team once, but if the client 836 // has not yet stored BoxSummaryHashes (due to being an old client) 837 // it does a force full reload so it is populated. 838 func loadTeamForBoxAudit(mctx libkb.MetaContext, teamID keybase1.TeamID) (*Team, error) { 839 return loadTeamForBoxAuditInner(mctx, teamID, false) 840 } 841 842 func loadTeamForBoxAuditInner(mctx libkb.MetaContext, teamID keybase1.TeamID, force bool) (team *Team, err error) { 843 defer mctx.Trace("loadTeamForBoxAuditInner", &err)() 844 arg := keybase1.LoadTeamArg{ 845 ID: teamID, 846 ForceRepoll: true, 847 Public: teamID.IsPublic(), 848 ForceFullReload: force, 849 } 850 851 team, err = Load(mctx.Ctx(), mctx.G(), arg) 852 if err != nil { 853 return nil, err 854 } 855 if team == nil { 856 return nil, fmt.Errorf("got nil team from loader") 857 } 858 859 // If the team sigchain state was constructed with support for the 860 // merkleRoots map, the map will be non-nil but empty. It will only be nil 861 // if the state is cached from a team load before box summary hash support. 862 if team.chain().GetMerkleRoots() == nil { 863 if force { 864 return nil, fmt.Errorf("failed to get a non-nil merkleRoots map after full reload") 865 } 866 mctx.Debug("retrying loadTeamForBoxAuditInner with force reload") 867 return loadTeamForBoxAuditInner(mctx, teamID, true) 868 } 869 return team, nil 870 } 871 872 type merkleSeqno = keybase1.Seqno 873 type merkleCheckpoints map[keybase1.UserVersion]merkleSeqno 874 875 func getPUKCheckpoints(mctx libkb.MetaContext, teamchain *TeamSigChainState, checkpoint merkleSeqno, fastforwardToAddition bool) (merkleCheckpoints, error) { 876 mctx.Debug("getting PUK checkpoints at merkle seqno %v; fastforwardToAddition=%t", checkpoint, fastforwardToAddition) 877 checkpoints := make(merkleCheckpoints) 878 // We only check users currently in the team, which means we skip over any 879 // users, who for example, have reset (and possibly have added a new PUK), 880 // but have not been let back into the team by an admin. 881 for uv, logPoints := range teamchain.inner.UserLog { 882 logPoint := logPoints[len(logPoints)-1] 883 if logPoint.Role == keybase1.TeamRole_NONE { 884 continue 885 } 886 latest := checkpoint 887 if fastforwardToAddition { 888 latest = max(latest, logPoint.SigMeta.PrevMerkleRootSigned.Seqno) 889 } 890 checkpoints[uv] = latest 891 } 892 return checkpoints, nil 893 } 894 895 func max(a, b merkleSeqno) merkleSeqno { 896 if a > b { 897 return a 898 } 899 return b 900 } 901 902 // calculateCurrentSummary calculates the box summary as it is currently for 903 // all users in the team (i.e., if the team were rotated right now, what the summary 904 // should be afterwards). 905 func calculateCurrentSummary(mctx libkb.MetaContext, team *Team) (summary *boxPublicSummary, err error) { 906 defer mctx.Trace(fmt.Sprintf("calculateCurrentSummary(%s)", team.ID), &err)() 907 908 currentRoot, err := mctx.G().GetMerkleClient().FetchRootFromServer(mctx, 5*time.Minute) 909 if err != nil { 910 return nil, err 911 } 912 if currentRoot.Seqno() == nil { 913 return nil, fmt.Errorf("got nil current merkle root") 914 } 915 return calculateSummaryAtMerkleSeqno(mctx, team, *currentRoot.Seqno(), false) 916 } 917 918 // calculateChainSummary calculates the box summary as implied by the team sigchain and previous links, 919 // using the last known rotation and subsequent additions as markers for PUK freshness. 920 func calculateChainSummary(mctx libkb.MetaContext, team *Team) (summary *boxPublicSummary, err error) { 921 defer mctx.Trace(fmt.Sprintf("calculateChainSummary(%s)", team.ID), &err)() 922 923 merkleSeqno, err := merkleSeqnoAtGenerationInception(mctx, team.chain()) 924 if err != nil { 925 return nil, err 926 } 927 928 if !mctx.G().GetMerkleClient().CanExamineHistoricalRoot(mctx, merkleSeqno) { 929 return nil, fmt.Errorf("last rotation was at %d, before the most recent checkpoint, so forcing a rotation", merkleSeqno) 930 } 931 932 return calculateSummaryAtMerkleSeqno(mctx, team, merkleSeqno, true) 933 } 934 935 // calculateSummaryAtMerkleSeqno calculates the summary at the given merkleSeqno. 936 func calculateSummaryAtMerkleSeqno(mctx libkb.MetaContext, team *Team, merkleSeqno merkleSeqno, fastforwardToAddition bool) (summary *boxPublicSummary, err error) { 937 defer mctx.Trace(fmt.Sprintf("calculateSummaryAtMerkleSeqno(%s, %v)", team.ID, merkleSeqno), &err)() 938 939 checkpoints, err := getPUKCheckpoints(mctx, team.chain(), merkleSeqno, fastforwardToAddition) 940 if err != nil { 941 return nil, err 942 } 943 944 if team.IsSubteam() { 945 mctx.Debug("calculating summary for subteam; loading implicit admins") 946 err = mctx.G().GetTeamLoader().MapTeamAncestors(mctx.Ctx(), func(t keybase1.TeamSigChainState, _ keybase1.TeamName) error { 947 chain := TeamSigChainState{inner: t} 948 ancestorCheckpoints, err := getPUKCheckpoints(mctx, &chain, merkleSeqno, fastforwardToAddition) 949 if err != nil { 950 return err 951 } 952 for ancestorUV, ancestorMerkleSeqno := range ancestorCheckpoints { 953 role, err := chain.GetUserRole(ancestorUV) 954 if err != nil { 955 return err 956 } 957 // Only add implicit admins to summary 958 if !role.IsOrAbove(keybase1.TeamRole_ADMIN) { 959 continue 960 } 961 // If the implicit admin is a descendant, only update the 962 // checkpoints if the implicit admin was added to the team at a 963 // later checkpoint (and so would have boxes refreshed at a 964 // newer merkle seqno). 965 currentCheckpoint, ok := checkpoints[ancestorUV] 966 if ok && ancestorMerkleSeqno <= currentCheckpoint { 967 continue 968 } 969 checkpoints[ancestorUV] = ancestorMerkleSeqno 970 } 971 return nil 972 }, team.ID, "team box audit", func(t keybase1.TeamSigChainState) bool { 973 chain := TeamSigChainState{inner: t} 974 return chain.GetMerkleRoots() != nil 975 }) 976 if err != nil { 977 return nil, err 978 } 979 } 980 981 var uvs []keybase1.UserVersion 982 for uv := range checkpoints { 983 uvs = append(uvs, uv) 984 } 985 986 // for UPAK Batcher API 987 getArg := func(idx int) *libkb.LoadUserArg { 988 if idx >= len(uvs) { 989 return nil 990 } 991 arg := libkb.NewLoadUserByUIDArg(mctx.Ctx(), mctx.G(), uvs[idx].Uid).WithPublicKeyOptional().WithForcePoll(true) 992 return &arg 993 } 994 995 d := make(map[keybase1.UserVersion]keybase1.PerUserKey) 996 // for UPAK Batcher API 997 processResult := func(idx int, upak *keybase1.UserPlusKeysV2AllIncarnations) error { 998 uv := uvs[idx] 999 checkpoint := checkpoints[uv] 1000 1001 if upak == nil { 1002 return fmt.Errorf("got nil upak for uv %+v", uv) 1003 } 1004 1005 var perUserKey *keybase1.PerUserKey 1006 leaf, _, err := mctx.G().GetMerkleClient().LookupLeafAtSeqno(mctx, keybase1.UserOrTeamID(uv.Uid), checkpoint) 1007 if err != nil { 1008 return fmt.Errorf("failed to lookup leaf at merkle seqno %v for %v", checkpoint, uv) 1009 } 1010 if leaf == nil { 1011 return fmt.Errorf("got nil leaf at seqno %v for %v", checkpoint, uv) 1012 } 1013 if leaf.Public == nil { 1014 return fmt.Errorf("got nil leaf public at seqno %v for %v (leaf=%+v)", checkpoint, uv, leaf) 1015 } 1016 sigchainSeqno := leaf.Public.Seqno 1017 1018 perUserKey, err = upak.GetPerUserKeyAtSeqno(uv, sigchainSeqno, checkpoint) 1019 if err != nil { 1020 return fmt.Errorf("failed to find peruserkey at seqno %v for upak", sigchainSeqno) 1021 } 1022 if perUserKey == nil { 1023 // Not a critical error, since reset users have no current per user keys, for example. 1024 mctx.Debug("%s has no per-user-key at seqno %v", uv, sigchainSeqno) 1025 return nil 1026 } 1027 1028 d[uv] = *perUserKey 1029 return nil 1030 } 1031 1032 err = mctx.G().GetUPAKLoader().Batcher(mctx.Ctx(), getArg, processResult, 0) 1033 if err != nil { 1034 return nil, err 1035 } 1036 1037 return newBoxPublicSummary(d) 1038 } 1039 1040 // merkleSeqnoAtGenerationInception assumes TeamSigChainState.MerkleRoots is populated 1041 func merkleSeqnoAtGenerationInception(mctx libkb.MetaContext, teamchain *TeamSigChainState) (merkleSeqno keybase1.Seqno, err error) { 1042 _, mr, err := teamchain.getLatestPerTeamKeyWithMerkleSeqno(mctx) 1043 if err != nil { 1044 return 0, err 1045 } 1046 return mr.Seqno, nil 1047 } 1048 1049 // TeamIDKeys takes a set of DBKeys that must all be tid:-style DBKeys and 1050 // extracts the team id from them. Because teams can be loaded via both FTL and 1051 // the slow team loader, we use a set so we don't return duplicate teamIDs. 1052 func keySetToTeamIDs(dbKeySet libkb.DBKeySet) ([]keybase1.TeamID, error) { 1053 seen := make(map[keybase1.TeamID]bool) 1054 teamIDs := make([]keybase1.TeamID, 0, len(dbKeySet)) 1055 for dbKey := range dbKeySet { 1056 teamID, err := storage.ParseTeamIDDBKey(dbKey.Key) 1057 if err != nil { 1058 return nil, err 1059 } 1060 _, ok := seen[teamID] 1061 if !ok { 1062 teamIDs = append(teamIDs, teamID) 1063 seen[teamID] = true 1064 } 1065 } 1066 return teamIDs, nil 1067 } 1068 1069 type boxAuditVersion int 1070 type boxAuditVersioned interface { 1071 getVersion() boxAuditVersion 1072 } 1073 1074 func BoxAuditLogDbKey(mctx libkb.MetaContext, teamID keybase1.TeamID) libkb.DbKey { 1075 return libkb.DbKey{Typ: libkb.DBBoxAuditor, Key: string(teamID) + mctx.ActiveDevice().UID().String()} 1076 } 1077 1078 func BoxAuditQueueDbKey(mctx libkb.MetaContext) libkb.DbKey { 1079 return libkb.DbKey{Typ: libkb.DBBoxAuditorPermanent, Key: "queue" + mctx.ActiveDevice().UID().String()} 1080 } 1081 1082 func BoxAuditJailDbKey(mctx libkb.MetaContext) libkb.DbKey { 1083 return libkb.DbKey{Typ: libkb.DBBoxAuditorPermanent, Key: "jail" + mctx.ActiveDevice().UID().String()} 1084 } 1085 1086 func (a *BoxAuditor) maybeGetLog(mctx libkb.MetaContext, teamID keybase1.TeamID) (*BoxAuditLog, error) { 1087 var log BoxAuditLog 1088 found, err := a.maybeGetIntoVersioned(mctx, &log, BoxAuditLogDbKey(mctx, teamID)) 1089 if err != nil || !found { 1090 return nil, err 1091 } 1092 return &log, nil 1093 } 1094 1095 func (a *BoxAuditor) maybeGetQueue(mctx libkb.MetaContext) (*BoxAuditQueue, error) { 1096 var queue BoxAuditQueue 1097 found, err := a.maybeGetIntoVersioned(mctx, &queue, BoxAuditQueueDbKey(mctx)) 1098 if err != nil || !found { 1099 return nil, err 1100 } 1101 return &queue, nil 1102 } 1103 1104 func (a *BoxAuditor) maybeGetJail(mctx libkb.MetaContext) (*BoxAuditJail, error) { 1105 var jail BoxAuditJail 1106 found, err := a.maybeGetIntoVersioned(mctx, &jail, BoxAuditJailDbKey(mctx)) 1107 if err != nil || !found { 1108 return nil, err 1109 } 1110 return &jail, nil 1111 } 1112 1113 func (a *BoxAuditor) maybeGetIntoVersioned(mctx libkb.MetaContext, v boxAuditVersioned, dbKey libkb.DbKey) (found bool, err error) { 1114 defer mctx.Trace("maybeGetIntoVersioned", &err)() 1115 found, err = mctx.G().LocalDb.GetInto(v, dbKey) 1116 if err != nil { 1117 mctx.Warning("Failed to unmarshal from db for key %+v: %s", dbKey, err) 1118 // Ignoring corruption; pretend it doesn't exist 1119 return false, nil 1120 } 1121 if !found { 1122 return false, nil 1123 } 1124 if v.getVersion() != a.Version { 1125 mctx.Debug("Not returning outdated obj at version %d (now at version %d)", v.getVersion(), a.Version) 1126 // We do not delete the old data. 1127 return false, nil 1128 } 1129 return true, nil 1130 } 1131 1132 func putLogToDisk(mctx libkb.MetaContext, log *BoxAuditLog, teamID keybase1.TeamID) error { 1133 return putToDisk(mctx, BoxAuditLogDbKey(mctx, teamID), log) 1134 } 1135 1136 func putQueueToDisk(mctx libkb.MetaContext, queue *BoxAuditQueue) error { 1137 return putToDisk(mctx, BoxAuditQueueDbKey(mctx), queue) 1138 } 1139 1140 func putJailToDisk(mctx libkb.MetaContext, jail *BoxAuditJail) error { 1141 return putToDisk(mctx, BoxAuditJailDbKey(mctx), jail) 1142 } 1143 1144 func putToDisk(mctx libkb.MetaContext, dbKey libkb.DbKey, i interface{}) error { 1145 return mctx.G().LocalDb.PutObj(dbKey, nil, i) 1146 } 1147 1148 func KnownTeamIDs(mctx libkb.MetaContext) (teamIDs []keybase1.TeamID, err error) { 1149 defer mctx.Trace("KnownTeamID", &err)() 1150 db := mctx.G().LocalDb 1151 if db == nil { 1152 return nil, fmt.Errorf("nil db") 1153 } 1154 dbKeySet, err := db.KeysWithPrefixes([]byte(libkb.PrefixString(libkb.DBSlowTeamsAlias)), []byte(libkb.PrefixString(libkb.DBFTLStorage))) 1155 if err != nil { 1156 return nil, err 1157 } 1158 teamIDs, err = keySetToTeamIDs(dbKeySet) 1159 if err != nil { 1160 return nil, err 1161 } 1162 return teamIDs, nil 1163 } 1164 1165 func randomKnownTeamID(mctx libkb.MetaContext) (teamID *keybase1.TeamID, err error) { 1166 knownTeamIDs, err := KnownTeamIDs(mctx) 1167 if err != nil { 1168 return nil, err 1169 } 1170 N := len(knownTeamIDs) 1171 if N == 0 { 1172 return nil, nil 1173 } 1174 idx, err := rand.Int(rand.Reader, big.NewInt(int64(N))) // [0, n) 1175 if err != nil { 1176 return nil, err 1177 } 1178 return &knownTeamIDs[idx.Int64()], nil 1179 } 1180 1181 func (a *BoxAuditor) getDelayedSlotForTeam(teamID keybase1.TeamID) bool { 1182 a.delayedSlotsMutex.Lock() 1183 defer a.delayedSlotsMutex.Unlock() 1184 found := a.delayedSlots[teamID] 1185 if !found { 1186 a.delayedSlots[teamID] = true 1187 } 1188 return !found 1189 } 1190 1191 func (a *BoxAuditor) clearDelayedSlotForTeam(teamID keybase1.TeamID) { 1192 a.delayedSlotsMutex.Lock() 1193 defer a.delayedSlotsMutex.Unlock() 1194 delete(a.delayedSlots, teamID) 1195 } 1196 1197 func (a *BoxAuditor) MaybeScheduleDelayedBoxAuditTeam(mctx libkb.MetaContext, teamID keybase1.TeamID) { 1198 mctx, shouldSkip := shouldSkipBasedOnRecursion(mctx) 1199 if shouldSkip { 1200 mctx.Debug("no re-scheduling a delayed box audit since we're calling recursively based on context") 1201 return 1202 } 1203 go a.scheduleDelayedBoxAuditTeam(mctx, teamID) 1204 } 1205 1206 func (a *BoxAuditor) scheduleDelayedBoxAuditTeam(mctx libkb.MetaContext, teamID keybase1.TeamID) { 1207 defer mctx.Trace(fmt.Sprintf("BoxAuditor#ScheduleDelayedBoxAuditTeam(%s)", teamID), nil)() 1208 1209 if !a.getDelayedSlotForTeam(teamID) { 1210 mctx.Debug("not scheduling delayed audit, since one is already in progress") 1211 return 1212 } 1213 1214 if mctx.G().Env.GetRunMode() == libkb.ProductionRunMode { 1215 // We don't fire this immediately since likely everyone else on the team is going to try the same thing; 1216 // So randomly backoff and maybe someone is going to win, and we won't all race to fix it. 1217 base := libkb.TeamBackoffBeforeAuditOnNeedRotate 1218 dur := libkb.RandomJitter(base) 1219 mctx.Debug("Sleeping %s random jitter before auditing the team", dur) 1220 mctx.G().Clock().Sleep(dur) 1221 } 1222 1223 _, err := a.BoxAuditTeam(mctx, teamID) 1224 if err != nil { 1225 mctx.Info("Box audit of team failed with error; we will continue to retry: %s", err) 1226 } 1227 }