github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/teams/box_audit.go (about)

     1  package teams
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/rand"
     6  	"errors"
     7  	"fmt"
     8  	"math/big"
     9  	"sync"
    10  	"time"
    11  
    12  	lru "github.com/hashicorp/golang-lru"
    13  	"github.com/keybase/client/go/libkb"
    14  	storage "github.com/keybase/client/go/teams/storage"
    15  	"golang.org/x/net/context"
    16  
    17  	"github.com/keybase/client/go/protocol/keybase1"
    18  )
    19  
    20  func ShouldRunBoxAudit(mctx libkb.MetaContext) bool {
    21  	if !mctx.G().ActiveDevice.Valid() {
    22  		mctx.Debug("ShouldRunBoxAudit: not logged in")
    23  		return false
    24  	}
    25  
    26  	if mctx.G().IsMobileAppType() {
    27  		netState := mctx.G().MobileNetState.State()
    28  		if netState.IsLimited() {
    29  			mctx.Debug("ShouldRunBoxAudit: skipping box audit, network state: %v", netState)
    30  			return false
    31  		}
    32  		appState, stateMtime := mctx.G().MobileAppState.StateAndMtime()
    33  		mctx.Debug("ShouldRunBoxAudit: mobileAppState=%+v, stateMtime=%+v", appState, stateMtime)
    34  		if stateMtime == nil || appState != keybase1.MobileAppState_FOREGROUND || time.Since(*stateMtime) < 3*time.Minute {
    35  			mctx.Debug("ShouldRunBoxAudit: mobile and backgrounded")
    36  			return false
    37  		}
    38  	}
    39  
    40  	return true
    41  }
    42  
    43  const CurrentBoxAuditVersion boxAuditVersion = 6
    44  const JailLRUSize = 100
    45  const BoxAuditIDLen = 16
    46  const MaxBoxAuditRetryAttempts = 6
    47  const BoxAuditTag = "BOXAUD"
    48  const MaxBoxAuditQueueSize = 100
    49  const MaxBoxAuditLogSize = 10
    50  
    51  type contextKey string
    52  
    53  const SkipBoxAuditCheckContextKey contextKey = "skip-box-audit-check"
    54  
    55  type ClientBoxAuditError struct {
    56  	inner error
    57  }
    58  
    59  func (e ClientBoxAuditError) Error() string {
    60  	return fmt.Sprintf("audit failed due to client-side issue; will be retried later: %s", e.inner)
    61  }
    62  
    63  type NonfatalBoxAuditError struct {
    64  	inner error
    65  }
    66  
    67  func (e NonfatalBoxAuditError) Error() string {
    68  	return fmt.Sprintf("audit failed; will be retried later: %s.", e.inner)
    69  }
    70  
    71  type FatalBoxAuditError struct {
    72  	inner error
    73  }
    74  
    75  func (e FatalBoxAuditError) Error() string {
    76  	return fmt.Sprintf("audit failed fatally; will not be retried until requested: %s", e.inner)
    77  }
    78  
    79  func shouldSkipBasedOnRecursion(mctx libkb.MetaContext) (libkb.MetaContext, bool) {
    80  	shouldSkip, ok := mctx.Ctx().Value(SkipBoxAuditCheckContextKey).(bool)
    81  	if ok && shouldSkip {
    82  		return mctx, true
    83  	}
    84  	mctx = mctx.WithCtx(context.WithValue(mctx.Ctx(), SkipBoxAuditCheckContextKey, true))
    85  	return mctx, false
    86  }
    87  
    88  func VerifyBoxAudit(mctx libkb.MetaContext, teamID keybase1.TeamID) (newMctx libkb.MetaContext, shouldReload bool) {
    89  	mctx, shouldSkip := shouldSkipBasedOnRecursion(mctx)
    90  	if shouldSkip {
    91  		return mctx, false
    92  	}
    93  
    94  	didReaudit, err := mctx.G().GetTeamBoxAuditor().AssertUnjailedOrReaudit(mctx, teamID)
    95  	if err != nil {
    96  		mctx.G().NotifyRouter.HandleBoxAuditError(mctx.Ctx(), err.Error())
    97  		return mctx, true
    98  	}
    99  	return mctx, didReaudit
   100  }
   101  
   102  // BoxAuditor ensures all of a team's secret boxes are encrypted for the right
   103  // people, and that the server has not neglected to notify a team to rotate
   104  // their keys in the event of a user revoking a device or resetting their
   105  // account. Security depends on the security of the Merkle tree so we know the
   106  // current status of all the team's members.  BoxAuditor operations are
   107  // thread-safe and can be run concurrently for many teams.  Security also
   108  // relies on team members and the Keybase server not colluding together to sign
   109  // box summary hashes into the sigchain that don't match what was actually
   110  // encrypted (which is somewhat trivial, since members can leak the secret if
   111  // they want regardless of server cooperation).
   112  type BoxAuditor struct {
   113  	Version boxAuditVersion
   114  
   115  	// Singleflight lock on team ID.
   116  	locktab *libkb.LockTable
   117  
   118  	// jailMutex and queueMutex are not per-team locks, since they are
   119  	// collections of multiple team IDs.  Two audits of two teams can happen at
   120  	// the same time, but they cannot access the jail or the retry queue at the
   121  	// same time.
   122  	jailMutex  sync.Mutex
   123  	queueMutex sync.Mutex
   124  
   125  	// The box audit jail has an LRU for performance, we need a mutex so we
   126  	// don't use a partially initialized jailLRU.
   127  	jailLRUMutex sync.Mutex
   128  	jailLRU      *lru.Cache
   129  
   130  	delayedSlotsMutex sync.Mutex
   131  	delayedSlots      map[keybase1.TeamID]bool
   132  }
   133  
   134  var _ libkb.TeamBoxAuditor = &BoxAuditor{}
   135  
   136  func (a *BoxAuditor) resetJailLRU() {
   137  	a.jailLRUMutex.Lock()
   138  	defer a.jailLRUMutex.Unlock()
   139  
   140  	jailLRU, err := lru.New(JailLRUSize)
   141  	// lru.New only returns an error on a negative size, so it's safe to panic
   142  	// on an error.
   143  	if err != nil {
   144  		panic(err)
   145  	}
   146  
   147  	a.jailLRU = jailLRU
   148  }
   149  
   150  func (a *BoxAuditor) getJailLRU() *lru.Cache {
   151  	a.jailLRUMutex.Lock()
   152  	defer a.jailLRUMutex.Unlock()
   153  	return a.jailLRU
   154  }
   155  
   156  func (a *BoxAuditor) OnLogout(mctx libkb.MetaContext) error {
   157  	a.resetJailLRU()
   158  	return nil
   159  }
   160  
   161  func (a *BoxAuditor) OnDbNuke(mctx libkb.MetaContext) error {
   162  	a.jailLRU.Purge()
   163  	return nil
   164  }
   165  
   166  func NewBoxAuditor(g *libkb.GlobalContext) *BoxAuditor {
   167  	return newBoxAuditorWithVersion(g, CurrentBoxAuditVersion)
   168  }
   169  
   170  func newBoxAuditorWithVersion(g *libkb.GlobalContext, version boxAuditVersion) *BoxAuditor {
   171  	a := &BoxAuditor{
   172  		Version:      version,
   173  		locktab:      libkb.NewLockTable(),
   174  		delayedSlots: make(map[keybase1.TeamID]bool),
   175  	}
   176  	a.resetJailLRU()
   177  	return a
   178  }
   179  
   180  func NewBoxAuditorAndInstall(g *libkb.GlobalContext) {
   181  	if g.GetEnv().GetDisableTeamBoxAuditor() {
   182  		g.Log.CWarningf(context.TODO(), "Box auditor disabled: using dummy auditor")
   183  		g.SetTeamBoxAuditor(DummyBoxAuditor{})
   184  	} else {
   185  		a := NewBoxAuditor(g)
   186  		g.SetTeamBoxAuditor(a)
   187  		g.AddLogoutHook(a, "boxAuditor")
   188  		g.AddDbNukeHook(a, "boxAuditor")
   189  	}
   190  }
   191  
   192  func (a *BoxAuditor) initMctx(mctx libkb.MetaContext) libkb.MetaContext {
   193  	mctx = mctx.WithLogTag(BoxAuditTag)
   194  	mctx = mctx.WithCtx(context.WithValue(mctx.Ctx(), SkipBoxAuditCheckContextKey, true))
   195  	return mctx
   196  }
   197  
   198  // BoxAuditTeam performs one attempt of a BoxAudit. If one is in progress for
   199  // the teamid, make a new attempt. If exceeded max tries or hit a malicious
   200  // error, return a fatal error.  Otherwise, make a new audit and fill it with
   201  // one attempt. If the attempt failed nonfatally, enqueue it in the retry
   202  // queue. If it failed fatally, add it to the jail. If it failed for reasons
   203  // that are purely client-side, like a disk write error, we retry it as well
   204  // but distinguish it from a failure the server could have possibly maliciously
   205  // caused.
   206  func (a *BoxAuditor) BoxAuditTeam(mctx libkb.MetaContext, teamID keybase1.TeamID) (attempt *keybase1.BoxAuditAttempt, err error) {
   207  	mctx = a.initMctx(mctx)
   208  	if !ShouldRunBoxAudit(mctx) {
   209  		mctx.Debug("Box auditor feature flagged off or not logged in; not auditing...")
   210  		return nil, nil
   211  	}
   212  	defer mctx.Trace(fmt.Sprintf("BoxAuditTeam(%s)", teamID), &err)()
   213  	defer mctx.PerfTrace(fmt.Sprintf("BoxAuditTeam(%s)", teamID), &err)()
   214  	start := time.Now()
   215  	defer func() {
   216  		var message string
   217  		if err == nil {
   218  			message = fmt.Sprintf("Audited boxes for team %s", teamID)
   219  		} else {
   220  			message = fmt.Sprintf("Failed to box audit %s", teamID)
   221  		}
   222  		mctx.G().RuntimeStats.PushPerfEvent(keybase1.PerfEvent{
   223  			EventType: keybase1.PerfEventType_TEAMBOXAUDIT,
   224  			Message:   message,
   225  			Ctime:     keybase1.ToTime(start),
   226  		})
   227  	}()
   228  
   229  	lock := a.locktab.AcquireOnName(mctx.Ctx(), mctx.G(), teamID.String())
   230  	defer lock.Release(mctx.Ctx())
   231  	return a.boxAuditTeamLocked(mctx, teamID)
   232  }
   233  
   234  func (a *BoxAuditor) boxAuditTeamLocked(mctx libkb.MetaContext, teamID keybase1.TeamID) (attemptPtr *keybase1.BoxAuditAttempt, err error) {
   235  	defer mctx.Trace(fmt.Sprintf("boxAuditTeamLocked(%s)", teamID), &err)()
   236  
   237  	a.clearDelayedSlotForTeam(teamID)
   238  
   239  	log, err := a.maybeGetLog(mctx, teamID)
   240  	if err != nil {
   241  		return nil, ClientBoxAuditError{err}
   242  	}
   243  	if log == nil {
   244  		log = NewBoxAuditLog(a.Version)
   245  	}
   246  
   247  	isRetry := log.InProgress
   248  	// First, attempt the audit.
   249  	attempt := a.attemptLocked(mctx, teamID, false /* rotateBeforeAudit */, false /* justRotated */)
   250  	// In the case where it was a retry on a failed audit, *and* the audit that
   251  	// just happened failed, try to rotate the team before auditing again. This
   252  	// is so we don't unnecessarily rotate if the previous failure was due to a
   253  	// network error. If the network is still down, this rotate will fail.
   254  	if isRetry && !attempt.Result.IsOK() && !mctx.G().TestOptions.NoAutorotateOnBoxAuditRetry {
   255  		attempt = a.attemptLocked(mctx, teamID, true /* rotateBeforeAudit */, false /* justRotated */)
   256  	}
   257  
   258  	var id BoxAuditID
   259  	if isRetry {
   260  		// If there's already an inprogress Audit (i.e., previous failure and
   261  		// we're doing a retry), rotate and do a new attempt in the same audit
   262  		mctx.Debug("Retrying failed box audit")
   263  		lastAudit := log.Last()
   264  		id = lastAudit.ID
   265  		newAudit := BoxAudit{
   266  			ID:       lastAudit.ID,
   267  			Attempts: append(lastAudit.Attempts, attempt),
   268  		}
   269  		log.Audits[len(log.Audits)-1] = newAudit
   270  	} else {
   271  		// If the last audit was completed, start a new audit.
   272  		mctx.Debug("Starting new box audit")
   273  		id, err = NewBoxAuditID()
   274  		if err != nil {
   275  			return &attempt, ClientBoxAuditError{err}
   276  		}
   277  		audit := BoxAudit{
   278  			ID:       id,
   279  			Attempts: []keybase1.BoxAuditAttempt{attempt},
   280  		}
   281  		log.Audits = append(log.Audits, audit)
   282  	}
   283  	if len(log.Audits) > MaxBoxAuditLogSize {
   284  		mctx.Debug("Truncating box audit log")
   285  		log.Audits = log.Audits[len(log.Audits)-MaxBoxAuditLogSize:]
   286  	}
   287  
   288  	isOK := attempt.Result.IsOK()
   289  	retryAttemptsExhausted := len(log.Last().Attempts) >= MaxBoxAuditRetryAttempts
   290  	isFatal := attempt.Result == keybase1.BoxAuditAttemptResult_FAILURE_MALICIOUS_SERVER || retryAttemptsExhausted
   291  	mctx.Debug("Box audit diagnosis: isOK=%t, retryAttemptsExhausted=%t, isFatal=%t", isOK, retryAttemptsExhausted, isFatal)
   292  
   293  	// NOTE An audit that has failed fatally will *not* be automatically
   294  	// retried, but it is still considered InProgress because it is not in a
   295  	// successful state, and more attempts will append to the currently failed
   296  	// audit, instead of starting a new one.
   297  	log.InProgress = !isOK
   298  
   299  	err = putLogToDisk(mctx, log, teamID)
   300  	if err != nil {
   301  		return &attempt, ClientBoxAuditError{err}
   302  	}
   303  
   304  	switch {
   305  	case isOK:
   306  		mctx.Debug("Box audit successful")
   307  		_, err = a.clearRetryQueueOf(mctx, teamID)
   308  		if err != nil {
   309  			return &attempt, ClientBoxAuditError{err}
   310  		}
   311  		err = a.unjail(mctx, teamID)
   312  		if err != nil {
   313  			return &attempt, ClientBoxAuditError{err}
   314  		}
   315  		return &attempt, nil
   316  	case isFatal:
   317  		mctx.Debug("Box audit failed fatally")
   318  		_, err = a.clearRetryQueueOf(mctx, teamID)
   319  		if err != nil {
   320  			return &attempt, ClientBoxAuditError{err}
   321  		}
   322  		err = a.jail(mctx, teamID)
   323  		if err != nil {
   324  			return &attempt, ClientBoxAuditError{err}
   325  		}
   326  		return &attempt, FatalBoxAuditError{errors.New(*attempt.Error)}
   327  	default: // retryable error
   328  		mctx.Debug("Box audit failed nonfatally; retryable")
   329  		err := a.pushRetryQueue(mctx, teamID, id)
   330  		if err != nil {
   331  			return &attempt, ClientBoxAuditError{err}
   332  		}
   333  		return &attempt, NonfatalBoxAuditError{errors.New(*attempt.Error)}
   334  	}
   335  }
   336  
   337  func (a *BoxAuditor) AssertUnjailedOrReaudit(mctx libkb.MetaContext, teamID keybase1.TeamID) (didReaudit bool, err error) {
   338  	mctx = a.initMctx(mctx)
   339  	defer mctx.Trace("AssertUnjailedOrReaudit", &err)()
   340  
   341  	if !ShouldRunBoxAudit(mctx) {
   342  		mctx.Debug("Box auditor feature flagged off or not logged in; not AssertUnjailedOrReauditing...")
   343  		return false, nil
   344  	}
   345  
   346  	inJail, err := a.IsInJail(mctx, teamID)
   347  	if err != nil {
   348  		return false, fmt.Errorf("failed to check box audit jail during team load: %s", err)
   349  	}
   350  	if !inJail {
   351  		return false, nil
   352  	}
   353  
   354  	mctx.Debug("team in jail; retrying box audit")
   355  	maxRetries := 3
   356  	var errs []error
   357  	for i := 0; i <= maxRetries; i++ {
   358  		_, err = a.BoxAuditTeam(mctx, teamID)
   359  		if err != nil {
   360  			mctx.Debug("AssertUnjailedOrReaudit: box audit try #%d failed...", i+1)
   361  			errs = append(errs, err)
   362  		} else {
   363  			return true, nil
   364  		}
   365  	}
   366  	return false, fmt.Errorf("failed to successfully reaudit team %s in box audit jail after %d retries: %s", teamID, maxRetries, libkb.CombineErrors(errs...))
   367  }
   368  
   369  // RetryNextBoxAudit selects a teamID from the box audit retry queue and performs another box audit.
   370  func (a *BoxAuditor) RetryNextBoxAudit(mctx libkb.MetaContext) (attempt *keybase1.BoxAuditAttempt, err error) {
   371  	mctx = a.initMctx(mctx)
   372  	defer mctx.Trace("RetryNextBoxAudit", &err)()
   373  
   374  	if !ShouldRunBoxAudit(mctx) {
   375  		mctx.Debug("Box auditor feature flagged off or not logged in; not RetryNextBoxAuditing...")
   376  		return nil, nil
   377  	}
   378  
   379  	queueItem, err := a.popRetryQueue(mctx)
   380  	if err != nil {
   381  		return nil, err
   382  	}
   383  	if queueItem == nil {
   384  		mctx.Debug("Retry queue empty, succeeding vacuously")
   385  		return nil, nil
   386  	}
   387  	return a.BoxAuditTeam(mctx, queueItem.TeamID)
   388  }
   389  
   390  // BoxAuditRandomTeam selects a random known team from the slow team or FTL
   391  // cache, including implicit teams, and audits it. It may succeed trivially
   392  // because, for example, user is a reader and so does not have permissions to
   393  // do a box audit or the team is an open team.
   394  func (a *BoxAuditor) BoxAuditRandomTeam(mctx libkb.MetaContext) (attempt *keybase1.BoxAuditAttempt, err error) {
   395  	mctx = a.initMctx(mctx)
   396  	defer mctx.Trace("BoxAuditRandomTeam", &err)()
   397  
   398  	if !ShouldRunBoxAudit(mctx) {
   399  		mctx.Debug("Box auditor feature flagged off or not logged in; not BoxAuditRandomTeaming...")
   400  		return nil, nil
   401  	}
   402  
   403  	teamID, err := randomKnownTeamID(mctx)
   404  	if err != nil {
   405  		return nil, err
   406  	}
   407  	if teamID == nil {
   408  		mctx.Debug("No known teams to audit in db, skipping box audit")
   409  		return nil, nil
   410  	}
   411  
   412  	return a.BoxAuditTeam(mctx, *teamID)
   413  }
   414  
   415  func (a *BoxAuditor) IsInJail(mctx libkb.MetaContext, teamID keybase1.TeamID) (inJail bool, err error) {
   416  	mctx = a.initMctx(mctx)
   417  
   418  	if !ShouldRunBoxAudit(mctx) {
   419  		mctx.Debug("Box auditor feature flagged off or not logged in; not IsInJailing...")
   420  		return false, nil
   421  	}
   422  
   423  	val, ok := a.getJailLRU().Get(teamID)
   424  	if ok {
   425  		valBool, ok := val.(bool)
   426  		if ok {
   427  			return valBool, nil
   428  		}
   429  		mctx.Error("Bad boolean type assertion in IsInJail LRU for %s", teamID)
   430  		// Fall through to disk if the LRU is corrupted
   431  	}
   432  
   433  	jail, err := a.maybeGetJail(mctx)
   434  	if err != nil {
   435  		return false, err
   436  	}
   437  	if jail == nil {
   438  		a.getJailLRU().Add(teamID, false)
   439  		return false, nil
   440  	}
   441  	_, ok = jail.TeamIDs[teamID]
   442  	a.getJailLRU().Add(teamID, ok)
   443  	return ok, nil
   444  }
   445  
   446  // Attempt tries one time to box audit a Team ID. It does not store any
   447  // persistent state to disk related to the box audit, but it may, e.g., refresh
   448  // the team cache.
   449  func (a *BoxAuditor) Attempt(mctx libkb.MetaContext, teamID keybase1.TeamID, rotateBeforeAudit bool) (attempt keybase1.BoxAuditAttempt) {
   450  	mctx = a.initMctx(mctx)
   451  	var err error
   452  	defer mctx.Trace(fmt.Sprintf("Attempt(%s, %t)", teamID, rotateBeforeAudit), &err)()
   453  	defer func() {
   454  		if attempt.Error != nil {
   455  			err = errors.New(*attempt.Error)
   456  		}
   457  	}()
   458  	lock := a.locktab.AcquireOnName(mctx.Ctx(), mctx.G(), teamID.String())
   459  	defer lock.Release(mctx.Ctx())
   460  	return a.attemptLocked(mctx, teamID, rotateBeforeAudit, false)
   461  }
   462  
   463  func (a *BoxAuditor) attemptLocked(mctx libkb.MetaContext, teamID keybase1.TeamID, rotateBeforeAudit bool, justRotated bool) (attempt keybase1.BoxAuditAttempt) {
   464  	var err error
   465  	defer mctx.Trace(fmt.Sprintf("attemptLocked(%s, %t)", teamID, rotateBeforeAudit), &err)()
   466  	defer func() {
   467  		if attempt.Error != nil {
   468  			err = errors.New(*attempt.Error)
   469  		}
   470  	}()
   471  
   472  	attempt = keybase1.BoxAuditAttempt{
   473  		Result: keybase1.BoxAuditAttemptResult_FAILURE_RETRYABLE,
   474  		Ctime:  keybase1.ToUnixTime(time.Now()),
   475  	}
   476  
   477  	getErrorMessage := func(err error) *string {
   478  		msg := err.Error()
   479  		return &msg
   480  	}
   481  
   482  	team, err := loadTeamForBoxAudit(mctx, teamID)
   483  	if err != nil {
   484  		attempt.Error = getErrorMessage(fmt.Errorf("failed to load team: %s", err))
   485  		return attempt
   486  	}
   487  
   488  	g := team.Generation()
   489  	attempt.Generation = &g
   490  
   491  	shouldAudit, shouldAuditResult, err := a.shouldAudit(mctx, *team)
   492  	if err != nil {
   493  		attempt.Error = getErrorMessage(err)
   494  		return attempt
   495  	}
   496  
   497  	rotateType := keybase1.RotationType_VISIBLE
   498  	if team.Hidden != nil && team.Hidden.NeedRotate {
   499  		shouldAudit = true
   500  		rotateBeforeAudit = true
   501  		rotateType = keybase1.RotationType_CLKR
   502  		mctx.Debug("Hidden load said need rotate; so we're attempt a CLKR-style rotation, then will reaudit")
   503  	}
   504  
   505  	if !shouldAudit {
   506  		mctx.Debug("Not attempting box audit attempt; %s", attempt.Result)
   507  		attempt.Result = *shouldAuditResult
   508  		return attempt
   509  	}
   510  
   511  	if rotateBeforeAudit {
   512  		mctx.Debug("rotating before audit")
   513  		err := team.Rotate(mctx.Ctx(), rotateType)
   514  		if err != nil {
   515  			mctx.Warning("failed to rotate team before audit: %s", err)
   516  			// continue despite having failed to rotate
   517  		} else {
   518  			// reload the team
   519  			return a.attemptLocked(mctx, teamID, false, true)
   520  		}
   521  	}
   522  
   523  	if justRotated {
   524  		attempt.Rotated = true
   525  	}
   526  
   527  	pastSummary, err := calculateChainSummary(mctx, team)
   528  	if err != nil {
   529  		attempt.Error = getErrorMessage(err)
   530  		return attempt
   531  	}
   532  
   533  	currentSummary, err := calculateCurrentSummary(mctx, team)
   534  	if err != nil {
   535  		attempt.Error = getErrorMessage(err)
   536  		return attempt
   537  	}
   538  
   539  	if !bytes.Equal(currentSummary.Hash(), pastSummary.Hash()) {
   540  		// No need to make these Warnings, because these could happen when a
   541  		// user has just changed their PUK and CLKR hasn't fired yet, or if the
   542  		// team doesn't have any box summary hashes in the sigchain yet, etc.
   543  		mctx.Debug("ERROR: Box audit summary mismatch")
   544  		mctx.Debug("Past summary: %+v", pastSummary.table)
   545  		mctx.Debug("Current summary: %+v", currentSummary.table)
   546  
   547  		attempt.Error = getErrorMessage(fmt.Errorf("box summary hash mismatch"))
   548  		return attempt
   549  	}
   550  
   551  	attempt.Result = keybase1.BoxAuditAttemptResult_OK_VERIFIED
   552  	return attempt
   553  }
   554  
   555  func (a *BoxAuditor) clearRetryQueueOf(mctx libkb.MetaContext, teamID keybase1.TeamID) (queue *BoxAuditQueue, err error) {
   556  	defer mctx.Trace(fmt.Sprintf("clearRetryQueueOf(%s)", teamID), &err)()
   557  	a.queueMutex.Lock()
   558  	defer a.queueMutex.Unlock()
   559  	return a.clearRetryQueueOfLocked(mctx, teamID)
   560  }
   561  
   562  func (a *BoxAuditor) clearRetryQueueOfLocked(mctx libkb.MetaContext, teamID keybase1.TeamID) (queue *BoxAuditQueue, err error) {
   563  	defer mctx.Trace(fmt.Sprintf("clearRetryQueueOfLocked(%s)", teamID), &err)()
   564  	queue, err = a.maybeGetQueue(mctx)
   565  	if err != nil {
   566  		return nil, err
   567  	}
   568  	if queue == nil {
   569  		return nil, nil
   570  	}
   571  	newItems := make([]BoxAuditQueueItem, 0, len(queue.Items))
   572  	for _, item := range queue.Items {
   573  		if item.TeamID != teamID {
   574  			newItems = append(newItems, item)
   575  		}
   576  	}
   577  	queue.Items = newItems
   578  	err = putQueueToDisk(mctx, queue)
   579  	if err != nil {
   580  		return nil, err
   581  	}
   582  	return queue, nil
   583  }
   584  
   585  func (a *BoxAuditor) popRetryQueue(mctx libkb.MetaContext) (itemPtr *BoxAuditQueueItem, err error) {
   586  	defer mctx.Trace("popRetryQueue", &err)()
   587  	a.queueMutex.Lock()
   588  	defer a.queueMutex.Unlock()
   589  
   590  	queue, err := a.maybeGetQueue(mctx)
   591  	if err != nil {
   592  		return nil, err
   593  	}
   594  	if queue == nil {
   595  		return nil, nil
   596  	}
   597  	if len(queue.Items) == 0 {
   598  		return nil, nil
   599  	}
   600  	item, newItems := queue.Items[0], queue.Items[1:]
   601  	queue.Items = newItems
   602  	err = putQueueToDisk(mctx, queue)
   603  	if err != nil {
   604  		return nil, err
   605  	}
   606  	return &item, nil
   607  }
   608  
   609  func (a *BoxAuditor) pushRetryQueue(mctx libkb.MetaContext, teamID keybase1.TeamID, auditID BoxAuditID) (err error) {
   610  	defer mctx.Trace(fmt.Sprintf("pushRetryQueue(%s, %x)", teamID, auditID), &err)()
   611  	a.queueMutex.Lock()
   612  	defer a.queueMutex.Unlock()
   613  
   614  	queue, err := a.maybeGetQueue(mctx)
   615  	if err != nil {
   616  		return err
   617  	}
   618  	if queue != nil {
   619  		// If already in the queue, remove it so we can bump it to the top.
   620  		queue, err = a.clearRetryQueueOfLocked(mctx, teamID)
   621  		if err != nil {
   622  			return err
   623  		}
   624  	} else {
   625  		queue = NewBoxAuditQueue(a.Version)
   626  	}
   627  
   628  	queue.Items = append(queue.Items, BoxAuditQueueItem{Ctime: time.Now(), TeamID: teamID, BoxAuditID: auditID})
   629  	if len(queue.Items) > MaxBoxAuditQueueSize {
   630  		// Truncate oldest first.
   631  		mctx.Debug("Truncating box audit queue")
   632  		queue.Items = queue.Items[len(queue.Items)-MaxBoxAuditQueueSize:]
   633  	}
   634  	err = putQueueToDisk(mctx, queue)
   635  	if err != nil {
   636  		return err
   637  	}
   638  	return nil
   639  }
   640  
   641  func (a *BoxAuditor) jail(mctx libkb.MetaContext, teamID keybase1.TeamID) (err error) {
   642  	defer mctx.Trace(fmt.Sprintf("jail(%s)", teamID), &err)()
   643  	a.jailMutex.Lock()
   644  	defer a.jailMutex.Unlock()
   645  
   646  	a.getJailLRU().Add(teamID, true)
   647  
   648  	jail, err := a.maybeGetJail(mctx)
   649  	if err != nil {
   650  		return err
   651  	}
   652  	if jail == nil {
   653  		jail = NewBoxAuditJail(a.Version)
   654  	}
   655  	jail.TeamIDs[teamID] = true
   656  	err = putJailToDisk(mctx, jail)
   657  	if err != nil {
   658  		return err
   659  	}
   660  	return nil
   661  }
   662  
   663  func (a *BoxAuditor) unjail(mctx libkb.MetaContext, teamID keybase1.TeamID) (err error) {
   664  	defer mctx.Trace(fmt.Sprintf("unjail(%s)", teamID), &err)()
   665  	a.jailMutex.Lock()
   666  	defer a.jailMutex.Unlock()
   667  
   668  	a.getJailLRU().Add(teamID, false)
   669  
   670  	jail, err := a.maybeGetJail(mctx)
   671  	if err != nil {
   672  		return err
   673  	}
   674  	if jail == nil {
   675  		jail = NewBoxAuditJail(a.Version)
   676  	}
   677  	delete(jail.TeamIDs, teamID)
   678  	err = putJailToDisk(mctx, jail)
   679  	if err != nil {
   680  		return err
   681  	}
   682  	return nil
   683  }
   684  
   685  type DummyBoxAuditor struct{}
   686  
   687  var _ libkb.TeamBoxAuditor = &DummyBoxAuditor{}
   688  
   689  const dummyMsg = "Box auditor disabled; aborting successfully"
   690  
   691  func (d DummyBoxAuditor) AssertUnjailedOrReaudit(mctx libkb.MetaContext, _ keybase1.TeamID) (bool, error) {
   692  	mctx.Debug(dummyMsg)
   693  	return false, nil
   694  }
   695  func (d DummyBoxAuditor) IsInJail(mctx libkb.MetaContext, _ keybase1.TeamID) (bool, error) {
   696  	mctx.Debug(dummyMsg)
   697  	return false, nil
   698  }
   699  func (d DummyBoxAuditor) RetryNextBoxAudit(mctx libkb.MetaContext) (*keybase1.BoxAuditAttempt, error) {
   700  	mctx.Debug(dummyMsg)
   701  	return nil, nil
   702  }
   703  func (d DummyBoxAuditor) BoxAuditRandomTeam(mctx libkb.MetaContext) (*keybase1.BoxAuditAttempt, error) {
   704  	mctx.Debug(dummyMsg)
   705  	return nil, nil
   706  }
   707  func (d DummyBoxAuditor) BoxAuditTeam(mctx libkb.MetaContext, _ keybase1.TeamID) (*keybase1.BoxAuditAttempt, error) {
   708  	mctx.Debug(dummyMsg)
   709  	return nil, nil
   710  }
   711  func (d DummyBoxAuditor) Attempt(mctx libkb.MetaContext, _ keybase1.TeamID, _ bool) keybase1.BoxAuditAttempt {
   712  	mctx.Debug(dummyMsg)
   713  	return keybase1.BoxAuditAttempt{
   714  		Result: keybase1.BoxAuditAttemptResult_OK_NOT_ATTEMPTED_ROLE,
   715  		Ctime:  keybase1.ToUnixTime(time.Now()),
   716  	}
   717  }
   718  func (d DummyBoxAuditor) MaybeScheduleDelayedBoxAuditTeam(mctx libkb.MetaContext, teamID keybase1.TeamID) {
   719  }
   720  
   721  // BoxAuditLog is a log of audits for a particular team.
   722  type BoxAuditLog struct {
   723  	// The last entry of Audits is the latest one.
   724  	Audits []BoxAudit
   725  
   726  	// Whether the last Audit is still in progress; false initially.
   727  	InProgress bool
   728  
   729  	Version boxAuditVersion
   730  }
   731  
   732  var _ boxAuditVersioned = &BoxAuditLog{}
   733  
   734  func (l *BoxAuditLog) getVersion() boxAuditVersion {
   735  	return l.Version
   736  }
   737  
   738  func NewBoxAuditLog(version boxAuditVersion) *BoxAuditLog {
   739  	return &BoxAuditLog{
   740  		Audits:     nil,
   741  		InProgress: false,
   742  		Version:    version,
   743  	}
   744  }
   745  
   746  func (l *BoxAuditLog) Last() *BoxAudit {
   747  	if l == nil || len(l.Audits) == 0 {
   748  		return nil
   749  	}
   750  	return &l.Audits[len(l.Audits)-1]
   751  }
   752  
   753  // BoxAudit is a single sequence of audit attempts for a single team.
   754  type BoxAudit struct {
   755  	ID       BoxAuditID
   756  	Attempts []keybase1.BoxAuditAttempt
   757  }
   758  
   759  type BoxAuditID = []byte
   760  
   761  func NewBoxAuditID() (BoxAuditID, error) {
   762  	idBytes := make([]byte, BoxAuditIDLen)
   763  	_, err := rand.Read(idBytes)
   764  	if err != nil {
   765  		return nil, err
   766  	}
   767  	return idBytes, nil
   768  }
   769  
   770  // BoxAuditQueue holds a list of teams that need to be reaudited, because the
   771  // previously failed an audit. When a team does pass an audit, it is removed
   772  // from the queue.
   773  type BoxAuditQueue struct {
   774  	Items   []BoxAuditQueueItem
   775  	Version boxAuditVersion
   776  }
   777  
   778  var _ boxAuditVersioned = &BoxAuditQueue{}
   779  
   780  func (q *BoxAuditQueue) getVersion() boxAuditVersion {
   781  	return q.Version
   782  }
   783  
   784  func NewBoxAuditQueue(version boxAuditVersion) *BoxAuditQueue {
   785  	return &BoxAuditQueue{
   786  		Items:   nil,
   787  		Version: version,
   788  	}
   789  }
   790  
   791  type BoxAuditQueueItem struct {
   792  	Ctime      time.Time
   793  	TeamID     keybase1.TeamID
   794  	BoxAuditID BoxAuditID
   795  }
   796  
   797  // BoxAuditJail contains TeamIDs that have hit a fatal audit failure or the max
   798  // number of retryable audit failures. Teams in jail will not be reaudited
   799  // unless they are explicitly loaded by the fast or slow team loaders.
   800  type BoxAuditJail struct {
   801  	TeamIDs map[keybase1.TeamID]bool
   802  	Version boxAuditVersion
   803  }
   804  
   805  var _ boxAuditVersioned = &BoxAuditJail{}
   806  
   807  func (j *BoxAuditJail) getVersion() boxAuditVersion {
   808  	return j.Version
   809  }
   810  
   811  func NewBoxAuditJail(version boxAuditVersion) *BoxAuditJail {
   812  	return &BoxAuditJail{
   813  		TeamIDs: make(map[keybase1.TeamID]bool),
   814  		Version: version,
   815  	}
   816  }
   817  
   818  func (a *BoxAuditor) shouldAudit(mctx libkb.MetaContext, team Team) (bool, *keybase1.BoxAuditAttemptResult, error) {
   819  	if team.IsOpen() {
   820  		res := keybase1.BoxAuditAttemptResult_OK_NOT_ATTEMPTED_OPENTEAM
   821  		return false, &res, nil
   822  	}
   823  	role, err := team.MemberRole(mctx.Ctx(), mctx.CurrentUserVersion())
   824  	if err != nil {
   825  		return false, nil, err
   826  	}
   827  	if !role.IsOrAbove(keybase1.TeamRole_WRITER) {
   828  		res := keybase1.BoxAuditAttemptResult_OK_NOT_ATTEMPTED_ROLE
   829  		return false, &res, nil
   830  	}
   831  
   832  	return true, nil, nil
   833  }
   834  
   835  // loadTeamForBoxAudit loads a team once, but if the client
   836  // has not yet stored BoxSummaryHashes (due to being an old client)
   837  // it does a force full reload so it is populated.
   838  func loadTeamForBoxAudit(mctx libkb.MetaContext, teamID keybase1.TeamID) (*Team, error) {
   839  	return loadTeamForBoxAuditInner(mctx, teamID, false)
   840  }
   841  
   842  func loadTeamForBoxAuditInner(mctx libkb.MetaContext, teamID keybase1.TeamID, force bool) (team *Team, err error) {
   843  	defer mctx.Trace("loadTeamForBoxAuditInner", &err)()
   844  	arg := keybase1.LoadTeamArg{
   845  		ID:              teamID,
   846  		ForceRepoll:     true,
   847  		Public:          teamID.IsPublic(),
   848  		ForceFullReload: force,
   849  	}
   850  
   851  	team, err = Load(mctx.Ctx(), mctx.G(), arg)
   852  	if err != nil {
   853  		return nil, err
   854  	}
   855  	if team == nil {
   856  		return nil, fmt.Errorf("got nil team from loader")
   857  	}
   858  
   859  	// If the team sigchain state was constructed with support for the
   860  	// merkleRoots map, the map will be non-nil but empty. It will only be nil
   861  	// if the state is cached from a team load before box summary hash support.
   862  	if team.chain().GetMerkleRoots() == nil {
   863  		if force {
   864  			return nil, fmt.Errorf("failed to get a non-nil merkleRoots map after full reload")
   865  		}
   866  		mctx.Debug("retrying loadTeamForBoxAuditInner with force reload")
   867  		return loadTeamForBoxAuditInner(mctx, teamID, true)
   868  	}
   869  	return team, nil
   870  }
   871  
   872  type merkleSeqno = keybase1.Seqno
   873  type merkleCheckpoints map[keybase1.UserVersion]merkleSeqno
   874  
   875  func getPUKCheckpoints(mctx libkb.MetaContext, teamchain *TeamSigChainState, checkpoint merkleSeqno, fastforwardToAddition bool) (merkleCheckpoints, error) {
   876  	mctx.Debug("getting PUK checkpoints at merkle seqno %v; fastforwardToAddition=%t", checkpoint, fastforwardToAddition)
   877  	checkpoints := make(merkleCheckpoints)
   878  	// We only check users currently in the team, which means we skip over any
   879  	// users, who for example, have reset (and possibly have added a new PUK),
   880  	// but have not been let back into the team by an admin.
   881  	for uv, logPoints := range teamchain.inner.UserLog {
   882  		logPoint := logPoints[len(logPoints)-1]
   883  		if logPoint.Role == keybase1.TeamRole_NONE {
   884  			continue
   885  		}
   886  		latest := checkpoint
   887  		if fastforwardToAddition {
   888  			latest = max(latest, logPoint.SigMeta.PrevMerkleRootSigned.Seqno)
   889  		}
   890  		checkpoints[uv] = latest
   891  	}
   892  	return checkpoints, nil
   893  }
   894  
   895  func max(a, b merkleSeqno) merkleSeqno {
   896  	if a > b {
   897  		return a
   898  	}
   899  	return b
   900  }
   901  
   902  // calculateCurrentSummary calculates the box summary as it is currently for
   903  // all users in the team (i.e., if the team were rotated right now, what the summary
   904  // should be afterwards).
   905  func calculateCurrentSummary(mctx libkb.MetaContext, team *Team) (summary *boxPublicSummary, err error) {
   906  	defer mctx.Trace(fmt.Sprintf("calculateCurrentSummary(%s)", team.ID), &err)()
   907  
   908  	currentRoot, err := mctx.G().GetMerkleClient().FetchRootFromServer(mctx, 5*time.Minute)
   909  	if err != nil {
   910  		return nil, err
   911  	}
   912  	if currentRoot.Seqno() == nil {
   913  		return nil, fmt.Errorf("got nil current merkle root")
   914  	}
   915  	return calculateSummaryAtMerkleSeqno(mctx, team, *currentRoot.Seqno(), false)
   916  }
   917  
   918  // calculateChainSummary calculates the box summary as implied by the team sigchain and previous links,
   919  // using the last known rotation and subsequent additions as markers for PUK freshness.
   920  func calculateChainSummary(mctx libkb.MetaContext, team *Team) (summary *boxPublicSummary, err error) {
   921  	defer mctx.Trace(fmt.Sprintf("calculateChainSummary(%s)", team.ID), &err)()
   922  
   923  	merkleSeqno, err := merkleSeqnoAtGenerationInception(mctx, team.chain())
   924  	if err != nil {
   925  		return nil, err
   926  	}
   927  
   928  	if !mctx.G().GetMerkleClient().CanExamineHistoricalRoot(mctx, merkleSeqno) {
   929  		return nil, fmt.Errorf("last rotation was at %d, before the most recent checkpoint, so forcing a rotation", merkleSeqno)
   930  	}
   931  
   932  	return calculateSummaryAtMerkleSeqno(mctx, team, merkleSeqno, true)
   933  }
   934  
   935  // calculateSummaryAtMerkleSeqno calculates the summary at the given merkleSeqno.
   936  func calculateSummaryAtMerkleSeqno(mctx libkb.MetaContext, team *Team, merkleSeqno merkleSeqno, fastforwardToAddition bool) (summary *boxPublicSummary, err error) {
   937  	defer mctx.Trace(fmt.Sprintf("calculateSummaryAtMerkleSeqno(%s, %v)", team.ID, merkleSeqno), &err)()
   938  
   939  	checkpoints, err := getPUKCheckpoints(mctx, team.chain(), merkleSeqno, fastforwardToAddition)
   940  	if err != nil {
   941  		return nil, err
   942  	}
   943  
   944  	if team.IsSubteam() {
   945  		mctx.Debug("calculating summary for subteam; loading implicit admins")
   946  		err = mctx.G().GetTeamLoader().MapTeamAncestors(mctx.Ctx(), func(t keybase1.TeamSigChainState, _ keybase1.TeamName) error {
   947  			chain := TeamSigChainState{inner: t}
   948  			ancestorCheckpoints, err := getPUKCheckpoints(mctx, &chain, merkleSeqno, fastforwardToAddition)
   949  			if err != nil {
   950  				return err
   951  			}
   952  			for ancestorUV, ancestorMerkleSeqno := range ancestorCheckpoints {
   953  				role, err := chain.GetUserRole(ancestorUV)
   954  				if err != nil {
   955  					return err
   956  				}
   957  				// Only add implicit admins to summary
   958  				if !role.IsOrAbove(keybase1.TeamRole_ADMIN) {
   959  					continue
   960  				}
   961  				// If the implicit admin is a descendant, only update the
   962  				// checkpoints if the implicit admin was added to the team at a
   963  				// later checkpoint (and so would have boxes refreshed at a
   964  				// newer merkle seqno).
   965  				currentCheckpoint, ok := checkpoints[ancestorUV]
   966  				if ok && ancestorMerkleSeqno <= currentCheckpoint {
   967  					continue
   968  				}
   969  				checkpoints[ancestorUV] = ancestorMerkleSeqno
   970  			}
   971  			return nil
   972  		}, team.ID, "team box audit", func(t keybase1.TeamSigChainState) bool {
   973  			chain := TeamSigChainState{inner: t}
   974  			return chain.GetMerkleRoots() != nil
   975  		})
   976  		if err != nil {
   977  			return nil, err
   978  		}
   979  	}
   980  
   981  	var uvs []keybase1.UserVersion
   982  	for uv := range checkpoints {
   983  		uvs = append(uvs, uv)
   984  	}
   985  
   986  	// for UPAK Batcher API
   987  	getArg := func(idx int) *libkb.LoadUserArg {
   988  		if idx >= len(uvs) {
   989  			return nil
   990  		}
   991  		arg := libkb.NewLoadUserByUIDArg(mctx.Ctx(), mctx.G(), uvs[idx].Uid).WithPublicKeyOptional().WithForcePoll(true)
   992  		return &arg
   993  	}
   994  
   995  	d := make(map[keybase1.UserVersion]keybase1.PerUserKey)
   996  	// for UPAK Batcher API
   997  	processResult := func(idx int, upak *keybase1.UserPlusKeysV2AllIncarnations) error {
   998  		uv := uvs[idx]
   999  		checkpoint := checkpoints[uv]
  1000  
  1001  		if upak == nil {
  1002  			return fmt.Errorf("got nil upak for uv %+v", uv)
  1003  		}
  1004  
  1005  		var perUserKey *keybase1.PerUserKey
  1006  		leaf, _, err := mctx.G().GetMerkleClient().LookupLeafAtSeqno(mctx, keybase1.UserOrTeamID(uv.Uid), checkpoint)
  1007  		if err != nil {
  1008  			return fmt.Errorf("failed to lookup leaf at merkle seqno %v for %v", checkpoint, uv)
  1009  		}
  1010  		if leaf == nil {
  1011  			return fmt.Errorf("got nil leaf at seqno %v for %v", checkpoint, uv)
  1012  		}
  1013  		if leaf.Public == nil {
  1014  			return fmt.Errorf("got nil leaf public at seqno %v for %v (leaf=%+v)", checkpoint, uv, leaf)
  1015  		}
  1016  		sigchainSeqno := leaf.Public.Seqno
  1017  
  1018  		perUserKey, err = upak.GetPerUserKeyAtSeqno(uv, sigchainSeqno, checkpoint)
  1019  		if err != nil {
  1020  			return fmt.Errorf("failed to find peruserkey at seqno %v for upak", sigchainSeqno)
  1021  		}
  1022  		if perUserKey == nil {
  1023  			// Not a critical error, since reset users have no current per user keys, for example.
  1024  			mctx.Debug("%s has no per-user-key at seqno %v", uv, sigchainSeqno)
  1025  			return nil
  1026  		}
  1027  
  1028  		d[uv] = *perUserKey
  1029  		return nil
  1030  	}
  1031  
  1032  	err = mctx.G().GetUPAKLoader().Batcher(mctx.Ctx(), getArg, processResult, 0)
  1033  	if err != nil {
  1034  		return nil, err
  1035  	}
  1036  
  1037  	return newBoxPublicSummary(d)
  1038  }
  1039  
  1040  // merkleSeqnoAtGenerationInception assumes TeamSigChainState.MerkleRoots is populated
  1041  func merkleSeqnoAtGenerationInception(mctx libkb.MetaContext, teamchain *TeamSigChainState) (merkleSeqno keybase1.Seqno, err error) {
  1042  	_, mr, err := teamchain.getLatestPerTeamKeyWithMerkleSeqno(mctx)
  1043  	if err != nil {
  1044  		return 0, err
  1045  	}
  1046  	return mr.Seqno, nil
  1047  }
  1048  
  1049  // TeamIDKeys takes a set of DBKeys that must all be tid:-style DBKeys and
  1050  // extracts the team id from them. Because teams can be loaded via both FTL and
  1051  // the slow team loader, we use a set so we don't return duplicate teamIDs.
  1052  func keySetToTeamIDs(dbKeySet libkb.DBKeySet) ([]keybase1.TeamID, error) {
  1053  	seen := make(map[keybase1.TeamID]bool)
  1054  	teamIDs := make([]keybase1.TeamID, 0, len(dbKeySet))
  1055  	for dbKey := range dbKeySet {
  1056  		teamID, err := storage.ParseTeamIDDBKey(dbKey.Key)
  1057  		if err != nil {
  1058  			return nil, err
  1059  		}
  1060  		_, ok := seen[teamID]
  1061  		if !ok {
  1062  			teamIDs = append(teamIDs, teamID)
  1063  			seen[teamID] = true
  1064  		}
  1065  	}
  1066  	return teamIDs, nil
  1067  }
  1068  
  1069  type boxAuditVersion int
  1070  type boxAuditVersioned interface {
  1071  	getVersion() boxAuditVersion
  1072  }
  1073  
  1074  func BoxAuditLogDbKey(mctx libkb.MetaContext, teamID keybase1.TeamID) libkb.DbKey {
  1075  	return libkb.DbKey{Typ: libkb.DBBoxAuditor, Key: string(teamID) + mctx.ActiveDevice().UID().String()}
  1076  }
  1077  
  1078  func BoxAuditQueueDbKey(mctx libkb.MetaContext) libkb.DbKey {
  1079  	return libkb.DbKey{Typ: libkb.DBBoxAuditorPermanent, Key: "queue" + mctx.ActiveDevice().UID().String()}
  1080  }
  1081  
  1082  func BoxAuditJailDbKey(mctx libkb.MetaContext) libkb.DbKey {
  1083  	return libkb.DbKey{Typ: libkb.DBBoxAuditorPermanent, Key: "jail" + mctx.ActiveDevice().UID().String()}
  1084  }
  1085  
  1086  func (a *BoxAuditor) maybeGetLog(mctx libkb.MetaContext, teamID keybase1.TeamID) (*BoxAuditLog, error) {
  1087  	var log BoxAuditLog
  1088  	found, err := a.maybeGetIntoVersioned(mctx, &log, BoxAuditLogDbKey(mctx, teamID))
  1089  	if err != nil || !found {
  1090  		return nil, err
  1091  	}
  1092  	return &log, nil
  1093  }
  1094  
  1095  func (a *BoxAuditor) maybeGetQueue(mctx libkb.MetaContext) (*BoxAuditQueue, error) {
  1096  	var queue BoxAuditQueue
  1097  	found, err := a.maybeGetIntoVersioned(mctx, &queue, BoxAuditQueueDbKey(mctx))
  1098  	if err != nil || !found {
  1099  		return nil, err
  1100  	}
  1101  	return &queue, nil
  1102  }
  1103  
  1104  func (a *BoxAuditor) maybeGetJail(mctx libkb.MetaContext) (*BoxAuditJail, error) {
  1105  	var jail BoxAuditJail
  1106  	found, err := a.maybeGetIntoVersioned(mctx, &jail, BoxAuditJailDbKey(mctx))
  1107  	if err != nil || !found {
  1108  		return nil, err
  1109  	}
  1110  	return &jail, nil
  1111  }
  1112  
  1113  func (a *BoxAuditor) maybeGetIntoVersioned(mctx libkb.MetaContext, v boxAuditVersioned, dbKey libkb.DbKey) (found bool, err error) {
  1114  	defer mctx.Trace("maybeGetIntoVersioned", &err)()
  1115  	found, err = mctx.G().LocalDb.GetInto(v, dbKey)
  1116  	if err != nil {
  1117  		mctx.Warning("Failed to unmarshal from db for key %+v: %s", dbKey, err)
  1118  		// Ignoring corruption; pretend it doesn't exist
  1119  		return false, nil
  1120  	}
  1121  	if !found {
  1122  		return false, nil
  1123  	}
  1124  	if v.getVersion() != a.Version {
  1125  		mctx.Debug("Not returning outdated obj at version %d (now at version %d)", v.getVersion(), a.Version)
  1126  		// We do not delete the old data.
  1127  		return false, nil
  1128  	}
  1129  	return true, nil
  1130  }
  1131  
  1132  func putLogToDisk(mctx libkb.MetaContext, log *BoxAuditLog, teamID keybase1.TeamID) error {
  1133  	return putToDisk(mctx, BoxAuditLogDbKey(mctx, teamID), log)
  1134  }
  1135  
  1136  func putQueueToDisk(mctx libkb.MetaContext, queue *BoxAuditQueue) error {
  1137  	return putToDisk(mctx, BoxAuditQueueDbKey(mctx), queue)
  1138  }
  1139  
  1140  func putJailToDisk(mctx libkb.MetaContext, jail *BoxAuditJail) error {
  1141  	return putToDisk(mctx, BoxAuditJailDbKey(mctx), jail)
  1142  }
  1143  
  1144  func putToDisk(mctx libkb.MetaContext, dbKey libkb.DbKey, i interface{}) error {
  1145  	return mctx.G().LocalDb.PutObj(dbKey, nil, i)
  1146  }
  1147  
  1148  func KnownTeamIDs(mctx libkb.MetaContext) (teamIDs []keybase1.TeamID, err error) {
  1149  	defer mctx.Trace("KnownTeamID", &err)()
  1150  	db := mctx.G().LocalDb
  1151  	if db == nil {
  1152  		return nil, fmt.Errorf("nil db")
  1153  	}
  1154  	dbKeySet, err := db.KeysWithPrefixes([]byte(libkb.PrefixString(libkb.DBSlowTeamsAlias)), []byte(libkb.PrefixString(libkb.DBFTLStorage)))
  1155  	if err != nil {
  1156  		return nil, err
  1157  	}
  1158  	teamIDs, err = keySetToTeamIDs(dbKeySet)
  1159  	if err != nil {
  1160  		return nil, err
  1161  	}
  1162  	return teamIDs, nil
  1163  }
  1164  
  1165  func randomKnownTeamID(mctx libkb.MetaContext) (teamID *keybase1.TeamID, err error) {
  1166  	knownTeamIDs, err := KnownTeamIDs(mctx)
  1167  	if err != nil {
  1168  		return nil, err
  1169  	}
  1170  	N := len(knownTeamIDs)
  1171  	if N == 0 {
  1172  		return nil, nil
  1173  	}
  1174  	idx, err := rand.Int(rand.Reader, big.NewInt(int64(N))) // [0, n)
  1175  	if err != nil {
  1176  		return nil, err
  1177  	}
  1178  	return &knownTeamIDs[idx.Int64()], nil
  1179  }
  1180  
  1181  func (a *BoxAuditor) getDelayedSlotForTeam(teamID keybase1.TeamID) bool {
  1182  	a.delayedSlotsMutex.Lock()
  1183  	defer a.delayedSlotsMutex.Unlock()
  1184  	found := a.delayedSlots[teamID]
  1185  	if !found {
  1186  		a.delayedSlots[teamID] = true
  1187  	}
  1188  	return !found
  1189  }
  1190  
  1191  func (a *BoxAuditor) clearDelayedSlotForTeam(teamID keybase1.TeamID) {
  1192  	a.delayedSlotsMutex.Lock()
  1193  	defer a.delayedSlotsMutex.Unlock()
  1194  	delete(a.delayedSlots, teamID)
  1195  }
  1196  
  1197  func (a *BoxAuditor) MaybeScheduleDelayedBoxAuditTeam(mctx libkb.MetaContext, teamID keybase1.TeamID) {
  1198  	mctx, shouldSkip := shouldSkipBasedOnRecursion(mctx)
  1199  	if shouldSkip {
  1200  		mctx.Debug("no re-scheduling a delayed box audit since we're calling recursively based on context")
  1201  		return
  1202  	}
  1203  	go a.scheduleDelayedBoxAuditTeam(mctx, teamID)
  1204  }
  1205  
  1206  func (a *BoxAuditor) scheduleDelayedBoxAuditTeam(mctx libkb.MetaContext, teamID keybase1.TeamID) {
  1207  	defer mctx.Trace(fmt.Sprintf("BoxAuditor#ScheduleDelayedBoxAuditTeam(%s)", teamID), nil)()
  1208  
  1209  	if !a.getDelayedSlotForTeam(teamID) {
  1210  		mctx.Debug("not scheduling delayed audit, since one is already in progress")
  1211  		return
  1212  	}
  1213  
  1214  	if mctx.G().Env.GetRunMode() == libkb.ProductionRunMode {
  1215  		// We don't fire this immediately since likely everyone else on the team is going to try the same thing;
  1216  		// So randomly backoff and maybe someone is going to win, and we won't all race to fix it.
  1217  		base := libkb.TeamBackoffBeforeAuditOnNeedRotate
  1218  		dur := libkb.RandomJitter(base)
  1219  		mctx.Debug("Sleeping %s random jitter before auditing the team", dur)
  1220  		mctx.G().Clock().Sleep(dur)
  1221  	}
  1222  
  1223  	_, err := a.BoxAuditTeam(mctx, teamID)
  1224  	if err != nil {
  1225  		mctx.Info("Box audit of team failed with error; we will continue to retry: %s", err)
  1226  	}
  1227  }