github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/kbfs/kbfsedits/tlf_history.go (about)

     1  // Copyright 2018 Keybase Inc. All rights reserved.
     2  // Use of this source code is governed by a BSD
     3  // license that can be found in the LICENSE file.
     4  
     5  package kbfsedits
     6  
     7  import (
     8  	"container/heap"
     9  	"encoding/json"
    10  	"fmt"
    11  	"path"
    12  	"sort"
    13  	"strings"
    14  	"sync"
    15  
    16  	"github.com/keybase/client/go/kbfs/kbfsmd"
    17  )
    18  
    19  const (
    20  	// The max number of edits needed for each writer.
    21  	maxEditsPerWriter = 10
    22  	// The max number of deletes needed for each writer.
    23  	maxDeletesPerWriter  = 10
    24  	maxWritersPerHistory = 10
    25  )
    26  
    27  type writerNotifications struct {
    28  	writerName    string
    29  	notifications notificationsByRevision
    30  	deletes       notificationsByRevision
    31  }
    32  
    33  // writersByRevision sorts sets of per-writer notifications in reverse
    34  // order by the revision of the latest notification for each writer.
    35  type writersByRevision []*writerNotifications
    36  
    37  func (wbr writersByRevision) Len() int {
    38  	return len(wbr)
    39  }
    40  
    41  func (wbr writersByRevision) Less(i, j int) bool {
    42  	// Some revisions come before no revisions.
    43  	iHasZero := len(wbr[i].notifications) == 0
    44  	jHasZero := len(wbr[j].notifications) == 0
    45  	if jHasZero {
    46  		if iHasZero {
    47  			// If neither has any notifications, sort by the latest
    48  			// delete.
    49  			iHasZeroDeletes := len(wbr[i].deletes) == 0
    50  			jHasZeroDeletes := len(wbr[j].deletes) == 0
    51  			if jHasZeroDeletes {
    52  				return iHasZeroDeletes
    53  			} else if iHasZeroDeletes {
    54  				return false
    55  			}
    56  
    57  			// Reverse sort, so latest deletes come first.
    58  			return wbr[i].deletes[0].Revision > wbr[j].deletes[0].Revision
    59  		}
    60  		return false
    61  	} else if iHasZero {
    62  		return false
    63  	}
    64  
    65  	// Reverse sort, so latest revisions come first.
    66  	return wbr[i].notifications[0].Revision > wbr[j].notifications[0].Revision
    67  }
    68  
    69  func (wbr writersByRevision) Swap(i, j int) {
    70  	wbr[i], wbr[j] = wbr[j], wbr[i]
    71  }
    72  
    73  func (wbr *writersByRevision) Push(x interface{}) {
    74  	wn := x.(*writerNotifications)
    75  	*wbr = append(*wbr, wn)
    76  }
    77  
    78  func (wbr *writersByRevision) Pop() interface{} {
    79  	// The item to remove is the last item; heap has already swapped
    80  	// it to the end.
    81  	old := *wbr
    82  	n := len(old)
    83  	item := old[n-1]
    84  	*wbr = old[0 : n-1]
    85  	return item
    86  }
    87  
    88  // TlfHistory maintains a history of the last N file edits from each
    89  // writer in the TLF.
    90  //
    91  // There will be two users of a TlfHistory instance:
    92  //
    93  //   - One user (likely something outside of the kbfsedits package,
    94  //     e.g. libkbfs.folderBranchOps) will read notifications from the
    95  //     corresponding TLF and add them to this history.  After adding a
    96  //     batch or several batches of messages, it should call
    97  //     `Recompute()`, and if some writers need more, earlier revisions,
    98  //     it should fetch more notifications for the indicated writer and
    99  //     repeat.
   100  //
   101  //   - The other user (within the kbfsedits package) will collate the
   102  //     histories from multiple TlfHistory instances together using
   103  //     `getHistory()` from each one.  It may also construct pretty
   104  //     versions of individual edit histories for a particular TLF.
   105  type TlfHistory struct {
   106  	lock               sync.RWMutex
   107  	byWriter           map[string]*writerNotifications
   108  	unflushed          *writerNotifications
   109  	computed           bool
   110  	cachedHistory      writersByRevision
   111  	cachedLoggedInUser string
   112  }
   113  
   114  // NewTlfHistory constructs a new TlfHistory instance.
   115  func NewTlfHistory() *TlfHistory {
   116  	return &TlfHistory{
   117  		byWriter: make(map[string]*writerNotifications),
   118  	}
   119  }
   120  
   121  // AddNotifications takes in a set of messages in this TLF by
   122  // `writer`, and adds them to the history.  Once done adding messages,
   123  // the caller should call `Recompute` to find out if more messages
   124  // should be added for any particular writer.  It returns the maximum
   125  // known revision including an update from this writer.
   126  func (th *TlfHistory) AddNotifications(
   127  	writerName string, messages []string) (maxRev kbfsmd.Revision, err error) {
   128  	newEdits := make(notificationsByRevision, 0, len(messages))
   129  
   130  	// Unmarshal and sort the new messages.
   131  	for _, msg := range messages {
   132  		var revList []NotificationMessage
   133  		err := json.Unmarshal([]byte(msg), &revList)
   134  		if err != nil {
   135  			// The messages might be from a new version we don't
   136  			// understand, so swallow the error.
   137  			continue
   138  		}
   139  
   140  		for j := len(revList) - 1; j >= 0; j-- {
   141  			revMsg := revList[j]
   142  			if revMsg.Version != NotificationV2 {
   143  				// Ignore messages that are too new for us to understand.
   144  				continue
   145  			}
   146  			revMsg.numWithinRevision = j
   147  			newEdits = append(newEdits, revMsg)
   148  		}
   149  	}
   150  
   151  	th.lock.Lock()
   152  	defer th.lock.Unlock()
   153  	wn, existed := th.byWriter[writerName]
   154  	if !existed {
   155  		wn = &writerNotifications{writerName, nil, nil}
   156  	}
   157  	oldLen := len(wn.notifications)
   158  	newEdits = append(newEdits, wn.notifications...)
   159  	sort.Sort(newEdits)
   160  	if len(newEdits) > 0 {
   161  		maxRev = newEdits[0].Revision
   162  	}
   163  
   164  	wn.notifications = newEdits.uniquify()
   165  	if len(wn.notifications) == oldLen {
   166  		// No new messages.
   167  		return maxRev, nil
   168  	}
   169  	if !existed {
   170  		th.byWriter[writerName] = wn
   171  	}
   172  	// Invalidate the cached results.
   173  	th.computed = false
   174  	th.cachedLoggedInUser = ""
   175  	return maxRev, nil
   176  }
   177  
   178  // AddUnflushedNotifications adds notifications to a special
   179  // "unflushed" list that takes precedences over the regular
   180  // notifications with revision numbers equal or greater to the minimum
   181  // unflushed revision.
   182  func (th *TlfHistory) AddUnflushedNotifications(
   183  	loggedInUser string, msgs []NotificationMessage) {
   184  	th.lock.Lock()
   185  	defer th.lock.Unlock()
   186  	if th.unflushed == nil {
   187  		th.unflushed = &writerNotifications{loggedInUser, nil, nil}
   188  	}
   189  	if th.unflushed.writerName != loggedInUser {
   190  		panic(fmt.Sprintf("Logged-in user %s doesn't match unflushed user %s",
   191  			loggedInUser, th.unflushed.writerName))
   192  	}
   193  	newEdits := append(
   194  		notificationsByRevision(msgs), th.unflushed.notifications...)
   195  	sort.Sort(newEdits)
   196  	th.unflushed.notifications = newEdits.uniquify()
   197  	// Invalidate the cached results.
   198  	th.computed = false
   199  	th.cachedLoggedInUser = ""
   200  }
   201  
   202  // FlushRevision clears all any unflushed notifications with a
   203  // revision equal or less than `rev`.
   204  func (th *TlfHistory) FlushRevision(rev kbfsmd.Revision) {
   205  	th.lock.Lock()
   206  	defer th.lock.Unlock()
   207  	if th.unflushed == nil {
   208  		return
   209  	}
   210  	lastToKeep := len(th.unflushed.notifications) - 1
   211  	for ; lastToKeep >= 0; lastToKeep-- {
   212  		if th.unflushed.notifications[lastToKeep].Revision > rev {
   213  			break
   214  		}
   215  	}
   216  	if lastToKeep < len(th.unflushed.notifications)-1 {
   217  		th.unflushed.notifications = th.unflushed.notifications[:lastToKeep+1]
   218  		// Invalidate the cached results.
   219  		th.computed = false
   220  		th.cachedLoggedInUser = ""
   221  	}
   222  }
   223  
   224  // ClearAllUnflushed clears all unflushed notifications.
   225  func (th *TlfHistory) ClearAllUnflushed() {
   226  	th.lock.Lock()
   227  	defer th.lock.Unlock()
   228  	if th.unflushed != nil {
   229  		// Invalidate the cached results.
   230  		th.computed = false
   231  		th.cachedLoggedInUser = ""
   232  	}
   233  	th.unflushed = nil
   234  }
   235  
   236  type fileEvent struct {
   237  	delete  bool
   238  	newName string
   239  	rev     kbfsmd.Revision
   240  }
   241  
   242  type recomputer struct {
   243  	byWriter      map[string]*writerNotifications
   244  	modifiedFiles map[string]map[string]bool // writer -> file -> bool
   245  	fileEvents    map[string]fileEvent       // currentName -> ultimate fate
   246  	numProcessed  map[string]int             // writer name -> num
   247  	minUnflushed  kbfsmd.Revision
   248  }
   249  
   250  func newRecomputer() *recomputer {
   251  	return &recomputer{
   252  		byWriter:      make(map[string]*writerNotifications),
   253  		modifiedFiles: make(map[string]map[string]bool),
   254  		fileEvents:    make(map[string]fileEvent),
   255  		numProcessed:  make(map[string]int),
   256  		minUnflushed:  kbfsmd.RevisionUninitialized,
   257  	}
   258  }
   259  
   260  var filesToIgnore = map[string]bool{
   261  	".Trashes":   true,
   262  	".fseventsd": true,
   263  	".DS_Store":  true,
   264  }
   265  
   266  func ignoreFile(filename string) bool {
   267  	_, base := path.Split(filename)
   268  	if filesToIgnore[base] || strings.HasPrefix(base, "._") {
   269  		return true
   270  	}
   271  	// Treat the files to ignore as prefixes, since if they ever
   272  	// conflict they'll have the conflict suffix.
   273  	for prefix := range filesToIgnore {
   274  		if strings.HasPrefix(base, prefix) {
   275  			return true
   276  		}
   277  	}
   278  	return false
   279  }
   280  
   281  // processNotification adds the notification to the recomputer's
   282  // history if it is a create/modify for a file that hasn't yet been
   283  // deleted.  If the file is renamed in a future revision, the added
   284  // notification has the new name of the file.  processNotification
   285  // should be called with notifications in reverse order of their
   286  // revision number.
   287  //
   288  // It returns true if it has added enough notifications for the given
   289  // writer, and the caller should not send any more for that writer.
   290  func (r *recomputer) processNotification(
   291  	writer string, notification NotificationMessage) (doTrim bool) {
   292  	// Ignore notifications that come after any present unflushed
   293  	// notifications, as the local client won't be able to see them.
   294  	if r.minUnflushed != kbfsmd.RevisionUninitialized &&
   295  		notification.Revision >= r.minUnflushed {
   296  		return false
   297  	}
   298  
   299  	filename := notification.Filename
   300  	r.numProcessed[writer]++
   301  
   302  	// If the file is renamed in a future revision, rename it in the
   303  	// notification.
   304  	eventFilename := filename
   305  	event, hasEvent := r.fileEvents[filename]
   306  	if hasEvent && event.newName != "" {
   307  		notification.Filename = event.newName
   308  		filename = event.newName
   309  	}
   310  
   311  	// Keep only the creates and modifies for non-deleted files,
   312  	// but remember the renames and deletes.
   313  	switch notification.Type {
   314  	case NotificationCreate, NotificationModify:
   315  		// Disregard any file that's already been deleted.
   316  		if hasEvent && event.delete {
   317  			return false
   318  		}
   319  
   320  		// We only care about files, so skip dir and sym creates.
   321  		if notification.FileType != EntryTypeFile {
   322  			return false
   323  		}
   324  
   325  		// Ignore macOS dotfiles.
   326  		if ignoreFile(filename) {
   327  			return false
   328  		}
   329  
   330  		wn, ok := r.byWriter[writer]
   331  		if !ok {
   332  			wn = &writerNotifications{writer, nil, nil}
   333  			r.byWriter[writer] = wn
   334  		}
   335  
   336  		if len(wn.notifications) == maxEditsPerWriter {
   337  			// We don't need any more edit notifications, but we
   338  			// should continue looking for more deletes.
   339  			return false
   340  		}
   341  
   342  		// See if any of the parent directories were renamed, checking
   343  		// backwards until we get to the TLF name.
   344  		prefix := filename
   345  		latestRenameRev := notification.Revision
   346  		suffix := ""
   347  		for strings.Count(prefix, "/") > 4 {
   348  			var finalElem string
   349  			prefix, finalElem = path.Split(prefix)
   350  			prefix = strings.TrimSuffix(prefix, "/")
   351  			suffix = path.Clean(path.Join(finalElem, suffix))
   352  			event, hasEvent := r.fileEvents[prefix]
   353  			// Ignore any rename events that happen at or before the
   354  			// last revision we considered, to avoid weird rename
   355  			// loops (see HOTPOT-856).
   356  			if hasEvent && event.newName != "" && latestRenameRev < event.rev {
   357  				prefix = event.newName
   358  				latestRenameRev = event.rev
   359  			}
   360  		}
   361  		filename = path.Clean(path.Join(prefix, suffix))
   362  		notification.Filename = filename
   363  
   364  		// We only need one modify message per writer per file.
   365  		if r.modifiedFiles[writer][filename] {
   366  			return false
   367  		}
   368  
   369  		wn.notifications = append(wn.notifications, notification)
   370  
   371  		modified, ok := r.modifiedFiles[writer]
   372  		if !ok {
   373  			modified = make(map[string]bool)
   374  			r.modifiedFiles[writer] = modified
   375  		}
   376  		modified[filename] = true
   377  
   378  		if len(wn.notifications) == maxEditsPerWriter &&
   379  			len(wn.deletes) == maxDeletesPerWriter {
   380  			// We have enough edits and deletes for this user.
   381  			return true
   382  		}
   383  	case NotificationRename:
   384  		// If the file already has a final event, move that to the old
   385  		// filename.  Otherwise, this is the final event.
   386  		if hasEvent {
   387  			r.fileEvents[notification.Params.OldFilename] = event
   388  			delete(r.fileEvents, eventFilename)
   389  		} else {
   390  			r.fileEvents[notification.Params.OldFilename] =
   391  				fileEvent{
   392  					newName: eventFilename,
   393  					rev:     notification.Revision,
   394  				}
   395  		}
   396  
   397  		// If renaming a directory, check whether there are any events
   398  		// for children of the directory, and rename them
   399  		// accordingly. TODO: there's probably a better data structure
   400  		// for doing this when storing events, maybe a multi-layer map
   401  		// structured like a file system.
   402  		if notification.FileType == EntryTypeDir {
   403  			for f, event := range r.fileEvents {
   404  				if strings.HasPrefix(f, eventFilename) {
   405  					oldF := strings.ReplaceAll(
   406  						f, eventFilename, notification.Params.OldFilename)
   407  					r.fileEvents[oldF] = event
   408  					delete(r.fileEvents, f)
   409  				}
   410  			}
   411  		}
   412  
   413  		// The renamed file overwrote any existing file with the new
   414  		// name.
   415  		r.fileEvents[eventFilename] = fileEvent{
   416  			delete: true,
   417  			rev:    notification.Revision,
   418  		}
   419  	case NotificationDelete:
   420  		r.fileEvents[eventFilename] = fileEvent{
   421  			delete: true,
   422  			rev:    notification.Revision,
   423  		}
   424  
   425  		// We only care about files, so skip dir and sym creates.
   426  		if notification.FileType != EntryTypeFile {
   427  			return false
   428  		}
   429  
   430  		// Ignore macOS dotfiles.
   431  		if ignoreFile(filename) {
   432  			return false
   433  		}
   434  
   435  		wn, ok := r.byWriter[writer]
   436  		if !ok {
   437  			wn = &writerNotifications{writer, nil, nil}
   438  			r.byWriter[writer] = wn
   439  		}
   440  
   441  		if len(wn.deletes) == maxDeletesPerWriter {
   442  			// We don't need any more deletes, but we
   443  			// should continue looking for more edit notifications.
   444  			return false
   445  		}
   446  
   447  		if hasEvent && event.delete {
   448  			// It's already been deleted, no need to track it further.
   449  			return false
   450  		}
   451  
   452  		// If there are no future modifications of this file, then
   453  		// this delete should be included in the history.
   454  		for _, files := range r.modifiedFiles {
   455  			for f := range files {
   456  				if f == eventFilename {
   457  					return false
   458  				}
   459  			}
   460  		}
   461  
   462  		wn.deletes = append(wn.deletes, notification)
   463  
   464  		if len(wn.notifications) == maxEditsPerWriter &&
   465  			len(wn.deletes) == maxDeletesPerWriter {
   466  			// We have enough edits and deletes for this user.
   467  			return true
   468  		}
   469  
   470  		// TODO: limit the number (or time span) of notifications we
   471  		// process to find the list of deleted files?  Or maybe we
   472  		// stop processing after we hit the last GC'd revision, since
   473  		// deleted files after that point can't be recovered anyway.
   474  	}
   475  	return false
   476  }
   477  
   478  func (th *TlfHistory) recomputeLocked(loggedInUser string) (
   479  	history writersByRevision, writersWhoNeedMore map[string]bool) {
   480  	writersWhoNeedMore = make(map[string]bool)
   481  
   482  	r := newRecomputer()
   483  
   484  	// First add all of the unflushed notifications for the logged-in
   485  	// writer.
   486  	skipLoggedIn := false
   487  	loggedInProcessed := 0
   488  	if th.unflushed != nil {
   489  		if th.unflushed.writerName != loggedInUser {
   490  			panic(fmt.Sprintf(
   491  				"Logged-in user %s doesn't match unflushed user %s",
   492  				loggedInUser, th.unflushed.writerName))
   493  		}
   494  		for _, n := range th.unflushed.notifications {
   495  			doTrim := r.processNotification(th.unflushed.writerName, n)
   496  			if doTrim {
   497  				skipLoggedIn = true
   498  				break
   499  			}
   500  		}
   501  		if ln := len(th.unflushed.notifications); ln > 0 {
   502  			r.minUnflushed = th.unflushed.notifications[ln-1].Revision
   503  		}
   504  		loggedInProcessed = r.numProcessed[th.unflushed.writerName]
   505  	}
   506  
   507  	// Copy the writer notifications into a heap.
   508  	var writersHeap writersByRevision
   509  	for _, wn := range th.byWriter {
   510  		if skipLoggedIn && wn.writerName == loggedInUser {
   511  			// There are enough unflushed notifications already, so
   512  			// skip the logged-in user.
   513  			continue
   514  		}
   515  		wnCopy := writerNotifications{
   516  			writerName:    wn.writerName,
   517  			notifications: make(notificationsByRevision, len(wn.notifications)),
   518  			deletes:       make(notificationsByRevision, len(wn.deletes)),
   519  		}
   520  		copy(wnCopy.notifications, wn.notifications)
   521  		copy(wnCopy.deletes, wn.deletes)
   522  		writersHeap = append(writersHeap, &wnCopy)
   523  	}
   524  	heap.Init(&writersHeap)
   525  
   526  	// Iterate through the heap.  The writer with the next highest
   527  	// revision will always be at index 0.  Process that writer's
   528  	// first notification, then remove it and fix the heap so that the
   529  	// next highest revision is at index 0.  That way events that
   530  	// happen more recently (like deletes and renames) can be taken
   531  	// into account when looking at older events.
   532  	for writersHeap.Len() > 0 {
   533  		nextWriter := writersHeap[0].writerName
   534  		nextNotification := writersHeap[0].notifications[0]
   535  		doTrim := r.processNotification(nextWriter, nextNotification)
   536  
   537  		// Remove that notification, and fix the heap because this
   538  		// writer has a different newest revision.
   539  		if doTrim {
   540  			// Trim all earlier revisions because they won't be needed
   541  			// for the cached history.
   542  			numProcessed := r.numProcessed[nextWriter]
   543  			if loggedInUser == nextWriter {
   544  				numProcessed -= loggedInProcessed
   545  			}
   546  			th.byWriter[nextWriter].notifications =
   547  				th.byWriter[nextWriter].notifications[:numProcessed]
   548  		} else {
   549  			writersHeap[0].notifications = writersHeap[0].notifications[1:]
   550  		}
   551  		if len(writersHeap[0].notifications) == 0 || doTrim {
   552  			heap.Pop(&writersHeap)
   553  		} else {
   554  			heap.Fix(&writersHeap, 0)
   555  		}
   556  	}
   557  
   558  	history = make(writersByRevision, 0, len(r.byWriter))
   559  	for writerName := range th.byWriter {
   560  		wn := r.byWriter[writerName]
   561  		if wn != nil && (len(wn.notifications) > 0 || len(wn.deletes) > 0) {
   562  			history = append(history, wn)
   563  		}
   564  		if wn == nil || len(wn.notifications) < maxEditsPerWriter ||
   565  			len(wn.notifications) < maxDeletesPerWriter {
   566  			writersWhoNeedMore[writerName] = true
   567  		}
   568  	}
   569  	if _, ok := th.byWriter[loggedInUser]; !ok {
   570  		// The logged-in user only has unflushed edits.
   571  		wn := r.byWriter[loggedInUser]
   572  		if wn != nil && (len(wn.notifications) > 0 || len(wn.deletes) > 0) {
   573  			history = append(history, wn)
   574  		}
   575  	}
   576  	sort.Sort(history)
   577  	if len(history) > maxWritersPerHistory {
   578  		// Garbage-collect any writers that don't appear in the history.
   579  		loggedInIndex := -1
   580  		for i := maxWritersPerHistory; i < len(history); i++ {
   581  			if history[i].writerName == loggedInUser {
   582  				// Don't purge the logged-in user.
   583  				loggedInIndex = i
   584  				continue
   585  			}
   586  			delete(th.byWriter, history[i].writerName)
   587  			delete(writersWhoNeedMore, history[i].writerName)
   588  		}
   589  		if loggedInIndex > 0 {
   590  			// Keep the logged-in user as the last entry.  Note that
   591  			// `loggedInIndex` is guaranteed to be greater or equal to
   592  			// `maxWritersPerHistory`, so this logic swaps in the
   593  			// loggedIn entry (and doesn't duplicate it).
   594  			history = append(
   595  				history[:maxWritersPerHistory-1], history[loggedInIndex])
   596  		} else {
   597  			history = history[:maxWritersPerHistory]
   598  		}
   599  	}
   600  	th.computed = true
   601  	th.cachedHistory = history
   602  	th.cachedLoggedInUser = loggedInUser
   603  	return history, writersWhoNeedMore
   604  }
   605  
   606  func (th *TlfHistory) getHistoryIfCached() (
   607  	cached bool, history writersByRevision, loggedInUser string) {
   608  	th.lock.RLock()
   609  	defer th.lock.RUnlock()
   610  	if th.computed {
   611  		return true, th.cachedHistory, th.cachedLoggedInUser
   612  	}
   613  	return false, nil, ""
   614  }
   615  
   616  func (th *TlfHistory) getHistory(loggedInUser string) writersByRevision {
   617  	cached, history, cachedLoggedInUser := th.getHistoryIfCached()
   618  	if cached && loggedInUser == cachedLoggedInUser {
   619  		return history
   620  	}
   621  
   622  	th.lock.Lock()
   623  	defer th.lock.Unlock()
   624  	if th.computed {
   625  		// Maybe another goroutine got the lock and recomputed the
   626  		// history since we checked above.
   627  		return th.cachedHistory
   628  	}
   629  	history, _ = th.recomputeLocked(loggedInUser)
   630  	return history
   631  }
   632  
   633  // Recompute processes (and caches) the history so that it reflects
   634  // all recently-added notifications, and returns the names of writers
   635  // which don't yet have the maximum number of edits in the history.
   636  func (th *TlfHistory) Recompute(loggedInUser string) (
   637  	writersWhoNeedMore map[string]bool) {
   638  	th.lock.Lock()
   639  	defer th.lock.Unlock()
   640  	_, writersWhoNeedMore = th.recomputeLocked(loggedInUser)
   641  	return writersWhoNeedMore
   642  }