github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/kbfs/kbfsedits/tlf_history.go (about) 1 // Copyright 2018 Keybase Inc. All rights reserved. 2 // Use of this source code is governed by a BSD 3 // license that can be found in the LICENSE file. 4 5 package kbfsedits 6 7 import ( 8 "container/heap" 9 "encoding/json" 10 "fmt" 11 "path" 12 "sort" 13 "strings" 14 "sync" 15 16 "github.com/keybase/client/go/kbfs/kbfsmd" 17 ) 18 19 const ( 20 // The max number of edits needed for each writer. 21 maxEditsPerWriter = 10 22 // The max number of deletes needed for each writer. 23 maxDeletesPerWriter = 10 24 maxWritersPerHistory = 10 25 ) 26 27 type writerNotifications struct { 28 writerName string 29 notifications notificationsByRevision 30 deletes notificationsByRevision 31 } 32 33 // writersByRevision sorts sets of per-writer notifications in reverse 34 // order by the revision of the latest notification for each writer. 35 type writersByRevision []*writerNotifications 36 37 func (wbr writersByRevision) Len() int { 38 return len(wbr) 39 } 40 41 func (wbr writersByRevision) Less(i, j int) bool { 42 // Some revisions come before no revisions. 43 iHasZero := len(wbr[i].notifications) == 0 44 jHasZero := len(wbr[j].notifications) == 0 45 if jHasZero { 46 if iHasZero { 47 // If neither has any notifications, sort by the latest 48 // delete. 49 iHasZeroDeletes := len(wbr[i].deletes) == 0 50 jHasZeroDeletes := len(wbr[j].deletes) == 0 51 if jHasZeroDeletes { 52 return iHasZeroDeletes 53 } else if iHasZeroDeletes { 54 return false 55 } 56 57 // Reverse sort, so latest deletes come first. 58 return wbr[i].deletes[0].Revision > wbr[j].deletes[0].Revision 59 } 60 return false 61 } else if iHasZero { 62 return false 63 } 64 65 // Reverse sort, so latest revisions come first. 66 return wbr[i].notifications[0].Revision > wbr[j].notifications[0].Revision 67 } 68 69 func (wbr writersByRevision) Swap(i, j int) { 70 wbr[i], wbr[j] = wbr[j], wbr[i] 71 } 72 73 func (wbr *writersByRevision) Push(x interface{}) { 74 wn := x.(*writerNotifications) 75 *wbr = append(*wbr, wn) 76 } 77 78 func (wbr *writersByRevision) Pop() interface{} { 79 // The item to remove is the last item; heap has already swapped 80 // it to the end. 81 old := *wbr 82 n := len(old) 83 item := old[n-1] 84 *wbr = old[0 : n-1] 85 return item 86 } 87 88 // TlfHistory maintains a history of the last N file edits from each 89 // writer in the TLF. 90 // 91 // There will be two users of a TlfHistory instance: 92 // 93 // - One user (likely something outside of the kbfsedits package, 94 // e.g. libkbfs.folderBranchOps) will read notifications from the 95 // corresponding TLF and add them to this history. After adding a 96 // batch or several batches of messages, it should call 97 // `Recompute()`, and if some writers need more, earlier revisions, 98 // it should fetch more notifications for the indicated writer and 99 // repeat. 100 // 101 // - The other user (within the kbfsedits package) will collate the 102 // histories from multiple TlfHistory instances together using 103 // `getHistory()` from each one. It may also construct pretty 104 // versions of individual edit histories for a particular TLF. 105 type TlfHistory struct { 106 lock sync.RWMutex 107 byWriter map[string]*writerNotifications 108 unflushed *writerNotifications 109 computed bool 110 cachedHistory writersByRevision 111 cachedLoggedInUser string 112 } 113 114 // NewTlfHistory constructs a new TlfHistory instance. 115 func NewTlfHistory() *TlfHistory { 116 return &TlfHistory{ 117 byWriter: make(map[string]*writerNotifications), 118 } 119 } 120 121 // AddNotifications takes in a set of messages in this TLF by 122 // `writer`, and adds them to the history. Once done adding messages, 123 // the caller should call `Recompute` to find out if more messages 124 // should be added for any particular writer. It returns the maximum 125 // known revision including an update from this writer. 126 func (th *TlfHistory) AddNotifications( 127 writerName string, messages []string) (maxRev kbfsmd.Revision, err error) { 128 newEdits := make(notificationsByRevision, 0, len(messages)) 129 130 // Unmarshal and sort the new messages. 131 for _, msg := range messages { 132 var revList []NotificationMessage 133 err := json.Unmarshal([]byte(msg), &revList) 134 if err != nil { 135 // The messages might be from a new version we don't 136 // understand, so swallow the error. 137 continue 138 } 139 140 for j := len(revList) - 1; j >= 0; j-- { 141 revMsg := revList[j] 142 if revMsg.Version != NotificationV2 { 143 // Ignore messages that are too new for us to understand. 144 continue 145 } 146 revMsg.numWithinRevision = j 147 newEdits = append(newEdits, revMsg) 148 } 149 } 150 151 th.lock.Lock() 152 defer th.lock.Unlock() 153 wn, existed := th.byWriter[writerName] 154 if !existed { 155 wn = &writerNotifications{writerName, nil, nil} 156 } 157 oldLen := len(wn.notifications) 158 newEdits = append(newEdits, wn.notifications...) 159 sort.Sort(newEdits) 160 if len(newEdits) > 0 { 161 maxRev = newEdits[0].Revision 162 } 163 164 wn.notifications = newEdits.uniquify() 165 if len(wn.notifications) == oldLen { 166 // No new messages. 167 return maxRev, nil 168 } 169 if !existed { 170 th.byWriter[writerName] = wn 171 } 172 // Invalidate the cached results. 173 th.computed = false 174 th.cachedLoggedInUser = "" 175 return maxRev, nil 176 } 177 178 // AddUnflushedNotifications adds notifications to a special 179 // "unflushed" list that takes precedences over the regular 180 // notifications with revision numbers equal or greater to the minimum 181 // unflushed revision. 182 func (th *TlfHistory) AddUnflushedNotifications( 183 loggedInUser string, msgs []NotificationMessage) { 184 th.lock.Lock() 185 defer th.lock.Unlock() 186 if th.unflushed == nil { 187 th.unflushed = &writerNotifications{loggedInUser, nil, nil} 188 } 189 if th.unflushed.writerName != loggedInUser { 190 panic(fmt.Sprintf("Logged-in user %s doesn't match unflushed user %s", 191 loggedInUser, th.unflushed.writerName)) 192 } 193 newEdits := append( 194 notificationsByRevision(msgs), th.unflushed.notifications...) 195 sort.Sort(newEdits) 196 th.unflushed.notifications = newEdits.uniquify() 197 // Invalidate the cached results. 198 th.computed = false 199 th.cachedLoggedInUser = "" 200 } 201 202 // FlushRevision clears all any unflushed notifications with a 203 // revision equal or less than `rev`. 204 func (th *TlfHistory) FlushRevision(rev kbfsmd.Revision) { 205 th.lock.Lock() 206 defer th.lock.Unlock() 207 if th.unflushed == nil { 208 return 209 } 210 lastToKeep := len(th.unflushed.notifications) - 1 211 for ; lastToKeep >= 0; lastToKeep-- { 212 if th.unflushed.notifications[lastToKeep].Revision > rev { 213 break 214 } 215 } 216 if lastToKeep < len(th.unflushed.notifications)-1 { 217 th.unflushed.notifications = th.unflushed.notifications[:lastToKeep+1] 218 // Invalidate the cached results. 219 th.computed = false 220 th.cachedLoggedInUser = "" 221 } 222 } 223 224 // ClearAllUnflushed clears all unflushed notifications. 225 func (th *TlfHistory) ClearAllUnflushed() { 226 th.lock.Lock() 227 defer th.lock.Unlock() 228 if th.unflushed != nil { 229 // Invalidate the cached results. 230 th.computed = false 231 th.cachedLoggedInUser = "" 232 } 233 th.unflushed = nil 234 } 235 236 type fileEvent struct { 237 delete bool 238 newName string 239 rev kbfsmd.Revision 240 } 241 242 type recomputer struct { 243 byWriter map[string]*writerNotifications 244 modifiedFiles map[string]map[string]bool // writer -> file -> bool 245 fileEvents map[string]fileEvent // currentName -> ultimate fate 246 numProcessed map[string]int // writer name -> num 247 minUnflushed kbfsmd.Revision 248 } 249 250 func newRecomputer() *recomputer { 251 return &recomputer{ 252 byWriter: make(map[string]*writerNotifications), 253 modifiedFiles: make(map[string]map[string]bool), 254 fileEvents: make(map[string]fileEvent), 255 numProcessed: make(map[string]int), 256 minUnflushed: kbfsmd.RevisionUninitialized, 257 } 258 } 259 260 var filesToIgnore = map[string]bool{ 261 ".Trashes": true, 262 ".fseventsd": true, 263 ".DS_Store": true, 264 } 265 266 func ignoreFile(filename string) bool { 267 _, base := path.Split(filename) 268 if filesToIgnore[base] || strings.HasPrefix(base, "._") { 269 return true 270 } 271 // Treat the files to ignore as prefixes, since if they ever 272 // conflict they'll have the conflict suffix. 273 for prefix := range filesToIgnore { 274 if strings.HasPrefix(base, prefix) { 275 return true 276 } 277 } 278 return false 279 } 280 281 // processNotification adds the notification to the recomputer's 282 // history if it is a create/modify for a file that hasn't yet been 283 // deleted. If the file is renamed in a future revision, the added 284 // notification has the new name of the file. processNotification 285 // should be called with notifications in reverse order of their 286 // revision number. 287 // 288 // It returns true if it has added enough notifications for the given 289 // writer, and the caller should not send any more for that writer. 290 func (r *recomputer) processNotification( 291 writer string, notification NotificationMessage) (doTrim bool) { 292 // Ignore notifications that come after any present unflushed 293 // notifications, as the local client won't be able to see them. 294 if r.minUnflushed != kbfsmd.RevisionUninitialized && 295 notification.Revision >= r.minUnflushed { 296 return false 297 } 298 299 filename := notification.Filename 300 r.numProcessed[writer]++ 301 302 // If the file is renamed in a future revision, rename it in the 303 // notification. 304 eventFilename := filename 305 event, hasEvent := r.fileEvents[filename] 306 if hasEvent && event.newName != "" { 307 notification.Filename = event.newName 308 filename = event.newName 309 } 310 311 // Keep only the creates and modifies for non-deleted files, 312 // but remember the renames and deletes. 313 switch notification.Type { 314 case NotificationCreate, NotificationModify: 315 // Disregard any file that's already been deleted. 316 if hasEvent && event.delete { 317 return false 318 } 319 320 // We only care about files, so skip dir and sym creates. 321 if notification.FileType != EntryTypeFile { 322 return false 323 } 324 325 // Ignore macOS dotfiles. 326 if ignoreFile(filename) { 327 return false 328 } 329 330 wn, ok := r.byWriter[writer] 331 if !ok { 332 wn = &writerNotifications{writer, nil, nil} 333 r.byWriter[writer] = wn 334 } 335 336 if len(wn.notifications) == maxEditsPerWriter { 337 // We don't need any more edit notifications, but we 338 // should continue looking for more deletes. 339 return false 340 } 341 342 // See if any of the parent directories were renamed, checking 343 // backwards until we get to the TLF name. 344 prefix := filename 345 latestRenameRev := notification.Revision 346 suffix := "" 347 for strings.Count(prefix, "/") > 4 { 348 var finalElem string 349 prefix, finalElem = path.Split(prefix) 350 prefix = strings.TrimSuffix(prefix, "/") 351 suffix = path.Clean(path.Join(finalElem, suffix)) 352 event, hasEvent := r.fileEvents[prefix] 353 // Ignore any rename events that happen at or before the 354 // last revision we considered, to avoid weird rename 355 // loops (see HOTPOT-856). 356 if hasEvent && event.newName != "" && latestRenameRev < event.rev { 357 prefix = event.newName 358 latestRenameRev = event.rev 359 } 360 } 361 filename = path.Clean(path.Join(prefix, suffix)) 362 notification.Filename = filename 363 364 // We only need one modify message per writer per file. 365 if r.modifiedFiles[writer][filename] { 366 return false 367 } 368 369 wn.notifications = append(wn.notifications, notification) 370 371 modified, ok := r.modifiedFiles[writer] 372 if !ok { 373 modified = make(map[string]bool) 374 r.modifiedFiles[writer] = modified 375 } 376 modified[filename] = true 377 378 if len(wn.notifications) == maxEditsPerWriter && 379 len(wn.deletes) == maxDeletesPerWriter { 380 // We have enough edits and deletes for this user. 381 return true 382 } 383 case NotificationRename: 384 // If the file already has a final event, move that to the old 385 // filename. Otherwise, this is the final event. 386 if hasEvent { 387 r.fileEvents[notification.Params.OldFilename] = event 388 delete(r.fileEvents, eventFilename) 389 } else { 390 r.fileEvents[notification.Params.OldFilename] = 391 fileEvent{ 392 newName: eventFilename, 393 rev: notification.Revision, 394 } 395 } 396 397 // If renaming a directory, check whether there are any events 398 // for children of the directory, and rename them 399 // accordingly. TODO: there's probably a better data structure 400 // for doing this when storing events, maybe a multi-layer map 401 // structured like a file system. 402 if notification.FileType == EntryTypeDir { 403 for f, event := range r.fileEvents { 404 if strings.HasPrefix(f, eventFilename) { 405 oldF := strings.ReplaceAll( 406 f, eventFilename, notification.Params.OldFilename) 407 r.fileEvents[oldF] = event 408 delete(r.fileEvents, f) 409 } 410 } 411 } 412 413 // The renamed file overwrote any existing file with the new 414 // name. 415 r.fileEvents[eventFilename] = fileEvent{ 416 delete: true, 417 rev: notification.Revision, 418 } 419 case NotificationDelete: 420 r.fileEvents[eventFilename] = fileEvent{ 421 delete: true, 422 rev: notification.Revision, 423 } 424 425 // We only care about files, so skip dir and sym creates. 426 if notification.FileType != EntryTypeFile { 427 return false 428 } 429 430 // Ignore macOS dotfiles. 431 if ignoreFile(filename) { 432 return false 433 } 434 435 wn, ok := r.byWriter[writer] 436 if !ok { 437 wn = &writerNotifications{writer, nil, nil} 438 r.byWriter[writer] = wn 439 } 440 441 if len(wn.deletes) == maxDeletesPerWriter { 442 // We don't need any more deletes, but we 443 // should continue looking for more edit notifications. 444 return false 445 } 446 447 if hasEvent && event.delete { 448 // It's already been deleted, no need to track it further. 449 return false 450 } 451 452 // If there are no future modifications of this file, then 453 // this delete should be included in the history. 454 for _, files := range r.modifiedFiles { 455 for f := range files { 456 if f == eventFilename { 457 return false 458 } 459 } 460 } 461 462 wn.deletes = append(wn.deletes, notification) 463 464 if len(wn.notifications) == maxEditsPerWriter && 465 len(wn.deletes) == maxDeletesPerWriter { 466 // We have enough edits and deletes for this user. 467 return true 468 } 469 470 // TODO: limit the number (or time span) of notifications we 471 // process to find the list of deleted files? Or maybe we 472 // stop processing after we hit the last GC'd revision, since 473 // deleted files after that point can't be recovered anyway. 474 } 475 return false 476 } 477 478 func (th *TlfHistory) recomputeLocked(loggedInUser string) ( 479 history writersByRevision, writersWhoNeedMore map[string]bool) { 480 writersWhoNeedMore = make(map[string]bool) 481 482 r := newRecomputer() 483 484 // First add all of the unflushed notifications for the logged-in 485 // writer. 486 skipLoggedIn := false 487 loggedInProcessed := 0 488 if th.unflushed != nil { 489 if th.unflushed.writerName != loggedInUser { 490 panic(fmt.Sprintf( 491 "Logged-in user %s doesn't match unflushed user %s", 492 loggedInUser, th.unflushed.writerName)) 493 } 494 for _, n := range th.unflushed.notifications { 495 doTrim := r.processNotification(th.unflushed.writerName, n) 496 if doTrim { 497 skipLoggedIn = true 498 break 499 } 500 } 501 if ln := len(th.unflushed.notifications); ln > 0 { 502 r.minUnflushed = th.unflushed.notifications[ln-1].Revision 503 } 504 loggedInProcessed = r.numProcessed[th.unflushed.writerName] 505 } 506 507 // Copy the writer notifications into a heap. 508 var writersHeap writersByRevision 509 for _, wn := range th.byWriter { 510 if skipLoggedIn && wn.writerName == loggedInUser { 511 // There are enough unflushed notifications already, so 512 // skip the logged-in user. 513 continue 514 } 515 wnCopy := writerNotifications{ 516 writerName: wn.writerName, 517 notifications: make(notificationsByRevision, len(wn.notifications)), 518 deletes: make(notificationsByRevision, len(wn.deletes)), 519 } 520 copy(wnCopy.notifications, wn.notifications) 521 copy(wnCopy.deletes, wn.deletes) 522 writersHeap = append(writersHeap, &wnCopy) 523 } 524 heap.Init(&writersHeap) 525 526 // Iterate through the heap. The writer with the next highest 527 // revision will always be at index 0. Process that writer's 528 // first notification, then remove it and fix the heap so that the 529 // next highest revision is at index 0. That way events that 530 // happen more recently (like deletes and renames) can be taken 531 // into account when looking at older events. 532 for writersHeap.Len() > 0 { 533 nextWriter := writersHeap[0].writerName 534 nextNotification := writersHeap[0].notifications[0] 535 doTrim := r.processNotification(nextWriter, nextNotification) 536 537 // Remove that notification, and fix the heap because this 538 // writer has a different newest revision. 539 if doTrim { 540 // Trim all earlier revisions because they won't be needed 541 // for the cached history. 542 numProcessed := r.numProcessed[nextWriter] 543 if loggedInUser == nextWriter { 544 numProcessed -= loggedInProcessed 545 } 546 th.byWriter[nextWriter].notifications = 547 th.byWriter[nextWriter].notifications[:numProcessed] 548 } else { 549 writersHeap[0].notifications = writersHeap[0].notifications[1:] 550 } 551 if len(writersHeap[0].notifications) == 0 || doTrim { 552 heap.Pop(&writersHeap) 553 } else { 554 heap.Fix(&writersHeap, 0) 555 } 556 } 557 558 history = make(writersByRevision, 0, len(r.byWriter)) 559 for writerName := range th.byWriter { 560 wn := r.byWriter[writerName] 561 if wn != nil && (len(wn.notifications) > 0 || len(wn.deletes) > 0) { 562 history = append(history, wn) 563 } 564 if wn == nil || len(wn.notifications) < maxEditsPerWriter || 565 len(wn.notifications) < maxDeletesPerWriter { 566 writersWhoNeedMore[writerName] = true 567 } 568 } 569 if _, ok := th.byWriter[loggedInUser]; !ok { 570 // The logged-in user only has unflushed edits. 571 wn := r.byWriter[loggedInUser] 572 if wn != nil && (len(wn.notifications) > 0 || len(wn.deletes) > 0) { 573 history = append(history, wn) 574 } 575 } 576 sort.Sort(history) 577 if len(history) > maxWritersPerHistory { 578 // Garbage-collect any writers that don't appear in the history. 579 loggedInIndex := -1 580 for i := maxWritersPerHistory; i < len(history); i++ { 581 if history[i].writerName == loggedInUser { 582 // Don't purge the logged-in user. 583 loggedInIndex = i 584 continue 585 } 586 delete(th.byWriter, history[i].writerName) 587 delete(writersWhoNeedMore, history[i].writerName) 588 } 589 if loggedInIndex > 0 { 590 // Keep the logged-in user as the last entry. Note that 591 // `loggedInIndex` is guaranteed to be greater or equal to 592 // `maxWritersPerHistory`, so this logic swaps in the 593 // loggedIn entry (and doesn't duplicate it). 594 history = append( 595 history[:maxWritersPerHistory-1], history[loggedInIndex]) 596 } else { 597 history = history[:maxWritersPerHistory] 598 } 599 } 600 th.computed = true 601 th.cachedHistory = history 602 th.cachedLoggedInUser = loggedInUser 603 return history, writersWhoNeedMore 604 } 605 606 func (th *TlfHistory) getHistoryIfCached() ( 607 cached bool, history writersByRevision, loggedInUser string) { 608 th.lock.RLock() 609 defer th.lock.RUnlock() 610 if th.computed { 611 return true, th.cachedHistory, th.cachedLoggedInUser 612 } 613 return false, nil, "" 614 } 615 616 func (th *TlfHistory) getHistory(loggedInUser string) writersByRevision { 617 cached, history, cachedLoggedInUser := th.getHistoryIfCached() 618 if cached && loggedInUser == cachedLoggedInUser { 619 return history 620 } 621 622 th.lock.Lock() 623 defer th.lock.Unlock() 624 if th.computed { 625 // Maybe another goroutine got the lock and recomputed the 626 // history since we checked above. 627 return th.cachedHistory 628 } 629 history, _ = th.recomputeLocked(loggedInUser) 630 return history 631 } 632 633 // Recompute processes (and caches) the history so that it reflects 634 // all recently-added notifications, and returns the names of writers 635 // which don't yet have the maximum number of edits in the history. 636 func (th *TlfHistory) Recompute(loggedInUser string) ( 637 writersWhoNeedMore map[string]bool) { 638 th.lock.Lock() 639 defer th.lock.Unlock() 640 _, writersWhoNeedMore = th.recomputeLocked(loggedInUser) 641 return writersWhoNeedMore 642 }