github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/kbfs/libkbfs/block_journal.go (about) 1 // Copyright 2016 Keybase Inc. All rights reserved. 2 // Use of this source code is governed by a BSD 3 // license that can be found in the LICENSE file. 4 5 package libkbfs 6 7 import ( 8 "fmt" 9 "math" 10 "path/filepath" 11 "reflect" 12 13 "github.com/keybase/client/go/kbfs/data" 14 "github.com/keybase/client/go/kbfs/ioutil" 15 "github.com/keybase/client/go/kbfs/kbfsblock" 16 "github.com/keybase/client/go/kbfs/kbfscodec" 17 "github.com/keybase/client/go/kbfs/kbfscrypto" 18 "github.com/keybase/client/go/kbfs/kbfsmd" 19 "github.com/keybase/client/go/kbfs/tlf" 20 "github.com/keybase/client/go/libkb" 21 "github.com/keybase/client/go/logger" 22 "github.com/keybase/client/go/protocol/keybase1" 23 "github.com/keybase/go-codec/codec" 24 "github.com/pkg/errors" 25 "golang.org/x/net/context" 26 ) 27 28 // blockJournal stores a single ordered list of block operations for a 29 // single TLF, along with the associated block data, in flat files in 30 // a directory on disk. 31 // 32 // The directory layout looks like: 33 // 34 // dir/block_aggregate_info 35 // dir/block_journal/EARLIEST 36 // dir/block_journal/LATEST 37 // dir/block_journal/0...000 38 // dir/block_journal/0...001 39 // dir/block_journal/0...fff 40 // dir/blocks/... 41 // dir/gc_block_journal/EARLIEST 42 // dir/gc_block_journal/LATEST 43 // dir/gc_block_journal/... 44 // 45 // block_aggregate_info holds aggregate info about the block journal; 46 // currently it just holds the count of stored and unflushed bytes. 47 // 48 // Each entry in the journal in dir/block_journal contains the 49 // mutating operation and arguments for a single operation, except for 50 // block data. (See diskJournal comments for more details about the 51 // journal.) 52 // 53 // The block data is stored separately in dir/blocks. See 54 // blockDiskStore comments for more details. 55 // 56 // The maximum number of characters added to the root dir by a block 57 // journal is 51: 58 // 59 // /blocks/(max 44 characters) 60 // 61 // blockJournal is not goroutine-safe, so any code that uses it must 62 // guarantee that only one goroutine at a time calls its functions. 63 type blockJournal struct { 64 codec kbfscodec.Codec 65 dir string 66 67 log traceLogger 68 deferLog traceLogger 69 vlog *libkb.VDebugLog 70 71 // j is the main journal. 72 j *diskJournal 73 74 // saveUntilMDFlush, when non-nil, prevents garbage collection 75 // of blocks. When removed, all the referenced blocks are 76 // garbage-collected. 77 // 78 // TODO: We only really need to save a list of IDs, and not a 79 // full journal. 80 deferredGC *diskJournal 81 82 // s stores all the block data. s should always reflect the 83 // state you get by replaying all the entries in j. 84 s *blockDiskStore 85 86 aggregateInfo blockAggregateInfo 87 } 88 89 type blockOpType int 90 91 const ( 92 blockPutOp blockOpType = 1 93 addRefOp blockOpType = 2 94 removeRefsOp blockOpType = 3 95 archiveRefsOp blockOpType = 4 96 mdRevMarkerOp blockOpType = 5 97 ) 98 99 func (t blockOpType) String() string { 100 switch t { 101 case blockPutOp: 102 return "blockPut" 103 case addRefOp: 104 return "addReference" 105 case removeRefsOp: 106 return "removeReferences" 107 case archiveRefsOp: 108 return "archiveReferences" 109 case mdRevMarkerOp: 110 return "mdRevisionMarker" 111 default: 112 return fmt.Sprintf("blockOpType(%d)", t) 113 } 114 } 115 116 // A blockJournalEntry is just the name of the operation and the 117 // associated block ID and contexts. Fields are exported only for 118 // serialization. 119 type blockJournalEntry struct { 120 // Must be one of the four ops above. 121 Op blockOpType 122 // Must have exactly one entry with one context for blockPutOp and 123 // addRefOp. Used for all ops except for mdRevMarkerOp. 124 Contexts kbfsblock.ContextMap `codec:",omitempty"` 125 // Only used for mdRevMarkerOps. 126 Revision kbfsmd.Revision `codec:",omitempty"` 127 // Ignore this entry while flushing if this is true. 128 Ignore bool `codec:",omitempty"` 129 // If the current journal ID doesn't match this journal ID, then 130 // ignore this entry while flushing. 131 MDJournalID *kbfsmd.ID `codec:",omitempty"` 132 // This is an MD rev marker that represents a local squash. TODO: 133 // combine this with Ignore using a more generic flags or state 134 // field, once we can change the journal format. 135 IsLocalSquash bool `codec:",omitempty"` 136 // This is legacy and only present for backwards compatibility. 137 // It can be removed as soon as we are sure there are no more 138 // journal entries in the wild with this set. 139 Unignorable bool `codec:",omitempty"` 140 141 codec.UnknownFieldSetHandler 142 } 143 144 // Get the single context stored in this entry. Only applicable to 145 // blockPutOp and addRefOp. 146 func (e blockJournalEntry) getSingleContext() ( 147 kbfsblock.ID, kbfsblock.Context, error) { 148 switch e.Op { 149 case blockPutOp, addRefOp: 150 if len(e.Contexts) != 1 { 151 return kbfsblock.ID{}, kbfsblock.Context{}, errors.Errorf( 152 "Op %s doesn't have exactly one context: %v", 153 e.Op, e.Contexts) 154 } 155 for id, idContexts := range e.Contexts { 156 if len(idContexts) != 1 { 157 return kbfsblock.ID{}, kbfsblock.Context{}, errors.Errorf( 158 "Op %s doesn't have exactly one context for id=%s: %v", 159 e.Op, id, idContexts) 160 } 161 return id, idContexts[0], nil 162 } 163 } 164 165 return kbfsblock.ID{}, kbfsblock.Context{}, errors.Errorf( 166 "getSingleContext() erroneously called on op %s", e.Op) 167 } 168 169 func (e blockJournalEntry) ignore(mdJournalID kbfsmd.ID) bool { 170 return e.Ignore || 171 (mdJournalID.IsValid() && e.MDJournalID != nil && 172 mdJournalID != *e.MDJournalID) 173 } 174 175 func blockJournalDir(dir string) string { 176 return filepath.Join(dir, "block_journal") 177 } 178 179 func blockJournalStoreDir(dir string) string { 180 return filepath.Join(dir, "blocks") 181 } 182 183 func deferredGCBlockJournalDir(dir string) string { 184 return filepath.Join(dir, "gc_block_journal") 185 } 186 187 // makeBlockJournal returns a new blockJournal for the given 188 // directory. Any existing journal entries are read. 189 func makeBlockJournal( 190 ctx context.Context, codec kbfscodec.Codec, dir string, 191 log logger.Logger, vlog *libkb.VDebugLog) (*blockJournal, error) { 192 journalPath := blockJournalDir(dir) 193 deferLog := log.CloneWithAddedDepth(1) 194 j, err := makeDiskJournal( 195 codec, journalPath, reflect.TypeOf(blockJournalEntry{})) 196 if err != nil { 197 return nil, err 198 } 199 200 gcJournalPath := deferredGCBlockJournalDir(dir) 201 gcj, err := makeDiskJournal( 202 codec, gcJournalPath, reflect.TypeOf(blockJournalEntry{})) 203 if err != nil { 204 return nil, err 205 } 206 207 storeDir := blockJournalStoreDir(dir) 208 s := makeBlockDiskStore(codec, storeDir) 209 journal := &blockJournal{ 210 codec: codec, 211 dir: dir, 212 log: traceLogger{log}, 213 vlog: vlog, 214 deferLog: traceLogger{deferLog}, 215 j: j, 216 deferredGC: gcj, 217 s: s, 218 } 219 220 // Get initial aggregate info. 221 err = kbfscodec.DeserializeFromFile( 222 codec, aggregateInfoPath(dir), &journal.aggregateInfo) 223 if !ioutil.IsNotExist(err) && err != nil { 224 return nil, err 225 } 226 227 return journal, nil 228 } 229 230 func (j *blockJournal) blockJournalFiles() []string { 231 return []string{ 232 blockJournalDir(j.dir), deferredGCBlockJournalDir(j.dir), 233 blockJournalStoreDir(j.dir), aggregateInfoPath(j.dir), 234 } 235 } 236 237 // The functions below are for reading and writing aggregate info. 238 239 // Ideally, this would be a JSON file, but we'd need a JSON 240 // encoder/decoder that supports unknown fields. 241 type blockAggregateInfo struct { 242 // StoredBytes counts the number of bytes of block data stored 243 // on disk. 244 StoredBytes int64 245 // StoredFiles counts an upper bound for the number of files 246 // of block data stored on disk. 247 StoredFiles int64 248 // UnflushedBytes counts the number of bytes of block data 249 // that is intended to be flushed to the server, but hasn't 250 // been yet. This should be always less than or equal to 251 // StoredBytes. 252 UnflushedBytes int64 253 254 codec.UnknownFieldSetHandler 255 } 256 257 func aggregateInfoPath(dir string) string { 258 return filepath.Join(dir, "block_aggregate_info") 259 } 260 261 // saturateAdd adds the given delta to the int64 at x; if the result 262 // would be over MaxInt64, *x is instead set to MaxInt64, and if the 263 // result would be negative, *x is instead set to 0. If *x is already 264 // negative, *x is first set to 0 before doing the addition. 265 func saturateAdd(x *int64, delta int64) { 266 if *x < 0 { 267 *x = 0 268 } 269 270 switch { 271 case delta > 0 && *x > (math.MaxInt64-delta): 272 *x = math.MaxInt64 273 case delta < 0 && *x+delta < 0: 274 *x = 0 275 default: 276 *x += delta 277 } 278 } 279 280 func (j *blockJournal) changeCounts( 281 deltaStoredBytes, deltaStoredFiles, deltaUnflushedBytes int64) error { 282 saturateAdd(&j.aggregateInfo.StoredBytes, deltaStoredBytes) 283 saturateAdd(&j.aggregateInfo.StoredFiles, deltaStoredFiles) 284 saturateAdd(&j.aggregateInfo.UnflushedBytes, deltaUnflushedBytes) 285 return kbfscodec.SerializeToFile( 286 j.codec, j.aggregateInfo, aggregateInfoPath(j.dir)) 287 } 288 289 func (j *blockJournal) accumulateBlock(bytes, files int64) error { 290 if bytes < 0 { 291 panic("bytes unexpectedly negative") 292 } 293 if files < 0 { 294 panic("files unexpectedly negative") 295 } 296 return j.changeCounts(bytes, files, bytes) 297 } 298 299 func (j *blockJournal) flushBlock(bytes int64) error { 300 if bytes < 0 { 301 panic("bytes unexpectedly negative") 302 } 303 return j.changeCounts(0, 0, -bytes) 304 } 305 306 func (j *blockJournal) unstoreBlocks(bytes, files int64) error { 307 if bytes < 0 { 308 panic("bytes unexpectedly negative") 309 } 310 if files < 0 { 311 panic("files unexpectedly negative") 312 } 313 return j.changeCounts(-bytes, -files, 0) 314 } 315 316 // The functions below are for reading and writing journal entries. 317 318 func (j *blockJournal) readJournalEntry(ordinal journalOrdinal) ( 319 blockJournalEntry, error) { 320 entry, err := j.j.readJournalEntry(ordinal) 321 if err != nil { 322 return blockJournalEntry{}, err 323 } 324 325 return entry.(blockJournalEntry), nil 326 } 327 328 func (j *blockJournal) appendJournalEntry( 329 ctx context.Context, entry blockJournalEntry) ( 330 journalOrdinal, error) { 331 ordinal, err := j.j.appendJournalEntry(nil, entry) 332 if err != nil { 333 return 0, err 334 } 335 336 return ordinal, nil 337 } 338 339 func (j *blockJournal) length() uint64 { 340 return j.j.length() 341 } 342 343 func (j *blockJournal) next() (journalOrdinal, error) { 344 last, err := j.j.readLatestOrdinal() 345 if ioutil.IsNotExist(err) { 346 return firstValidJournalOrdinal, nil 347 } else if err != nil { 348 return 0, err 349 } 350 return last + 1, nil 351 } 352 353 func (j *blockJournal) end() (journalOrdinal, error) { 354 last, err := j.j.readLatestOrdinal() 355 if ioutil.IsNotExist(err) { 356 return 0, nil 357 } else if err != nil { 358 return 0, err 359 } 360 return last + 1, nil 361 } 362 363 func (j *blockJournal) hasData( 364 ctx context.Context, id kbfsblock.ID) (bool, error) { 365 return j.s.hasData(ctx, id) 366 } 367 368 func (j *blockJournal) isUnflushed( 369 ctx context.Context, id kbfsblock.ID) (bool, error) { 370 return j.s.isUnflushed(ctx, id) 371 } 372 373 func (j *blockJournal) remove(ctx context.Context, id kbfsblock.ID) ( 374 removedBytes, removedFiles int64, err error) { 375 bytesToRemove, err := j.s.getDataSize(ctx, id) 376 if err != nil { 377 return 0, 0, err 378 } 379 380 err = j.s.remove(ctx, id) 381 if err != nil { 382 return 0, 0, err 383 } 384 385 var filesToRemove int64 386 if bytesToRemove > 0 { 387 filesToRemove = filesPerBlockMax 388 } 389 390 return bytesToRemove, filesToRemove, nil 391 } 392 393 // All functions below are public functions. 394 395 func (j *blockJournal) empty() bool { 396 return j.j.empty() && j.deferredGC.empty() 397 } 398 399 func (j *blockJournal) getDataWithContext( 400 ctx context.Context, id kbfsblock.ID, context kbfsblock.Context) ( 401 []byte, kbfscrypto.BlockCryptKeyServerHalf, error) { 402 return j.s.getDataWithContext(ctx, id, context) 403 } 404 405 func (j *blockJournal) getData(ctx context.Context, id kbfsblock.ID) ( 406 []byte, kbfscrypto.BlockCryptKeyServerHalf, error) { 407 return j.s.getData(ctx, id) 408 } 409 410 func (j *blockJournal) getDataSize( 411 ctx context.Context, id kbfsblock.ID) (int64, error) { 412 return j.s.getDataSize(ctx, id) 413 } 414 415 func (j *blockJournal) getStoredBytes() int64 { 416 return j.aggregateInfo.StoredBytes 417 } 418 419 func (j *blockJournal) getUnflushedBytes() int64 { 420 return j.aggregateInfo.UnflushedBytes 421 } 422 423 func (j *blockJournal) getStoredFiles() int64 { 424 return j.aggregateInfo.StoredFiles 425 } 426 427 // putBlockData puts the given block data. If err is non-nil, putData will 428 // always be false. 429 func (j *blockJournal) putBlockData( 430 ctx context.Context, id kbfsblock.ID, context kbfsblock.Context, 431 buf []byte, serverHalf kbfscrypto.BlockCryptKeyServerHalf) ( 432 putData bool, err error) { 433 j.vlog.CLogf( 434 ctx, libkb.VLog1, 435 "Putting %d bytes of data for block %s with context %v", 436 len(buf), id, context) 437 defer func() { 438 if err != nil { 439 j.deferLog.CDebugf(ctx, 440 "Put for block %s with context %v failed with %+v", 441 id, context, err) 442 } 443 }() 444 445 putData, err = j.s.put(ctx, true, id, context, buf, serverHalf) 446 if err != nil { 447 return false, err 448 } 449 450 return putData, nil 451 } 452 453 // appendBlock appends an entry for the previously-put block to the 454 // journal, and records the size for the put block. 455 func (j *blockJournal) appendBlock( 456 ctx context.Context, id kbfsblock.ID, context kbfsblock.Context, 457 bufLenToAdd int64) error { 458 j.vlog.CLogf(ctx, libkb.VLog1, "Appending block %s to journal", id) 459 460 if bufLenToAdd > 0 { 461 var putFiles int64 = filesPerBlockMax 462 err := j.accumulateBlock(bufLenToAdd, putFiles) 463 if err != nil { 464 return err 465 } 466 } 467 468 next, err := j.next() 469 if err != nil { 470 return err 471 } 472 473 err = j.s.addReference(ctx, id, context, next.String()) 474 if err != nil { 475 return err 476 } 477 478 _, err = j.appendJournalEntry(ctx, blockJournalEntry{ 479 Op: blockPutOp, 480 Contexts: kbfsblock.ContextMap{id: {context}}, 481 }) 482 return err 483 } 484 485 func (j *blockJournal) addReference( 486 ctx context.Context, id kbfsblock.ID, context kbfsblock.Context) ( 487 err error) { 488 j.vlog.CLogf( 489 ctx, libkb.VLog1, "Adding reference for block %s with context %v", 490 id, context) 491 defer func() { 492 if err != nil { 493 j.deferLog.CDebugf(ctx, 494 "Adding reference for block %s with context %v failed with %+v", 495 id, context, err) 496 } 497 }() 498 499 next, err := j.next() 500 if err != nil { 501 return err 502 } 503 504 err = j.s.addReference(ctx, id, context, next.String()) 505 if err != nil { 506 return err 507 } 508 509 _, err = j.appendJournalEntry(ctx, blockJournalEntry{ 510 Op: addRefOp, 511 Contexts: kbfsblock.ContextMap{id: {context}}, 512 }) 513 if err != nil { 514 return err 515 } 516 517 return nil 518 } 519 520 func (j *blockJournal) archiveReferences( 521 ctx context.Context, contexts kbfsblock.ContextMap) (err error) { 522 j.vlog.CLogf(ctx, libkb.VLog1, "Archiving references for %v", contexts) 523 defer func() { 524 if err != nil { 525 j.deferLog.CDebugf(ctx, 526 "Archiving references for %+v,", contexts, err) 527 } 528 }() 529 530 next, err := j.next() 531 if err != nil { 532 return err 533 } 534 535 err = j.s.archiveReferences(ctx, contexts, next.String()) 536 if err != nil { 537 return err 538 } 539 540 _, err = j.appendJournalEntry(ctx, blockJournalEntry{ 541 Op: archiveRefsOp, 542 Contexts: contexts, 543 }) 544 if err != nil { 545 return err 546 } 547 548 return nil 549 } 550 551 // removeReferences removes references for the given contexts from 552 // their respective IDs. 553 func (j *blockJournal) removeReferences( 554 ctx context.Context, contexts kbfsblock.ContextMap) ( 555 liveCounts map[kbfsblock.ID]int, err error) { 556 j.vlog.CLogf(ctx, libkb.VLog1, "Removing references for %v", contexts) 557 defer func() { 558 if err != nil { 559 j.deferLog.CDebugf(ctx, 560 "Removing references for %+v", contexts, err) 561 } 562 }() 563 564 // Add the journal entry first, so that if we crash before 565 // removing the refs, we have at worst un-GCed blocks. 566 567 _, err = j.appendJournalEntry(ctx, blockJournalEntry{ 568 Op: removeRefsOp, 569 Contexts: contexts, 570 }) 571 if err != nil { 572 return nil, err 573 } 574 575 liveCounts = make(map[kbfsblock.ID]int) 576 for id, idContexts := range contexts { 577 // Remove the references unconditionally here (i.e., 578 // with an empty tag), since j.s should reflect the 579 // most recent state. 580 liveCount, err := j.s.removeReferences(ctx, id, idContexts, "") 581 if err != nil { 582 return nil, err 583 } 584 585 liveCounts[id] = liveCount 586 } 587 588 return liveCounts, nil 589 } 590 591 func (j *blockJournal) markMDRevision(ctx context.Context, 592 rev kbfsmd.Revision, journalID kbfsmd.ID, isPendingLocalSquash bool) ( 593 err error) { 594 j.vlog.CLogf( 595 ctx, libkb.VLog1, "Marking MD revision %d in the block journal", rev) 596 defer func() { 597 if err != nil { 598 j.deferLog.CDebugf(ctx, "Marking MD revision %d error: %+v", 599 rev, err) 600 } 601 }() 602 603 _, err = j.appendJournalEntry(ctx, blockJournalEntry{ 604 Op: mdRevMarkerOp, 605 Revision: rev, 606 MDJournalID: &journalID, 607 // If this MD represents a pending local squash, it should 608 // never be ignored since the revision it refers to can't be 609 // squashed again. 610 IsLocalSquash: isPendingLocalSquash, 611 }) 612 if err != nil { 613 return err 614 } 615 return nil 616 } 617 618 // blockEntriesToFlush is an internal data structure for blockJournal; 619 // its fields shouldn't be accessed outside this file. 620 type blockEntriesToFlush struct { 621 all []blockJournalEntry 622 first journalOrdinal 623 624 puts blockPutState 625 adds blockPutState 626 other []blockJournalEntry 627 } 628 629 func (be blockEntriesToFlush) length() int { 630 return len(be.all) 631 } 632 633 func (be blockEntriesToFlush) flushNeeded() bool { 634 return be.length() > 0 635 } 636 637 func (be blockEntriesToFlush) revIsLocalSquash( 638 rev kbfsmd.Revision, mdJournalID kbfsmd.ID) bool { 639 for _, entry := range be.other { 640 if !entry.ignore(mdJournalID) && entry.Op == mdRevMarkerOp && 641 entry.Revision == rev { 642 return entry.IsLocalSquash || entry.Unignorable 643 } 644 } 645 return false 646 } 647 648 func (be blockEntriesToFlush) markFlushingBlockIDs(ids map[kbfsblock.ID]bool) { 649 for _, ptr := range be.puts.Ptrs() { 650 ids[ptr.ID] = true 651 } 652 } 653 654 func (be blockEntriesToFlush) clearFlushingBlockIDs(ids map[kbfsblock.ID]bool) { 655 for _, ptr := range be.puts.Ptrs() { 656 delete(ids, ptr.ID) 657 } 658 } 659 660 // Only entries with ordinals less than the given ordinal (assumed to 661 // be <= latest ordinal + 1) are returned. Also returns the maximum 662 // MD revision that can be merged after the returned entries are 663 // successfully flushed; if no entries are returned (i.e., the block 664 // journal is empty) then any MD revision may be flushed even when 665 // kbfsmd.RevisionUninitialized is returned. 666 func (j *blockJournal) getNextEntriesToFlush( 667 ctx context.Context, end journalOrdinal, maxToFlush int, 668 mdJournalID kbfsmd.ID) ( 669 entries blockEntriesToFlush, bytesToFlush int64, 670 maxMDRevToFlush kbfsmd.Revision, err error) { 671 first, err := j.j.readEarliestOrdinal() 672 if ioutil.IsNotExist(err) { 673 return blockEntriesToFlush{}, 0, kbfsmd.RevisionUninitialized, nil 674 } else if err != nil { 675 return blockEntriesToFlush{}, 0, kbfsmd.RevisionUninitialized, err 676 } 677 678 if first >= end { 679 return blockEntriesToFlush{}, 0, kbfsmd.RevisionUninitialized, 680 errors.Errorf("Trying to flush past the "+ 681 "start of the journal (first=%d, end=%d)", first, end) 682 } 683 684 realEnd, err := j.end() 685 if realEnd == 0 { 686 return blockEntriesToFlush{}, 0, kbfsmd.RevisionUninitialized, 687 errors.Errorf("There was an earliest "+ 688 "ordinal %d, but no latest ordinal", first) 689 } else if err != nil { 690 return blockEntriesToFlush{}, 0, kbfsmd.RevisionUninitialized, err 691 } 692 693 if end > realEnd { 694 return blockEntriesToFlush{}, 0, kbfsmd.RevisionUninitialized, 695 errors.Errorf("Trying to flush past the "+ 696 "end of the journal (realEnd=%d, end=%d)", realEnd, end) 697 } 698 699 entries.puts = newBlockPutStateMemory(int(end - first)) 700 entries.adds = newBlockPutStateMemory(int(end - first)) 701 maxMDRevToFlush = kbfsmd.RevisionUninitialized 702 703 loopEnd := end 704 if first+journalOrdinal(maxToFlush) < end { 705 loopEnd = first + journalOrdinal(maxToFlush) 706 } 707 708 for ordinal := first; ordinal < loopEnd; ordinal++ { 709 entry, err := j.readJournalEntry(ordinal) 710 if err != nil { 711 return blockEntriesToFlush{}, 0, kbfsmd.RevisionUninitialized, err 712 } 713 714 if entry.ignore(mdJournalID) { 715 if loopEnd < end { 716 loopEnd++ 717 } 718 entries.other = append(entries.other, entry) 719 entries.all = append(entries.all, entry) 720 continue 721 } 722 723 var blockData []byte 724 var serverHalf kbfscrypto.BlockCryptKeyServerHalf 725 726 switch entry.Op { 727 case blockPutOp: 728 id, bctx, err := entry.getSingleContext() 729 if err != nil { 730 return blockEntriesToFlush{}, 0, 731 kbfsmd.RevisionUninitialized, err 732 } 733 734 blockData, serverHalf, err = j.s.getData(ctx, id) 735 if err != nil { 736 return blockEntriesToFlush{}, 0, 737 kbfsmd.RevisionUninitialized, err 738 } 739 bytesToFlush += int64(len(blockData)) 740 741 err = entries.puts.AddNewBlock( 742 ctx, data.BlockPointer{ID: id, Context: bctx}, 743 nil, /* only used by folderBranchOps */ 744 data.ReadyBlockData{ 745 Buf: blockData, 746 ServerHalf: serverHalf, 747 }, nil) 748 if err != nil { 749 return blockEntriesToFlush{}, 0, 750 kbfsmd.RevisionUninitialized, err 751 } 752 753 case addRefOp: 754 id, bctx, err := entry.getSingleContext() 755 if err != nil { 756 return blockEntriesToFlush{}, 0, 757 kbfsmd.RevisionUninitialized, err 758 } 759 760 err = entries.adds.AddNewBlock( 761 ctx, data.BlockPointer{ID: id, Context: bctx}, 762 nil, /* only used by folderBranchOps */ 763 data.ReadyBlockData{}, nil) 764 if err != nil { 765 return blockEntriesToFlush{}, 0, 766 kbfsmd.RevisionUninitialized, err 767 } 768 769 case mdRevMarkerOp: 770 if entry.Revision < maxMDRevToFlush { 771 return blockEntriesToFlush{}, 0, kbfsmd.RevisionUninitialized, 772 errors.Errorf("Max MD revision decreased in block journal "+ 773 "from %d to %d", entry.Revision, maxMDRevToFlush) 774 } 775 maxMDRevToFlush = entry.Revision 776 entries.other = append(entries.other, entry) 777 778 default: 779 entries.other = append(entries.other, entry) 780 } 781 782 entries.all = append(entries.all, entry) 783 } 784 entries.first = first 785 return entries, bytesToFlush, maxMDRevToFlush, nil 786 } 787 788 // flushNonBPSBlockJournalEntry flushes journal entries that can't be 789 // parallelized via a blockPutState. 790 func flushNonBPSBlockJournalEntry( 791 ctx context.Context, log logger.Logger, 792 bserver BlockServer, tlfID tlf.ID, entry blockJournalEntry) error { 793 log.CDebugf(ctx, "Flushing other block op %v", entry) 794 795 switch entry.Op { 796 case removeRefsOp: 797 _, err := bserver.RemoveBlockReferences( 798 ctx, tlfID, entry.Contexts) 799 if err != nil { 800 return err 801 } 802 803 case archiveRefsOp: 804 err := bserver.ArchiveBlockReferences( 805 ctx, tlfID, entry.Contexts) 806 if err != nil { 807 return err 808 } 809 810 case blockPutOp: 811 if !entry.ignore(kbfsmd.ID{}) { 812 return errors.New("Trying to flush unignored blockPut as other") 813 } 814 // Otherwise nothing to do. 815 816 case mdRevMarkerOp: 817 // Nothing to do. 818 819 default: 820 return errors.Errorf("Unknown op %s", entry.Op) 821 } 822 823 return nil 824 } 825 826 func flushBlockEntries(ctx context.Context, log, deferLog traceLogger, 827 bserver BlockServer, bcache data.BlockCache, reporter Reporter, tlfID tlf.ID, 828 tlfName tlf.CanonicalName, entries blockEntriesToFlush, 829 cacheType DiskBlockCacheType) error { 830 if !entries.flushNeeded() { 831 // Avoid logging anything when there's nothing to flush. 832 return nil 833 } 834 835 // Do all the put state stuff first, in parallel. We need to do 836 // the puts strictly before the addRefs, since the latter might 837 // reference the former. 838 log.CDebugf(ctx, "Putting %d blocks", entries.puts.numBlocks()) 839 blocksToRemove, err := doBlockPuts(ctx, bserver, bcache, reporter, 840 log, deferLog, tlfID, tlfName, entries.puts, cacheType) 841 if err != nil { 842 if isRecoverableBlockError(err) { 843 log.CWarningf(ctx, 844 "Recoverable block error encountered on puts: %+v, ptrs=%v", 845 err, blocksToRemove) 846 } 847 return err 848 } 849 850 // Next, do the addrefs. 851 log.CDebugf(ctx, "Adding %d block references", entries.adds.numBlocks()) 852 blocksToRemove, err = doBlockPuts(ctx, bserver, bcache, reporter, 853 log, deferLog, tlfID, tlfName, entries.adds, cacheType) 854 if err != nil { 855 if isRecoverableBlockError(err) { 856 log.CWarningf(ctx, 857 "Recoverable block error encountered on addRefs: %+v, ptrs=%v", 858 err, blocksToRemove) 859 } 860 return err 861 } 862 863 // Now do all the other, non-put/addref entries. TODO: 864 // parallelize these as well. 865 for _, entry := range entries.other { 866 err := flushNonBPSBlockJournalEntry(ctx, log, bserver, tlfID, entry) 867 if err != nil { 868 return err 869 } 870 } 871 872 return nil 873 } 874 875 func (j *blockJournal) removeFlushedEntry(ctx context.Context, 876 ordinal journalOrdinal, entry blockJournalEntry) ( 877 flushedBytes int64, err error) { 878 earliestOrdinal, err := j.j.readEarliestOrdinal() 879 if err != nil { 880 return 0, err 881 } 882 883 if ordinal != earliestOrdinal { 884 return 0, errors.Errorf("Expected ordinal %d, got %d", 885 ordinal, earliestOrdinal) 886 } 887 888 // Store the block byte count if we've finished a Put. 889 if entry.Op == blockPutOp && !entry.ignore(kbfsmd.ID{}) { 890 id, _, err := entry.getSingleContext() 891 if err != nil { 892 return 0, err 893 } 894 895 err = j.s.markFlushed(ctx, id) 896 if err != nil { 897 return 0, err 898 } 899 900 flushedBytes, err = j.s.getDataSize(ctx, id) 901 if err != nil { 902 return 0, err 903 } 904 905 err = j.flushBlock(flushedBytes) 906 if err != nil { 907 return 0, err 908 } 909 } 910 911 // Remove any of the entry's refs that hasn't been modified by 912 // a subsequent block op (i.e., that has earliestOrdinal as a 913 // tag). Has no effect for removeRefsOp (since those are 914 // already removed) or mdRevMarkerOp (which has no 915 // references). 916 for id, idContexts := range entry.Contexts { 917 liveCount, err := j.s.removeReferences( 918 ctx, id, idContexts, earliestOrdinal.String()) 919 if err != nil { 920 return 0, err 921 } 922 // Postpone garbage collection until the next MD flush. 923 if liveCount == 0 { 924 _, err := j.deferredGC.appendJournalEntry(nil, entry) 925 if err != nil { 926 return 0, err 927 } 928 } 929 } 930 931 _, err = j.j.removeEarliest() 932 if err != nil { 933 return 0, err 934 } 935 936 return flushedBytes, nil 937 } 938 939 func (j *blockJournal) removeFlushedEntries(ctx context.Context, 940 entries blockEntriesToFlush, tlfID tlf.ID, reporter Reporter) ( 941 totalFlushedBytes int64, err error) { 942 // Remove them all! 943 for i, entry := range entries.all { 944 flushedBytes, err := j.removeFlushedEntry( 945 ctx, entries.first+journalOrdinal(i), entry) 946 if err != nil { 947 return 0, err 948 } 949 totalFlushedBytes += flushedBytes 950 951 reporter.NotifySyncStatus(ctx, &keybase1.FSPathSyncStatus{ 952 FolderType: tlfID.Type().FolderType(), 953 // Path: TODO, 954 // SyncingBytes: TODO, 955 // SyncingOps: TODO, 956 SyncedBytes: flushedBytes, 957 }) 958 } 959 960 // The block journal might be empty, but deferredGC might 961 // still be non-empty, so we have to wait for that to be empty 962 // before nuking the whole journal (see clearDeferredGCRange). 963 964 return totalFlushedBytes, nil 965 } 966 967 func (j *blockJournal) ignoreBlocksAndMDRevMarkersInJournal(ctx context.Context, 968 idsToIgnore map[kbfsblock.ID]bool, rev kbfsmd.Revision, 969 dj *diskJournal) (totalIgnoredBytes int64, err error) { 970 first, err := dj.readEarliestOrdinal() 971 if ioutil.IsNotExist(err) { 972 return 0, nil 973 } else if err != nil { 974 return 0, err 975 } 976 last, err := dj.readLatestOrdinal() 977 if err != nil { 978 return 0, err 979 } 980 981 isMainJournal := dj.dir == j.j.dir 982 983 // Iterate backwards since the blocks to ignore are likely to be 984 // at the end of the journal. 985 ignored := 0 986 ignoredRev := false 987 // i is unsigned, so make sure to handle overflow when `first` is 988 // 0 by checking that it's less than `last`. TODO: handle 989 // first==0 and last==maxuint? 990 for i := last; i >= first && i <= last; i-- { 991 entry, err := dj.readJournalEntry(i) 992 if err != nil { 993 // If we can't read a particular entry while ignoring old 994 // revisions, the entry might be corrupt. But returning 995 // an error is harsh and dangerous because at this point a 996 // new MD revision has already been appended to the MD 997 // journal; if we error to the caller then they won't 998 // write another marker and on a restart the whole MD 999 // journal can be flushed without waiting for the 1000 // corresponding blocks to flush. See HOTPOT-193. So 1001 // instead, log the error and keep going in that case. If 1002 // the entry continues to be unreadable during flush, then 1003 // the journal (and eventually another round of conflict 1004 // resolution) will become stuck and the error will 1005 // surface up to the user. 1006 j.log.CWarningf(ctx, "Couldn't read journal entry %d: %+v", i, err) 1007 continue 1008 } 1009 e := entry.(blockJournalEntry) 1010 1011 switch e.Op { 1012 case blockPutOp, addRefOp: 1013 id, _, err := e.getSingleContext() 1014 if err != nil { 1015 return 0, err 1016 } 1017 1018 if !idsToIgnore[id] { 1019 continue 1020 } 1021 ignored++ 1022 1023 e.Ignore = true 1024 err = dj.writeJournalEntry(i, e) 1025 if err != nil { 1026 return 0, err 1027 } 1028 1029 if e.Op == blockPutOp && isMainJournal { 1030 // Treat ignored put ops as flushed 1031 // for the purposes of accounting. 1032 ignoredBytes, err := j.s.getDataSize(ctx, id) 1033 if err != nil { 1034 return 0, err 1035 } 1036 1037 err = j.flushBlock(ignoredBytes) 1038 if err != nil { 1039 return 0, err 1040 } 1041 1042 totalIgnoredBytes += ignoredBytes 1043 } 1044 1045 case mdRevMarkerOp: 1046 if ignoredRev { 1047 continue 1048 } 1049 1050 e.Ignore = true 1051 err = dj.writeJournalEntry(i, e) 1052 if err != nil { 1053 return 0, err 1054 } 1055 1056 // We must ignore all the way up to the MD marker that 1057 // matches the revision of the squash, otherwise we may 1058 // put the new squash MD before all the blocks have been 1059 // put. 1060 if e.Revision == rev { 1061 ignoredRev = true 1062 } 1063 } 1064 1065 // If we've ignored all of the block IDs in `idsToIgnore`, and 1066 // the earliest md marker we care about, we can avoid 1067 // iterating through the rest of the journal. 1068 if len(idsToIgnore) == ignored && ignoredRev { 1069 break 1070 } 1071 } 1072 1073 return totalIgnoredBytes, nil 1074 } 1075 1076 func (j *blockJournal) ignoreBlocksAndMDRevMarkers(ctx context.Context, 1077 blocksToIgnore []kbfsblock.ID, rev kbfsmd.Revision) ( 1078 totalIgnoredBytes int64, err error) { 1079 idsToIgnore := make(map[kbfsblock.ID]bool) 1080 for _, id := range blocksToIgnore { 1081 idsToIgnore[id] = true 1082 } 1083 1084 return j.ignoreBlocksAndMDRevMarkersInJournal( 1085 ctx, idsToIgnore, rev, j.j) 1086 } 1087 1088 // getDeferredRange gets the earliest and latest revision of the 1089 // deferred GC journal. If the returned length is 0, there's no need 1090 // for further GC. 1091 func (j *blockJournal) getDeferredGCRange() ( 1092 len int, earliest, latest journalOrdinal, err error) { 1093 earliest, err = j.deferredGC.readEarliestOrdinal() 1094 if ioutil.IsNotExist(err) { 1095 return 0, 0, 0, nil 1096 } else if err != nil { 1097 return 0, 0, 0, err 1098 } 1099 1100 latest, err = j.deferredGC.readLatestOrdinal() 1101 if ioutil.IsNotExist(err) { 1102 return 0, 0, 0, nil 1103 } else if err != nil { 1104 return 0, 0, 0, err 1105 } 1106 1107 return int(latest - earliest + 1), earliest, latest, nil 1108 } 1109 1110 // doGC collects any unreferenced blocks from flushed 1111 // entries. earliest and latest should be from a call to 1112 // getDeferredGCRange, and clearDeferredGCRange should be called after 1113 // this function. This function only reads the deferred GC journal at 1114 // the given range and reads/writes the block store, so callers may 1115 // use that to relax any synchronization requirements. 1116 func (j *blockJournal) doGC(ctx context.Context, 1117 earliest, latest journalOrdinal) ( 1118 removedBytes, removedFiles int64, err error) { 1119 // Safe to check the earliest ordinal, even if the caller is using 1120 // relaxed synchronization, since this is the only function that 1121 // removes items from the deferred journal. 1122 first, err := j.deferredGC.readEarliestOrdinal() 1123 if err != nil { 1124 return 0, 0, err 1125 } 1126 if first != earliest { 1127 return 0, 0, errors.Errorf("Expected deferred earliest %d, "+ 1128 "but actual earliest is %d", earliest, first) 1129 } 1130 1131 // Delete the block data for anything in the GC journal. 1132 j.vlog.CLogf( 1133 ctx, libkb.VLog1, "Garbage-collecting blocks for entries [%d, %d]", 1134 earliest, latest) 1135 for i := earliest; i <= latest; i++ { 1136 e, err := j.deferredGC.readJournalEntry(i) 1137 if err != nil { 1138 return 0, 0, err 1139 } 1140 1141 entry, ok := e.(blockJournalEntry) 1142 if !ok { 1143 return 0, 0, errors.New("Unexpected block journal entry type to GC") 1144 } 1145 1146 for id := range entry.Contexts { 1147 // TODO: once we support references, this needs to be made 1148 // goroutine-safe. 1149 hasRef, err := j.s.hasAnyRef(ctx, id) 1150 if err != nil { 1151 return 0, 0, err 1152 } 1153 if !hasRef { 1154 // Garbage-collect the old entry. 1155 idRemovedBytes, idRemovedFiles, err := 1156 j.remove(ctx, id) 1157 if err != nil { 1158 return 0, 0, err 1159 } 1160 removedBytes += idRemovedBytes 1161 removedFiles += idRemovedFiles 1162 } 1163 } 1164 } 1165 1166 return removedBytes, removedFiles, nil 1167 } 1168 1169 // clearDeferredGCRange removes the given range from the deferred 1170 // journal. If the journal goes completely empty, it then nukes the 1171 // journal directories. 1172 func (j *blockJournal) clearDeferredGCRange( 1173 ctx context.Context, removedBytes, removedFiles int64, 1174 earliest, latest journalOrdinal) ( 1175 clearedJournal bool, aggregateInfo blockAggregateInfo, 1176 err error) { 1177 for i := earliest; i <= latest; i++ { 1178 _, err := j.deferredGC.removeEarliest() 1179 if err != nil { 1180 return false, blockAggregateInfo{}, err 1181 } 1182 } 1183 1184 // If we crash before calling this, the journal bytes/files 1185 // counts will be inaccurate. But this will be resolved when 1186 // the journal goes empty in the clause above. 1187 err = j.unstoreBlocks(removedBytes, removedFiles) 1188 if err != nil { 1189 return false, blockAggregateInfo{}, err 1190 } 1191 1192 aggregateInfo = j.aggregateInfo 1193 1194 if j.empty() { 1195 j.log.CDebugf(ctx, "Block journal is now empty") 1196 1197 j.aggregateInfo = blockAggregateInfo{} 1198 1199 err = j.s.clear() 1200 if err != nil { 1201 return false, blockAggregateInfo{}, err 1202 } 1203 1204 for _, dir := range j.blockJournalFiles() { 1205 j.log.CDebugf(ctx, "Removing all files in %s", dir) 1206 err := ioutil.RemoveAll(dir) 1207 if err != nil { 1208 return false, blockAggregateInfo{}, err 1209 } 1210 } 1211 1212 clearedJournal = true 1213 } 1214 1215 return clearedJournal, aggregateInfo, nil 1216 } 1217 1218 func (j *blockJournal) getAllRefsForTest() (map[kbfsblock.ID]blockRefMap, error) { 1219 refs := make(map[kbfsblock.ID]blockRefMap) 1220 1221 first, err := j.j.readEarliestOrdinal() 1222 if ioutil.IsNotExist(err) { 1223 return refs, nil 1224 } else if err != nil { 1225 return nil, err 1226 } 1227 last, err := j.j.readLatestOrdinal() 1228 if err != nil { 1229 return nil, err 1230 } 1231 1232 for i := first; i <= last; i++ { 1233 e, err := j.readJournalEntry(i) 1234 if err != nil { 1235 return nil, err 1236 } 1237 1238 // Handle single ops separately. 1239 switch e.Op { 1240 case blockPutOp, addRefOp: 1241 id, context, err := e.getSingleContext() 1242 if err != nil { 1243 return nil, err 1244 } 1245 1246 blockRefs := refs[id] 1247 if blockRefs == nil { 1248 blockRefs = make(blockRefMap) 1249 refs[id] = blockRefs 1250 } 1251 1252 err = blockRefs.put(context, liveBlockRef, i.String()) 1253 if err != nil { 1254 return nil, err 1255 } 1256 1257 continue 1258 } 1259 1260 for id, idContexts := range e.Contexts { 1261 blockRefs := refs[id] 1262 1263 switch e.Op { 1264 case removeRefsOp: 1265 if blockRefs == nil { 1266 // All refs are already gone, 1267 // which is not an error. 1268 continue 1269 } 1270 1271 for _, context := range idContexts { 1272 err := blockRefs.remove(context, "") 1273 if err != nil { 1274 return nil, err 1275 } 1276 } 1277 1278 if len(blockRefs) == 0 { 1279 delete(refs, id) 1280 } 1281 1282 case archiveRefsOp: 1283 if blockRefs == nil { 1284 blockRefs = make(blockRefMap) 1285 refs[id] = blockRefs 1286 } 1287 1288 for _, context := range idContexts { 1289 err := blockRefs.put( 1290 context, archivedBlockRef, i.String()) 1291 if err != nil { 1292 return nil, err 1293 } 1294 } 1295 1296 case mdRevMarkerOp: 1297 // Ignore MD revision markers. 1298 continue 1299 1300 default: 1301 return nil, errors.Errorf("Unknown op %s", e.Op) 1302 } 1303 } 1304 } 1305 return refs, nil 1306 } 1307 1308 func (j *blockJournal) markLatestRevMarkerAsLocalSquash( 1309 mdJournalID kbfsmd.ID) error { 1310 first, err := j.j.readEarliestOrdinal() 1311 if ioutil.IsNotExist(err) { 1312 return nil 1313 } else if err != nil { 1314 return err 1315 } 1316 last, err := j.j.readLatestOrdinal() 1317 if err != nil { 1318 return err 1319 } 1320 1321 // Iterate backwards to find the latest md marker. 1322 for i := last; i >= first && i <= last; i-- { 1323 entry, err := j.j.readJournalEntry(i) 1324 if err != nil { 1325 return err 1326 } 1327 e := entry.(blockJournalEntry) 1328 if e.ignore(mdJournalID) || e.Op != mdRevMarkerOp { 1329 continue 1330 } 1331 1332 e.IsLocalSquash = true 1333 return j.j.writeJournalEntry(i, e) 1334 } 1335 1336 return errors.Errorf("Couldn't find an md rev marker between %d and %d", 1337 first, last) 1338 } 1339 1340 func (j *blockJournal) checkInSyncForTest() error { 1341 journalRefs, err := j.getAllRefsForTest() 1342 if err != nil { 1343 return err 1344 } 1345 1346 storeRefs, err := j.s.getAllRefsForTest() 1347 if err != nil { 1348 return err 1349 } 1350 1351 if !reflect.DeepEqual(journalRefs, storeRefs) { 1352 return errors.Errorf("journal refs = %+v != store refs = %+v", 1353 journalRefs, storeRefs) 1354 } 1355 return nil 1356 }