github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/kbfs/data/dirty_file.go (about)

     1  // Copyright 2016 Keybase Inc. All rights reserved.
     2  // Use of this source code is governed by a BSD
     3  // license that can be found in the LICENSE file.
     4  
     5  package data
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"sync"
    11  )
    12  
    13  // dirtyBlockSyncState represents that state of a block with respect to
    14  // whether it's currently being synced.  There can be three states:
    15  //  0. Not being synced
    16  //  1. Currently being synced to the server.
    17  //  2. Finished syncing, but the rest of the sync hasn't finished yet.
    18  type dirtyBlockSyncState int
    19  
    20  const (
    21  	blockNotSyncing dirtyBlockSyncState = iota
    22  	blockSyncing
    23  	blockSynced
    24  )
    25  
    26  // dirtyBlockCopyState represents that state of a block with respect
    27  // to whether it needs to be copied if it is modified by the caller.
    28  type dirtyBlockCopyState int
    29  
    30  const (
    31  	blockNeedsCopy dirtyBlockCopyState = iota
    32  	blockAlreadyCopied
    33  )
    34  
    35  type dirtyBlockState struct {
    36  	sync     dirtyBlockSyncState
    37  	copy     dirtyBlockCopyState
    38  	syncSize int64
    39  	// An "orphaned" block is one that is now referred to in an
    40  	// indirect file block under its new, permanent block ID.  Once a
    41  	// block is orphaned, it is no longer re-dirtiable.
    42  	orphaned bool
    43  }
    44  
    45  // DirtyFile represents a particular file that's been written to, but
    46  // has not yet completed syncing its dirty blocks to the server.
    47  type DirtyFile struct {
    48  	Path        Path
    49  	dirtyBcache DirtyBlockCache
    50  
    51  	// Protects access to the fields below.  Most, but not all,
    52  	// accesses to dirtyFile is already protected by
    53  	// folderBlockOps.blockLock, so this lock should always be taken
    54  	// just in case.
    55  	lock sync.Mutex
    56  	// Which blocks are currently being synced and still need copying,
    57  	// so that writes and truncates can do copy-on-write to avoid
    58  	// messing up the ongoing sync.  If it is blockSyncing and
    59  	// blockNeedsCopy, then any write to the block should result in a
    60  	// deep copy and those writes should be deferred; if it is
    61  	// blockSyncing and blockAlreadyCopied, then just defer the
    62  	// writes.
    63  	fileBlockStates map[BlockPointer]dirtyBlockState
    64  	// notYetSyncingBytes is the number of bytes that are dirty in the
    65  	// file, but haven't yet started syncing to the server yet.
    66  	notYetSyncingBytes int64
    67  	// totalSyncBytes is the total number of outstanding dirty bytes
    68  	// for this file, including those blocks that have already
    69  	// finished syncing.
    70  	totalSyncBytes int64
    71  	// deferredNewBytes is the number of bytes that have been
    72  	// deferred, and will be rewritten after the current sync
    73  	// finishes.  It counts only new bytes that extended the file as
    74  	// part of the deferred write.  This is useful in the case where
    75  	// the current sync gets retried due to a recoverable error, and
    76  	// those bytes get sucked into the retry and need to be accounted
    77  	// for.
    78  	deferredNewBytes int64
    79  	// If there are too many deferred bytes outstanding, writes should
    80  	// add themselves to this list.  They will be able to receive on
    81  	// the channel on an outstanding Sync() completes.  If they
    82  	// receive an error, they should fail the write.
    83  	errListeners []chan<- error
    84  }
    85  
    86  // NewDirtyFile constructs a new `DirtyFile` instance.
    87  func NewDirtyFile(file Path, dirtyBcache DirtyBlockCache) *DirtyFile {
    88  	return &DirtyFile{
    89  		Path:            file,
    90  		dirtyBcache:     dirtyBcache,
    91  		fileBlockStates: make(map[BlockPointer]dirtyBlockState),
    92  	}
    93  }
    94  
    95  // BlockNeedsCopy returns true if the block should be copied by anyone
    96  // who next tries to modify it.
    97  func (df *DirtyFile) BlockNeedsCopy(ptr BlockPointer) bool {
    98  	df.lock.Lock()
    99  	defer df.lock.Unlock()
   100  	return df.fileBlockStates[ptr].copy == blockNeedsCopy
   101  }
   102  
   103  // UpdateNotYetSyncingBytes adds `newBytes` to the number of
   104  // outstanding to-be-synced bytes.
   105  func (df *DirtyFile) UpdateNotYetSyncingBytes(newBytes int64) {
   106  	df.lock.Lock()
   107  	defer df.lock.Unlock()
   108  	df.notYetSyncingBytes += newBytes
   109  	if df.notYetSyncingBytes < 0 {
   110  		// It would be better if we didn't have this check, but it's
   111  		// hard for folderBlockOps to account correctly when bytes in
   112  		// a syncing block are overwritten, and then the write is
   113  		// deferred (see KBFS-2157).
   114  		df.notYetSyncingBytes = 0
   115  	}
   116  	df.dirtyBcache.UpdateUnsyncedBytes(df.Path.Tlf, newBytes, false)
   117  }
   118  
   119  // SetBlockDirty transitions a block to a dirty state, and returns
   120  // whether or not the block needs to be put in the dirty cache
   121  // (because it isn't yet), and whether or not the block is currently
   122  // part of a sync in progress.
   123  func (df *DirtyFile) SetBlockDirty(ptr BlockPointer) (
   124  	needsCaching bool, isSyncing bool) {
   125  	df.lock.Lock()
   126  	defer df.lock.Unlock()
   127  
   128  	state := df.fileBlockStates[ptr]
   129  	needsCaching = state.copy == blockNeedsCopy
   130  	state.copy = blockAlreadyCopied
   131  	isSyncing = state.sync != blockNotSyncing
   132  	df.fileBlockStates[ptr] = state
   133  	return needsCaching, isSyncing
   134  }
   135  
   136  func (df *DirtyFile) setBlockNotDirty(ptr BlockPointer) (
   137  	needsCaching bool, isSyncing bool) {
   138  	df.lock.Lock()
   139  	defer df.lock.Unlock()
   140  	state := df.fileBlockStates[ptr]
   141  	state.copy = blockNeedsCopy
   142  	df.fileBlockStates[ptr] = state
   143  	return
   144  }
   145  
   146  // IsBlockOrphaned returns true if the block has been orphaned and can
   147  // no longer be reached in the file.
   148  func (df *DirtyFile) IsBlockOrphaned(ptr BlockPointer) bool {
   149  	df.lock.Lock()
   150  	defer df.lock.Unlock()
   151  	return df.fileBlockStates[ptr].orphaned
   152  }
   153  
   154  // SetBlockSyncing is called to indicate that the block pointed to by
   155  // `ptr` is currently being synced.
   156  func (df *DirtyFile) SetBlockSyncing(
   157  	ctx context.Context, ptr BlockPointer) error {
   158  	df.lock.Lock()
   159  	defer df.lock.Unlock()
   160  	state := df.fileBlockStates[ptr]
   161  	if state.copy == blockNeedsCopy {
   162  		return fmt.Errorf("Trying to sync a block that isn't dirty: %v", ptr)
   163  	}
   164  	state.copy = blockNeedsCopy
   165  	state.sync = blockSyncing
   166  	block, err := df.dirtyBcache.Get(ctx, df.Path.Tlf, ptr, df.Path.Branch)
   167  	if err != nil {
   168  		// The dirty block cache must always contain the dirty block
   169  		// until the full sync is completely done.  If the block is
   170  		// gone before we have even finished syncing it, that is a
   171  		// fatal bug, because no one would be able to read the dirtied
   172  		// version of the block.
   173  		panic(err)
   174  	}
   175  	fblock, ok := block.(*FileBlock)
   176  	if !ok {
   177  		panic("Dirty file syncing a non-file block")
   178  	}
   179  	state.syncSize = int64(len(fblock.Contents))
   180  	df.totalSyncBytes += state.syncSize
   181  	df.notYetSyncingBytes -= state.syncSize
   182  	df.fileBlockStates[ptr] = state
   183  	df.dirtyBcache.UpdateSyncingBytes(df.Path.Tlf, state.syncSize)
   184  	return nil
   185  }
   186  
   187  // ResetSyncingBlocksToDirty can be called when a sync failed, and all
   188  // the syncing blocks need to transition back to being dirty.
   189  func (df *DirtyFile) ResetSyncingBlocksToDirty() {
   190  	df.lock.Lock()
   191  	defer df.lock.Unlock()
   192  	// Reset all syncing blocks to just be dirty again
   193  	syncFinishedNeeded := false
   194  	for ptr, state := range df.fileBlockStates {
   195  		if state.orphaned {
   196  			// This block will never be sync'd again, so clear any
   197  			// bytes from the buffer.
   198  			if state.sync == blockSyncing {
   199  				df.dirtyBcache.UpdateUnsyncedBytes(df.Path.Tlf,
   200  					-state.syncSize, true)
   201  			} else if state.sync == blockSynced {
   202  				// Some blocks did finish, so we might be able to
   203  				// increase our buffer size.
   204  				syncFinishedNeeded = true
   205  			}
   206  			state.syncSize = 0
   207  			delete(df.fileBlockStates, ptr)
   208  			continue
   209  		}
   210  		if state.sync == blockSynced {
   211  			// Re-dirty the unsynced bytes (but don't touch the total
   212  			// bytes).
   213  			df.dirtyBcache.BlockSyncFinished(df.Path.Tlf, -state.syncSize)
   214  		} else if state.sync == blockSyncing {
   215  			df.dirtyBcache.UpdateSyncingBytes(df.Path.Tlf, -state.syncSize)
   216  		}
   217  		if state.sync != blockNotSyncing {
   218  			state.copy = blockAlreadyCopied
   219  			state.sync = blockNotSyncing
   220  			state.syncSize = 0
   221  			df.fileBlockStates[ptr] = state
   222  		}
   223  	}
   224  	if syncFinishedNeeded {
   225  		df.dirtyBcache.SyncFinished(df.Path.Tlf, df.totalSyncBytes)
   226  	}
   227  	df.totalSyncBytes = 0 // all the blocks need to be re-synced.
   228  }
   229  
   230  func (df *DirtyFile) setBlockSyncedLocked(ptr BlockPointer) error {
   231  	state, ok := df.fileBlockStates[ptr]
   232  	if !ok || (state.copy == blockAlreadyCopied &&
   233  		state.sync == blockNotSyncing) {
   234  		// We've likely already had an resetSyncingBlocksToDirty; ignore.
   235  		return nil
   236  	}
   237  
   238  	if state.sync != blockSyncing && !state.orphaned {
   239  		return fmt.Errorf("Trying to finish a block sync that wasn't in "+
   240  			"progress: %v (%v)", ptr, df.fileBlockStates[ptr])
   241  	}
   242  	state.sync = blockSynced
   243  	df.dirtyBcache.BlockSyncFinished(df.Path.Tlf, state.syncSize)
   244  	// Keep syncSize set in case the block needs to be re-dirtied due
   245  	// to an error.
   246  	df.fileBlockStates[ptr] = state
   247  	return nil
   248  }
   249  
   250  func (df *DirtyFile) setBlockSynced(ptr BlockPointer) error {
   251  	df.lock.Lock()
   252  	defer df.lock.Unlock()
   253  	return df.setBlockSyncedLocked(ptr)
   254  }
   255  
   256  // FinishSync is called to indicate that a sync has finished
   257  // successfully.
   258  func (df *DirtyFile) FinishSync() error {
   259  	// Mark any remaining blocks as finished syncing.  For now, only
   260  	// the top-level indirect block needs this because they are added
   261  	// to the blockPutState by folderBranchOps, not folderBlockOps.
   262  	df.lock.Lock()
   263  	defer df.lock.Unlock()
   264  
   265  	// Reset all syncing blocks to just be dirty again (there should
   266  	// only be one, equal to the original top block).
   267  	found := false
   268  	for ptr, state := range df.fileBlockStates {
   269  		if state.orphaned {
   270  			continue
   271  		}
   272  		if state.sync == blockSyncing {
   273  			if found {
   274  				return fmt.Errorf("Unexpected syncing block %v", ptr)
   275  			}
   276  			if ptr != df.Path.TailPointer() {
   277  				return fmt.Errorf("Unexpected syncing block %v; expected %v",
   278  					ptr, df.Path.TailPointer())
   279  			}
   280  			found = true
   281  			err := df.setBlockSyncedLocked(ptr)
   282  			if err != nil {
   283  				return err
   284  			}
   285  		}
   286  	}
   287  	df.dirtyBcache.SyncFinished(df.Path.Tlf, df.totalSyncBytes)
   288  	df.totalSyncBytes = 0
   289  	df.deferredNewBytes = 0
   290  	if df.notYetSyncingBytes > 0 {
   291  		// The sync will never happen (probably because the underlying
   292  		// file was removed).
   293  		df.dirtyBcache.UpdateUnsyncedBytes(df.Path.Tlf,
   294  			-df.notYetSyncingBytes, false)
   295  		df.notYetSyncingBytes = 0
   296  	}
   297  	return nil
   298  }
   299  
   300  // AddErrListener adds a callback that will be invoked if an error
   301  // happens during the sync.
   302  func (df *DirtyFile) AddErrListener(listener chan<- error) {
   303  	df.lock.Lock()
   304  	defer df.lock.Unlock()
   305  	df.errListeners = append(df.errListeners, listener)
   306  }
   307  
   308  // NotifyErrListeners notifies all registered callbacks that an error
   309  // happened, if `err` is `nil`.  It also resets the registered
   310  // listeners.
   311  func (df *DirtyFile) NotifyErrListeners(err error) {
   312  	df.lock.Lock()
   313  	listeners := df.errListeners
   314  	df.errListeners = nil
   315  	df.lock.Unlock()
   316  	if err == nil {
   317  		return
   318  	}
   319  	for _, listener := range listeners {
   320  		listener <- err
   321  	}
   322  }
   323  
   324  // NumErrListeners returns the number of registered error listeners.
   325  func (df *DirtyFile) NumErrListeners() int {
   326  	df.lock.Lock()
   327  	defer df.lock.Unlock()
   328  	return len(df.errListeners)
   329  }
   330  
   331  // SetBlockOrphaned is called to indicate that a block has been
   332  // orphaned, and can no longer be reached within the file.
   333  func (df *DirtyFile) SetBlockOrphaned(ptr BlockPointer, orphaned bool) {
   334  	df.lock.Lock()
   335  	defer df.lock.Unlock()
   336  	state, ok := df.fileBlockStates[ptr]
   337  	if !ok {
   338  		return
   339  	}
   340  	state.orphaned = orphaned
   341  	df.fileBlockStates[ptr] = state
   342  }
   343  
   344  // AddDeferredNewBytes adds `bytes` to the count of all the bytes that
   345  // have been deferred until after the current sync finishes.
   346  func (df *DirtyFile) AddDeferredNewBytes(bytes int64) {
   347  	df.lock.Lock()
   348  	defer df.lock.Unlock()
   349  	df.deferredNewBytes += bytes
   350  }
   351  
   352  // AssimilateDeferredNewBytes is called to indicate that any deferred
   353  // bytes should be included in the count of the next sync.
   354  func (df *DirtyFile) AssimilateDeferredNewBytes() {
   355  	df.lock.Lock()
   356  	defer df.lock.Unlock()
   357  	if df.deferredNewBytes == 0 {
   358  		return
   359  	}
   360  	df.dirtyBcache.UpdateUnsyncedBytes(df.Path.Tlf, df.deferredNewBytes, false)
   361  	df.deferredNewBytes = 0
   362  }