github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/kbfs/data/dirty_file.go (about) 1 // Copyright 2016 Keybase Inc. All rights reserved. 2 // Use of this source code is governed by a BSD 3 // license that can be found in the LICENSE file. 4 5 package data 6 7 import ( 8 "context" 9 "fmt" 10 "sync" 11 ) 12 13 // dirtyBlockSyncState represents that state of a block with respect to 14 // whether it's currently being synced. There can be three states: 15 // 0. Not being synced 16 // 1. Currently being synced to the server. 17 // 2. Finished syncing, but the rest of the sync hasn't finished yet. 18 type dirtyBlockSyncState int 19 20 const ( 21 blockNotSyncing dirtyBlockSyncState = iota 22 blockSyncing 23 blockSynced 24 ) 25 26 // dirtyBlockCopyState represents that state of a block with respect 27 // to whether it needs to be copied if it is modified by the caller. 28 type dirtyBlockCopyState int 29 30 const ( 31 blockNeedsCopy dirtyBlockCopyState = iota 32 blockAlreadyCopied 33 ) 34 35 type dirtyBlockState struct { 36 sync dirtyBlockSyncState 37 copy dirtyBlockCopyState 38 syncSize int64 39 // An "orphaned" block is one that is now referred to in an 40 // indirect file block under its new, permanent block ID. Once a 41 // block is orphaned, it is no longer re-dirtiable. 42 orphaned bool 43 } 44 45 // DirtyFile represents a particular file that's been written to, but 46 // has not yet completed syncing its dirty blocks to the server. 47 type DirtyFile struct { 48 Path Path 49 dirtyBcache DirtyBlockCache 50 51 // Protects access to the fields below. Most, but not all, 52 // accesses to dirtyFile is already protected by 53 // folderBlockOps.blockLock, so this lock should always be taken 54 // just in case. 55 lock sync.Mutex 56 // Which blocks are currently being synced and still need copying, 57 // so that writes and truncates can do copy-on-write to avoid 58 // messing up the ongoing sync. If it is blockSyncing and 59 // blockNeedsCopy, then any write to the block should result in a 60 // deep copy and those writes should be deferred; if it is 61 // blockSyncing and blockAlreadyCopied, then just defer the 62 // writes. 63 fileBlockStates map[BlockPointer]dirtyBlockState 64 // notYetSyncingBytes is the number of bytes that are dirty in the 65 // file, but haven't yet started syncing to the server yet. 66 notYetSyncingBytes int64 67 // totalSyncBytes is the total number of outstanding dirty bytes 68 // for this file, including those blocks that have already 69 // finished syncing. 70 totalSyncBytes int64 71 // deferredNewBytes is the number of bytes that have been 72 // deferred, and will be rewritten after the current sync 73 // finishes. It counts only new bytes that extended the file as 74 // part of the deferred write. This is useful in the case where 75 // the current sync gets retried due to a recoverable error, and 76 // those bytes get sucked into the retry and need to be accounted 77 // for. 78 deferredNewBytes int64 79 // If there are too many deferred bytes outstanding, writes should 80 // add themselves to this list. They will be able to receive on 81 // the channel on an outstanding Sync() completes. If they 82 // receive an error, they should fail the write. 83 errListeners []chan<- error 84 } 85 86 // NewDirtyFile constructs a new `DirtyFile` instance. 87 func NewDirtyFile(file Path, dirtyBcache DirtyBlockCache) *DirtyFile { 88 return &DirtyFile{ 89 Path: file, 90 dirtyBcache: dirtyBcache, 91 fileBlockStates: make(map[BlockPointer]dirtyBlockState), 92 } 93 } 94 95 // BlockNeedsCopy returns true if the block should be copied by anyone 96 // who next tries to modify it. 97 func (df *DirtyFile) BlockNeedsCopy(ptr BlockPointer) bool { 98 df.lock.Lock() 99 defer df.lock.Unlock() 100 return df.fileBlockStates[ptr].copy == blockNeedsCopy 101 } 102 103 // UpdateNotYetSyncingBytes adds `newBytes` to the number of 104 // outstanding to-be-synced bytes. 105 func (df *DirtyFile) UpdateNotYetSyncingBytes(newBytes int64) { 106 df.lock.Lock() 107 defer df.lock.Unlock() 108 df.notYetSyncingBytes += newBytes 109 if df.notYetSyncingBytes < 0 { 110 // It would be better if we didn't have this check, but it's 111 // hard for folderBlockOps to account correctly when bytes in 112 // a syncing block are overwritten, and then the write is 113 // deferred (see KBFS-2157). 114 df.notYetSyncingBytes = 0 115 } 116 df.dirtyBcache.UpdateUnsyncedBytes(df.Path.Tlf, newBytes, false) 117 } 118 119 // SetBlockDirty transitions a block to a dirty state, and returns 120 // whether or not the block needs to be put in the dirty cache 121 // (because it isn't yet), and whether or not the block is currently 122 // part of a sync in progress. 123 func (df *DirtyFile) SetBlockDirty(ptr BlockPointer) ( 124 needsCaching bool, isSyncing bool) { 125 df.lock.Lock() 126 defer df.lock.Unlock() 127 128 state := df.fileBlockStates[ptr] 129 needsCaching = state.copy == blockNeedsCopy 130 state.copy = blockAlreadyCopied 131 isSyncing = state.sync != blockNotSyncing 132 df.fileBlockStates[ptr] = state 133 return needsCaching, isSyncing 134 } 135 136 func (df *DirtyFile) setBlockNotDirty(ptr BlockPointer) ( 137 needsCaching bool, isSyncing bool) { 138 df.lock.Lock() 139 defer df.lock.Unlock() 140 state := df.fileBlockStates[ptr] 141 state.copy = blockNeedsCopy 142 df.fileBlockStates[ptr] = state 143 return 144 } 145 146 // IsBlockOrphaned returns true if the block has been orphaned and can 147 // no longer be reached in the file. 148 func (df *DirtyFile) IsBlockOrphaned(ptr BlockPointer) bool { 149 df.lock.Lock() 150 defer df.lock.Unlock() 151 return df.fileBlockStates[ptr].orphaned 152 } 153 154 // SetBlockSyncing is called to indicate that the block pointed to by 155 // `ptr` is currently being synced. 156 func (df *DirtyFile) SetBlockSyncing( 157 ctx context.Context, ptr BlockPointer) error { 158 df.lock.Lock() 159 defer df.lock.Unlock() 160 state := df.fileBlockStates[ptr] 161 if state.copy == blockNeedsCopy { 162 return fmt.Errorf("Trying to sync a block that isn't dirty: %v", ptr) 163 } 164 state.copy = blockNeedsCopy 165 state.sync = blockSyncing 166 block, err := df.dirtyBcache.Get(ctx, df.Path.Tlf, ptr, df.Path.Branch) 167 if err != nil { 168 // The dirty block cache must always contain the dirty block 169 // until the full sync is completely done. If the block is 170 // gone before we have even finished syncing it, that is a 171 // fatal bug, because no one would be able to read the dirtied 172 // version of the block. 173 panic(err) 174 } 175 fblock, ok := block.(*FileBlock) 176 if !ok { 177 panic("Dirty file syncing a non-file block") 178 } 179 state.syncSize = int64(len(fblock.Contents)) 180 df.totalSyncBytes += state.syncSize 181 df.notYetSyncingBytes -= state.syncSize 182 df.fileBlockStates[ptr] = state 183 df.dirtyBcache.UpdateSyncingBytes(df.Path.Tlf, state.syncSize) 184 return nil 185 } 186 187 // ResetSyncingBlocksToDirty can be called when a sync failed, and all 188 // the syncing blocks need to transition back to being dirty. 189 func (df *DirtyFile) ResetSyncingBlocksToDirty() { 190 df.lock.Lock() 191 defer df.lock.Unlock() 192 // Reset all syncing blocks to just be dirty again 193 syncFinishedNeeded := false 194 for ptr, state := range df.fileBlockStates { 195 if state.orphaned { 196 // This block will never be sync'd again, so clear any 197 // bytes from the buffer. 198 if state.sync == blockSyncing { 199 df.dirtyBcache.UpdateUnsyncedBytes(df.Path.Tlf, 200 -state.syncSize, true) 201 } else if state.sync == blockSynced { 202 // Some blocks did finish, so we might be able to 203 // increase our buffer size. 204 syncFinishedNeeded = true 205 } 206 state.syncSize = 0 207 delete(df.fileBlockStates, ptr) 208 continue 209 } 210 if state.sync == blockSynced { 211 // Re-dirty the unsynced bytes (but don't touch the total 212 // bytes). 213 df.dirtyBcache.BlockSyncFinished(df.Path.Tlf, -state.syncSize) 214 } else if state.sync == blockSyncing { 215 df.dirtyBcache.UpdateSyncingBytes(df.Path.Tlf, -state.syncSize) 216 } 217 if state.sync != blockNotSyncing { 218 state.copy = blockAlreadyCopied 219 state.sync = blockNotSyncing 220 state.syncSize = 0 221 df.fileBlockStates[ptr] = state 222 } 223 } 224 if syncFinishedNeeded { 225 df.dirtyBcache.SyncFinished(df.Path.Tlf, df.totalSyncBytes) 226 } 227 df.totalSyncBytes = 0 // all the blocks need to be re-synced. 228 } 229 230 func (df *DirtyFile) setBlockSyncedLocked(ptr BlockPointer) error { 231 state, ok := df.fileBlockStates[ptr] 232 if !ok || (state.copy == blockAlreadyCopied && 233 state.sync == blockNotSyncing) { 234 // We've likely already had an resetSyncingBlocksToDirty; ignore. 235 return nil 236 } 237 238 if state.sync != blockSyncing && !state.orphaned { 239 return fmt.Errorf("Trying to finish a block sync that wasn't in "+ 240 "progress: %v (%v)", ptr, df.fileBlockStates[ptr]) 241 } 242 state.sync = blockSynced 243 df.dirtyBcache.BlockSyncFinished(df.Path.Tlf, state.syncSize) 244 // Keep syncSize set in case the block needs to be re-dirtied due 245 // to an error. 246 df.fileBlockStates[ptr] = state 247 return nil 248 } 249 250 func (df *DirtyFile) setBlockSynced(ptr BlockPointer) error { 251 df.lock.Lock() 252 defer df.lock.Unlock() 253 return df.setBlockSyncedLocked(ptr) 254 } 255 256 // FinishSync is called to indicate that a sync has finished 257 // successfully. 258 func (df *DirtyFile) FinishSync() error { 259 // Mark any remaining blocks as finished syncing. For now, only 260 // the top-level indirect block needs this because they are added 261 // to the blockPutState by folderBranchOps, not folderBlockOps. 262 df.lock.Lock() 263 defer df.lock.Unlock() 264 265 // Reset all syncing blocks to just be dirty again (there should 266 // only be one, equal to the original top block). 267 found := false 268 for ptr, state := range df.fileBlockStates { 269 if state.orphaned { 270 continue 271 } 272 if state.sync == blockSyncing { 273 if found { 274 return fmt.Errorf("Unexpected syncing block %v", ptr) 275 } 276 if ptr != df.Path.TailPointer() { 277 return fmt.Errorf("Unexpected syncing block %v; expected %v", 278 ptr, df.Path.TailPointer()) 279 } 280 found = true 281 err := df.setBlockSyncedLocked(ptr) 282 if err != nil { 283 return err 284 } 285 } 286 } 287 df.dirtyBcache.SyncFinished(df.Path.Tlf, df.totalSyncBytes) 288 df.totalSyncBytes = 0 289 df.deferredNewBytes = 0 290 if df.notYetSyncingBytes > 0 { 291 // The sync will never happen (probably because the underlying 292 // file was removed). 293 df.dirtyBcache.UpdateUnsyncedBytes(df.Path.Tlf, 294 -df.notYetSyncingBytes, false) 295 df.notYetSyncingBytes = 0 296 } 297 return nil 298 } 299 300 // AddErrListener adds a callback that will be invoked if an error 301 // happens during the sync. 302 func (df *DirtyFile) AddErrListener(listener chan<- error) { 303 df.lock.Lock() 304 defer df.lock.Unlock() 305 df.errListeners = append(df.errListeners, listener) 306 } 307 308 // NotifyErrListeners notifies all registered callbacks that an error 309 // happened, if `err` is `nil`. It also resets the registered 310 // listeners. 311 func (df *DirtyFile) NotifyErrListeners(err error) { 312 df.lock.Lock() 313 listeners := df.errListeners 314 df.errListeners = nil 315 df.lock.Unlock() 316 if err == nil { 317 return 318 } 319 for _, listener := range listeners { 320 listener <- err 321 } 322 } 323 324 // NumErrListeners returns the number of registered error listeners. 325 func (df *DirtyFile) NumErrListeners() int { 326 df.lock.Lock() 327 defer df.lock.Unlock() 328 return len(df.errListeners) 329 } 330 331 // SetBlockOrphaned is called to indicate that a block has been 332 // orphaned, and can no longer be reached within the file. 333 func (df *DirtyFile) SetBlockOrphaned(ptr BlockPointer, orphaned bool) { 334 df.lock.Lock() 335 defer df.lock.Unlock() 336 state, ok := df.fileBlockStates[ptr] 337 if !ok { 338 return 339 } 340 state.orphaned = orphaned 341 df.fileBlockStates[ptr] = state 342 } 343 344 // AddDeferredNewBytes adds `bytes` to the count of all the bytes that 345 // have been deferred until after the current sync finishes. 346 func (df *DirtyFile) AddDeferredNewBytes(bytes int64) { 347 df.lock.Lock() 348 defer df.lock.Unlock() 349 df.deferredNewBytes += bytes 350 } 351 352 // AssimilateDeferredNewBytes is called to indicate that any deferred 353 // bytes should be included in the count of the next sync. 354 func (df *DirtyFile) AssimilateDeferredNewBytes() { 355 df.lock.Lock() 356 defer df.lock.Unlock() 357 if df.deferredNewBytes == 0 { 358 return 359 } 360 df.dirtyBcache.UpdateUnsyncedBytes(df.Path.Tlf, df.deferredNewBytes, false) 361 df.deferredNewBytes = 0 362 }