github.com/swiftstack/ProxyFS@v0.0.0-20210203235616-4017c267d62f/inode/file_flusher.go (about) 1 // Copyright (c) 2015-2021, NVIDIA CORPORATION. 2 // SPDX-License-Identifier: Apache-2.0 3 4 package inode 5 6 import ( 7 "fmt" 8 9 "github.com/swiftstack/ProxyFS/blunder" 10 "github.com/swiftstack/ProxyFS/logger" 11 "github.com/swiftstack/ProxyFS/stats" 12 "github.com/swiftstack/ProxyFS/swiftclient" 13 "github.com/swiftstack/ProxyFS/utils" 14 ) 15 16 func openLogSegmentLRUInsertWhileLocked(inFlightLogSegment *inFlightLogSegmentStruct) { 17 // Place inode at the MRU end of openLogSegmentLRU 18 19 if 0 == globals.openLogSegmentLRUItems { 20 globals.openLogSegmentLRUHead = inFlightLogSegment 21 globals.openLogSegmentLRUTail = inFlightLogSegment 22 globals.openLogSegmentLRUItems = 1 23 } else { 24 inFlightLogSegment.openLogSegmentLRUPrev = globals.openLogSegmentLRUTail 25 inFlightLogSegment.openLogSegmentLRUPrev.openLogSegmentLRUNext = inFlightLogSegment 26 27 globals.openLogSegmentLRUTail = inFlightLogSegment 28 globals.openLogSegmentLRUItems++ 29 } 30 } 31 32 func openLogSegmentLRUInsert(inFlightLogSegment *inFlightLogSegmentStruct) { 33 globals.Lock() 34 openLogSegmentLRUInsertWhileLocked(inFlightLogSegment) 35 globals.Unlock() 36 } 37 38 func openLogSegmentLRUTouchWhileLocked(inFlightLogSegment *inFlightLogSegmentStruct) { 39 // Move inode to the MRU end of openLogSegmentLRU 40 41 if inFlightLogSegment != globals.openLogSegmentLRUTail { 42 if inFlightLogSegment == globals.openLogSegmentLRUHead { 43 globals.openLogSegmentLRUHead = inFlightLogSegment.openLogSegmentLRUNext 44 globals.openLogSegmentLRUHead.openLogSegmentLRUPrev = nil 45 46 inFlightLogSegment.openLogSegmentLRUPrev = globals.openLogSegmentLRUTail 47 inFlightLogSegment.openLogSegmentLRUNext = nil 48 49 globals.openLogSegmentLRUTail.openLogSegmentLRUNext = inFlightLogSegment 50 globals.openLogSegmentLRUTail = inFlightLogSegment 51 } else { 52 inFlightLogSegment.openLogSegmentLRUPrev.openLogSegmentLRUNext = inFlightLogSegment.openLogSegmentLRUNext 53 inFlightLogSegment.openLogSegmentLRUNext.openLogSegmentLRUPrev = inFlightLogSegment.openLogSegmentLRUPrev 54 55 inFlightLogSegment.openLogSegmentLRUNext = nil 56 inFlightLogSegment.openLogSegmentLRUPrev = globals.openLogSegmentLRUTail 57 58 globals.openLogSegmentLRUTail.openLogSegmentLRUNext = inFlightLogSegment 59 globals.openLogSegmentLRUTail = inFlightLogSegment 60 } 61 } 62 } 63 64 func openLogSegmentLRUTouch(inFlightLogSegment *inFlightLogSegmentStruct) { 65 globals.Lock() 66 openLogSegmentLRUTouchWhileLocked(inFlightLogSegment) 67 globals.Unlock() 68 } 69 70 func openLogSegmentLRURemoveWhileLocked(inFlightLogSegment *inFlightLogSegmentStruct) { 71 if inFlightLogSegment == globals.openLogSegmentLRUHead { 72 if inFlightLogSegment == globals.openLogSegmentLRUTail { 73 globals.openLogSegmentLRUHead = nil 74 globals.openLogSegmentLRUTail = nil 75 globals.openLogSegmentLRUItems = 0 76 } else { 77 globals.openLogSegmentLRUHead = inFlightLogSegment.openLogSegmentLRUNext 78 globals.openLogSegmentLRUHead.openLogSegmentLRUPrev = nil 79 globals.openLogSegmentLRUItems-- 80 81 inFlightLogSegment.openLogSegmentLRUNext = nil 82 } 83 } else { 84 if inFlightLogSegment == globals.openLogSegmentLRUTail { 85 globals.openLogSegmentLRUTail = inFlightLogSegment.openLogSegmentLRUPrev 86 globals.openLogSegmentLRUTail.openLogSegmentLRUNext = nil 87 globals.openLogSegmentLRUItems-- 88 89 inFlightLogSegment.openLogSegmentLRUPrev = nil 90 } else { 91 inFlightLogSegment.openLogSegmentLRUPrev.openLogSegmentLRUNext = inFlightLogSegment.openLogSegmentLRUNext 92 inFlightLogSegment.openLogSegmentLRUNext.openLogSegmentLRUPrev = inFlightLogSegment.openLogSegmentLRUPrev 93 globals.openLogSegmentLRUItems-- 94 95 inFlightLogSegment.openLogSegmentLRUNext = nil 96 inFlightLogSegment.openLogSegmentLRUPrev = nil 97 } 98 } 99 } 100 101 func openLogSegmentLRURemove(inFlightLogSegment *inFlightLogSegmentStruct) { 102 globals.Lock() 103 openLogSegmentLRURemoveWhileLocked(inFlightLogSegment) 104 globals.Unlock() 105 } 106 107 func (volumeGroup *volumeGroupStruct) capReadCacheWhileLocked() { 108 for uint64(len(volumeGroup.readCache)) > volumeGroup.readCacheLineCount { 109 110 delete(volumeGroup.readCache, volumeGroup.readCacheLRU.readCacheKey) 111 volumeGroup.readCacheLRU = volumeGroup.readCacheLRU.prev 112 volumeGroup.readCacheLRU.next = nil 113 } 114 } 115 116 func (volumeGroup *volumeGroupStruct) insertReadCacheElementWhileLocked(readCacheElement *readCacheElementStruct) { 117 volumeGroup.readCache[readCacheElement.readCacheKey] = readCacheElement 118 if nil == volumeGroup.readCacheMRU { 119 volumeGroup.readCacheMRU = readCacheElement 120 volumeGroup.readCacheLRU = readCacheElement 121 } else { 122 readCacheElement.next = volumeGroup.readCacheMRU 123 readCacheElement.next.prev = readCacheElement 124 volumeGroup.readCacheMRU = readCacheElement 125 } 126 volumeGroup.capReadCacheWhileLocked() 127 } 128 129 func (volumeGroup *volumeGroupStruct) touchReadCacheElementWhileLocked(readCacheElement *readCacheElementStruct) { 130 if volumeGroup.readCacheMRU != readCacheElement { 131 if readCacheElement == volumeGroup.readCacheLRU { 132 volumeGroup.readCacheLRU = readCacheElement.prev 133 volumeGroup.readCacheLRU.next = nil 134 } else { 135 readCacheElement.prev.next = readCacheElement.next 136 readCacheElement.next.prev = readCacheElement.prev 137 } 138 readCacheElement.next = volumeGroup.readCacheMRU 139 readCacheElement.prev = nil 140 volumeGroup.readCacheMRU.prev = readCacheElement 141 volumeGroup.readCacheMRU = readCacheElement 142 } 143 } 144 145 func (vS *volumeStruct) doReadPlan(fileInode *inMemoryInodeStruct, readPlan []ReadPlanStep, readPlanBytes uint64) (buf []byte, err error) { 146 var ( 147 cacheLine []byte 148 cacheLineHitLength uint64 149 cacheLineHitOffset uint64 150 cacheLineStartOffset uint64 151 chunkOffset uint64 152 inFlightHit bool 153 inFlightHitBuf []byte 154 inFlightLogSegment *inFlightLogSegmentStruct 155 readCacheElement *readCacheElementStruct 156 readCacheHit bool 157 readCacheKey readCacheKeyStruct 158 readCacheLineSize uint64 159 remainingLength uint64 160 step ReadPlanStep 161 stepIndex int 162 volumeGroup *volumeGroupStruct 163 ) 164 165 volumeGroup = vS.volumeGroup 166 readCacheLineSize = volumeGroup.readCacheLineSize 167 readCacheKey.volumeName = vS.volumeName 168 169 if 1 == len(readPlan) { 170 // Possibly a trivial case (allowing for a potential zero-copy return)... three exist: 171 // Case 1: The lone step calls for a zero-filled []byte 172 // Case 2: The lone step is satisfied by reading from an inFlightLogSegment 173 // Case 3: The lone step is satisfied by landing completely within a single Read Cache Line 174 175 step = readPlan[0] 176 177 if 0 == step.LogSegmentNumber { 178 // Case 1: The lone step calls for a zero-filled []byte 179 buf = make([]byte, step.Length) 180 stats.IncrementOperationsAndBucketedBytes(stats.FileRead, step.Length) 181 err = nil 182 return 183 } 184 185 fileInode.Lock() 186 187 inFlightLogSegment, inFlightHit = fileInode.inFlightLogSegmentMap[step.LogSegmentNumber] 188 if inFlightHit { 189 // Case 2: The lone step is satisfied by reading from an inFlightLogSegment 190 openLogSegmentLRUTouch(inFlightLogSegment) 191 buf, err = inFlightLogSegment.Read(step.Offset, step.Length) 192 if nil != err { 193 fileInode.Unlock() 194 logger.ErrorfWithError(err, "Reading back inFlightLogSegment failed - optimal case") 195 err = blunder.AddError(err, blunder.SegReadError) 196 return 197 } 198 fileInode.Unlock() 199 stats.IncrementOperations(&stats.FileWritebackHitOps) 200 stats.IncrementOperationsAndBucketedBytes(stats.FileRead, step.Length) 201 return 202 } 203 204 stats.IncrementOperations(&stats.FileWritebackMissOps) 205 206 fileInode.Unlock() 207 208 cacheLineHitOffset = step.Offset % readCacheLineSize 209 210 if (cacheLineHitOffset + step.Length) <= readCacheLineSize { 211 // Case 3: The lone step is satisfied by landing completely within a single Read Cache Line 212 readCacheKey.logSegmentNumber = step.LogSegmentNumber 213 readCacheKey.cacheLineTag = step.Offset / readCacheLineSize 214 215 volumeGroup.Lock() 216 217 readCacheElement, readCacheHit = volumeGroup.readCache[readCacheKey] 218 219 if readCacheHit { 220 volumeGroup.touchReadCacheElementWhileLocked(readCacheElement) 221 cacheLine = readCacheElement.cacheLine 222 volumeGroup.Unlock() 223 stats.IncrementOperations(&stats.FileReadcacheHitOps) 224 } else { 225 volumeGroup.Unlock() 226 stats.IncrementOperations(&stats.FileReadcacheMissOps) 227 // Make readCacheHit true (at MRU, likely kicking out LRU) 228 cacheLineStartOffset = readCacheKey.cacheLineTag * readCacheLineSize 229 cacheLine, err = swiftclient.ObjectGet(step.AccountName, step.ContainerName, step.ObjectName, cacheLineStartOffset, readCacheLineSize) 230 if nil != err { 231 logger.ErrorfWithError(err, "Reading from LogSegment object failed - optimal case") 232 err = blunder.AddError(err, blunder.SegReadError) 233 return 234 } 235 readCacheElement = &readCacheElementStruct{ 236 readCacheKey: readCacheKey, 237 next: nil, 238 prev: nil, 239 cacheLine: cacheLine, 240 } 241 volumeGroup.Lock() 242 volumeGroup.insertReadCacheElementWhileLocked(readCacheElement) 243 volumeGroup.Unlock() 244 } 245 246 if (cacheLineHitOffset + step.Length) > uint64(len(cacheLine)) { 247 err = fmt.Errorf("Invalid range for LogSegment object - optimal case") 248 logger.ErrorWithError(err) 249 err = blunder.AddError(err, blunder.SegReadError) 250 return 251 } 252 253 buf = cacheLine[cacheLineHitOffset:(cacheLineHitOffset + step.Length)] 254 255 stats.IncrementOperationsAndBucketedBytes(stats.FileRead, step.Length) 256 257 err = nil 258 return 259 } 260 } 261 262 // If we reach here, normal readPlan processing will be performed... no zero-copy opportunity 263 264 buf = make([]byte, 0, readPlanBytes) 265 266 for stepIndex, step = range readPlan { 267 if 0 == step.LogSegmentNumber { 268 // The step calls for a zero-filled []byte 269 buf = append(buf, make([]byte, step.Length)...) 270 } else { 271 fileInode.Lock() 272 inFlightLogSegment, inFlightHit = fileInode.inFlightLogSegmentMap[step.LogSegmentNumber] 273 if inFlightHit { 274 // The step is satisfied by reading from an inFlightLogSegment 275 openLogSegmentLRUTouch(inFlightLogSegment) 276 inFlightHitBuf, err = inFlightLogSegment.Read(step.Offset, step.Length) 277 if nil != err { 278 fileInode.Unlock() 279 logger.ErrorfWithError(err, "Reading back inFlightLogSegment failed - general case") 280 err = blunder.AddError(err, blunder.SegReadError) 281 return 282 } 283 fileInode.Unlock() 284 buf = append(buf, inFlightHitBuf...) 285 stats.IncrementOperations(&stats.FileWritebackHitOps) 286 } else { 287 fileInode.Unlock() 288 if (0 == stepIndex) && (1 == len(readPlan)) { 289 // No need to increment stats.FileWritebackMissOps since it was incremented above 290 } else { 291 stats.IncrementOperations(&stats.FileWritebackMissOps) 292 } 293 } 294 if !inFlightHit { 295 // The step is satisfied by hitting or missing the Read Cache 296 readCacheKey.logSegmentNumber = step.LogSegmentNumber 297 chunkOffset = step.Offset 298 remainingLength = step.Length 299 for 0 < remainingLength { 300 readCacheKey.cacheLineTag = chunkOffset / readCacheLineSize 301 cacheLineHitOffset = chunkOffset % readCacheLineSize 302 if (cacheLineHitOffset + remainingLength) > readCacheLineSize { 303 // When we've got a cache hit, the read extends beyond the cache line 304 cacheLineHitLength = readCacheLineSize - cacheLineHitOffset 305 } else { 306 // When we've got a cache hit, all the data is inside the cache line 307 cacheLineHitLength = remainingLength 308 } 309 volumeGroup.Lock() 310 readCacheElement, readCacheHit = volumeGroup.readCache[readCacheKey] 311 if readCacheHit { 312 volumeGroup.touchReadCacheElementWhileLocked(readCacheElement) 313 cacheLine = readCacheElement.cacheLine 314 volumeGroup.Unlock() 315 stats.IncrementOperations(&stats.FileReadcacheHitOps) 316 } else { 317 volumeGroup.Unlock() 318 stats.IncrementOperations(&stats.FileReadcacheMissOps) 319 // Make readCacheHit true (at MRU, likely kicking out LRU) 320 cacheLineStartOffset = readCacheKey.cacheLineTag * readCacheLineSize 321 cacheLine, err = swiftclient.ObjectGet(step.AccountName, step.ContainerName, step.ObjectName, cacheLineStartOffset, readCacheLineSize) 322 if nil != err { 323 logger.ErrorfWithError(err, "Reading from LogSegment object failed - general case") 324 err = blunder.AddError(err, blunder.SegReadError) 325 return 326 } 327 readCacheElement = &readCacheElementStruct{ 328 readCacheKey: readCacheKey, 329 next: nil, 330 prev: nil, 331 cacheLine: cacheLine, 332 } 333 volumeGroup.Lock() 334 volumeGroup.insertReadCacheElementWhileLocked(readCacheElement) 335 volumeGroup.Unlock() 336 } 337 if (cacheLineHitOffset + cacheLineHitLength) > uint64(len(cacheLine)) { 338 err = fmt.Errorf("Invalid range for LogSegment object - general case") 339 logger.ErrorWithError(err) 340 err = blunder.AddError(err, blunder.SegReadError) 341 return 342 } 343 buf = append(buf, cacheLine[cacheLineHitOffset:(cacheLineHitOffset+cacheLineHitLength)]...) 344 chunkOffset += cacheLineHitLength 345 remainingLength -= cacheLineHitLength 346 } 347 } 348 } 349 } 350 351 stats.IncrementOperationsAndBucketedBytes(stats.FileRead, uint64(len(buf))) 352 353 err = nil 354 return 355 } 356 357 func (vS *volumeStruct) doSendChunk(fileInode *inMemoryInodeStruct, buf []byte) (logSegmentNumber uint64, logSegmentOffset uint64, err error) { 358 var ( 359 inFlightLogSegment *inFlightLogSegmentStruct 360 openLogSegmentContainerName string 361 openLogSegmentObjectNumber uint64 362 ) 363 364 fileInode.Lock() 365 366 if nil == fileInode.openLogSegment { 367 // Drop fileInode Lock while preparing an inFlightLogSegment. This is to avoid a deadlock where 368 // starvation for ChunkedPutContext's might need to grab this fileInode's Lock to check a previous 369 // openLogSegment associated with this fileInode (and, hence, when we looked was then on the 370 // openLogSegmentLRU). 371 372 fileInode.Unlock() 373 374 openLogSegmentContainerName, openLogSegmentObjectNumber, err = fileInode.volume.provisionObject() 375 if nil != err { 376 logger.ErrorfWithError(err, "Provisioning LogSegment failed") 377 return 378 } 379 380 err = fileInode.volume.setLogSegmentContainer(openLogSegmentObjectNumber, openLogSegmentContainerName) 381 if nil != err { 382 logger.ErrorfWithError(err, "Recording LogSegment ContainerName failed") 383 return 384 } 385 386 inFlightLogSegment = &inFlightLogSegmentStruct{ 387 logSegmentNumber: openLogSegmentObjectNumber, 388 fileInode: fileInode, 389 accountName: fileInode.volume.accountName, 390 containerName: openLogSegmentContainerName, 391 objectName: utils.Uint64ToHexStr(openLogSegmentObjectNumber), 392 } 393 394 inFlightLogSegment.ChunkedPutContext, err = swiftclient.ObjectFetchChunkedPutContext(inFlightLogSegment.accountName, inFlightLogSegment.containerName, inFlightLogSegment.objectName, "") 395 if nil != err { 396 logger.ErrorfWithError(err, "Starting Chunked PUT to LogSegment failed") 397 return 398 } 399 400 // Now reestablish the fileInode Lock before continuing 401 402 fileInode.Lock() 403 404 fileInode.inFlightLogSegmentMap[inFlightLogSegment.logSegmentNumber] = inFlightLogSegment 405 406 fileInode.openLogSegment = inFlightLogSegment 407 openLogSegmentLRUInsert(inFlightLogSegment) 408 } else { 409 inFlightLogSegment = fileInode.openLogSegment 410 openLogSegmentLRUTouch(inFlightLogSegment) 411 } 412 413 logSegmentNumber = inFlightLogSegment.logSegmentNumber 414 415 logSegmentOffset, err = inFlightLogSegment.BytesPut() 416 if nil != err { 417 fileInode.Unlock() 418 logger.ErrorfWithError(err, "Failed to get current LogSegmentOffset") 419 return 420 } 421 422 err = inFlightLogSegment.ChunkedPutContext.SendChunk(buf) 423 if nil != err { 424 fileInode.Unlock() 425 logger.ErrorfWithError(err, "Sending Chunked PUT chunk to LogSegment failed") 426 return 427 } 428 429 if (logSegmentOffset + uint64(len(buf))) >= fileInode.volume.maxFlushSize { 430 fileInode.Add(1) 431 go vS.inFlightLogSegmentFlusher(inFlightLogSegment, true) 432 // No need to wait for it to complete now... that's only in doFileInodeDataFlush() 433 } 434 435 fileInode.Unlock() 436 437 err = nil 438 return 439 } 440 441 func (vS *volumeStruct) doFileInodeDataFlush(fileInode *inMemoryInodeStruct) (err error) { 442 var ( 443 inFlightLogSegment *inFlightLogSegmentStruct 444 ) 445 446 fileInode.Lock() 447 if nil != fileInode.openLogSegment { 448 inFlightLogSegment = fileInode.openLogSegment 449 fileInode.Add(1) 450 go vS.inFlightLogSegmentFlusher(inFlightLogSegment, true) 451 } 452 fileInode.Unlock() 453 454 // Wait for all invocations of inFlightLogSegmentFlusher() for this fileInode have completed 455 456 fileInode.Wait() 457 458 // REVIEW TODO: Does anybody ever empty the errors map? Should they? Would this mask prior errors? 459 // File system could go "read only" if that's sufficient... 460 // Problem with write-back data... must discard it... 461 462 if 0 == len(fileInode.inFlightLogSegmentErrors) { 463 err = nil 464 } else { 465 err = fmt.Errorf("Errors encountered while flushing inFlightLogSegments") 466 } 467 468 return 469 } 470 471 func (vS *volumeStruct) inFlightLogSegmentFlusher(inFlightLogSegment *inFlightLogSegmentStruct, doDone bool) { 472 var ( 473 err error 474 fileInode *inMemoryInodeStruct 475 ) 476 477 // Handle the race between a DLM-serialized Flush triggering this versus the starvatation condition 478 // doing so... Either one will perform the appropriate steps to enable the Flush() to complete. 479 480 fileInode = inFlightLogSegment.fileInode 481 482 fileInode.Lock() 483 484 if inFlightLogSegment != fileInode.openLogSegment { 485 // Either a Close() is already in progress or has already completed 486 487 fileInode.Unlock() 488 if doDone { 489 fileInode.Done() 490 } 491 return 492 } 493 494 // This, and inFlightLogSegment still being in fileInode.inFlightLogSegmentMap, 495 // means "a Close() is already in progress" 496 497 fileInode.openLogSegment = nil 498 499 // Terminate Chunked PUT while not holding fileInode.Lock 500 501 fileInode.Unlock() 502 err = inFlightLogSegment.Close() 503 fileInode.Lock() 504 505 // Finish up... recording error (if any) in the process 506 507 if nil != err { 508 err = blunder.AddError(err, blunder.InodeFlushError) 509 fileInode.inFlightLogSegmentErrors[inFlightLogSegment.logSegmentNumber] = err 510 } 511 512 delete(inFlightLogSegment.fileInode.inFlightLogSegmentMap, inFlightLogSegment.logSegmentNumber) 513 514 openLogSegmentLRURemove(inFlightLogSegment) 515 516 fileInode.Unlock() 517 518 if doDone { 519 fileInode.Done() 520 } 521 } 522 523 func chunkedPutConnectionPoolStarvationCallback() { 524 var ( 525 fileInode *inMemoryInodeStruct 526 inFlightLogSegment *inFlightLogSegmentStruct 527 volume *volumeStruct 528 ) 529 530 globals.Lock() 531 532 if 0 == globals.openLogSegmentLRUItems { 533 globals.Unlock() 534 return 535 } 536 537 inFlightLogSegment = globals.openLogSegmentLRUHead 538 539 fileInode = inFlightLogSegment.fileInode 540 volume = fileInode.volume 541 542 globals.Unlock() 543 544 // Call inFlightLogSegmentFlusher() synchronously because we only want to return when it completes 545 // and we don't want to call fileInode.Wait() as this would wait until all invocations of 546 // inFlightLogSegmentFlusher() for the fileInode have completed. 547 548 volume.inFlightLogSegmentFlusher(inFlightLogSegment, false) 549 }