github.com/rohankumardubey/proxyfs@v0.0.0-20210108201508-653efa9ab00e/inode/file_flusher.go (about) 1 package inode 2 3 import ( 4 "fmt" 5 6 "github.com/swiftstack/ProxyFS/blunder" 7 "github.com/swiftstack/ProxyFS/logger" 8 "github.com/swiftstack/ProxyFS/stats" 9 "github.com/swiftstack/ProxyFS/swiftclient" 10 "github.com/swiftstack/ProxyFS/utils" 11 ) 12 13 func openLogSegmentLRUInsertWhileLocked(inFlightLogSegment *inFlightLogSegmentStruct) { 14 // Place inode at the MRU end of openLogSegmentLRU 15 16 if 0 == globals.openLogSegmentLRUItems { 17 globals.openLogSegmentLRUHead = inFlightLogSegment 18 globals.openLogSegmentLRUTail = inFlightLogSegment 19 globals.openLogSegmentLRUItems = 1 20 } else { 21 inFlightLogSegment.openLogSegmentLRUPrev = globals.openLogSegmentLRUTail 22 inFlightLogSegment.openLogSegmentLRUPrev.openLogSegmentLRUNext = inFlightLogSegment 23 24 globals.openLogSegmentLRUTail = inFlightLogSegment 25 globals.openLogSegmentLRUItems++ 26 } 27 } 28 29 func openLogSegmentLRUInsert(inFlightLogSegment *inFlightLogSegmentStruct) { 30 globals.Lock() 31 openLogSegmentLRUInsertWhileLocked(inFlightLogSegment) 32 globals.Unlock() 33 } 34 35 func openLogSegmentLRUTouchWhileLocked(inFlightLogSegment *inFlightLogSegmentStruct) { 36 // Move inode to the MRU end of openLogSegmentLRU 37 38 if inFlightLogSegment != globals.openLogSegmentLRUTail { 39 if inFlightLogSegment == globals.openLogSegmentLRUHead { 40 globals.openLogSegmentLRUHead = inFlightLogSegment.openLogSegmentLRUNext 41 globals.openLogSegmentLRUHead.openLogSegmentLRUPrev = nil 42 43 inFlightLogSegment.openLogSegmentLRUPrev = globals.openLogSegmentLRUTail 44 inFlightLogSegment.openLogSegmentLRUNext = nil 45 46 globals.openLogSegmentLRUTail.openLogSegmentLRUNext = inFlightLogSegment 47 globals.openLogSegmentLRUTail = inFlightLogSegment 48 } else { 49 inFlightLogSegment.openLogSegmentLRUPrev.openLogSegmentLRUNext = inFlightLogSegment.openLogSegmentLRUNext 50 inFlightLogSegment.openLogSegmentLRUNext.openLogSegmentLRUPrev = inFlightLogSegment.openLogSegmentLRUPrev 51 52 inFlightLogSegment.openLogSegmentLRUNext = nil 53 inFlightLogSegment.openLogSegmentLRUPrev = globals.openLogSegmentLRUTail 54 55 globals.openLogSegmentLRUTail.openLogSegmentLRUNext = inFlightLogSegment 56 globals.openLogSegmentLRUTail = inFlightLogSegment 57 } 58 } 59 } 60 61 func openLogSegmentLRUTouch(inFlightLogSegment *inFlightLogSegmentStruct) { 62 globals.Lock() 63 openLogSegmentLRUTouchWhileLocked(inFlightLogSegment) 64 globals.Unlock() 65 } 66 67 func openLogSegmentLRURemoveWhileLocked(inFlightLogSegment *inFlightLogSegmentStruct) { 68 if inFlightLogSegment == globals.openLogSegmentLRUHead { 69 if inFlightLogSegment == globals.openLogSegmentLRUTail { 70 globals.openLogSegmentLRUHead = nil 71 globals.openLogSegmentLRUTail = nil 72 globals.openLogSegmentLRUItems = 0 73 } else { 74 globals.openLogSegmentLRUHead = inFlightLogSegment.openLogSegmentLRUNext 75 globals.openLogSegmentLRUHead.openLogSegmentLRUPrev = nil 76 globals.openLogSegmentLRUItems-- 77 78 inFlightLogSegment.openLogSegmentLRUNext = nil 79 } 80 } else { 81 if inFlightLogSegment == globals.openLogSegmentLRUTail { 82 globals.openLogSegmentLRUTail = inFlightLogSegment.openLogSegmentLRUPrev 83 globals.openLogSegmentLRUTail.openLogSegmentLRUNext = nil 84 globals.openLogSegmentLRUItems-- 85 86 inFlightLogSegment.openLogSegmentLRUPrev = nil 87 } else { 88 inFlightLogSegment.openLogSegmentLRUPrev.openLogSegmentLRUNext = inFlightLogSegment.openLogSegmentLRUNext 89 inFlightLogSegment.openLogSegmentLRUNext.openLogSegmentLRUPrev = inFlightLogSegment.openLogSegmentLRUPrev 90 globals.openLogSegmentLRUItems-- 91 92 inFlightLogSegment.openLogSegmentLRUNext = nil 93 inFlightLogSegment.openLogSegmentLRUPrev = nil 94 } 95 } 96 } 97 98 func openLogSegmentLRURemove(inFlightLogSegment *inFlightLogSegmentStruct) { 99 globals.Lock() 100 openLogSegmentLRURemoveWhileLocked(inFlightLogSegment) 101 globals.Unlock() 102 } 103 104 func (volumeGroup *volumeGroupStruct) capReadCacheWhileLocked() { 105 for uint64(len(volumeGroup.readCache)) > volumeGroup.readCacheLineCount { 106 delete(volumeGroup.readCache, volumeGroup.readCacheLRU.readCacheKey) 107 volumeGroup.readCacheLRU = volumeGroup.readCacheLRU.prev 108 volumeGroup.readCacheLRU.next = nil 109 } 110 } 111 112 func (volumeGroup *volumeGroupStruct) insertReadCacheElementWhileLocked(readCacheElement *readCacheElementStruct) { 113 volumeGroup.readCache[readCacheElement.readCacheKey] = readCacheElement 114 if nil == volumeGroup.readCacheMRU { 115 volumeGroup.readCacheMRU = readCacheElement 116 volumeGroup.readCacheLRU = readCacheElement 117 } else { 118 readCacheElement.next = volumeGroup.readCacheMRU 119 readCacheElement.next.prev = readCacheElement 120 volumeGroup.readCacheMRU = readCacheElement 121 } 122 volumeGroup.capReadCacheWhileLocked() 123 } 124 125 func (volumeGroup *volumeGroupStruct) touchReadCacheElementWhileLocked(readCacheElement *readCacheElementStruct) { 126 if volumeGroup.readCacheMRU != readCacheElement { 127 if readCacheElement == volumeGroup.readCacheLRU { 128 volumeGroup.readCacheLRU = readCacheElement.prev 129 volumeGroup.readCacheLRU.next = nil 130 } else { 131 readCacheElement.prev.next = readCacheElement.next 132 readCacheElement.next.prev = readCacheElement.prev 133 } 134 readCacheElement.next = volumeGroup.readCacheMRU 135 readCacheElement.prev = nil 136 volumeGroup.readCacheMRU.prev = readCacheElement 137 volumeGroup.readCacheMRU = readCacheElement 138 } 139 } 140 141 func (vS *volumeStruct) doReadPlan(fileInode *inMemoryInodeStruct, readPlan []ReadPlanStep, readPlanBytes uint64) (buf []byte, err error) { 142 var ( 143 cacheLine []byte 144 cacheLineHitLength uint64 145 cacheLineHitOffset uint64 146 cacheLineStartOffset uint64 147 chunkOffset uint64 148 inFlightHit bool 149 inFlightHitBuf []byte 150 inFlightLogSegment *inFlightLogSegmentStruct 151 readCacheElement *readCacheElementStruct 152 readCacheHit bool 153 readCacheKey readCacheKeyStruct 154 readCacheLineSize uint64 155 remainingLength uint64 156 step ReadPlanStep 157 stepIndex int 158 volumeGroup *volumeGroupStruct 159 ) 160 161 volumeGroup = vS.volumeGroup 162 readCacheLineSize = volumeGroup.readCacheLineSize 163 readCacheKey.volumeName = vS.volumeName 164 165 if 1 == len(readPlan) { 166 // Possibly a trivial case (allowing for a potential zero-copy return)... three exist: 167 // Case 1: The lone step calls for a zero-filled []byte 168 // Case 2: The lone step is satisfied by reading from an inFlightLogSegment 169 // Case 3: The lone step is satisfied by landing completely within a single Read Cache Line 170 171 step = readPlan[0] 172 173 if 0 == step.LogSegmentNumber { 174 // Case 1: The lone step calls for a zero-filled []byte 175 buf = make([]byte, step.Length) 176 stats.IncrementOperationsAndBucketedBytes(stats.FileRead, step.Length) 177 err = nil 178 return 179 } 180 181 fileInode.Lock() 182 183 inFlightLogSegment, inFlightHit = fileInode.inFlightLogSegmentMap[step.LogSegmentNumber] 184 if inFlightHit { 185 // Case 2: The lone step is satisfied by reading from an inFlightLogSegment 186 openLogSegmentLRUTouch(inFlightLogSegment) 187 buf, err = inFlightLogSegment.Read(step.Offset, step.Length) 188 if nil != err { 189 fileInode.Unlock() 190 logger.ErrorfWithError(err, "Reading back inFlightLogSegment failed - optimal case") 191 err = blunder.AddError(err, blunder.SegReadError) 192 return 193 } 194 fileInode.Unlock() 195 stats.IncrementOperations(&stats.FileWritebackHitOps) 196 stats.IncrementOperationsAndBucketedBytes(stats.FileRead, step.Length) 197 return 198 } 199 200 stats.IncrementOperations(&stats.FileWritebackMissOps) 201 202 fileInode.Unlock() 203 204 cacheLineHitOffset = step.Offset % readCacheLineSize 205 206 if (cacheLineHitOffset + step.Length) <= readCacheLineSize { 207 // Case 3: The lone step is satisfied by landing completely within a single Read Cache Line 208 readCacheKey.logSegmentNumber = step.LogSegmentNumber 209 readCacheKey.cacheLineTag = step.Offset / readCacheLineSize 210 211 volumeGroup.Lock() 212 213 readCacheElement, readCacheHit = volumeGroup.readCache[readCacheKey] 214 215 if readCacheHit { 216 volumeGroup.touchReadCacheElementWhileLocked(readCacheElement) 217 cacheLine = readCacheElement.cacheLine 218 volumeGroup.Unlock() 219 stats.IncrementOperations(&stats.FileReadcacheHitOps) 220 } else { 221 volumeGroup.Unlock() 222 stats.IncrementOperations(&stats.FileReadcacheMissOps) 223 // Make readCacheHit true (at MRU, likely kicking out LRU) 224 cacheLineStartOffset = readCacheKey.cacheLineTag * readCacheLineSize 225 cacheLine, err = swiftclient.ObjectGet(step.AccountName, step.ContainerName, step.ObjectName, cacheLineStartOffset, readCacheLineSize) 226 if nil != err { 227 logger.ErrorfWithError(err, "Reading from LogSegment object failed - optimal case") 228 err = blunder.AddError(err, blunder.SegReadError) 229 return 230 } 231 readCacheElement = &readCacheElementStruct{ 232 readCacheKey: readCacheKey, 233 next: nil, 234 prev: nil, 235 cacheLine: cacheLine, 236 } 237 volumeGroup.Lock() 238 volumeGroup.insertReadCacheElementWhileLocked(readCacheElement) 239 volumeGroup.Unlock() 240 } 241 242 if (cacheLineHitOffset + step.Length) > uint64(len(cacheLine)) { 243 err = fmt.Errorf("Invalid range for LogSegment object - optimal case") 244 logger.ErrorWithError(err) 245 err = blunder.AddError(err, blunder.SegReadError) 246 return 247 } 248 249 buf = cacheLine[cacheLineHitOffset:(cacheLineHitOffset + step.Length)] 250 251 stats.IncrementOperationsAndBucketedBytes(stats.FileRead, step.Length) 252 253 err = nil 254 return 255 } 256 } 257 258 // If we reach here, normal readPlan processing will be performed... no zero-copy opportunity 259 260 buf = make([]byte, 0, readPlanBytes) 261 262 for stepIndex, step = range readPlan { 263 if 0 == step.LogSegmentNumber { 264 // The step calls for a zero-filled []byte 265 buf = append(buf, make([]byte, step.Length)...) 266 } else { 267 fileInode.Lock() 268 inFlightLogSegment, inFlightHit = fileInode.inFlightLogSegmentMap[step.LogSegmentNumber] 269 if inFlightHit { 270 // The step is satisfied by reading from an inFlightLogSegment 271 openLogSegmentLRUTouch(inFlightLogSegment) 272 inFlightHitBuf, err = inFlightLogSegment.Read(step.Offset, step.Length) 273 if nil != err { 274 fileInode.Unlock() 275 logger.ErrorfWithError(err, "Reading back inFlightLogSegment failed - general case") 276 err = blunder.AddError(err, blunder.SegReadError) 277 return 278 } 279 fileInode.Unlock() 280 buf = append(buf, inFlightHitBuf...) 281 stats.IncrementOperations(&stats.FileWritebackHitOps) 282 } else { 283 fileInode.Unlock() 284 if (0 == stepIndex) && (1 == len(readPlan)) { 285 // No need to increment stats.FileWritebackMissOps since it was incremented above 286 } else { 287 stats.IncrementOperations(&stats.FileWritebackMissOps) 288 } 289 } 290 if !inFlightHit { 291 // The step is satisfied by hitting or missing the Read Cache 292 readCacheKey.logSegmentNumber = step.LogSegmentNumber 293 chunkOffset = step.Offset 294 remainingLength = step.Length 295 for 0 < remainingLength { 296 readCacheKey.cacheLineTag = chunkOffset / readCacheLineSize 297 cacheLineHitOffset = chunkOffset % readCacheLineSize 298 if (cacheLineHitOffset + remainingLength) > readCacheLineSize { 299 // When we've got a cache hit, the read extends beyond the cache line 300 cacheLineHitLength = readCacheLineSize - cacheLineHitOffset 301 } else { 302 // When we've got a cache hit, all the data is inside the cache line 303 cacheLineHitLength = remainingLength 304 } 305 volumeGroup.Lock() 306 readCacheElement, readCacheHit = volumeGroup.readCache[readCacheKey] 307 if readCacheHit { 308 volumeGroup.touchReadCacheElementWhileLocked(readCacheElement) 309 cacheLine = readCacheElement.cacheLine 310 volumeGroup.Unlock() 311 stats.IncrementOperations(&stats.FileReadcacheHitOps) 312 } else { 313 volumeGroup.Unlock() 314 stats.IncrementOperations(&stats.FileReadcacheMissOps) 315 // Make readCacheHit true (at MRU, likely kicking out LRU) 316 cacheLineStartOffset = readCacheKey.cacheLineTag * readCacheLineSize 317 cacheLine, err = swiftclient.ObjectGet(step.AccountName, step.ContainerName, step.ObjectName, cacheLineStartOffset, readCacheLineSize) 318 if nil != err { 319 logger.ErrorfWithError(err, "Reading from LogSegment object failed - general case") 320 err = blunder.AddError(err, blunder.SegReadError) 321 return 322 } 323 readCacheElement = &readCacheElementStruct{ 324 readCacheKey: readCacheKey, 325 next: nil, 326 prev: nil, 327 cacheLine: cacheLine, 328 } 329 volumeGroup.Lock() 330 volumeGroup.insertReadCacheElementWhileLocked(readCacheElement) 331 volumeGroup.Unlock() 332 } 333 if (cacheLineHitOffset + cacheLineHitLength) > uint64(len(cacheLine)) { 334 err = fmt.Errorf("Invalid range for LogSegment object - general case") 335 logger.ErrorWithError(err) 336 err = blunder.AddError(err, blunder.SegReadError) 337 return 338 } 339 buf = append(buf, cacheLine[cacheLineHitOffset:(cacheLineHitOffset+cacheLineHitLength)]...) 340 chunkOffset += cacheLineHitLength 341 remainingLength -= cacheLineHitLength 342 } 343 } 344 } 345 } 346 347 stats.IncrementOperationsAndBucketedBytes(stats.FileRead, uint64(len(buf))) 348 349 err = nil 350 return 351 } 352 353 func (vS *volumeStruct) doSendChunk(fileInode *inMemoryInodeStruct, buf []byte) (logSegmentNumber uint64, logSegmentOffset uint64, err error) { 354 var ( 355 inFlightLogSegment *inFlightLogSegmentStruct 356 openLogSegmentContainerName string 357 openLogSegmentObjectNumber uint64 358 ) 359 360 fileInode.Lock() 361 362 if nil == fileInode.openLogSegment { 363 // Drop fileInode Lock while preparing an inFlightLogSegment. This is to avoid a deadlock where 364 // starvation for ChunkedPutContext's might need to grab this fileInode's Lock to check a previous 365 // openLogSegment associated with this fileInode (and, hence, when we looked was then on the 366 // openLogSegmentLRU). 367 368 fileInode.Unlock() 369 370 openLogSegmentContainerName, openLogSegmentObjectNumber, err = fileInode.volume.provisionObject() 371 if nil != err { 372 logger.ErrorfWithError(err, "Provisioning LogSegment failed") 373 return 374 } 375 376 err = fileInode.volume.setLogSegmentContainer(openLogSegmentObjectNumber, openLogSegmentContainerName) 377 if nil != err { 378 logger.ErrorfWithError(err, "Recording LogSegment ContainerName failed") 379 return 380 } 381 382 inFlightLogSegment = &inFlightLogSegmentStruct{ 383 logSegmentNumber: openLogSegmentObjectNumber, 384 fileInode: fileInode, 385 accountName: fileInode.volume.accountName, 386 containerName: openLogSegmentContainerName, 387 objectName: utils.Uint64ToHexStr(openLogSegmentObjectNumber), 388 } 389 390 inFlightLogSegment.ChunkedPutContext, err = swiftclient.ObjectFetchChunkedPutContext(inFlightLogSegment.accountName, inFlightLogSegment.containerName, inFlightLogSegment.objectName, "") 391 if nil != err { 392 logger.ErrorfWithError(err, "Starting Chunked PUT to LogSegment failed") 393 return 394 } 395 396 // Now reestablish the fileInode Lock before continuing 397 398 fileInode.Lock() 399 400 fileInode.inFlightLogSegmentMap[inFlightLogSegment.logSegmentNumber] = inFlightLogSegment 401 402 fileInode.openLogSegment = inFlightLogSegment 403 openLogSegmentLRUInsert(inFlightLogSegment) 404 } else { 405 inFlightLogSegment = fileInode.openLogSegment 406 openLogSegmentLRUTouch(inFlightLogSegment) 407 } 408 409 logSegmentNumber = inFlightLogSegment.logSegmentNumber 410 411 logSegmentOffset, err = inFlightLogSegment.BytesPut() 412 if nil != err { 413 fileInode.Unlock() 414 logger.ErrorfWithError(err, "Failed to get current LogSegmentOffset") 415 return 416 } 417 418 err = inFlightLogSegment.ChunkedPutContext.SendChunk(buf) 419 if nil != err { 420 fileInode.Unlock() 421 logger.ErrorfWithError(err, "Sending Chunked PUT chunk to LogSegment failed") 422 return 423 } 424 425 if (logSegmentOffset + uint64(len(buf))) >= fileInode.volume.maxFlushSize { 426 fileInode.Add(1) 427 go vS.inFlightLogSegmentFlusher(inFlightLogSegment, true) 428 // No need to wait for it to complete now... that's only in doFileInodeDataFlush() 429 } 430 431 fileInode.Unlock() 432 433 err = nil 434 return 435 } 436 437 func (vS *volumeStruct) doFileInodeDataFlush(fileInode *inMemoryInodeStruct) (err error) { 438 var ( 439 inFlightLogSegment *inFlightLogSegmentStruct 440 ) 441 442 fileInode.Lock() 443 if nil != fileInode.openLogSegment { 444 inFlightLogSegment = fileInode.openLogSegment 445 fileInode.Add(1) 446 go vS.inFlightLogSegmentFlusher(inFlightLogSegment, true) 447 } 448 fileInode.Unlock() 449 450 // Wait for all invocations of inFlightLogSegmentFlusher() for this fileInode have completed 451 452 fileInode.Wait() 453 454 // REVIEW TODO: Does anybody ever empty the errors map? Should they? Would this mask prior errors? 455 // File system could go "read only" if that's sufficient... 456 // Problem with write-back data... must discard it... 457 458 if 0 == len(fileInode.inFlightLogSegmentErrors) { 459 err = nil 460 } else { 461 err = fmt.Errorf("Errors encountered while flushing inFlightLogSegments") 462 } 463 464 return 465 } 466 467 func (vS *volumeStruct) inFlightLogSegmentFlusher(inFlightLogSegment *inFlightLogSegmentStruct, doDone bool) { 468 var ( 469 err error 470 fileInode *inMemoryInodeStruct 471 ) 472 473 // Handle the race between a DLM-serialized Flush triggering this versus the starvatation condition 474 // doing so... Either one will perform the appropriate steps to enable the Flush() to complete. 475 476 fileInode = inFlightLogSegment.fileInode 477 478 fileInode.Lock() 479 480 if inFlightLogSegment != fileInode.openLogSegment { 481 // Either a Close() is already in progress or has already completed 482 483 fileInode.Unlock() 484 if doDone { 485 fileInode.Done() 486 } 487 return 488 } 489 490 // This, and inFlightLogSegment still being in fileInode.inFlightLogSegmentMap, 491 // means "a Close() is already in progress" 492 493 fileInode.openLogSegment = nil 494 495 // Terminate Chunked PUT while not holding fileInode.Lock 496 497 fileInode.Unlock() 498 err = inFlightLogSegment.Close() 499 fileInode.Lock() 500 501 // Finish up... recording error (if any) in the process 502 503 if nil != err { 504 err = blunder.AddError(err, blunder.InodeFlushError) 505 fileInode.inFlightLogSegmentErrors[inFlightLogSegment.logSegmentNumber] = err 506 } 507 508 delete(inFlightLogSegment.fileInode.inFlightLogSegmentMap, inFlightLogSegment.logSegmentNumber) 509 510 openLogSegmentLRURemove(inFlightLogSegment) 511 512 fileInode.Unlock() 513 514 if doDone { 515 fileInode.Done() 516 } 517 } 518 519 func chunkedPutConnectionPoolStarvationCallback() { 520 var ( 521 fileInode *inMemoryInodeStruct 522 inFlightLogSegment *inFlightLogSegmentStruct 523 volume *volumeStruct 524 ) 525 526 globals.Lock() 527 528 if 0 == globals.openLogSegmentLRUItems { 529 globals.Unlock() 530 return 531 } 532 533 inFlightLogSegment = globals.openLogSegmentLRUHead 534 535 fileInode = inFlightLogSegment.fileInode 536 volume = fileInode.volume 537 538 globals.Unlock() 539 540 // Call inFlightLogSegmentFlusher() synchronously because we only want to return when it completes 541 // and we don't want to call fileInode.Wait() as this would wait until all invocations of 542 // inFlightLogSegmentFlusher() for the fileInode have completed. 543 544 volume.inFlightLogSegmentFlusher(inFlightLogSegment, false) 545 }