gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/host/contractmanager/writeaheadlogsync.go (about) 1 package contractmanager 2 3 import ( 4 "encoding/json" 5 "path/filepath" 6 "reflect" 7 "sync" 8 "sync/atomic" 9 "time" 10 11 "gitlab.com/SiaPrime/SiaPrime/build" 12 ) 13 14 // syncResources will call Sync on all resources that the WAL has open. The 15 // storage folder files will be left open, as they are not updated atomically. 16 // The settings file and WAL tmp files will be synced and closed, to perform an 17 // atomic update to the files. 18 func (wal *writeAheadLog) syncResources() { 19 // Syncing occurs over multiple files and disks, and is done in parallel to 20 // minimize the amount of time that a lock is held over the contract 21 // manager. 22 var wg sync.WaitGroup 23 24 // Sync the settings file. 25 wg.Add(1) 26 go func() { 27 defer wg.Done() 28 29 if wal.fileSettingsTmp == nil { 30 // nothing to sync 31 return 32 } 33 34 tmpFilename := filepath.Join(wal.cm.persistDir, settingsFileTmp) 35 filename := filepath.Join(wal.cm.persistDir, settingsFile) 36 err := wal.fileSettingsTmp.Sync() 37 if err != nil { 38 wal.cm.log.Severe("ERROR: unable to sync the contract manager settings:", err) 39 } 40 err = wal.fileSettingsTmp.Close() 41 if err != nil { 42 wal.cm.log.Println("unable to close the temporary contract manager settings file:", err) 43 } 44 45 // For testing, provide a place to interrupt the saving of the sync 46 // file. This makes it easy to simulate certain types of unclean 47 // shutdown. 48 if wal.cm.dependencies.Disrupt("settingsSyncRename") { 49 // The current settings file that is being re-written will not be 50 // saved. 51 return 52 } 53 54 err = wal.cm.dependencies.RenameFile(tmpFilename, filename) 55 if err != nil { 56 wal.cm.log.Severe("ERROR: unable to atomically copy the contract manager settings:", err) 57 } 58 }() 59 60 // Sync all of the storage folders. 61 for _, sf := range wal.cm.storageFolders { 62 // Skip operation on unavailable storage folders. 63 if atomic.LoadUint64(&sf.atomicUnavailable) == 1 { 64 continue 65 } 66 67 wg.Add(2) 68 go func(sf *storageFolder) { 69 defer wg.Done() 70 err := sf.metadataFile.Sync() 71 if err != nil { 72 wal.cm.log.Severe("ERROR: unable to sync a storage folder:", err) 73 } 74 }(sf) 75 go func(sf *storageFolder) { 76 defer wg.Done() 77 err := sf.sectorFile.Sync() 78 if err != nil { 79 wal.cm.log.Severe("ERROR: unable to sync a storage folder:", err) 80 } 81 }(sf) 82 } 83 84 // Sync the temp WAL file, but do not perform the atmoic rename - the 85 // atomic rename must be guaranteed to happen after all of the other files 86 // have been synced. 87 wg.Add(1) 88 go func() { 89 defer wg.Done() 90 if len(wal.uncommittedChanges) == 0 { 91 // nothing to sync 92 return 93 } 94 95 err := wal.fileWALTmp.Sync() 96 if err != nil { 97 wal.cm.log.Severe("Unable to sync the write-ahead-log:", err) 98 } 99 err = wal.fileWALTmp.Close() 100 if err != nil { 101 // Log that the host is having trouble saving the uncommitted changes. 102 // Crash if the list of uncommitted changes has grown very large. 103 wal.cm.log.Println("ERROR: could not close temporary write-ahead-log in contract manager:", err) 104 return 105 } 106 }() 107 108 // Wait for all of the sync calls to finish. 109 wg.Wait() 110 111 // Now that all the Sync calls have completed, rename the WAL tmp file to 112 // update the WAL. 113 if len(wal.uncommittedChanges) != 0 && !wal.cm.dependencies.Disrupt("walRename") { 114 walTmpName := filepath.Join(wal.cm.persistDir, walFileTmp) 115 walFileName := filepath.Join(wal.cm.persistDir, walFile) 116 err := wal.cm.dependencies.RenameFile(walTmpName, walFileName) 117 if err != nil { 118 // Log that the host is having trouble saving the uncommitted changes. 119 // Crash if the list of uncommitted changes has grown very large. 120 wal.cm.log.Severe("ERROR: could not rename temporary write-ahead-log in contract manager:", err) 121 } 122 } 123 124 // Perform any cleanup actions on the updates. 125 for _, sc := range wal.uncommittedChanges { 126 for _, sfe := range sc.StorageFolderExtensions { 127 wal.commitStorageFolderExtension(sfe) 128 } 129 for _, sfr := range sc.StorageFolderReductions { 130 wal.commitStorageFolderReduction(sfr) 131 } 132 for _, sfr := range sc.StorageFolderRemovals { 133 wal.commitStorageFolderRemoval(sfr) 134 } 135 136 // TODO: Virtual sector handling here. 137 } 138 139 // Now that the WAL is sync'd and updated, any calls waiting on ACID 140 // guarantees can safely return. 141 close(wal.syncChan) 142 wal.syncChan = make(chan struct{}) 143 } 144 145 // commit will take all of the changes that have been added to the WAL and 146 // atomically commit the WAL to disk, then apply the actions in the WAL to the 147 // state. commit will do lots of syncing disk I/O, and so can take a while, 148 // especially if there are a large number of actions queued up. 149 // 150 // A bool is returned indicating whether or not the commit was successful. 151 // False does not indiciate an error, it can also indicate that there was 152 // nothing to do. 153 // 154 // commit should only be called from threadedSyncLoop. 155 func (wal *writeAheadLog) commit() { 156 // Sync all open, non-WAL files on the host. 157 wal.syncResources() 158 159 // Begin writing to the settings file. 160 var wg sync.WaitGroup 161 wg.Add(1) 162 go func() { 163 defer wg.Done() 164 165 newSettings := wal.cm.savedSettings() 166 if reflect.DeepEqual(newSettings, wal.committedSettings) { 167 // no need to write the settings file 168 wal.fileSettingsTmp = nil 169 return 170 } 171 wal.committedSettings = newSettings 172 173 // Begin writing to the settings file, which will be synced during the 174 // next iteration of the sync loop. 175 var err error 176 wal.fileSettingsTmp, err = wal.cm.dependencies.CreateFile(filepath.Join(wal.cm.persistDir, settingsFileTmp)) 177 if err != nil { 178 wal.cm.log.Severe("Unable to open temporary settings file for writing:", err) 179 } 180 b, err := json.MarshalIndent(newSettings, "", "\t") 181 if err != nil { 182 build.ExtendErr("unable to marshal settings data", err) 183 } 184 enc := json.NewEncoder(wal.fileSettingsTmp) 185 if err := enc.Encode(settingsMetadata.Header); err != nil { 186 build.ExtendErr("unable to write header to settings temp file", err) 187 } 188 if err := enc.Encode(settingsMetadata.Version); err != nil { 189 build.ExtendErr("unable to write version to settings temp file", err) 190 } 191 if _, err = wal.fileSettingsTmp.Write(b); err != nil { 192 build.ExtendErr("unable to write data settings temp file", err) 193 } 194 }() 195 196 // Begin writing new changes to the WAL. 197 wg.Add(1) 198 go func() { 199 defer wg.Done() 200 201 if len(wal.uncommittedChanges) == 0 { 202 // no need to recreate wal 203 return 204 } 205 206 // Extract any unfinished long-running jobs from the list of WAL items. 207 unfinishedAdditions := findUnfinishedStorageFolderAdditions(wal.uncommittedChanges) 208 unfinishedExtensions := findUnfinishedStorageFolderExtensions(wal.uncommittedChanges) 209 210 // Recreate the wal file so that it can receive new updates. 211 var err error 212 walTmpName := filepath.Join(wal.cm.persistDir, walFileTmp) 213 wal.fileWALTmp, err = wal.cm.dependencies.CreateFile(walTmpName) 214 if err != nil { 215 wal.cm.log.Severe("ERROR: unable to create write-ahead-log:", err) 216 } 217 // Write the metadata into the WAL. 218 err = writeWALMetadata(wal.fileWALTmp) 219 if err != nil { 220 wal.cm.log.Severe("Unable to properly initialize WAL file, crashing to prevent corruption:", err) 221 } 222 223 // Append all of the remaining long running uncommitted changes to the WAL. 224 wal.appendChange(stateChange{ 225 UnfinishedStorageFolderAdditions: unfinishedAdditions, 226 UnfinishedStorageFolderExtensions: unfinishedExtensions, 227 }) 228 229 // Clear the set of uncommitted changes. 230 wal.uncommittedChanges = nil 231 }() 232 wg.Wait() 233 } 234 235 // spawnSyncLoop prepares and establishes the loop which will be running in the 236 // background to coordinate disk syncronizations. Disk syncing is done in a 237 // background loop to help with performance, and to allow multiple things to 238 // modify the WAL simultaneously. 239 func (wal *writeAheadLog) spawnSyncLoop() (err error) { 240 // Create a signal so we know when the sync loop has stopped, which means 241 // there will be no more open commits. 242 threadsStopped := make(chan struct{}) 243 syncLoopStopped := make(chan struct{}) 244 wal.syncChan = make(chan struct{}) 245 go wal.threadedSyncLoop(threadsStopped, syncLoopStopped) 246 wal.cm.tg.AfterStop(func() { 247 // Wait for another iteration of the sync loop, so that the in-progress 248 // settings can be saved atomically to disk. 249 wal.mu.Lock() 250 syncChan := wal.syncChan 251 wal.mu.Unlock() 252 <-syncChan 253 254 // Close the threadsStopped channel to let the sync loop know that all 255 // calls to tg.Add() in the contract manager have cleaned up. 256 close(threadsStopped) 257 258 // Because this is being called in an 'AfterStop' routine, all open 259 // calls to the contract manager should have completed, and all open 260 // threads should have closed. The last call to change the contract 261 // manager should have completed, so the number of uncommitted changes 262 // should be zero. 263 <-syncLoopStopped // Wait for the sync loop to signal proper termination. 264 265 // Allow unclean shutdown to be simulated by disrupting the removal of 266 // the WAL file. 267 if !wal.cm.dependencies.Disrupt("cleanWALFile") { 268 err = wal.cm.dependencies.RemoveFile(filepath.Join(wal.cm.persistDir, walFile)) 269 if err != nil { 270 wal.cm.log.Println("Error removing WAL during contract manager shutdown:", err) 271 } 272 } 273 }) 274 return nil 275 } 276 277 // threadedSyncLoop is a background thread that occasionally commits the WAL to 278 // the state as an ACID transaction. This process can be very slow, so 279 // transactions to the contract manager are batched automatically and 280 // occasionally committed together. 281 func (wal *writeAheadLog) threadedSyncLoop(threadsStopped chan struct{}, syncLoopStopped chan struct{}) { 282 // Provide a place for the testing to disable the sync loop. 283 if wal.cm.dependencies.Disrupt("threadedSyncLoopStart") { 284 close(syncLoopStopped) 285 return 286 } 287 288 syncInterval := 500 * time.Millisecond 289 for { 290 select { 291 case <-threadsStopped: 292 close(syncLoopStopped) 293 return 294 case <-time.After(syncInterval): 295 // Commit all of the changes in the WAL to disk, and then apply the 296 // changes. 297 wal.mu.Lock() 298 wal.commit() 299 wal.mu.Unlock() 300 } 301 } 302 }