gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/host/contractmanager/writeaheadlog.go (about) 1 package contractmanager 2 3 import ( 4 "encoding/json" 5 "errors" 6 "io" 7 "os" 8 "path/filepath" 9 "sync" 10 11 "gitlab.com/SiaPrime/SiaPrime/build" 12 "gitlab.com/SiaPrime/SiaPrime/modules" 13 "gitlab.com/SiaPrime/SiaPrime/persist" 14 ) 15 16 type ( 17 // sectorUpdate is an idempotent update to the sector metadata. 18 sectorUpdate struct { 19 Count uint16 20 Folder uint16 21 ID sectorID 22 Index uint32 23 } 24 25 // stateChange defines an idempotent change to the state that has not yet 26 // been applied to the contract manager. The state change is a single 27 // transaction in the WAL. 28 // 29 // All changes in the stateChange object need to be idempotent, as it's 30 // possible that consecutive unclean shutdowns will result in changes being 31 // committed to the state multiple times. 32 stateChange struct { 33 // These fields relate to adding a storage folder. Adding a storage 34 // folder happens in several stages. 35 // 36 // First the storage folder is added as an 37 // 'UnfinishedStorageFolderAddition', because there is large amount of 38 // I/O preprocessing that is performed when adding a storage folder. 39 // This I/O must be nonblocking and must resume in the event of unclean 40 // or early shutdown. 41 // 42 // When the preprocessing is complete, the storage folder is moved to a 43 // 'StorageFolderAddition', which can be safely applied to the contract 44 // manager but hasn't yet. 45 // 46 // ErroredStorageFolderAdditions are signals to the WAL that an 47 // unfinished storage folder addition has failed and can be cleared 48 // out. The WAL is append-only, which is why an error needs to be 49 // logged instead of just automatically clearning out the unfinished 50 // storage folder addition. 51 ErroredStorageFolderAdditions []uint16 52 ErroredStorageFolderExtensions []uint16 53 StorageFolderAdditions []savedStorageFolder 54 StorageFolderExtensions []storageFolderExtension 55 StorageFolderRemovals []storageFolderRemoval 56 StorageFolderReductions []storageFolderReduction 57 UnfinishedStorageFolderAdditions []savedStorageFolder 58 UnfinishedStorageFolderExtensions []unfinishedStorageFolderExtension 59 60 // Updates to the sector metadata. Careful ordering of events ensures 61 // that a sector update will not make it into the synced WAL unless the 62 // sector data is already on-disk and synced. 63 SectorUpdates []sectorUpdate 64 } 65 66 // writeAheadLog coordinates ACID transactions which update the state of 67 // the contract manager. Consistency on a field is only guaranteed by 68 // looking it up through the WAL, and is not guaranteed by direct access. 69 writeAheadLog struct { 70 // The primary feature of the WAL is a file on disk that records all of 71 // the changes which have been proposed. The data is written to a temp 72 // file and then renamed atomically to a non-corrupt commitment of 73 // actions to be committed to the state. Data is written to the temp 74 // file continuously for performance reasons - when a Sync() -> 75 // Rename() occurs, most of the data will have already been flushed to 76 // disk, making the operation faster. The same is done with the 77 // settings file, which might be multiple MiB large for larger storage 78 // arrays. 79 // 80 // To further increase throughput, the WAL will batch as many 81 // operations as possible. These operations can happen concurrently, 82 // and will block until the contract manager can provide an ACID 83 // guarantee that the operation has completed. Syncing of multiple 84 // operations happens all at once, and the syncChan is used to signal 85 // that a sync operation has completed, providing ACID guarantees to 86 // any operation waiting on it. The mechanism of announcing is to close 87 // the syncChan, and then to create a new one for new operations to 88 // listen on. 89 // 90 // uncommittedChanges details a list of operations which have been 91 // suggested or queued to be made to the state, but are not yet 92 // guaranteed to have completed. 93 fileSettingsTmp modules.File 94 fileWALTmp modules.File 95 syncChan chan struct{} 96 uncommittedChanges []stateChange 97 committedSettings savedSettings 98 99 // Utilities. The WAL needs access to the ContractManager because all 100 // mutations to ACID fields of the contract manager happen through the 101 // WAL. 102 cm *ContractManager 103 mu sync.Mutex 104 } 105 ) 106 107 // readWALMetadata reads WAL metadata from the input file, returning an error 108 // if the result is unexpected. 109 func readWALMetadata(decoder *json.Decoder) error { 110 var md persist.Metadata 111 err := decoder.Decode(&md) 112 if err != nil { 113 return build.ExtendErr("error reading WAL metadata", err) 114 } 115 if md.Header != walMetadata.Header { 116 return errors.New("WAL metadata header does not match header found in WAL file") 117 } 118 if md.Version != walMetadata.Version { 119 return errors.New("WAL metadata version does not match version found in WAL file") 120 } 121 return nil 122 } 123 124 // writeWALMetadata writes WAL metadata to the input file. 125 func writeWALMetadata(f modules.File) error { 126 changeBytes, err := json.MarshalIndent(walMetadata, "", "\t") 127 if err != nil { 128 return build.ExtendErr("could not marshal WAL metadata", err) 129 } 130 _, err = f.Write(changeBytes) 131 if err != nil { 132 return build.ExtendErr("unable to write WAL metadata", err) 133 } 134 return nil 135 } 136 137 // appendChange will add a change to the WAL, writing the details of the change 138 // to the WAL file but not syncing - syncing is orchestrated by the sync loop. 139 // 140 // The WAL is append only, which means that changes can only be revoked by 141 // appending an error. This is common for long running operations like adding a 142 // storage folder. 143 func (wal *writeAheadLog) appendChange(sc stateChange) { 144 // Marshal the change and then write the change to the WAL file. Syncing 145 // happens in the sync loop. 146 changeBytes, err := json.MarshalIndent(sc, "", "\t") 147 if err != nil { 148 wal.cm.log.Severe("Unable to marshal state change:", err) 149 panic("unable to append a change to the WAL, crashing to prevent corruption") 150 } 151 _, err = wal.fileWALTmp.Write(changeBytes) 152 if err != nil { 153 wal.cm.log.Severe("Unable to write state change to WAL:", err) 154 panic("unable to append a change to the WAL, crashing to prevent corruption") 155 } 156 157 // Update the WAL to include the new storage folder in the uncommitted 158 // changes. 159 wal.uncommittedChanges = append(wal.uncommittedChanges, sc) 160 } 161 162 // commitChange will commit the provided change to the contract manager, 163 // updating both the in-memory state and the on-disk state. 164 // 165 // It should be noted that long running tasks are ignored during calls to 166 // commitChange, as they haven't completed and are being managed by a separate 167 // thread. Upon completion, they will be converted into a different type of 168 // commitment. 169 func (wal *writeAheadLog) commitChange(sc stateChange) { 170 for _, sfa := range sc.StorageFolderAdditions { 171 for i := uint64(0); i < wal.cm.dependencies.AtLeastOne(); i++ { 172 wal.commitAddStorageFolder(sfa) 173 } 174 } 175 for _, sfe := range sc.StorageFolderExtensions { 176 for i := uint64(0); i < wal.cm.dependencies.AtLeastOne(); i++ { 177 wal.commitStorageFolderExtension(sfe) 178 } 179 } 180 for _, sfr := range sc.StorageFolderReductions { 181 for i := uint64(0); i < wal.cm.dependencies.AtLeastOne(); i++ { 182 wal.commitStorageFolderReduction(sfr) 183 } 184 } 185 for _, sfr := range sc.StorageFolderRemovals { 186 for i := uint64(0); i < wal.cm.dependencies.AtLeastOne(); i++ { 187 wal.commitStorageFolderRemoval(sfr) 188 } 189 } 190 for _, su := range sc.SectorUpdates { 191 for i := uint64(0); i < wal.cm.dependencies.AtLeastOne(); i++ { 192 wal.commitUpdateSector(su) 193 } 194 } 195 } 196 197 // createWALTmp will open up the temporary WAL file. 198 func (wal *writeAheadLog) createWALTmp() { 199 var err error 200 walTmpName := filepath.Join(wal.cm.persistDir, walFileTmp) 201 wal.fileWALTmp, err = wal.cm.dependencies.CreateFile(walTmpName) 202 if err != nil { 203 wal.cm.log.Severe("Unable to create WAL temporary file:", err) 204 panic("unable to create WAL temporary file, crashing to avoid corruption") 205 } 206 err = writeWALMetadata(wal.fileWALTmp) 207 if err != nil { 208 wal.cm.log.Severe("Unable to write WAL metadata:", err) 209 panic("unable to create WAL temporary file, crashing to prevent corruption") 210 } 211 } 212 213 // recoverWAL will read a previous WAL and re-commit all of the changes inside, 214 // restoring the program to consistency after an unclean shutdown. The tmp WAL 215 // file needs to be open before this function is called. 216 func (wal *writeAheadLog) recoverWAL(walFile modules.File) error { 217 // Read the WAL metadata to make sure that the version is correct. 218 decoder := json.NewDecoder(walFile) 219 err := readWALMetadata(decoder) 220 if err != nil { 221 wal.cm.log.Println("ERROR: error while reading WAL metadata:", err) 222 return build.ExtendErr("walFile metadata mismatch", err) 223 } 224 225 // Read changes from the WAL one at a time and load them back into memory. 226 // A full list of changes is kept so that modifications to long running 227 // changes can be parsed properly. 228 var sc stateChange 229 var scs []stateChange 230 for err == nil { 231 err = decoder.Decode(&sc) 232 if err == nil { 233 // The uncommitted changes are loaded into memory using a simple 234 // append, because the tmp WAL file has not been created yet, and 235 // will not be created until the sync loop is spawned. The sync 236 // loop spawner will make sure that the uncommitted changes are 237 // written to the tmp WAL file. 238 wal.commitChange(sc) 239 scs = append(scs, sc) 240 } 241 } 242 if err != io.EOF { 243 wal.cm.log.Println("ERROR: could not load WAL json:", err) 244 return build.ExtendErr("error loading WAL json", err) 245 } 246 247 // Do any cleanup regarding long-running unfinished tasks. Long running 248 // task cleanup cannot be handled in the 'commitChange' loop because future 249 // state changes may indicate that the long running task has actually been 250 // completed. 251 wal.cleanupUnfinishedStorageFolderAdditions(scs) 252 wal.cleanupUnfinishedStorageFolderExtensions(scs) 253 return nil 254 } 255 256 // load will pull any changes from the uncommitted WAL into memory, decoding 257 // them and doing any necessary preprocessing. In the most common case (any 258 // time the previous shutdown was clean), there will not be a WAL file. 259 func (wal *writeAheadLog) load() error { 260 // Create the walTmpFile, which needs to be open before recovery can start. 261 wal.createWALTmp() 262 263 // Close the WAL tmp file upon shutdown. 264 wal.cm.tg.AfterStop(func() { 265 wal.mu.Lock() 266 defer wal.mu.Unlock() 267 268 err := wal.fileWALTmp.Close() 269 if err != nil { 270 wal.cm.log.Println("ERROR: error closing wal file during contract manager shutdown:", err) 271 return 272 } 273 err = wal.cm.dependencies.RemoveFile(filepath.Join(wal.cm.persistDir, walFileTmp)) 274 if err != nil { 275 wal.cm.log.Println("ERROR: error removing temporary WAL during contract manager shutdown:", err) 276 return 277 } 278 }) 279 280 // Try opening the WAL file. 281 walFileName := filepath.Join(wal.cm.persistDir, walFile) 282 walFile, err := wal.cm.dependencies.OpenFile(walFileName, os.O_RDONLY, 0600) 283 if err == nil { 284 // err == nil indicates that there is a WAL file, which means that the 285 // previous shutdown was not clean. Re-commit the changes in the WAL to 286 // bring the program back to consistency. 287 wal.cm.log.Println("WARN: WAL file detected, performing recovery after unclean shutdown.") 288 err = wal.recoverWAL(walFile) 289 if err != nil { 290 return build.ExtendErr("failed to recover WAL", err) 291 } 292 err = walFile.Close() 293 if err != nil { 294 return build.ExtendErr("error closing WAL after performing a recovery", err) 295 } 296 } else if !os.IsNotExist(err) { 297 return build.ExtendErr("walFile was not opened successfully", err) 298 } 299 // err == os.IsNotExist, suggesting a successful, clean shutdown. No action 300 // is taken. 301 302 // Create the tmp settings file and initialize the first write to it. This 303 // is necessary before kicking off the sync loop. 304 wal.fileSettingsTmp, err = wal.cm.dependencies.CreateFile(filepath.Join(wal.cm.persistDir, settingsFileTmp)) 305 if err != nil { 306 return build.ExtendErr("unable to prepare the settings temp file", err) 307 } 308 wal.cm.tg.AfterStop(func() { 309 wal.mu.Lock() 310 defer wal.mu.Unlock() 311 if wal.fileSettingsTmp == nil { 312 return 313 } 314 err := wal.fileSettingsTmp.Close() 315 if err != nil { 316 wal.cm.log.Println("ERROR: unable to close settings temporary file") 317 return 318 } 319 err = wal.cm.dependencies.RemoveFile(filepath.Join(wal.cm.persistDir, settingsFileTmp)) 320 if err != nil { 321 wal.cm.log.Println("ERROR: unable to remove settings temporary file") 322 return 323 } 324 }) 325 ss := wal.cm.savedSettings() 326 b, err := json.MarshalIndent(ss, "", "\t") 327 if err != nil { 328 build.ExtendErr("unable to marshal settings data", err) 329 } 330 enc := json.NewEncoder(wal.fileSettingsTmp) 331 if err := enc.Encode(settingsMetadata.Header); err != nil { 332 build.ExtendErr("unable to write header to settings temp file", err) 333 } 334 if err := enc.Encode(settingsMetadata.Version); err != nil { 335 build.ExtendErr("unable to write version to settings temp file", err) 336 } 337 if _, err = wal.fileSettingsTmp.Write(b); err != nil { 338 build.ExtendErr("unable to write data settings temp file", err) 339 } 340 return nil 341 }