gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/host/contractmanager/writeaheadlogsync.go (about)

     1  package contractmanager
     2  
     3  import (
     4  	"encoding/json"
     5  	"path/filepath"
     6  	"reflect"
     7  	"sync"
     8  	"sync/atomic"
     9  	"time"
    10  
    11  	"gitlab.com/SiaPrime/SiaPrime/build"
    12  )
    13  
    14  // syncResources will call Sync on all resources that the WAL has open. The
    15  // storage folder files will be left open, as they are not updated atomically.
    16  // The settings file and WAL tmp files will be synced and closed, to perform an
    17  // atomic update to the files.
    18  func (wal *writeAheadLog) syncResources() {
    19  	// Syncing occurs over multiple files and disks, and is done in parallel to
    20  	// minimize the amount of time that a lock is held over the contract
    21  	// manager.
    22  	var wg sync.WaitGroup
    23  
    24  	// Sync the settings file.
    25  	wg.Add(1)
    26  	go func() {
    27  		defer wg.Done()
    28  
    29  		if wal.fileSettingsTmp == nil {
    30  			// nothing to sync
    31  			return
    32  		}
    33  
    34  		tmpFilename := filepath.Join(wal.cm.persistDir, settingsFileTmp)
    35  		filename := filepath.Join(wal.cm.persistDir, settingsFile)
    36  		err := wal.fileSettingsTmp.Sync()
    37  		if err != nil {
    38  			wal.cm.log.Severe("ERROR: unable to sync the contract manager settings:", err)
    39  		}
    40  		err = wal.fileSettingsTmp.Close()
    41  		if err != nil {
    42  			wal.cm.log.Println("unable to close the temporary contract manager settings file:", err)
    43  		}
    44  
    45  		// For testing, provide a place to interrupt the saving of the sync
    46  		// file. This makes it easy to simulate certain types of unclean
    47  		// shutdown.
    48  		if wal.cm.dependencies.Disrupt("settingsSyncRename") {
    49  			// The current settings file that is being re-written will not be
    50  			// saved.
    51  			return
    52  		}
    53  
    54  		err = wal.cm.dependencies.RenameFile(tmpFilename, filename)
    55  		if err != nil {
    56  			wal.cm.log.Severe("ERROR: unable to atomically copy the contract manager settings:", err)
    57  		}
    58  	}()
    59  
    60  	// Sync all of the storage folders.
    61  	for _, sf := range wal.cm.storageFolders {
    62  		// Skip operation on unavailable storage folders.
    63  		if atomic.LoadUint64(&sf.atomicUnavailable) == 1 {
    64  			continue
    65  		}
    66  
    67  		wg.Add(2)
    68  		go func(sf *storageFolder) {
    69  			defer wg.Done()
    70  			err := sf.metadataFile.Sync()
    71  			if err != nil {
    72  				wal.cm.log.Severe("ERROR: unable to sync a storage folder:", err)
    73  			}
    74  		}(sf)
    75  		go func(sf *storageFolder) {
    76  			defer wg.Done()
    77  			err := sf.sectorFile.Sync()
    78  			if err != nil {
    79  				wal.cm.log.Severe("ERROR: unable to sync a storage folder:", err)
    80  			}
    81  		}(sf)
    82  	}
    83  
    84  	// Sync the temp WAL file, but do not perform the atmoic rename - the
    85  	// atomic rename must be guaranteed to happen after all of the other files
    86  	// have been synced.
    87  	wg.Add(1)
    88  	go func() {
    89  		defer wg.Done()
    90  		if len(wal.uncommittedChanges) == 0 {
    91  			// nothing to sync
    92  			return
    93  		}
    94  
    95  		err := wal.fileWALTmp.Sync()
    96  		if err != nil {
    97  			wal.cm.log.Severe("Unable to sync the write-ahead-log:", err)
    98  		}
    99  		err = wal.fileWALTmp.Close()
   100  		if err != nil {
   101  			// Log that the host is having trouble saving the uncommitted changes.
   102  			// Crash if the list of uncommitted changes has grown very large.
   103  			wal.cm.log.Println("ERROR: could not close temporary write-ahead-log in contract manager:", err)
   104  			return
   105  		}
   106  	}()
   107  
   108  	// Wait for all of the sync calls to finish.
   109  	wg.Wait()
   110  
   111  	// Now that all the Sync calls have completed, rename the WAL tmp file to
   112  	// update the WAL.
   113  	if len(wal.uncommittedChanges) != 0 && !wal.cm.dependencies.Disrupt("walRename") {
   114  		walTmpName := filepath.Join(wal.cm.persistDir, walFileTmp)
   115  		walFileName := filepath.Join(wal.cm.persistDir, walFile)
   116  		err := wal.cm.dependencies.RenameFile(walTmpName, walFileName)
   117  		if err != nil {
   118  			// Log that the host is having trouble saving the uncommitted changes.
   119  			// Crash if the list of uncommitted changes has grown very large.
   120  			wal.cm.log.Severe("ERROR: could not rename temporary write-ahead-log in contract manager:", err)
   121  		}
   122  	}
   123  
   124  	// Perform any cleanup actions on the updates.
   125  	for _, sc := range wal.uncommittedChanges {
   126  		for _, sfe := range sc.StorageFolderExtensions {
   127  			wal.commitStorageFolderExtension(sfe)
   128  		}
   129  		for _, sfr := range sc.StorageFolderReductions {
   130  			wal.commitStorageFolderReduction(sfr)
   131  		}
   132  		for _, sfr := range sc.StorageFolderRemovals {
   133  			wal.commitStorageFolderRemoval(sfr)
   134  		}
   135  
   136  		// TODO: Virtual sector handling here.
   137  	}
   138  
   139  	// Now that the WAL is sync'd and updated, any calls waiting on ACID
   140  	// guarantees can safely return.
   141  	close(wal.syncChan)
   142  	wal.syncChan = make(chan struct{})
   143  }
   144  
   145  // commit will take all of the changes that have been added to the WAL and
   146  // atomically commit the WAL to disk, then apply the actions in the WAL to the
   147  // state. commit will do lots of syncing disk I/O, and so can take a while,
   148  // especially if there are a large number of actions queued up.
   149  //
   150  // A bool is returned indicating whether or not the commit was successful.
   151  // False does not indiciate an error, it can also indicate that there was
   152  // nothing to do.
   153  //
   154  // commit should only be called from threadedSyncLoop.
   155  func (wal *writeAheadLog) commit() {
   156  	// Sync all open, non-WAL files on the host.
   157  	wal.syncResources()
   158  
   159  	// Begin writing to the settings file.
   160  	var wg sync.WaitGroup
   161  	wg.Add(1)
   162  	go func() {
   163  		defer wg.Done()
   164  
   165  		newSettings := wal.cm.savedSettings()
   166  		if reflect.DeepEqual(newSettings, wal.committedSettings) {
   167  			// no need to write the settings file
   168  			wal.fileSettingsTmp = nil
   169  			return
   170  		}
   171  		wal.committedSettings = newSettings
   172  
   173  		// Begin writing to the settings file, which will be synced during the
   174  		// next iteration of the sync loop.
   175  		var err error
   176  		wal.fileSettingsTmp, err = wal.cm.dependencies.CreateFile(filepath.Join(wal.cm.persistDir, settingsFileTmp))
   177  		if err != nil {
   178  			wal.cm.log.Severe("Unable to open temporary settings file for writing:", err)
   179  		}
   180  		b, err := json.MarshalIndent(newSettings, "", "\t")
   181  		if err != nil {
   182  			build.ExtendErr("unable to marshal settings data", err)
   183  		}
   184  		enc := json.NewEncoder(wal.fileSettingsTmp)
   185  		if err := enc.Encode(settingsMetadata.Header); err != nil {
   186  			build.ExtendErr("unable to write header to settings temp file", err)
   187  		}
   188  		if err := enc.Encode(settingsMetadata.Version); err != nil {
   189  			build.ExtendErr("unable to write version to settings temp file", err)
   190  		}
   191  		if _, err = wal.fileSettingsTmp.Write(b); err != nil {
   192  			build.ExtendErr("unable to write data settings temp file", err)
   193  		}
   194  	}()
   195  
   196  	// Begin writing new changes to the WAL.
   197  	wg.Add(1)
   198  	go func() {
   199  		defer wg.Done()
   200  
   201  		if len(wal.uncommittedChanges) == 0 {
   202  			// no need to recreate wal
   203  			return
   204  		}
   205  
   206  		// Extract any unfinished long-running jobs from the list of WAL items.
   207  		unfinishedAdditions := findUnfinishedStorageFolderAdditions(wal.uncommittedChanges)
   208  		unfinishedExtensions := findUnfinishedStorageFolderExtensions(wal.uncommittedChanges)
   209  
   210  		// Recreate the wal file so that it can receive new updates.
   211  		var err error
   212  		walTmpName := filepath.Join(wal.cm.persistDir, walFileTmp)
   213  		wal.fileWALTmp, err = wal.cm.dependencies.CreateFile(walTmpName)
   214  		if err != nil {
   215  			wal.cm.log.Severe("ERROR: unable to create write-ahead-log:", err)
   216  		}
   217  		// Write the metadata into the WAL.
   218  		err = writeWALMetadata(wal.fileWALTmp)
   219  		if err != nil {
   220  			wal.cm.log.Severe("Unable to properly initialize WAL file, crashing to prevent corruption:", err)
   221  		}
   222  
   223  		// Append all of the remaining long running uncommitted changes to the WAL.
   224  		wal.appendChange(stateChange{
   225  			UnfinishedStorageFolderAdditions:  unfinishedAdditions,
   226  			UnfinishedStorageFolderExtensions: unfinishedExtensions,
   227  		})
   228  
   229  		// Clear the set of uncommitted changes.
   230  		wal.uncommittedChanges = nil
   231  	}()
   232  	wg.Wait()
   233  }
   234  
   235  // spawnSyncLoop prepares and establishes the loop which will be running in the
   236  // background to coordinate disk syncronizations. Disk syncing is done in a
   237  // background loop to help with performance, and to allow multiple things to
   238  // modify the WAL simultaneously.
   239  func (wal *writeAheadLog) spawnSyncLoop() (err error) {
   240  	// Create a signal so we know when the sync loop has stopped, which means
   241  	// there will be no more open commits.
   242  	threadsStopped := make(chan struct{})
   243  	syncLoopStopped := make(chan struct{})
   244  	wal.syncChan = make(chan struct{})
   245  	go wal.threadedSyncLoop(threadsStopped, syncLoopStopped)
   246  	wal.cm.tg.AfterStop(func() {
   247  		// Wait for another iteration of the sync loop, so that the in-progress
   248  		// settings can be saved atomically to disk.
   249  		wal.mu.Lock()
   250  		syncChan := wal.syncChan
   251  		wal.mu.Unlock()
   252  		<-syncChan
   253  
   254  		// Close the threadsStopped channel to let the sync loop know that all
   255  		// calls to tg.Add() in the contract manager have cleaned up.
   256  		close(threadsStopped)
   257  
   258  		// Because this is being called in an 'AfterStop' routine, all open
   259  		// calls to the contract manager should have completed, and all open
   260  		// threads should have closed. The last call to change the contract
   261  		// manager should have completed, so the number of uncommitted changes
   262  		// should be zero.
   263  		<-syncLoopStopped // Wait for the sync loop to signal proper termination.
   264  
   265  		// Allow unclean shutdown to be simulated by disrupting the removal of
   266  		// the WAL file.
   267  		if !wal.cm.dependencies.Disrupt("cleanWALFile") {
   268  			err = wal.cm.dependencies.RemoveFile(filepath.Join(wal.cm.persistDir, walFile))
   269  			if err != nil {
   270  				wal.cm.log.Println("Error removing WAL during contract manager shutdown:", err)
   271  			}
   272  		}
   273  	})
   274  	return nil
   275  }
   276  
   277  // threadedSyncLoop is a background thread that occasionally commits the WAL to
   278  // the state as an ACID transaction. This process can be very slow, so
   279  // transactions to the contract manager are batched automatically and
   280  // occasionally committed together.
   281  func (wal *writeAheadLog) threadedSyncLoop(threadsStopped chan struct{}, syncLoopStopped chan struct{}) {
   282  	// Provide a place for the testing to disable the sync loop.
   283  	if wal.cm.dependencies.Disrupt("threadedSyncLoopStart") {
   284  		close(syncLoopStopped)
   285  		return
   286  	}
   287  
   288  	syncInterval := 500 * time.Millisecond
   289  	for {
   290  		select {
   291  		case <-threadsStopped:
   292  			close(syncLoopStopped)
   293  			return
   294  		case <-time.After(syncInterval):
   295  			// Commit all of the changes in the WAL to disk, and then apply the
   296  			// changes.
   297  			wal.mu.Lock()
   298  			wal.commit()
   299  			wal.mu.Unlock()
   300  		}
   301  	}
   302  }