gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/host/contractmanager/writeaheadlog.go (about)

     1  package contractmanager
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"io"
     7  	"os"
     8  	"path/filepath"
     9  	"sync"
    10  
    11  	"gitlab.com/SiaPrime/SiaPrime/build"
    12  	"gitlab.com/SiaPrime/SiaPrime/modules"
    13  	"gitlab.com/SiaPrime/SiaPrime/persist"
    14  )
    15  
    16  type (
    17  	// sectorUpdate is an idempotent update to the sector metadata.
    18  	sectorUpdate struct {
    19  		Count  uint16
    20  		Folder uint16
    21  		ID     sectorID
    22  		Index  uint32
    23  	}
    24  
    25  	// stateChange defines an idempotent change to the state that has not yet
    26  	// been applied to the contract manager. The state change is a single
    27  	// transaction in the WAL.
    28  	//
    29  	// All changes in the stateChange object need to be idempotent, as it's
    30  	// possible that consecutive unclean shutdowns will result in changes being
    31  	// committed to the state multiple times.
    32  	stateChange struct {
    33  		// These fields relate to adding a storage folder. Adding a storage
    34  		// folder happens in several stages.
    35  		//
    36  		// First the storage folder is added as an
    37  		// 'UnfinishedStorageFolderAddition', because there is large amount of
    38  		// I/O preprocessing that is performed when adding a storage folder.
    39  		// This I/O must be nonblocking and must resume in the event of unclean
    40  		// or early shutdown.
    41  		//
    42  		// When the preprocessing is complete, the storage folder is moved to a
    43  		// 'StorageFolderAddition', which can be safely applied to the contract
    44  		// manager but hasn't yet.
    45  		//
    46  		// ErroredStorageFolderAdditions are signals to the WAL that an
    47  		// unfinished storage folder addition has failed and can be cleared
    48  		// out. The WAL is append-only, which is why an error needs to be
    49  		// logged instead of just automatically clearning out the unfinished
    50  		// storage folder addition.
    51  		ErroredStorageFolderAdditions     []uint16
    52  		ErroredStorageFolderExtensions    []uint16
    53  		StorageFolderAdditions            []savedStorageFolder
    54  		StorageFolderExtensions           []storageFolderExtension
    55  		StorageFolderRemovals             []storageFolderRemoval
    56  		StorageFolderReductions           []storageFolderReduction
    57  		UnfinishedStorageFolderAdditions  []savedStorageFolder
    58  		UnfinishedStorageFolderExtensions []unfinishedStorageFolderExtension
    59  
    60  		// Updates to the sector metadata. Careful ordering of events ensures
    61  		// that a sector update will not make it into the synced WAL unless the
    62  		// sector data is already on-disk and synced.
    63  		SectorUpdates []sectorUpdate
    64  	}
    65  
    66  	// writeAheadLog coordinates ACID transactions which update the state of
    67  	// the contract manager. Consistency on a field is only guaranteed by
    68  	// looking it up through the WAL, and is not guaranteed by direct access.
    69  	writeAheadLog struct {
    70  		// The primary feature of the WAL is a file on disk that records all of
    71  		// the changes which have been proposed. The data is written to a temp
    72  		// file and then renamed atomically to a non-corrupt commitment of
    73  		// actions to be committed to the state. Data is written to the temp
    74  		// file continuously for performance reasons - when a Sync() ->
    75  		// Rename() occurs, most of the data will have already been flushed to
    76  		// disk, making the operation faster. The same is done with the
    77  		// settings file, which might be multiple MiB large for larger storage
    78  		// arrays.
    79  		//
    80  		// To further increase throughput, the WAL will batch as many
    81  		// operations as possible. These operations can happen concurrently,
    82  		// and will block until the contract manager can provide an ACID
    83  		// guarantee that the operation has completed. Syncing of multiple
    84  		// operations happens all at once, and the syncChan is used to signal
    85  		// that a sync operation has completed, providing ACID guarantees to
    86  		// any operation waiting on it. The mechanism of announcing is to close
    87  		// the syncChan, and then to create a new one for new operations to
    88  		// listen on.
    89  		//
    90  		// uncommittedChanges details a list of operations which have been
    91  		// suggested or queued to be made to the state, but are not yet
    92  		// guaranteed to have completed.
    93  		fileSettingsTmp    modules.File
    94  		fileWALTmp         modules.File
    95  		syncChan           chan struct{}
    96  		uncommittedChanges []stateChange
    97  		committedSettings  savedSettings
    98  
    99  		// Utilities. The WAL needs access to the ContractManager because all
   100  		// mutations to ACID fields of the contract manager happen through the
   101  		// WAL.
   102  		cm *ContractManager
   103  		mu sync.Mutex
   104  	}
   105  )
   106  
   107  // readWALMetadata reads WAL metadata from the input file, returning an error
   108  // if the result is unexpected.
   109  func readWALMetadata(decoder *json.Decoder) error {
   110  	var md persist.Metadata
   111  	err := decoder.Decode(&md)
   112  	if err != nil {
   113  		return build.ExtendErr("error reading WAL metadata", err)
   114  	}
   115  	if md.Header != walMetadata.Header {
   116  		return errors.New("WAL metadata header does not match header found in WAL file")
   117  	}
   118  	if md.Version != walMetadata.Version {
   119  		return errors.New("WAL metadata version does not match version found in WAL file")
   120  	}
   121  	return nil
   122  }
   123  
   124  // writeWALMetadata writes WAL metadata to the input file.
   125  func writeWALMetadata(f modules.File) error {
   126  	changeBytes, err := json.MarshalIndent(walMetadata, "", "\t")
   127  	if err != nil {
   128  		return build.ExtendErr("could not marshal WAL metadata", err)
   129  	}
   130  	_, err = f.Write(changeBytes)
   131  	if err != nil {
   132  		return build.ExtendErr("unable to write WAL metadata", err)
   133  	}
   134  	return nil
   135  }
   136  
   137  // appendChange will add a change to the WAL, writing the details of the change
   138  // to the WAL file but not syncing - syncing is orchestrated by the sync loop.
   139  //
   140  // The WAL is append only, which means that changes can only be revoked by
   141  // appending an error. This is common for long running operations like adding a
   142  // storage folder.
   143  func (wal *writeAheadLog) appendChange(sc stateChange) {
   144  	// Marshal the change and then write the change to the WAL file. Syncing
   145  	// happens in the sync loop.
   146  	changeBytes, err := json.MarshalIndent(sc, "", "\t")
   147  	if err != nil {
   148  		wal.cm.log.Severe("Unable to marshal state change:", err)
   149  		panic("unable to append a change to the WAL, crashing to prevent corruption")
   150  	}
   151  	_, err = wal.fileWALTmp.Write(changeBytes)
   152  	if err != nil {
   153  		wal.cm.log.Severe("Unable to write state change to WAL:", err)
   154  		panic("unable to append a change to the WAL, crashing to prevent corruption")
   155  	}
   156  
   157  	// Update the WAL to include the new storage folder in the uncommitted
   158  	// changes.
   159  	wal.uncommittedChanges = append(wal.uncommittedChanges, sc)
   160  }
   161  
   162  // commitChange will commit the provided change to the contract manager,
   163  // updating both the in-memory state and the on-disk state.
   164  //
   165  // It should be noted that long running tasks are ignored during calls to
   166  // commitChange, as they haven't completed and are being managed by a separate
   167  // thread. Upon completion, they will be converted into a different type of
   168  // commitment.
   169  func (wal *writeAheadLog) commitChange(sc stateChange) {
   170  	for _, sfa := range sc.StorageFolderAdditions {
   171  		for i := uint64(0); i < wal.cm.dependencies.AtLeastOne(); i++ {
   172  			wal.commitAddStorageFolder(sfa)
   173  		}
   174  	}
   175  	for _, sfe := range sc.StorageFolderExtensions {
   176  		for i := uint64(0); i < wal.cm.dependencies.AtLeastOne(); i++ {
   177  			wal.commitStorageFolderExtension(sfe)
   178  		}
   179  	}
   180  	for _, sfr := range sc.StorageFolderReductions {
   181  		for i := uint64(0); i < wal.cm.dependencies.AtLeastOne(); i++ {
   182  			wal.commitStorageFolderReduction(sfr)
   183  		}
   184  	}
   185  	for _, sfr := range sc.StorageFolderRemovals {
   186  		for i := uint64(0); i < wal.cm.dependencies.AtLeastOne(); i++ {
   187  			wal.commitStorageFolderRemoval(sfr)
   188  		}
   189  	}
   190  	for _, su := range sc.SectorUpdates {
   191  		for i := uint64(0); i < wal.cm.dependencies.AtLeastOne(); i++ {
   192  			wal.commitUpdateSector(su)
   193  		}
   194  	}
   195  }
   196  
   197  // createWALTmp will open up the temporary WAL file.
   198  func (wal *writeAheadLog) createWALTmp() {
   199  	var err error
   200  	walTmpName := filepath.Join(wal.cm.persistDir, walFileTmp)
   201  	wal.fileWALTmp, err = wal.cm.dependencies.CreateFile(walTmpName)
   202  	if err != nil {
   203  		wal.cm.log.Severe("Unable to create WAL temporary file:", err)
   204  		panic("unable to create WAL temporary file, crashing to avoid corruption")
   205  	}
   206  	err = writeWALMetadata(wal.fileWALTmp)
   207  	if err != nil {
   208  		wal.cm.log.Severe("Unable to write WAL metadata:", err)
   209  		panic("unable to create WAL temporary file, crashing to prevent corruption")
   210  	}
   211  }
   212  
   213  // recoverWAL will read a previous WAL and re-commit all of the changes inside,
   214  // restoring the program to consistency after an unclean shutdown. The tmp WAL
   215  // file needs to be open before this function is called.
   216  func (wal *writeAheadLog) recoverWAL(walFile modules.File) error {
   217  	// Read the WAL metadata to make sure that the version is correct.
   218  	decoder := json.NewDecoder(walFile)
   219  	err := readWALMetadata(decoder)
   220  	if err != nil {
   221  		wal.cm.log.Println("ERROR: error while reading WAL metadata:", err)
   222  		return build.ExtendErr("walFile metadata mismatch", err)
   223  	}
   224  
   225  	// Read changes from the WAL one at a time and load them back into memory.
   226  	// A full list of changes is kept so that modifications to long running
   227  	// changes can be parsed properly.
   228  	var sc stateChange
   229  	var scs []stateChange
   230  	for err == nil {
   231  		err = decoder.Decode(&sc)
   232  		if err == nil {
   233  			// The uncommitted changes are loaded into memory using a simple
   234  			// append, because the tmp WAL file has not been created yet, and
   235  			// will not be created until the sync loop is spawned. The sync
   236  			// loop spawner will make sure that the uncommitted changes are
   237  			// written to the tmp WAL file.
   238  			wal.commitChange(sc)
   239  			scs = append(scs, sc)
   240  		}
   241  	}
   242  	if err != io.EOF {
   243  		wal.cm.log.Println("ERROR: could not load WAL json:", err)
   244  		return build.ExtendErr("error loading WAL json", err)
   245  	}
   246  
   247  	// Do any cleanup regarding long-running unfinished tasks. Long running
   248  	// task cleanup cannot be handled in the 'commitChange' loop because future
   249  	// state changes may indicate that the long running task has actually been
   250  	// completed.
   251  	wal.cleanupUnfinishedStorageFolderAdditions(scs)
   252  	wal.cleanupUnfinishedStorageFolderExtensions(scs)
   253  	return nil
   254  }
   255  
   256  // load will pull any changes from the uncommitted WAL into memory, decoding
   257  // them and doing any necessary preprocessing. In the most common case (any
   258  // time the previous shutdown was clean), there will not be a WAL file.
   259  func (wal *writeAheadLog) load() error {
   260  	// Create the walTmpFile, which needs to be open before recovery can start.
   261  	wal.createWALTmp()
   262  
   263  	// Close the WAL tmp file upon shutdown.
   264  	wal.cm.tg.AfterStop(func() {
   265  		wal.mu.Lock()
   266  		defer wal.mu.Unlock()
   267  
   268  		err := wal.fileWALTmp.Close()
   269  		if err != nil {
   270  			wal.cm.log.Println("ERROR: error closing wal file during contract manager shutdown:", err)
   271  			return
   272  		}
   273  		err = wal.cm.dependencies.RemoveFile(filepath.Join(wal.cm.persistDir, walFileTmp))
   274  		if err != nil {
   275  			wal.cm.log.Println("ERROR: error removing temporary WAL during contract manager shutdown:", err)
   276  			return
   277  		}
   278  	})
   279  
   280  	// Try opening the WAL file.
   281  	walFileName := filepath.Join(wal.cm.persistDir, walFile)
   282  	walFile, err := wal.cm.dependencies.OpenFile(walFileName, os.O_RDONLY, 0600)
   283  	if err == nil {
   284  		// err == nil indicates that there is a WAL file, which means that the
   285  		// previous shutdown was not clean. Re-commit the changes in the WAL to
   286  		// bring the program back to consistency.
   287  		wal.cm.log.Println("WARN: WAL file detected, performing recovery after unclean shutdown.")
   288  		err = wal.recoverWAL(walFile)
   289  		if err != nil {
   290  			return build.ExtendErr("failed to recover WAL", err)
   291  		}
   292  		err = walFile.Close()
   293  		if err != nil {
   294  			return build.ExtendErr("error closing WAL after performing a recovery", err)
   295  		}
   296  	} else if !os.IsNotExist(err) {
   297  		return build.ExtendErr("walFile was not opened successfully", err)
   298  	}
   299  	// err == os.IsNotExist, suggesting a successful, clean shutdown. No action
   300  	// is taken.
   301  
   302  	// Create the tmp settings file and initialize the first write to it. This
   303  	// is necessary before kicking off the sync loop.
   304  	wal.fileSettingsTmp, err = wal.cm.dependencies.CreateFile(filepath.Join(wal.cm.persistDir, settingsFileTmp))
   305  	if err != nil {
   306  		return build.ExtendErr("unable to prepare the settings temp file", err)
   307  	}
   308  	wal.cm.tg.AfterStop(func() {
   309  		wal.mu.Lock()
   310  		defer wal.mu.Unlock()
   311  		if wal.fileSettingsTmp == nil {
   312  			return
   313  		}
   314  		err := wal.fileSettingsTmp.Close()
   315  		if err != nil {
   316  			wal.cm.log.Println("ERROR: unable to close settings temporary file")
   317  			return
   318  		}
   319  		err = wal.cm.dependencies.RemoveFile(filepath.Join(wal.cm.persistDir, settingsFileTmp))
   320  		if err != nil {
   321  			wal.cm.log.Println("ERROR: unable to remove settings temporary file")
   322  			return
   323  		}
   324  	})
   325  	ss := wal.cm.savedSettings()
   326  	b, err := json.MarshalIndent(ss, "", "\t")
   327  	if err != nil {
   328  		build.ExtendErr("unable to marshal settings data", err)
   329  	}
   330  	enc := json.NewEncoder(wal.fileSettingsTmp)
   331  	if err := enc.Encode(settingsMetadata.Header); err != nil {
   332  		build.ExtendErr("unable to write header to settings temp file", err)
   333  	}
   334  	if err := enc.Encode(settingsMetadata.Version); err != nil {
   335  		build.ExtendErr("unable to write version to settings temp file", err)
   336  	}
   337  	if _, err = wal.fileSettingsTmp.Write(b); err != nil {
   338  		build.ExtendErr("unable to write data settings temp file", err)
   339  	}
   340  	return nil
   341  }