gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/host/contractmanager/contractmanager.go (about)

     1  package contractmanager
     2  
     3  // TODO: Need to sync the directory after doing rename and create operations.
     4  
     5  // TODO: Use fallocate when adding + growing storage folders.
     6  
     7  // TODO: Long-running operations (add, empty) don't tally progress, and don't
     8  // indicate what operation is running.
     9  
    10  // TODO: Add disk failure testing.
    11  
    12  // TODO: Write some code into the production dependencies that will, during
    13  // testing, arbitrarily write less than the full data to a file until Sync()
    14  // has been called. That way, disruptions can effectively simulate partial
    15  // writes even though the disk writes are actually completing.
    16  
    17  // TODO: emptyStorageFolder should be able to move sectors into folders that
    18  // are being resized, into the sectors that are not affected by the resize.
    19  
    20  // TODO: Re-write the WAL to not need to do group syncing, and also to not need
    21  // to use the rename call at all.
    22  
    23  // TODO: When a storage folder is missing, operations on the sectors in that
    24  // storage folder (Add, Remove, Delete, etc.) may result in corruption and
    25  // inconsistent internal state for the contractor. For now, this is fine because
    26  // it's a rare situation, but it should be addressed eventually.
    27  
    28  import (
    29  	"errors"
    30  	"path/filepath"
    31  	"sync/atomic"
    32  
    33  	"gitlab.com/SiaPrime/SiaPrime/build"
    34  	"gitlab.com/SiaPrime/SiaPrime/crypto"
    35  	"gitlab.com/SiaPrime/SiaPrime/modules"
    36  	"gitlab.com/SiaPrime/SiaPrime/persist"
    37  	siasync "gitlab.com/SiaPrime/SiaPrime/sync"
    38  )
    39  
    40  // ContractManager is responsible for managing contracts that the host has with
    41  // renters, including storing the data, submitting storage proofs, and deleting
    42  // the data when a contract is complete.
    43  type ContractManager struct {
    44  	// The contract manager controls many resources which are spread across
    45  	// multiple files yet must all be consistent and durable. ACID properties
    46  	// have been achieved by using a write-ahead-logger (WAL). The in-memory
    47  	// state represents currently uncommitted data, however reading from the
    48  	// uncommitted state does not threaten consistency. It is okay if the user
    49  	// sees uncommitted data, so long as other ACID operations do not return
    50  	// early. Any changes to the state must be documented in the WAL to prevent
    51  	// inconsistency.
    52  
    53  	// The contract manager is highly concurrent. Most fields are protected by
    54  	// the mutex in the WAL, but storage folders and sectors can be accessed
    55  	// individually. A map of locked sectors ensures that each sector is only
    56  	// accessed by one thread at a time, but allows many sectors across a
    57  	// single file to be accessed concurrently. Any interaction with a sector
    58  	// requires a sector lock.
    59  	//
    60  	// If sectors are being added to a storage folder, a readlock is required
    61  	// on the storage folder. Reads and deletes do not require any locks on the
    62  	// storage folder. If a storage folder operation is happening (add, resize,
    63  	// remove), a writelock is required on the storage folder lock.
    64  
    65  	// The contract manager is expected to be consistent, durable, atomic, and
    66  	// error-free in the face of unclean shutdown and disk error. Failure of
    67  	// the controlling disk (containing the settings file and WAL file) is not
    68  	// tolerated and will cause a panic, but any disk failures for the storage
    69  	// folders should be tolerated gracefully. Threads should perform complete
    70  	// cleanup before returning, which can be achieved with threadgroups.
    71  
    72  	// sectorSalt is a persistent security field that gets set the first time
    73  	// the contract manager is initiated and then never gets touched again.
    74  	// It's used to randomize the location on-disk that a sector gets stored,
    75  	// so that an adversary cannot maliciously add sectors to specific disks,
    76  	// or otherwise perform manipulations that may degrade performance.
    77  	//
    78  	// sectorLocations is a giant lookup table that keeps a mapping from every
    79  	// sector in the host to the location on-disk where it is stored. For
    80  	// performance information, see the BenchmarkSectorLocations docstring.
    81  	// sectorLocations is persisted on disk through a combination of the WAL
    82  	// and through metadata that is stored directly in each storage folder.
    83  	//
    84  	// The storageFolders fields stores information about each storage folder,
    85  	// including metadata about which sector slots are currently populated vs.
    86  	// which sector slots are available. For performance information, see
    87  	// BenchmarkStorageFolders.
    88  	sectorSalt      crypto.Hash
    89  	sectorLocations map[sectorID]sectorLocation
    90  	storageFolders  map[uint16]*storageFolder
    91  
    92  	// lockedSectors contains a list of sectors that are currently being read
    93  	// or modified.
    94  	lockedSectors map[sectorID]*sectorLock
    95  
    96  	// Utilities.
    97  	dependencies modules.Dependencies
    98  	log          *persist.Logger
    99  	persistDir   string
   100  	tg           siasync.ThreadGroup
   101  	wal          writeAheadLog
   102  }
   103  
   104  // Close will cleanly shutdown the contract manager.
   105  func (cm *ContractManager) Close() error {
   106  	return build.ExtendErr("error while stopping contract manager", cm.tg.Stop())
   107  }
   108  
   109  // newContractManager returns a contract manager that is ready to be used with
   110  // the provided dependencies.
   111  func newContractManager(dependencies modules.Dependencies, persistDir string) (*ContractManager, error) {
   112  	cm := &ContractManager{
   113  		storageFolders:  make(map[uint16]*storageFolder),
   114  		sectorLocations: make(map[sectorID]sectorLocation),
   115  
   116  		lockedSectors: make(map[sectorID]*sectorLock),
   117  
   118  		dependencies: dependencies,
   119  		persistDir:   persistDir,
   120  	}
   121  	cm.wal.cm = cm
   122  	cm.tg.AfterStop(func() {
   123  		dependencies.Destruct()
   124  	})
   125  
   126  	// Perform clean shutdown of already-initialized features if startup fails.
   127  	var err error
   128  	defer func() {
   129  		if err != nil {
   130  			err1 := build.ExtendErr("error during contract manager startup", err)
   131  			err2 := build.ExtendErr("error while stopping a partially started contract manager", cm.tg.Stop())
   132  			err = build.ComposeErrors(err1, err2)
   133  		}
   134  	}()
   135  
   136  	// Create the perist directory if it does not yet exist.
   137  	err = dependencies.MkdirAll(cm.persistDir, 0700)
   138  	if err != nil {
   139  		return nil, build.ExtendErr("error while creating the persist directory for the contract manager", err)
   140  	}
   141  
   142  	// Logger is always the first thing initialized.
   143  	cm.log, err = dependencies.NewLogger(filepath.Join(cm.persistDir, logFile))
   144  	if err != nil {
   145  		return nil, build.ExtendErr("error while creating the logger for the contract manager", err)
   146  	}
   147  	// Set up the clean shutdown of the logger.
   148  	cm.tg.AfterStop(func() {
   149  		err = build.ComposeErrors(cm.log.Close(), err)
   150  	})
   151  
   152  	// Load the atomic state of the contract manager. Unclean shutdown may have
   153  	// wiped out some changes that got made. Anything really important will be
   154  	// recovered when the WAL is loaded.
   155  	err = cm.loadSettings()
   156  	if err != nil {
   157  		cm.log.Println("ERROR: Unable to load contract manager settings:", err)
   158  		return nil, build.ExtendErr("error while loading contract manager atomic data", err)
   159  	}
   160  
   161  	// Load the WAL, repairing any corruption caused by unclean shutdown.
   162  	err = cm.wal.load()
   163  	if err != nil {
   164  		cm.log.Println("ERROR: Unable to load the contract manager write-ahead-log:", err)
   165  		return nil, build.ExtendErr("error while loading the WAL at startup", err)
   166  	}
   167  	// Upon shudown, unload all of the files.
   168  	cm.tg.AfterStop(func() {
   169  		cm.wal.mu.Lock()
   170  		defer cm.wal.mu.Unlock()
   171  
   172  		for _, sf := range cm.storageFolders {
   173  			// No storage folder to close if the folder is not available.
   174  			if atomic.LoadUint64(&sf.atomicUnavailable) == 1 {
   175  				// File handles will either already be closed or may even be
   176  				// nil.
   177  				continue
   178  			}
   179  
   180  			err = sf.metadataFile.Close()
   181  			if err != nil {
   182  				cm.log.Println("Error closing the storage folder file handle", err)
   183  			}
   184  			err = sf.sectorFile.Close()
   185  			if err != nil {
   186  				cm.log.Println("Error closing the storage folder file handle", err)
   187  			}
   188  		}
   189  	})
   190  
   191  	// The sector location data is loaded last. Any corruption that happened
   192  	// during unclean shutdown has already been fixed by the WAL.
   193  	for _, sf := range cm.storageFolders {
   194  		if atomic.LoadUint64(&sf.atomicUnavailable) == 1 {
   195  			// Metadata unavailable, just count the number of sectors instead of
   196  			// loading them.
   197  			sf.sectors = uint64(len(usageSectors(sf.usage)))
   198  			continue
   199  		}
   200  		cm.loadSectorLocations(sf)
   201  	}
   202  
   203  	// Launch the sync loop that periodically flushes changes from the WAL to
   204  	// disk.
   205  	err = cm.wal.spawnSyncLoop()
   206  	if err != nil {
   207  		cm.log.Println("ERROR: Unable to spawn the contract manager synchronization loop:", err)
   208  		return nil, build.ExtendErr("error while spawning contract manager sync loop", err)
   209  	}
   210  
   211  	// Spin up the thread that continuously looks for missing storage folders
   212  	// and adds them if they are discovered.
   213  	go cm.threadedFolderRecheck()
   214  
   215  	// Simulate an error to make sure the cleanup code is triggered correctly.
   216  	if cm.dependencies.Disrupt("erroredStartup") {
   217  		err = errors.New("startup disrupted")
   218  		return nil, err
   219  	}
   220  	return cm, nil
   221  }
   222  
   223  // New returns a new ContractManager.
   224  func New(persistDir string) (*ContractManager, error) {
   225  	return newContractManager(new(modules.ProductionDependencies), persistDir)
   226  }