gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/projectchunkworkerset_test.go (about)

     1  package renter
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"math"
     8  	"path/filepath"
     9  	"strings"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/opentracing/opentracing-go"
    14  	"gitlab.com/NebulousLabs/errors"
    15  	"gitlab.com/NebulousLabs/fastrand"
    16  	"gitlab.com/SkynetLabs/skyd/build"
    17  	"gitlab.com/SkynetLabs/skyd/persist"
    18  	"gitlab.com/SkynetLabs/skyd/skymodules"
    19  	"go.sia.tech/siad/crypto"
    20  	"go.sia.tech/siad/modules"
    21  	"go.sia.tech/siad/types"
    22  )
    23  
    24  // TestPCWS verifies the functionality of the PCWS.
    25  func TestPCWS(t *testing.T) {
    26  	if testing.Short() {
    27  		t.SkipNow()
    28  	}
    29  
    30  	// create a worker tester
    31  	wt, err := newWorkerTester(t.Name())
    32  	if err != nil {
    33  		t.Fatal(err)
    34  	}
    35  	defer func() {
    36  		err := wt.Close()
    37  		if err != nil {
    38  			t.Fatal(err)
    39  		}
    40  	}()
    41  
    42  	t.Run("basic", func(t *testing.T) { testBasic(t, wt) })
    43  	t.Run("multiple", func(t *testing.T) { testMultiple(t, wt) })
    44  	t.Run("newPCWSByRoots", testNewPCWSByRoots)
    45  }
    46  
    47  // testBasic verifies the PCWS using a simple setup with a single host, looking
    48  // for a single sector.
    49  func testBasic(t *testing.T, wt *workerTester) {
    50  	// create a ctx with test span
    51  	ctx := opentracing.ContextWithSpan(context.Background(), testSpan())
    52  
    53  	// create a random sector
    54  	sectorData := fastrand.Bytes(int(modules.SectorSize))
    55  	sectorRoot := crypto.MerkleRoot(sectorData)
    56  
    57  	// create a passthrough EC and a passhtrough cipher key
    58  	ptec := skymodules.NewPassthroughErasureCoder()
    59  	ptck, err := crypto.NewSiaKey(crypto.TypePlain, nil)
    60  	if err != nil {
    61  		t.Fatal(err)
    62  	}
    63  
    64  	// define a helper function that waits for an update
    65  	waitForUpdate := func(ws *pcwsWorkerState) {
    66  		ws.mu.Lock()
    67  		wu := ws.registerForWorkerUpdate()
    68  		ws.mu.Unlock()
    69  		select {
    70  		case <-wu:
    71  		case <-time.After(5 * time.Second):
    72  			t.Fatal("timed out")
    73  		}
    74  	}
    75  
    76  	// create PCWS with empty root. This should fail.
    77  	pcws, err := wt.staticRenter.newPCWSByRoots(ctx, []crypto.Hash{{}}, ptec, ptck, 0)
    78  	if err == nil {
    79  		t.Fatal("should fail")
    80  	}
    81  
    82  	// create PCWS
    83  	pcws, err = wt.staticRenter.newPCWSByRoots(ctx, []crypto.Hash{sectorRoot}, ptec, ptck, 0)
    84  	if err != nil {
    85  		t.Fatal(err)
    86  	}
    87  
    88  	// get the current state update
    89  	pcws.mu.Lock()
    90  	ws := pcws.workerState
    91  	wslt := pcws.workerStateLaunchTime
    92  	pcws.mu.Unlock()
    93  
    94  	// verify launch time was set
    95  	unset := time.Time{}
    96  	if wslt == unset {
    97  		t.Fatal("launch time not set")
    98  	}
    99  
   100  	// register for worker update and wait
   101  	waitForUpdate(ws)
   102  
   103  	// verify resolved and unresolved workers
   104  	ws.mu.Lock()
   105  	resolved := ws.resolvedWorkers
   106  	numResolved := len(ws.resolvedWorkers)
   107  	numUnresolved := len(ws.unresolvedWorkers)
   108  	ws.mu.Unlock()
   109  
   110  	if numResolved != 1 || numUnresolved != 0 {
   111  		t.Fatal("unexpected")
   112  	}
   113  	if len(resolved[0].pieceIndices) != 0 {
   114  		t.Fatal("unexpected")
   115  	}
   116  
   117  	// add the sector to the host
   118  	err = wt.host.AddSector(sectorRoot, sectorData)
   119  	if err != nil {
   120  		t.Fatal(err)
   121  	}
   122  
   123  	// reset the launch time - allowing us to force a state update
   124  	pcws.mu.Lock()
   125  	pcws.workerStateLaunchTime = unset
   126  	pcws.mu.Unlock()
   127  	err = pcws.managedTryUpdateWorkerState()
   128  	if err != nil {
   129  		t.Fatal(err)
   130  	}
   131  
   132  	// get the current worker state (!important)
   133  	ws = pcws.managedWorkerState()
   134  
   135  	// register for worker update and wait
   136  	waitForUpdate(ws)
   137  
   138  	// verify resolved and unresolved workers
   139  	ws.mu.Lock()
   140  	resolved = ws.resolvedWorkers
   141  	ws.mu.Unlock()
   142  
   143  	// expect we found sector at index 0
   144  	if len(resolved) != 1 || len(resolved[0].pieceIndices) != 1 {
   145  		t.Fatal("unexpected", len(resolved), len(resolved[0].pieceIndices))
   146  	}
   147  }
   148  
   149  // testMultiple verifies the PCWS for a multiple sector lookup on multiple
   150  // hosts.
   151  func testMultiple(t *testing.T, wt *workerTester) {
   152  	// create a ctx with test span
   153  	ctx := opentracing.ContextWithSpan(context.Background(), testSpan())
   154  
   155  	// create a helper function that adds a host
   156  	numHosts := 0
   157  	addHost := func() modules.Host {
   158  		testdir := filepath.Join(wt.rt.dir, fmt.Sprintf("host%d", numHosts))
   159  		host, err := wt.rt.addCustomHost(testdir, modules.ProdDependencies)
   160  		if err != nil {
   161  			t.Fatal(err)
   162  		}
   163  		numHosts++
   164  		return host
   165  	}
   166  
   167  	// create a helper function that adds a random sector on a given host
   168  	addSector := func(h modules.Host) crypto.Hash {
   169  		// create a random sector
   170  		sectorData := fastrand.Bytes(int(modules.SectorSize))
   171  		sectorRoot := crypto.MerkleRoot(sectorData)
   172  
   173  		// add the sector to the host
   174  		err := h.AddSector(sectorRoot, sectorData)
   175  		if err != nil {
   176  			t.Fatal(err)
   177  		}
   178  		return sectorRoot
   179  	}
   180  
   181  	// create a helper function that waits for an update
   182  	waitForUpdate := func(ws *pcwsWorkerState) {
   183  		ws.mu.Lock()
   184  		wu := ws.registerForWorkerUpdate()
   185  		ws.mu.Unlock()
   186  		select {
   187  		case <-wu:
   188  		case <-time.After(time.Minute):
   189  			t.Fatal("timed out")
   190  		}
   191  	}
   192  
   193  	// create a helper function that compares uint64 slices for equality
   194  	isEqualTo := func(a, b []uint64) bool {
   195  		if len(a) != len(b) {
   196  			return false
   197  		}
   198  		for i, v := range a {
   199  			if v != b[i] {
   200  				return false
   201  			}
   202  		}
   203  		return true
   204  	}
   205  
   206  	h1 := addHost()
   207  	h2 := addHost()
   208  	h3 := addHost()
   209  
   210  	h1PK := h1.PublicKey().String()
   211  	h2PK := h2.PublicKey().String()
   212  	h3PK := h3.PublicKey().String()
   213  
   214  	r1 := addSector(h1)
   215  	r2 := addSector(h1)
   216  	r3 := addSector(h2)
   217  	r4 := addSector(h3)
   218  	r5 := crypto.MerkleRoot(fastrand.Bytes(int(modules.SectorSize)))
   219  	roots := []crypto.Hash{r1, r2, r3, r4, r5}
   220  
   221  	// create an EC and a passhtrough cipher key
   222  	ec, err := skymodules.NewRSCode(1, 4)
   223  	if err != nil {
   224  		t.Fatal(err)
   225  	}
   226  	ptck, err := crypto.NewSiaKey(crypto.TypePlain, nil)
   227  	if err != nil {
   228  		t.Fatal(err)
   229  	}
   230  
   231  	// wait until the renter has a worker for all hosts
   232  	err = build.Retry(600, 100*time.Millisecond, func() error {
   233  		ws, err := wt.staticRenter.WorkerPoolStatus()
   234  		if err != nil {
   235  			t.Fatal(err)
   236  		}
   237  		if ws.NumWorkers < 3 {
   238  			_, err = wt.rt.miner.AddBlock()
   239  			if err != nil {
   240  				t.Fatal(err)
   241  			}
   242  
   243  			return errors.New("workers not ready yet")
   244  		}
   245  		return nil
   246  	})
   247  	if err != nil {
   248  		t.Fatal(err)
   249  	}
   250  
   251  	// wait until we're certain all workers are fit for duty
   252  	err = build.Retry(100, 100*time.Millisecond, func() error {
   253  		ws, err := wt.staticRenter.WorkerPoolStatus()
   254  		if err != nil {
   255  			t.Fatal(err)
   256  		}
   257  		for _, w := range ws.Workers {
   258  			if w.AccountStatus.AvailableBalance.IsZero() ||
   259  				!w.PriceTableStatus.Active ||
   260  				w.MaintenanceOnCooldown {
   261  				return errors.New("worker is not ready yet")
   262  			}
   263  		}
   264  		return nil
   265  	})
   266  
   267  	// create PCWS
   268  	pcws, err := wt.staticRenter.newPCWSByRoots(ctx, roots, ec, ptck, 0)
   269  	if err != nil {
   270  		t.Fatal(err)
   271  	}
   272  	ws := pcws.managedWorkerState()
   273  
   274  	// wait until all workers have resolved
   275  	numWorkers := len(ws.staticWorkerPool.callWorkers())
   276  	for {
   277  		waitForUpdate(ws)
   278  		ws.mu.Lock()
   279  		numResolved := len(ws.resolvedWorkers)
   280  		ws.mu.Unlock()
   281  		if numResolved == numWorkers {
   282  			break
   283  		}
   284  	}
   285  
   286  	// fetch piece indices per host
   287  	ws.mu.Lock()
   288  	resolved := ws.resolvedWorkers
   289  	ws.mu.Unlock()
   290  	for _, rw := range resolved {
   291  		var expected []uint64
   292  		var hostname string
   293  		switch rw.worker.staticHostPubKeyStr {
   294  		case h1PK:
   295  			expected = []uint64{0, 1}
   296  			hostname = "host1"
   297  		case h2PK:
   298  			expected = []uint64{2}
   299  			hostname = "host2"
   300  		case h3PK:
   301  			expected = []uint64{3}
   302  			hostname = "host3"
   303  		default:
   304  			hostname = "other"
   305  			continue
   306  		}
   307  
   308  		if !isEqualTo(rw.pieceIndices, expected) {
   309  			t.Error("unexpected pieces", hostname, rw.worker.staticHostPubKeyStr[64:], rw.pieceIndices, rw.err)
   310  		}
   311  	}
   312  }
   313  
   314  // testNewPCWSByRoots verifies the 'newPCWSByRoots' constructor function and its
   315  // edge cases
   316  func testNewPCWSByRoots(t *testing.T) {
   317  	r := new(Renter)
   318  	r.staticWorkerPool = new(workerPool)
   319  
   320  	// create a ctx with test span
   321  	ctx := opentracing.ContextWithSpan(context.Background(), testSpan())
   322  
   323  	// create random roots
   324  	var root1 crypto.Hash
   325  	var root2 crypto.Hash
   326  	fastrand.Read(root1[:])
   327  	fastrand.Read(root2[:])
   328  	roots := []crypto.Hash{root1, root2}
   329  
   330  	// create a passthrough EC and a passhtrough cipher key
   331  	ptec := skymodules.NewPassthroughErasureCoder()
   332  	ptck, err := crypto.NewSiaKey(crypto.TypePlain, nil)
   333  	if err != nil {
   334  		t.Fatal(err)
   335  	}
   336  
   337  	// verify basic case
   338  	_, err = r.newPCWSByRoots(ctx, roots[:1], ptec, ptck, 0)
   339  	if err != nil {
   340  		t.Fatal("unexpected")
   341  	}
   342  
   343  	// verify the case where we the amount of roots does not equal num pieces
   344  	// defined in the erasure coder
   345  	_, err = r.newPCWSByRoots(ctx, roots, ptec, ptck, 0)
   346  	if err == nil || !strings.Contains(err.Error(), "but erasure coder specifies 1 pieces") {
   347  		t.Fatal(err)
   348  	}
   349  
   350  	// verify the legacy case where 1-of-N only needs 1 root
   351  	ec, err := skymodules.NewRSCode(1, 10)
   352  	if err != nil {
   353  		t.Fatal("unexpected")
   354  	}
   355  
   356  	// verify the amount of roots provided **does not** equal num pieces,
   357  	// usually causing an error
   358  	if len(roots[:1]) == ec.NumPieces() {
   359  		t.Fatal("unexpected")
   360  	}
   361  	_, err = r.newPCWSByRoots(ctx, roots[:1], ec, ptck, 0)
   362  	if err != nil {
   363  		t.Fatal("unexpected")
   364  	}
   365  
   366  	// verify passing nil for the master key returns an error
   367  	_, err = r.newPCWSByRoots(ctx, roots[:1], ptec, nil, 0)
   368  	if err == nil {
   369  		t.Fatal("unexpected")
   370  	}
   371  }
   372  
   373  // TestProjectChunkWorsetSet_managedLaunchWorker probes the
   374  // 'managedLaunchWorker' function on the PCWS.
   375  func TestProjectChunkWorsetSet_managedLaunchWorker(t *testing.T) {
   376  	t.Parallel()
   377  
   378  	// create EC + key
   379  	ec := skymodules.NewPassthroughErasureCoder()
   380  	ck, err := crypto.NewSiaKey(crypto.TypePlain, nil)
   381  	if err != nil {
   382  		t.Fatal(err)
   383  	}
   384  
   385  	// create renter
   386  	renter := new(Renter)
   387  	renter.staticWorkerPool = new(workerPool)
   388  
   389  	// create PCWS
   390  	pcws := &projectChunkWorkerSet{
   391  		staticChunkIndex:   0,
   392  		staticErasureCoder: ec,
   393  		staticMasterKey:    ck,
   394  		staticPieceRoots:   []crypto.Hash{},
   395  
   396  		staticBaseSectorDownloadStats:   renter.staticBaseSectorDownloadStats,
   397  		staticFanoutSectorDownloadStats: renter.staticFanoutSectorDownloadStats,
   398  
   399  		staticCtx:        context.Background(),
   400  		staticDeps:       renter.staticDeps,
   401  		staticLog:        renter.staticLog,
   402  		staticWorkerPool: renter.staticWorkerPool,
   403  	}
   404  
   405  	// create PCWS worker state
   406  	ws := &pcwsWorkerState{
   407  		unresolvedWorkers: make(map[string]*pcwsUnresolvedWorker),
   408  		staticDeps:        pcws.staticDeps,
   409  		staticLog:         pcws.staticLog,
   410  		staticWorkerPool:  pcws.staticWorkerPool,
   411  	}
   412  
   413  	// mock the worker
   414  	w := new(worker)
   415  	w.newCache()
   416  	w.newPriceTable()
   417  	w.newMaintenanceState()
   418  	w.initJobHasSectorQueue()
   419  
   420  	// give it a name and set an initial estimate on the HS queue
   421  	seed := 123 * time.Second
   422  	w.staticJobHasSectorQueue.staticDT.AddDataPoint(seed)
   423  	w.staticJobHasSectorQueue.weightedJobTime = float64(seed)
   424  	w.staticHostPubKeyStr = "myworker"
   425  
   426  	// ensure PT is valid
   427  	w.staticPriceTable().staticExpiryTime = time.Now().Add(time.Hour)
   428  
   429  	// launch the worker - shouldn't work
   430  	responseChan := make(chan *jobHasSectorResponse, 0)
   431  	err = pcws.managedLaunchWorker(w, responseChan, ws)
   432  	if !errors.Contains(err, errEstimateAboveMax) {
   433  		t.Fatal(err)
   434  	}
   435  
   436  	// verify the worker didn't launch.
   437  	uw, exists := ws.unresolvedWorkers["myworker"]
   438  	if exists {
   439  		t.Log(ws.unresolvedWorkers)
   440  		t.Fatal("unexpected")
   441  	}
   442  
   443  	// launch the worker
   444  	w.staticJobHasSectorQueue.staticDT = skymodules.NewDistributionTrackerStandard()
   445  	w.staticJobHasSectorQueue.staticDT.AddDataPoint(pcwsHasSectorTimeout)
   446  	w.staticJobHasSectorQueue.weightedJobTime = float64(pcwsHasSectorTimeout)
   447  	responseChan = make(chan *jobHasSectorResponse, 0)
   448  	err = pcws.managedLaunchWorker(w, responseChan, ws)
   449  	if err != nil {
   450  		t.Fatal(err)
   451  	}
   452  
   453  	// verify the worker launched successfully
   454  	uw, exists = ws.unresolvedWorkers["myworker"]
   455  	if !exists {
   456  		t.Log(ws.unresolvedWorkers)
   457  		t.Fatal("unexpected")
   458  	}
   459  
   460  	// verify the expected dur matches the initial queue estimate
   461  	expectedDur := time.Until(uw.staticExpectedResolvedTime)
   462  	expectedDurInS := math.Round(expectedDur.Seconds())
   463  	if expectedDurInS != pcwsHasSectorTimeout.Seconds() {
   464  		t.Log(expectedDurInS)
   465  		t.Fatal("unexpected")
   466  	}
   467  
   468  	// tweak the maintenancestate, putting it on a cooldown
   469  	minuteFromNow := time.Now().Add(time.Minute)
   470  	w.staticMaintenanceState.cooldownUntil = minuteFromNow
   471  	err = pcws.managedLaunchWorker(w, responseChan, ws)
   472  	if err != nil {
   473  		t.Fatal(err)
   474  	}
   475  
   476  	// verify the cooldown is being reflected in the estimate
   477  	uw = ws.unresolvedWorkers["myworker"]
   478  	expectedDur = time.Until(uw.staticExpectedResolvedTime)
   479  	expectedDurInS = math.Round(expectedDur.Seconds())
   480  	if expectedDurInS != pcwsHasSectorTimeout.Seconds()+60 {
   481  		t.Log(expectedDurInS)
   482  		t.Fatal("unexpected")
   483  	}
   484  }
   485  
   486  // TestWaitForResult is a unit test for the worker state's WaitForResults
   487  // method.
   488  func TestWaitForResult(t *testing.T) {
   489  	if testing.Short() {
   490  		t.SkipNow()
   491  	}
   492  	t.Parallel()
   493  
   494  	// Create a plain worker state.
   495  	ws := &pcwsWorkerState{
   496  		unresolvedWorkers: make(map[string]*pcwsUnresolvedWorker),
   497  	}
   498  
   499  	// Add unresolved worker.
   500  	_, pk1 := crypto.GenerateKeyPair()
   501  	hpk1 := types.Ed25519PublicKey(pk1)
   502  	ws.unresolvedWorkers[hpk1.String()] = &pcwsUnresolvedWorker{}
   503  
   504  	// Wait for its result in a separate goroutine.
   505  	done := make(chan struct{})
   506  	var result []pcwsWorkerResponse
   507  	go func() {
   508  		result = ws.WaitForResults(context.Background())
   509  		close(done)
   510  	}()
   511  
   512  	// Wait some time. Should still not be done.
   513  	select {
   514  	case <-done:
   515  		t.Fatal("wait finished")
   516  	case <-time.After(100 * time.Millisecond):
   517  	}
   518  
   519  	// Move the unresolved workers to resolved.
   520  	ws.mu.Lock()
   521  	delete(ws.unresolvedWorkers, hpk1.String())
   522  	resolvedWorker := pcwsWorkerResponse{err: errors.New("test")}
   523  	ws.resolvedWorkers = append(ws.resolvedWorkers, resolvedWorker)
   524  
   525  	// Close the update chan.
   526  	ws.closeUpdateChans()
   527  	ws.mu.Unlock()
   528  
   529  	// Should be done now.
   530  	select {
   531  	case <-time.After(100 * time.Millisecond):
   532  		t.Fatal("still not done")
   533  	case <-done:
   534  	}
   535  
   536  	// Result should have length 1.
   537  	if len(result) != 1 {
   538  		t.Fatal("unexpected", len(result))
   539  	}
   540  
   541  	// Add another unresolved worker.
   542  	_, pk2 := crypto.GenerateKeyPair()
   543  	hpk2 := types.Ed25519PublicKey(pk2)
   544  	ws.unresolvedWorkers[hpk2.String()] = &pcwsUnresolvedWorker{}
   545  
   546  	// Use a timeout this time.
   547  	ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
   548  	defer cancel()
   549  
   550  	// Measure the time the call takes.
   551  	start := time.Now()
   552  	result = ws.WaitForResults(ctx)
   553  
   554  	// Should have taken at least 100ms to return.
   555  	if time.Since(start) < 100*time.Millisecond {
   556  		t.Fatal("returned too early")
   557  	}
   558  
   559  	// Result should have length 1 again. Because we got the same resolved
   560  	// worker.
   561  	if len(result) != 1 {
   562  		t.Fatal("unexpected", len(result))
   563  	}
   564  }
   565  
   566  // newTestProjectChunkWorkerSet returns a PCWS used for testing
   567  func newTestProjectChunkWorkerSet() *projectChunkWorkerSet {
   568  	return newCustomTestProjectChunkWorkerSet(skymodules.NewRSSubCodeDefault())
   569  }
   570  
   571  // newCustomTestProjectChunkWorkerSet returns a PCWS used for testing and allows
   572  // to pass a custom erasure coder
   573  func newCustomTestProjectChunkWorkerSet(ec skymodules.ErasureCoder) *projectChunkWorkerSet {
   574  	// create a passhtrough cipher key
   575  	ck, err := crypto.NewSiaKey(crypto.TypePlain, nil)
   576  	if err != nil {
   577  		return nil
   578  	}
   579  
   580  	// create renter
   581  	renter := new(Renter)
   582  	renter.staticBaseSectorDownloadStats = skymodules.NewSectorDownloadStats()
   583  	renter.staticDeps = skymodules.SkydProdDependencies
   584  	renter.staticFanoutSectorDownloadStats = skymodules.NewSectorDownloadStats()
   585  
   586  	// create discard logger
   587  	logger, err := persist.NewLogger(ioutil.Discard)
   588  	if err != nil {
   589  		return nil
   590  	}
   591  	renter.staticLog = logger
   592  
   593  	// create PCWS manually
   594  	return &projectChunkWorkerSet{
   595  		workerState: &pcwsWorkerState{
   596  			unresolvedWorkers: make(map[string]*pcwsUnresolvedWorker),
   597  			staticDeps:        renter.staticDeps,
   598  			staticLog:         renter.staticLog,
   599  			staticWorkerPool:  renter.staticWorkerPool,
   600  		},
   601  
   602  		staticChunkIndex:   0,
   603  		staticErasureCoder: ec,
   604  		staticMasterKey:    ck,
   605  		staticPieceRoots:   make([]crypto.Hash, ec.NumPieces()),
   606  
   607  		staticBaseSectorDownloadStats:   renter.staticBaseSectorDownloadStats,
   608  		staticFanoutSectorDownloadStats: renter.staticFanoutSectorDownloadStats,
   609  
   610  		staticCtx:        context.Background(),
   611  		staticDeps:       renter.staticDeps,
   612  		staticLog:        renter.staticLog,
   613  		staticWorkerPool: renter.staticWorkerPool,
   614  	}
   615  }