github.com/sunrise-zone/sunrise-node@v0.13.1-sr2/share/ipld/get.go (about)

     1  package ipld
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"sync"
     7  	"sync/atomic"
     8  
     9  	"github.com/gammazero/workerpool"
    10  	"github.com/ipfs/boxo/blockservice"
    11  	"github.com/ipfs/go-cid"
    12  	ipld "github.com/ipfs/go-ipld-format"
    13  
    14  	"github.com/sunrise-zone/sunrise-node/share"
    15  )
    16  
    17  // NumWorkersLimit sets global limit for workers spawned by GetShares.
    18  // GetShares could be called MaxSquareSize(128) times per data square each
    19  // spawning up to 128/2 goroutines and altogether this is 8192. Considering
    20  // there can be N blocks fetched at the same time, e.g. during catching up data
    21  // from the past, we multiply this number by the amount of allowed concurrent
    22  // data square fetches(NumConcurrentSquares).
    23  //
    24  // NOTE: This value only limits amount of simultaneously running workers that
    25  // are spawned as the load increases and are killed, once the load declines.
    26  //
    27  // TODO(@Wondertan): This assumes we have parallelized DASer implemented. Sync the values once it is shipped.
    28  // TODO(@Wondertan): Allow configuration of values without global state.
    29  var NumWorkersLimit = share.MaxSquareSize * share.MaxSquareSize / 2 * NumConcurrentSquares
    30  
    31  // NumConcurrentSquares limits the amount of squares that are fetched
    32  // concurrently/simultaneously.
    33  var NumConcurrentSquares = 8
    34  
    35  // ErrNodeNotFound is used to signal when a nmt Node could not be found.
    36  var ErrNodeNotFound = errors.New("nmt node not found")
    37  
    38  // Global worker pool that globally controls and limits goroutines spawned by
    39  // GetShares.
    40  //
    41  //	TODO(@Wondertan): Idle timeout for workers needs to be configured to around block time,
    42  //		so that workers spawned between each reconstruction for every new block are reused.
    43  var pool = workerpool.New(NumWorkersLimit)
    44  
    45  // GetLeaf fetches and returns the raw leaf.
    46  // It walks down the IPLD NMT tree until it finds the requested one.
    47  func GetLeaf(
    48  	ctx context.Context,
    49  	bGetter blockservice.BlockGetter,
    50  	root cid.Cid,
    51  	leaf, total int,
    52  ) (ipld.Node, error) {
    53  	// request the node
    54  	nd, err := GetNode(ctx, bGetter, root)
    55  	if err != nil {
    56  		return nil, err
    57  	}
    58  
    59  	// look for links
    60  	lnks := nd.Links()
    61  	if len(lnks) == 0 {
    62  		// in case there is none, we reached tree's bottom, so finally return the leaf.
    63  		return nd, err
    64  	}
    65  
    66  	// route walk to appropriate children
    67  	total /= 2 // as we are using binary tree, every step decreases total leaves in a half
    68  	if leaf < total {
    69  		root = lnks[0].Cid // if target leave on the left, go with walk down the first children
    70  	} else {
    71  		root, leaf = lnks[1].Cid, leaf-total // otherwise go down the second
    72  	}
    73  
    74  	// recursively walk down through selected children
    75  	return GetLeaf(ctx, bGetter, root, leaf, total)
    76  }
    77  
    78  // GetLeaves gets leaves from either local storage, or, if not found, requests
    79  // them from immediate/connected peers. It puts them into the slice under index
    80  // of node position in the tree (bin-tree-feat).
    81  // Does not return any error, and returns/unblocks only on success
    82  // (got all shares) or on context cancellation.
    83  //
    84  // It works concurrently by spawning workers in the pool which do one basic
    85  // thing - block until data is fetched, s. t. share processing is never
    86  // sequential, and thus we request *all* the shares available without waiting
    87  // for others to finish. It is the required property to maximize data
    88  // availability. As a side effect, we get concurrent tree traversal reducing
    89  // time to data time.
    90  //
    91  // GetLeaves relies on the fact that the underlying data structure is a binary
    92  // tree, so it's not suitable for anything else besides that. Parts on the
    93  // implementation that rely on this property are explicitly tagged with
    94  // (bin-tree-feat).
    95  func GetLeaves(ctx context.Context,
    96  	bGetter blockservice.BlockGetter,
    97  	root cid.Cid,
    98  	maxShares int,
    99  	put func(int, ipld.Node),
   100  ) {
   101  	// this buffer ensures writes to 'jobs' are never blocking (bin-tree-feat)
   102  	jobs := make(chan *job, (maxShares+1)/2) // +1 for the case where 'maxShares' is 1
   103  	jobs <- &job{cid: root, ctx: ctx}
   104  	// total is an amount of routines spawned and total amount of nodes we process (bin-tree-feat)
   105  	// so we can specify exact amount of loops we do, and wait for this amount
   106  	// of routines to finish processing
   107  	total := maxShares*2 - 1
   108  	wg := sync.WaitGroup{}
   109  	wg.Add(total)
   110  
   111  	// all preparations are done, so begin processing jobs
   112  	for i := 0; i < total; i++ {
   113  		select {
   114  		case j := <-jobs:
   115  			// work over each job concurrently, s.t. shares do not block
   116  			// processing of each other
   117  			pool.Submit(func() {
   118  				defer wg.Done()
   119  
   120  				nd, err := GetNode(ctx, bGetter, j.cid)
   121  				if err != nil {
   122  					// we don't really care about errors here
   123  					// just fetch as much as possible
   124  					return
   125  				}
   126  				// check links to know what we should do with the node
   127  				lnks := nd.Links()
   128  				if len(lnks) == 0 {
   129  					// successfully fetched a share/leaf
   130  					// ladies and gentlemen, we got em!
   131  					put(j.sharePos, nd)
   132  					return
   133  				}
   134  				// ok, we found more links
   135  				for i, lnk := range lnks {
   136  					// send those to be processed
   137  					select {
   138  					case jobs <- &job{
   139  						cid: lnk.Cid,
   140  						// calc position for children nodes (bin-tree-feat),
   141  						// s.t. 'if' above knows where to put a share
   142  						sharePos: j.sharePos*2 + i,
   143  						// we pass the context to job so that spans are tracked in a tree
   144  						// structure
   145  						ctx: ctx,
   146  					}:
   147  					case <-ctx.Done():
   148  						return
   149  					}
   150  				}
   151  			})
   152  		case <-ctx.Done():
   153  			return
   154  		}
   155  	}
   156  	// "tick-tack, how much more should I wait before you get those shares?" - the goroutine
   157  	wg.Wait()
   158  }
   159  
   160  // GetProof fetches and returns the leaf's Merkle Proof.
   161  // It walks down the IPLD NMT tree until it reaches the leaf and returns collected proof
   162  func GetProof(
   163  	ctx context.Context,
   164  	bGetter blockservice.BlockGetter,
   165  	root cid.Cid,
   166  	proof []cid.Cid,
   167  	leaf, total int,
   168  ) ([]cid.Cid, error) {
   169  	// request the node
   170  	nd, err := GetNode(ctx, bGetter, root)
   171  	if err != nil {
   172  		return nil, err
   173  	}
   174  	// look for links
   175  	lnks := nd.Links()
   176  	if len(lnks) == 0 {
   177  		p := make([]cid.Cid, len(proof))
   178  		copy(p, proof)
   179  		return p, nil
   180  	}
   181  
   182  	// route walk to appropriate children
   183  	total /= 2 // as we are using binary tree, every step decreases total leaves in a half
   184  	if leaf < total {
   185  		root = lnks[0].Cid // if target leave on the left, go with walk down the first children
   186  		proof = append(proof, lnks[1].Cid)
   187  	} else {
   188  		root, leaf = lnks[1].Cid, leaf-total // otherwise go down the second
   189  		proof, err = GetProof(ctx, bGetter, root, proof, leaf, total)
   190  		if err != nil {
   191  			return nil, err
   192  		}
   193  		return append(proof, lnks[0].Cid), nil
   194  	}
   195  
   196  	// recursively walk down through selected children
   197  	return GetProof(ctx, bGetter, root, proof, leaf, total)
   198  }
   199  
   200  // chanGroup implements an atomic wait group, closing a jobs chan
   201  // when fully done.
   202  type chanGroup struct {
   203  	jobs    chan job
   204  	counter int64
   205  }
   206  
   207  func (w *chanGroup) add(count int64) {
   208  	atomic.AddInt64(&w.counter, count)
   209  }
   210  
   211  func (w *chanGroup) done() {
   212  	numRemaining := atomic.AddInt64(&w.counter, -1)
   213  
   214  	// Close channel if this job was the last one
   215  	if numRemaining == 0 {
   216  		close(w.jobs)
   217  	}
   218  }
   219  
   220  // job represents an encountered node to investigate during the `GetLeaves`
   221  // and `CollectLeavesByNamespace` routines.
   222  type job struct {
   223  	// we pass the context to job so that spans are tracked in a tree
   224  	// structure
   225  	ctx context.Context
   226  	// cid of the node that will be handled
   227  	cid cid.Cid
   228  	// sharePos represents potential share position in share slice
   229  	sharePos int
   230  	// depth represents the number of edges present in path from the root node of a tree to that node
   231  	depth int
   232  	// isAbsent indicates if target namespaceID is not included, only collect absence proofs
   233  	isAbsent bool
   234  }
   235  
   236  func (j job) next(direction direction, cid cid.Cid, isAbsent bool) job {
   237  	var i int
   238  	if direction == right {
   239  		i++
   240  	}
   241  	return job{
   242  		ctx:      j.ctx,
   243  		cid:      cid,
   244  		sharePos: j.sharePos*2 + i,
   245  		depth:    j.depth + 1,
   246  		isAbsent: isAbsent,
   247  	}
   248  }