storj.io/uplink@v1.13.0/private/storage/streams/segmentupload/single.go (about)

     1  // Copyright (C) 2023 Storj Labs, Inc.
     2  // See LICENSE for copying information.
     3  
     4  package segmentupload
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"io"
    10  	"sync"
    11  	"sync/atomic"
    12  
    13  	"github.com/spacemonkeygo/monkit/v3"
    14  	"github.com/zeebo/errs"
    15  
    16  	"storj.io/common/encryption"
    17  	"storj.io/uplink/private/eestream"
    18  	"storj.io/uplink/private/eestream/scheduler"
    19  	"storj.io/uplink/private/metaclient"
    20  	"storj.io/uplink/private/storage/streams/pieceupload"
    21  	"storj.io/uplink/private/storage/streams/splitter"
    22  	"storj.io/uplink/private/testuplink"
    23  )
    24  
    25  var (
    26  	mon        = monkit.Package()
    27  	uploadTask = mon.TaskNamed("segment-upload")
    28  )
    29  
    30  // Scheduler is used to coordinate and constrain resources between
    31  // concurrent segment uploads.
    32  type Scheduler interface {
    33  	Join(ctx context.Context) (scheduler.Handle, bool)
    34  }
    35  
    36  // Begin starts a segment upload identified by the segment ID provided in the
    37  // beginSegment response. The returned upload will complete when enough piece
    38  // uploads to fulfill the optimal threshold for the segment redundancy strategy
    39  // plus a small long tail margin. It cancels remaining piece uploads once that
    40  // threshold has been hit.
    41  func Begin(ctx context.Context,
    42  	beginSegment *metaclient.BeginSegmentResponse,
    43  	segment splitter.Segment,
    44  	limitsExchanger pieceupload.LimitsExchanger,
    45  	piecePutter pieceupload.PiecePutter,
    46  	scheduler Scheduler,
    47  	longTailMargin int,
    48  ) (_ *Upload, err error) {
    49  	defer mon.Task()(&ctx)(&err)
    50  
    51  	ctx = testuplink.WithLogWriterContext(ctx, "seg_pos", fmt.Sprint(segment.Position()))
    52  	testuplink.Log(ctx, "Begin upload segment...")
    53  	defer testuplink.Log(ctx, "Done begin upload segment.")
    54  
    55  	taskDone := uploadTask(&ctx)
    56  	defer func() {
    57  		if err != nil {
    58  			taskDone(&err)
    59  		}
    60  	}()
    61  
    62  	// Join the scheduler so the concurrency can be limited appropriately.
    63  	handle, ok := scheduler.Join(ctx)
    64  	if !ok {
    65  		return nil, errs.New("failed to obtain piece upload handle")
    66  	}
    67  	defer func() {
    68  		if err != nil {
    69  			handle.Done()
    70  		}
    71  	}()
    72  
    73  	if beginSegment.RedundancyStrategy.ErasureScheme == nil {
    74  		return nil, errs.New("begin segment response is missing redundancy strategy")
    75  	}
    76  	if beginSegment.PiecePrivateKey.IsZero() {
    77  		return nil, errs.New("begin segment response is missing piece private key")
    78  	}
    79  
    80  	optimalThreshold := beginSegment.RedundancyStrategy.OptimalThreshold()
    81  	if optimalThreshold > len(beginSegment.Limits) {
    82  		return nil, errs.New("begin segment response needs at least %d limits to meet optimal threshold but has %d", optimalThreshold, len(beginSegment.Limits))
    83  	}
    84  
    85  	uploaderCount := len(beginSegment.Limits)
    86  	if longTailMargin >= 0 {
    87  		// The number of uploads is enough to satisfy the optimal threshold plus
    88  		// a small long tail margin, capped by the number of limits.
    89  		uploaderCount = optimalThreshold + longTailMargin
    90  		if uploaderCount > len(beginSegment.Limits) {
    91  			uploaderCount = len(beginSegment.Limits)
    92  		}
    93  	}
    94  
    95  	mgr := pieceupload.NewManager(
    96  		limitsExchanger,
    97  		&pieceReader{segment, beginSegment.RedundancyStrategy},
    98  		beginSegment.SegmentID,
    99  		beginSegment.Limits,
   100  	)
   101  
   102  	wg := new(sync.WaitGroup)
   103  	defer func() {
   104  		if err != nil {
   105  			wg.Wait()
   106  		}
   107  	}()
   108  
   109  	// Create a context that we can use to cancel piece uploads when we have enough.
   110  	longTailCtx, cancel := context.WithCancel(ctx)
   111  	defer func() {
   112  		if err != nil {
   113  			cancel()
   114  		}
   115  	}()
   116  
   117  	results := make(chan segmentResult, uploaderCount)
   118  	var successful int32
   119  	for i := 0; i < uploaderCount; i++ {
   120  		res, ok := handle.Get(ctx)
   121  		if !ok {
   122  			return nil, errs.New("failed to obtain piece upload resource")
   123  		}
   124  
   125  		wg.Add(1)
   126  		go func() {
   127  			defer wg.Done()
   128  
   129  			// Whether the upload is ultimately successful or not, when this
   130  			// function returns, the scheduler resource MUST be released to
   131  			// allow other piece uploads to take place.
   132  			defer res.Done()
   133  			uploaded, err := pieceupload.UploadOne(longTailCtx, ctx, mgr, piecePutter, beginSegment.PiecePrivateKey)
   134  			results <- segmentResult{uploaded: uploaded, err: err}
   135  			if uploaded {
   136  				// Piece upload was successful. If we have met the optimal threshold, we
   137  				// can cancel the rest.
   138  				if int(atomic.AddInt32(&successful, 1)) == optimalThreshold {
   139  					testuplink.Log(ctx, "Segment reached optimal threshold of", optimalThreshold, "pieces.")
   140  					cancel()
   141  				}
   142  			}
   143  		}()
   144  	}
   145  
   146  	return &Upload{
   147  		ctx:              ctx,
   148  		taskDone:         taskDone,
   149  		optimalThreshold: beginSegment.RedundancyStrategy.OptimalThreshold(),
   150  		handle:           handle,
   151  		results:          results,
   152  		cancel:           cancel,
   153  		wg:               wg,
   154  		mgr:              mgr,
   155  		segment:          segment,
   156  	}, nil
   157  }
   158  
   159  type pieceReader struct {
   160  	segment    splitter.Segment
   161  	redundancy eestream.RedundancyStrategy
   162  }
   163  
   164  func (r *pieceReader) PieceReader(num int) io.Reader {
   165  	segment := r.segment.Reader()
   166  	stripeSize := r.redundancy.StripeSize()
   167  	paddedData := encryption.PadReader(io.NopCloser(segment), stripeSize)
   168  	return NewEncodedReader(paddedData, r.redundancy, num)
   169  }
   170  
   171  type segmentResult struct {
   172  	uploaded bool
   173  	err      error
   174  }
   175  
   176  // Upload is a segment upload that has been started and returned by the Begin
   177  // method.
   178  type Upload struct {
   179  	ctx              context.Context
   180  	taskDone         func(*error)
   181  	optimalThreshold int
   182  	handle           scheduler.Handle
   183  	results          chan segmentResult
   184  	cancel           context.CancelFunc
   185  	wg               *sync.WaitGroup
   186  	mgr              *pieceupload.Manager
   187  	segment          splitter.Segment
   188  }
   189  
   190  // Wait blocks until the segment upload completes. It will be successful as
   191  // long as enough pieces have uploaded successfully.
   192  func (upload *Upload) Wait() (_ *metaclient.CommitSegmentParams, err error) {
   193  	defer upload.taskDone(&err)
   194  	defer upload.handle.Done()
   195  	defer upload.cancel()
   196  
   197  	var eg errs.Group
   198  	var successful int
   199  	for i := 0; i < cap(upload.results); i++ {
   200  		result := <-upload.results
   201  		if result.uploaded {
   202  			successful++
   203  		}
   204  		eg.Add(result.err)
   205  	}
   206  
   207  	// The goroutines should all be on their way to exiting since the loop
   208  	// above guarantees they have written their results to the channel. Wait
   209  	// for them to all finish and release the scheduler resource. This is
   210  	// really only necessary for deterministic testing.
   211  	upload.wg.Wait()
   212  
   213  	if successful < upload.optimalThreshold {
   214  		err = errs.Combine(errs.New("failed to upload enough pieces (needed at least %d but got %d)", upload.optimalThreshold, successful), eg.Err())
   215  	}
   216  	upload.segment.DoneReading(err)
   217  
   218  	testuplink.Log(upload.ctx, "Done waiting for segment.",
   219  		"successful:", successful,
   220  		"optimal:", upload.optimalThreshold,
   221  		"errs:", eg.Err(),
   222  	)
   223  
   224  	if err != nil {
   225  		return nil, err
   226  	}
   227  
   228  	info := upload.segment.Finalize()
   229  	segmentID, results := upload.mgr.Results()
   230  
   231  	return &metaclient.CommitSegmentParams{
   232  		SegmentID:         segmentID,
   233  		Encryption:        info.Encryption,
   234  		SizeEncryptedData: info.EncryptedSize,
   235  		PlainSize:         info.PlainSize,
   236  		EncryptedTag:      nil, // encrypted eTag is injected by a different layer
   237  		UploadResult:      results,
   238  	}, nil
   239  }