storj.io/uplink@v1.13.0/private/eestream/encode.go

storj.io/uplink@v1.13.0/private/eestream/encode.go (about)

     1  // Copyright (C) 2019 Storj Labs, Inc.
     2  // See LICENSE for copying information.
     3  
     4  package eestream
     5  
     6  import (
     7  	"context"
     8  	"io"
     9  	"os"
    10  
    11  	"storj.io/common/encryption"
    12  	"storj.io/common/fpath"
    13  	"storj.io/common/memory"
    14  	"storj.io/common/pb"
    15  	"storj.io/common/ranger"
    16  	"storj.io/common/readcloser"
    17  	"storj.io/common/storj"
    18  	"storj.io/common/sync2"
    19  	"storj.io/infectious"
    20  )
    21  
    22  // RedundancyStrategy is an ErasureScheme with a repair and optimal thresholds.
    23  type RedundancyStrategy struct {
    24  	ErasureScheme
    25  	repairThreshold  int
    26  	optimalThreshold int
    27  }
    28  
    29  // NewRedundancyStrategy from the given ErasureScheme, repair and optimal thresholds.
    30  //
    31  // repairThreshold is the minimum repair threshold.
    32  // If set to 0, it will be reset to the TotalCount of the ErasureScheme.
    33  // optimalThreshold is the optimal threshold.
    34  // If set to 0, it will be reset to the TotalCount of the ErasureScheme.
    35  func NewRedundancyStrategy(es ErasureScheme, repairThreshold, optimalThreshold int) (RedundancyStrategy, error) {
    36  	if repairThreshold == 0 {
    37  		repairThreshold = es.TotalCount()
    38  	}
    39  
    40  	if optimalThreshold == 0 {
    41  		optimalThreshold = es.TotalCount()
    42  	}
    43  	if repairThreshold < 0 {
    44  		return RedundancyStrategy{}, Error.New("negative repair threshold")
    45  	}
    46  	if repairThreshold > 0 && repairThreshold < es.RequiredCount() {
    47  		return RedundancyStrategy{}, Error.New("repair threshold less than required count")
    48  	}
    49  	if repairThreshold > es.TotalCount() {
    50  		return RedundancyStrategy{}, Error.New("repair threshold greater than total count")
    51  	}
    52  	if optimalThreshold < 0 {
    53  		return RedundancyStrategy{}, Error.New("negative optimal threshold")
    54  	}
    55  	if optimalThreshold > 0 && optimalThreshold < es.RequiredCount() {
    56  		return RedundancyStrategy{}, Error.New("optimal threshold less than required count")
    57  	}
    58  	if optimalThreshold > es.TotalCount() {
    59  		return RedundancyStrategy{}, Error.New("optimal threshold greater than total count")
    60  	}
    61  	if repairThreshold > optimalThreshold {
    62  		return RedundancyStrategy{}, Error.New("repair threshold greater than optimal threshold")
    63  	}
    64  	return RedundancyStrategy{ErasureScheme: es, repairThreshold: repairThreshold, optimalThreshold: optimalThreshold}, nil
    65  }
    66  
    67  // NewRedundancyStrategyFromProto creates new RedundancyStrategy from the given
    68  // RedundancyScheme protobuf.
    69  func NewRedundancyStrategyFromProto(scheme *pb.RedundancyScheme) (RedundancyStrategy, error) {
    70  	fc, err := infectious.NewFEC(int(scheme.GetMinReq()), int(scheme.GetTotal()))
    71  	if err != nil {
    72  		return RedundancyStrategy{}, Error.Wrap(err)
    73  	}
    74  	es := NewRSScheme(fc, int(scheme.GetErasureShareSize()))
    75  	return NewRedundancyStrategy(es, int(scheme.GetRepairThreshold()), int(scheme.GetSuccessThreshold()))
    76  }
    77  
    78  // NewRedundancyStrategyFromStorj creates new RedundancyStrategy from the given
    79  // storj.RedundancyScheme.
    80  func NewRedundancyStrategyFromStorj(scheme storj.RedundancyScheme) (RedundancyStrategy, error) {
    81  	fc, err := infectious.NewFEC(int(scheme.RequiredShares), int(scheme.TotalShares))
    82  	if err != nil {
    83  		return RedundancyStrategy{}, Error.Wrap(err)
    84  	}
    85  	es := NewRSScheme(fc, int(scheme.ShareSize))
    86  	return NewRedundancyStrategy(es, int(scheme.RepairShares), int(scheme.OptimalShares))
    87  }
    88  
    89  // RepairThreshold is the number of available erasure pieces below which
    90  // the data must be repaired to avoid loss.
    91  func (rs *RedundancyStrategy) RepairThreshold() int {
    92  	return rs.repairThreshold
    93  }
    94  
    95  // OptimalThreshold is the number of available erasure pieces above which
    96  // there is no need for the data to be repaired.
    97  func (rs *RedundancyStrategy) OptimalThreshold() int {
    98  	return rs.optimalThreshold
    99  }
   100  
   101  type encodedReader struct {
   102  	ctx    context.Context
   103  	rs     RedundancyStrategy
   104  	pieces map[int]*encodedPiece
   105  }
   106  
   107  // EncodeReader2 takes a Reader and a RedundancyStrategy and returns a slice of
   108  // io.ReadClosers.
   109  func EncodeReader2(ctx context.Context, r io.Reader, rs RedundancyStrategy) (_ []io.ReadCloser, err error) {
   110  	defer mon.Task()(&ctx)(&err)
   111  
   112  	er := &encodedReader{
   113  		ctx:    ctx,
   114  		rs:     rs,
   115  		pieces: make(map[int]*encodedPiece, rs.TotalCount()),
   116  	}
   117  
   118  	var pipeReaders []sync2.PipeReader
   119  	var pipeWriter sync2.PipeWriter
   120  
   121  	tempDir, inmemory, _ := fpath.GetTempData(ctx)
   122  	if inmemory {
   123  		// TODO what default inmemory size will be enough
   124  		pipeReaders, pipeWriter, err = sync2.NewTeeInmemory(rs.TotalCount(), memory.MiB.Int64())
   125  	} else {
   126  		if tempDir == "" {
   127  			tempDir = os.TempDir()
   128  		}
   129  		pipeReaders, pipeWriter, err = sync2.NewTeeFile(rs.TotalCount(), tempDir)
   130  	}
   131  	if err != nil {
   132  		return nil, err
   133  	}
   134  
   135  	readers := make([]io.ReadCloser, 0, rs.TotalCount())
   136  	for i := 0; i < rs.TotalCount(); i++ {
   137  		er.pieces[i] = &encodedPiece{
   138  			er:         er,
   139  			pipeReader: pipeReaders[i],
   140  			num:        i,
   141  			stripeBuf:  make([]byte, rs.StripeSize()),
   142  			shareBuf:   make([]byte, rs.ErasureShareSize()),
   143  		}
   144  		readers = append(readers, er.pieces[i])
   145  	}
   146  
   147  	go er.fillBuffer(ctx, r, pipeWriter)
   148  
   149  	return readers, nil
   150  }
   151  
   152  func (er *encodedReader) fillBuffer(ctx context.Context, r io.Reader, w sync2.PipeWriter) {
   153  	var err error
   154  	defer mon.Task()(&ctx)(&err)
   155  	_, err = sync2.Copy(ctx, w, r)
   156  
   157  	// We probably cannot do anything reasonable with the error here.
   158  	// This would indicate failure to close a temporary file, which doesn't need to be persisted.
   159  	_ = w.CloseWithError(err)
   160  }
   161  
   162  type encodedPiece struct {
   163  	er            *encodedReader
   164  	pipeReader    sync2.PipeReader
   165  	num           int
   166  	currentStripe int64
   167  	stripeBuf     []byte
   168  	shareBuf      []byte
   169  	available     int
   170  	err           error
   171  }
   172  
   173  func (ep *encodedPiece) Read(p []byte) (n int, err error) {
   174  	// No need to trace this function because it's very fast and called many times.
   175  	if ep.err != nil {
   176  		return 0, ep.err
   177  	}
   178  
   179  	if ep.available == 0 {
   180  		// take the next stripe from the segment buffer
   181  		_, err := io.ReadFull(ep.pipeReader, ep.stripeBuf)
   182  		if err != nil {
   183  			return 0, err
   184  		}
   185  
   186  		// encode the num-th erasure share
   187  		err = ep.er.rs.EncodeSingle(ep.stripeBuf, ep.shareBuf, ep.num)
   188  		if err != nil {
   189  			return 0, err
   190  		}
   191  
   192  		ep.currentStripe++
   193  		ep.available = ep.er.rs.ErasureShareSize()
   194  	}
   195  
   196  	// we have some buffer remaining for this piece. write it to the output
   197  	off := len(ep.shareBuf) - ep.available
   198  	n = copy(p, ep.shareBuf[off:])
   199  	ep.available -= n
   200  
   201  	return n, nil
   202  }
   203  
   204  func (ep *encodedPiece) Close() (err error) {
   205  	ctx := ep.er.ctx
   206  	defer mon.Task()(&ctx)(&err)
   207  	return ep.pipeReader.Close()
   208  }
   209  
   210  // EncodedRanger will take an existing Ranger and provide a means to get
   211  // multiple Ranged sub-Readers. EncodedRanger does not match the normal Ranger
   212  // interface.
   213  type EncodedRanger struct {
   214  	rr ranger.Ranger
   215  	rs RedundancyStrategy
   216  }
   217  
   218  // NewEncodedRanger from the given Ranger and RedundancyStrategy. See the
   219  // comments for EncodeReader about the repair and success thresholds.
   220  func NewEncodedRanger(rr ranger.Ranger, rs RedundancyStrategy) (*EncodedRanger, error) {
   221  	if rr.Size()%int64(rs.StripeSize()) != 0 {
   222  		return nil, Error.New("invalid erasure encoder and range reader combo. range reader size must be a multiple of erasure encoder block size")
   223  	}
   224  	return &EncodedRanger{
   225  		rs: rs,
   226  		rr: rr,
   227  	}, nil
   228  }
   229  
   230  // OutputSize is like Ranger.Size but returns the Size of the erasure encoded
   231  // pieces that come out.
   232  func (er *EncodedRanger) OutputSize() int64 {
   233  	blocks := er.rr.Size() / int64(er.rs.StripeSize())
   234  	return blocks * int64(er.rs.ErasureShareSize())
   235  }
   236  
   237  // Range is like Ranger.Range, but returns a slice of Readers.
   238  func (er *EncodedRanger) Range(ctx context.Context, offset, length int64) (_ []io.ReadCloser, err error) {
   239  	defer mon.Task()(&ctx)(&err)
   240  	// the offset and length given may not be block-aligned, so let's figure
   241  	// out which blocks contain the request.
   242  	firstBlock, blockCount := encryption.CalcEncompassingBlocks(
   243  		offset, length, er.rs.ErasureShareSize())
   244  	// okay, now let's encode the reader for the range containing the blocks
   245  	r, err := er.rr.Range(ctx,
   246  		firstBlock*int64(er.rs.StripeSize()),
   247  		blockCount*int64(er.rs.StripeSize()))
   248  	if err != nil {
   249  		return nil, err
   250  	}
   251  	readers, err := EncodeReader2(ctx, r, er.rs)
   252  	if err != nil {
   253  		return nil, err
   254  	}
   255  	for i, r := range readers {
   256  		// the offset might start a few bytes in, so we potentially have to
   257  		// discard the beginning bytes
   258  		_, err := io.CopyN(io.Discard, r,
   259  			offset-firstBlock*int64(er.rs.ErasureShareSize()))
   260  		if err != nil {
   261  			return nil, Error.Wrap(err)
   262  		}
   263  		// the length might be shorter than a multiple of the block size, so
   264  		// limit it
   265  		readers[i] = readcloser.LimitReadCloser(r, length)
   266  	}
   267  	return readers, nil
   268  }
   269  
   270  // CalcPieceSize calculates what would be the piece size of the encoded data
   271  // after erasure coding data with dataSize using the given ErasureScheme.
   272  func CalcPieceSize(dataSize int64, scheme ErasureScheme) int64 {
   273  	const uint32Size = 4
   274  	stripeSize := int64(scheme.StripeSize())
   275  	stripes := (dataSize + uint32Size + stripeSize - 1) / stripeSize
   276  
   277  	encodedSize := stripes * int64(scheme.StripeSize())
   278  	pieceSize := encodedSize / int64(scheme.RequiredCount())
   279  
   280  	return pieceSize
   281  }