gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/erasure.go (about)

     1  package skymodules
     2  
     3  // erasure.go defines an interface for an erasure coder, as well as an erasure
     4  // type for data that is not erasure coded.
     5  
     6  import (
     7  	"bytes"
     8  	"encoding/binary"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  
    13  	"github.com/klauspost/reedsolomon"
    14  	"gitlab.com/SkynetLabs/skyd/build"
    15  	"go.sia.tech/siad/crypto"
    16  )
    17  
    18  var (
    19  	// RenterDefaultDataPieces is the number of data pieces per erasure-coded
    20  	// chunk used in the renter.
    21  	RenterDefaultDataPieces = build.Select(build.Var{
    22  		Dev:      2,
    23  		Standard: 10,
    24  		Testing:  1,
    25  	}).(int)
    26  
    27  	// RenterDefaultParityPieces is the number of parity pieces per
    28  	// erasure-coded chunk used in the renter.
    29  	RenterDefaultParityPieces = build.Select(build.Var{
    30  		Dev:      1,
    31  		Standard: 20,
    32  		Testing:  4,
    33  	}).(int)
    34  
    35  	// RenterDefaultNumPieces is the sum of the renter's default data and parity
    36  	// pieces.
    37  	RenterDefaultNumPieces = RenterDefaultDataPieces + RenterDefaultParityPieces
    38  
    39  	// ECReedSolomon is the marshaled type of the reed solomon coder.
    40  	ECReedSolomon = ErasureCoderType{0, 0, 0, 1}
    41  
    42  	// ECReedSolomonSubShards64 is the marshaled type of the reed solomon coder
    43  	// for files where every 64 bytes of an encoded piece can be decoded
    44  	// separately.
    45  	ECReedSolomonSubShards64 = ErasureCoderType{0, 0, 0, 2}
    46  
    47  	// ECPassthrough defines the erasure coder type for an erasure coder that
    48  	// does nothing.
    49  	ECPassthrough = ErasureCoderType{0, 0, 0, 3}
    50  )
    51  
    52  type (
    53  	// ErasureCoderType is an identifier for the individual types of erasure
    54  	// coders.
    55  	ErasureCoderType [4]byte
    56  
    57  	// ErasureCoderIdentifier is an identifier that only matches another
    58  	// ErasureCoder's identifier if they both are of the same type and settings.
    59  	ErasureCoderIdentifier string
    60  
    61  	// An ErasureCoder is an error-correcting encoder and decoder.
    62  	ErasureCoder interface {
    63  		// NumPieces is the number of pieces returned by Encode.
    64  		NumPieces() int
    65  
    66  		// MinPieces is the minimum number of pieces that must be present to
    67  		// recover the original data.
    68  		MinPieces() int
    69  
    70  		// Encode splits data into equal-length pieces, with some pieces
    71  		// containing parity data.
    72  		Encode(data []byte) ([][]byte, error)
    73  
    74  		// Identifier returns the ErasureCoderIdentifier of the ErasureCoder.
    75  		Identifier() ErasureCoderIdentifier
    76  
    77  		// EncodeShards encodes the input data like Encode but accepts an already
    78  		// sharded input.
    79  		EncodeShards(data [][]byte) ([][]byte, error)
    80  
    81  		// Reconstruct recovers the full set of encoded shards from the provided
    82  		// pieces, of which at least MinPieces must be non-nil.
    83  		Reconstruct(pieces [][]byte) error
    84  
    85  		// Recover recovers the original data from pieces and writes it to w.
    86  		// pieces should be identical to the slice returned by Encode (length
    87  		// and order must be preserved), but with missing elements set to nil. n
    88  		// is the number of bytes to be written to w; this is necessary because
    89  		// pieces may have been padded with zeros during encoding.
    90  		Recover(pieces [][]byte, n uint64, w io.Writer) error
    91  
    92  		// SupportsPartialEncoding returns true if partial encoding is
    93  		// supported. The piece segment size will be returned. Otherwise the
    94  		// numerical return value is set to zero.
    95  		SupportsPartialEncoding() (uint64, bool)
    96  
    97  		// Type returns the type identifier of the ErasureCoder.
    98  		Type() ErasureCoderType
    99  	}
   100  
   101  	// RSCode is a Reed-Solomon encoder/decoder. It implements the
   102  	// ErasureCoder interface.
   103  	RSCode struct {
   104  		enc reedsolomon.Encoder
   105  
   106  		numPieces  int
   107  		dataPieces int
   108  	}
   109  
   110  	// RSSubCode is a Reed-Solomon encoder/decoder. It implements the
   111  	// ErasureCoder interface in a way that every crypto.SegmentSize bytes of
   112  	// encoded data can be recovered separately.
   113  	RSSubCode struct {
   114  		RSCode
   115  		staticSegmentSize uint64
   116  		staticType        ErasureCoderType
   117  	}
   118  
   119  	// PassthroughErasureCoder is a blank type that signifies no erasure coding.
   120  	PassthroughErasureCoder struct{}
   121  )
   122  
   123  // NewRSCode creates a new Reed-Solomon encoder/decoder using the supplied
   124  // parameters.
   125  func NewRSCode(nData, nParity int) (ErasureCoder, error) {
   126  	return newRSCode(nData, nParity)
   127  }
   128  
   129  // NewRSCodeDefault creates a new Reed-Solomon encoder/decoder using the
   130  // default parameters.
   131  func NewRSCodeDefault() ErasureCoder {
   132  	ec, err := newRSCode(RenterDefaultDataPieces, RenterDefaultParityPieces)
   133  	if err != nil {
   134  		build.Critical("defaults are not accepted")
   135  	}
   136  	return ec
   137  }
   138  
   139  // NewRSSubCode creates a new Reed-Solomon encoder/decoder using the supplied
   140  // parameters.
   141  func NewRSSubCode(nData, nParity int, segmentSize uint64) (ErasureCoder, error) {
   142  	rs, err := newRSCode(nData, nParity)
   143  	if err != nil {
   144  		return nil, err
   145  	}
   146  	// Get the correct type from the segmentSize.
   147  	var t ErasureCoderType
   148  	switch segmentSize {
   149  	case 64:
   150  		t = ECReedSolomonSubShards64
   151  	default:
   152  		return nil, errors.New("unsupported segmentSize")
   153  	}
   154  	// Create the encoder.
   155  	return &RSSubCode{
   156  		*rs,
   157  		segmentSize,
   158  		t,
   159  	}, nil
   160  }
   161  
   162  // NewRSSubCodeDefault creates a new Reed-Solomon encoder/decoder using the
   163  // default parameters and the default segment size.
   164  func NewRSSubCodeDefault() ErasureCoder {
   165  	ec, err := NewRSSubCode(RenterDefaultDataPieces, RenterDefaultParityPieces, crypto.SegmentSize)
   166  	if err != nil {
   167  		build.Critical("defaults are not accepted")
   168  	}
   169  	return ec
   170  }
   171  
   172  // NewPassthroughErasureCoder will return an erasure coder that does not encode
   173  // the data. It uses 1-of-1 redundancy and always returns itself or some subset
   174  // of itself.
   175  func NewPassthroughErasureCoder() ErasureCoder {
   176  	return new(PassthroughErasureCoder)
   177  }
   178  
   179  // NumPieces returns the number of pieces returned by Encode.
   180  func (rs *RSCode) NumPieces() int { return rs.numPieces }
   181  
   182  // MinPieces return the minimum number of pieces that must be present to
   183  // recover the original data.
   184  func (rs *RSCode) MinPieces() int { return rs.dataPieces }
   185  
   186  // Encode splits data into equal-length pieces, some containing the original
   187  // data and some containing parity data.
   188  func (rs *RSCode) Encode(data []byte) ([][]byte, error) {
   189  	pieces, err := rs.enc.Split(data)
   190  	if err != nil {
   191  		return nil, err
   192  	}
   193  	return rs.EncodeShards(pieces)
   194  }
   195  
   196  // EncodeShards creates the parity shards for an already sharded input.
   197  func (rs *RSCode) EncodeShards(pieces [][]byte) ([][]byte, error) {
   198  	// Check that the caller provided the minimum amount of pieces.
   199  	if len(pieces) < rs.MinPieces() {
   200  		return nil, fmt.Errorf("invalid number of pieces given %v < %v", len(pieces), rs.MinPieces())
   201  	}
   202  	// Since all the pieces should have the same length, get the pieceSize from
   203  	// the first one.
   204  	pieceSize := len(pieces[0])
   205  	// Add the parity shards to pieces.
   206  	for len(pieces) < rs.NumPieces() {
   207  		pieces = append(pieces, make([]byte, pieceSize))
   208  	}
   209  	err := rs.enc.Encode(pieces)
   210  	if err != nil {
   211  		return nil, err
   212  	}
   213  	return pieces, nil
   214  }
   215  
   216  // Identifier returns an identifier for an erasure coder which can be used to
   217  // identify erasure coders of the same type, dataPieces and parityPieces.
   218  func (rs *RSCode) Identifier() ErasureCoderIdentifier {
   219  	t := rs.Type()
   220  	dataPieces := rs.MinPieces()
   221  	parityPieces := rs.NumPieces() - dataPieces
   222  	id := fmt.Sprintf("%v+%v+%v", binary.BigEndian.Uint32(t[:]), dataPieces, parityPieces)
   223  	return ErasureCoderIdentifier(id)
   224  }
   225  
   226  // Reconstruct recovers the full set of encoded shards from the provided pieces,
   227  // of which at least MinPieces must be non-nil.
   228  func (rs *RSCode) Reconstruct(pieces [][]byte) error {
   229  	return rs.enc.Reconstruct(pieces)
   230  }
   231  
   232  // Recover recovers the original data from pieces and writes it to w.
   233  // pieces should be identical to the slice returned by Encode (length and
   234  // order must be preserved), but with missing elements set to nil.
   235  func (rs *RSCode) Recover(pieces [][]byte, n uint64, w io.Writer) error {
   236  	err := rs.enc.ReconstructData(pieces)
   237  	if err != nil {
   238  		return err
   239  	}
   240  	return rs.enc.Join(w, pieces, int(n))
   241  }
   242  
   243  // SupportsPartialEncoding returns false for the basic reed-solomon encoder and
   244  // a size of 0.
   245  func (rs *RSCode) SupportsPartialEncoding() (uint64, bool) {
   246  	return 0, false
   247  }
   248  
   249  // Type returns the erasure coders type identifier.
   250  func (rs *RSCode) Type() ErasureCoderType {
   251  	return ECReedSolomon
   252  }
   253  
   254  // newRSCode creates a new Reed-Solomon encoder/decoder using the supplied
   255  // parameters.
   256  func newRSCode(nData, nParity int) (*RSCode, error) {
   257  	enc, err := reedsolomon.New(nData, nParity)
   258  	if err != nil {
   259  		return nil, err
   260  	}
   261  	return &RSCode{
   262  		enc:        enc,
   263  		numPieces:  nData + nParity,
   264  		dataPieces: nData,
   265  	}, nil
   266  }
   267  
   268  // Encode splits data into equal-length pieces, some containing the original
   269  // data and some containing parity data.
   270  func (rs *RSSubCode) Encode(data []byte) ([][]byte, error) {
   271  	pieces, err := rs.enc.Split(data)
   272  	if err != nil {
   273  		return nil, err
   274  	}
   275  	return rs.EncodeShards(pieces[:rs.MinPieces()])
   276  }
   277  
   278  // EncodeShards encodes data in a way that every segmentSize bytes of the
   279  // encoded data can be decoded independently.
   280  func (rs *RSSubCode) EncodeShards(pieces [][]byte) ([][]byte, error) {
   281  	// Check that there are enough pieces.
   282  	if len(pieces) != rs.MinPieces() {
   283  		return nil, fmt.Errorf("not enough segments expected %v but was %v",
   284  			rs.MinPieces(), len(pieces))
   285  	}
   286  	// Since all the pieces should have the same length, get the pieceSize from
   287  	// the first one.
   288  	pieceSize := uint64(len(pieces[0]))
   289  	// pieceSize must be divisible by segmentSize
   290  	if pieceSize%rs.staticSegmentSize != 0 {
   291  		return nil, errors.New("pieceSize not divisible by segmentSize")
   292  	}
   293  	// Each piece should have pieceSize bytes.
   294  	for _, piece := range pieces {
   295  		if uint64(len(piece)) != pieceSize {
   296  			return nil, fmt.Errorf("pieces don't have right size expected %v but was %v",
   297  				pieceSize, len(piece))
   298  		}
   299  	}
   300  	// Flatten the pieces into a byte slice.
   301  	data := make([]byte, uint64(len(pieces))*pieceSize)
   302  	for i, piece := range pieces {
   303  		copy(data[uint64(i)*pieceSize:], piece)
   304  		pieces[i] = pieces[i][:0]
   305  	}
   306  	// Add parity shards to pieces.
   307  	parityShards := make([][]byte, rs.NumPieces()-len(pieces))
   308  	pieces = append(pieces, parityShards...)
   309  	// Encode the pieces.
   310  	segmentOffset := uint64(0)
   311  	for buf := bytes.NewBuffer(data); buf.Len() > 0; {
   312  		// Get the next segments to encode.
   313  		s := buf.Next(int(rs.staticSegmentSize) * rs.MinPieces())
   314  
   315  		// Create a copy of it.
   316  		segments := make([]byte, len(s))
   317  		copy(segments, s)
   318  
   319  		// Encode the segment
   320  		encodedSegments, err := rs.RSCode.Encode(segments)
   321  		if err != nil {
   322  			return nil, err
   323  		}
   324  
   325  		// Write the encoded segments back to pieces.
   326  		for i, segment := range encodedSegments {
   327  			pieces[i] = append(pieces[i], segment...)
   328  		}
   329  		segmentOffset += rs.staticSegmentSize
   330  	}
   331  	return pieces, nil
   332  }
   333  
   334  // Identifier returns an identifier for an erasure coder which can be used to
   335  // identify erasure coders of the same type, dataPieces and parityPieces.
   336  func (rs *RSSubCode) Identifier() ErasureCoderIdentifier {
   337  	t := rs.Type()
   338  	dataPieces := rs.MinPieces()
   339  	parityPieces := rs.NumPieces() - dataPieces
   340  	id := fmt.Sprintf("%v+%v+%v", binary.BigEndian.Uint32(t[:]), dataPieces, parityPieces)
   341  	return ErasureCoderIdentifier(id)
   342  }
   343  
   344  // Reconstruct recovers the full set of encoded shards from the provided
   345  // pieces, of which at least MinPieces must be non-nil.
   346  func (rs *RSSubCode) Reconstruct(pieces [][]byte) error {
   347  	// Check the length of pieces.
   348  	if len(pieces) != rs.NumPieces() {
   349  		return fmt.Errorf("expected pieces to have len %v but was %v",
   350  			rs.NumPieces(), len(pieces))
   351  	}
   352  
   353  	// Since all the pieces should have the same length, get the pieceSize from
   354  	// the first piece that was set.
   355  	var pieceSize uint64
   356  	for _, piece := range pieces {
   357  		if uint64(len(piece)) > pieceSize {
   358  			pieceSize = uint64(len(piece))
   359  			break
   360  		}
   361  	}
   362  
   363  	// pieceSize must be divisible by segmentSize
   364  	if pieceSize%rs.staticSegmentSize != 0 {
   365  		return errors.New("pieceSize not divisible by segmentSize")
   366  	}
   367  
   368  	isNil := make([]bool, len(pieces))
   369  	for i := range pieces {
   370  		isNil[i] = len(pieces[i]) == 0
   371  		pieces[i] = pieces[i][:0]
   372  	}
   373  
   374  	// Extract the segment from the pieces.
   375  	segment := make([][]byte, len(pieces))
   376  	for segmentIndex := 0; uint64(segmentIndex) < pieceSize/rs.staticSegmentSize; segmentIndex++ {
   377  		off := uint64(segmentIndex) * rs.staticSegmentSize
   378  		for i, piece := range pieces {
   379  			if isNil[i] {
   380  				segment[i] = piece[off:off]
   381  			} else {
   382  				segment[i] = piece[off:][:rs.staticSegmentSize]
   383  			}
   384  		}
   385  		// Reconstruct the segment.
   386  		if err := rs.RSCode.Reconstruct(segment); err != nil {
   387  			return err
   388  		}
   389  		for i := range pieces {
   390  			pieces[i] = append(pieces[i], segment[i]...)
   391  		}
   392  	}
   393  	return nil
   394  }
   395  
   396  // Recover accepts encoded pieces and decodes the segment at
   397  // segmentIndex. The size of the decoded data is segmentSize * dataPieces.
   398  func (rs *RSSubCode) Recover(pieces [][]byte, n uint64, w io.Writer) error {
   399  	// Check the length of pieces.
   400  	if len(pieces) != rs.NumPieces() {
   401  		return fmt.Errorf("expected pieces to have len %v but was %v",
   402  			rs.NumPieces(), len(pieces))
   403  	}
   404  	// Since all the pieces should have the same length, get the pieceSize from
   405  	// the first piece that was set.
   406  	var pieceSize uint64
   407  	for _, piece := range pieces {
   408  		if uint64(len(piece)) > pieceSize {
   409  			pieceSize = uint64(len(piece))
   410  			break
   411  		}
   412  	}
   413  
   414  	// pieceSize must be divisible by segmentSize
   415  	if pieceSize%rs.staticSegmentSize != 0 {
   416  		return errors.New("pieceSize not divisible by segmentSize")
   417  	}
   418  
   419  	// Extract the segment from the pieces.
   420  	decodedSegmentSize := rs.staticSegmentSize * uint64(rs.MinPieces())
   421  	segment := make([][]byte, len(pieces))
   422  	for i := range segment {
   423  		segment[i] = make([]byte, 0, rs.staticSegmentSize)
   424  	}
   425  	for segmentIndex := 0; uint64(segmentIndex) < pieceSize/rs.staticSegmentSize && n > 0; segmentIndex++ {
   426  		off := uint64(segmentIndex) * rs.staticSegmentSize
   427  		for i, piece := range pieces {
   428  			if uint64(len(piece)) >= off+rs.staticSegmentSize {
   429  				segment[i] = append(segment[i][:0], piece[off:off+rs.staticSegmentSize]...)
   430  			} else {
   431  				segment[i] = segment[i][:0]
   432  			}
   433  		}
   434  		// Reconstruct the segment.
   435  		if n < decodedSegmentSize {
   436  			decodedSegmentSize = n
   437  		}
   438  		if err := rs.RSCode.Recover(segment, decodedSegmentSize, w); err != nil {
   439  			return err
   440  		}
   441  		n -= decodedSegmentSize
   442  	}
   443  	return nil
   444  }
   445  
   446  // SupportsPartialEncoding returns true for the custom reed-solomon encoder and
   447  // returns the segment size.
   448  func (rs *RSSubCode) SupportsPartialEncoding() (uint64, bool) {
   449  	return crypto.SegmentSize, true
   450  }
   451  
   452  // Type returns the erasure coders type identifier.
   453  func (rs *RSSubCode) Type() ErasureCoderType {
   454  	return rs.staticType
   455  }
   456  
   457  // ExtractSegment is a convenience method that extracts the data of the segment
   458  // at segmentIndex from pieces.
   459  func ExtractSegment(pieces [][]byte, segmentIndex int, segmentSize uint64) [][]byte {
   460  	segment := make([][]byte, len(pieces))
   461  	off := uint64(segmentIndex) * segmentSize
   462  	for i, piece := range pieces {
   463  		if uint64(len(piece)) >= off+segmentSize {
   464  			segment[i] = piece[off : off+segmentSize]
   465  		} else {
   466  			segment[i] = nil
   467  		}
   468  	}
   469  	return segment
   470  }
   471  
   472  // NumPieces is the number of pieces returned by Encode. For the passthrough
   473  // this is hardcoded to 1.
   474  func (pec *PassthroughErasureCoder) NumPieces() int {
   475  	return 1
   476  }
   477  
   478  // MinPieces is the minimum number of pieces that must be present to recover the
   479  // original data. For the passthrough this is hardcoded to 1.
   480  func (pec *PassthroughErasureCoder) MinPieces() int {
   481  	return 1
   482  }
   483  
   484  // Encode splits data into equal-length pieces, with some pieces containing
   485  // parity data. For the passthrough this is a no-op.
   486  func (pec *PassthroughErasureCoder) Encode(data []byte) ([][]byte, error) {
   487  	return [][]byte{data}, nil
   488  }
   489  
   490  // Identifier returns the ErasureCoderIdentifier of the ErasureCoder.
   491  func (pec *PassthroughErasureCoder) Identifier() ErasureCoderIdentifier {
   492  	return "ECPassthrough"
   493  }
   494  
   495  // EncodeShards encodes the input data like Encode but accepts an already
   496  // sharded input. For the passthrough this is a no-op.
   497  func (pec *PassthroughErasureCoder) EncodeShards(pieces [][]byte) ([][]byte, error) {
   498  	return pieces, nil
   499  }
   500  
   501  // Reconstruct recovers the full set of encoded shards from the provided pieces,
   502  // of which at least MinPieces must be non-nil. For the passthrough this is a
   503  // no-op.
   504  func (pec *PassthroughErasureCoder) Reconstruct(pieces [][]byte) error {
   505  	return nil
   506  }
   507  
   508  // Recover recovers the original data from pieces and writes it to w. pieces
   509  // should be identical to the slice returned by Encode (length and order must be
   510  // preserved), but with missing elements set to nil. n is the number of bytes to
   511  // be written to w; this is necessary because pieces may have been padded with
   512  // zeros during encoding.
   513  func (pec *PassthroughErasureCoder) Recover(pieces [][]byte, n uint64, w io.Writer) error {
   514  	_, err := w.Write(pieces[0][:n])
   515  	return err
   516  }
   517  
   518  // SupportsPartialEncoding returns true if partial encoding is supported. The
   519  // piece segment size will be returned. Otherwise the numerical return value is
   520  // set to zero.
   521  func (pec *PassthroughErasureCoder) SupportsPartialEncoding() (uint64, bool) {
   522  	// The actual protocol is in some places restricted to using an atomic
   523  	// request size of crypto.SegmentSize, so that's what we use here.
   524  	//
   525  	// TODO: I'm not sure if the above comment is completely true, may be okay
   526  	// to return a segment size of 1.
   527  	return crypto.SegmentSize, true
   528  }
   529  
   530  // Type returns the type identifier of the ErasureCoder.
   531  func (pec *PassthroughErasureCoder) Type() ErasureCoderType {
   532  	return ECPassthrough
   533  }