storj.io/uplink@v1.13.0/private/eestream/encode.go (about) 1 // Copyright (C) 2019 Storj Labs, Inc. 2 // See LICENSE for copying information. 3 4 package eestream 5 6 import ( 7 "context" 8 "io" 9 "os" 10 11 "storj.io/common/encryption" 12 "storj.io/common/fpath" 13 "storj.io/common/memory" 14 "storj.io/common/pb" 15 "storj.io/common/ranger" 16 "storj.io/common/readcloser" 17 "storj.io/common/storj" 18 "storj.io/common/sync2" 19 "storj.io/infectious" 20 ) 21 22 // RedundancyStrategy is an ErasureScheme with a repair and optimal thresholds. 23 type RedundancyStrategy struct { 24 ErasureScheme 25 repairThreshold int 26 optimalThreshold int 27 } 28 29 // NewRedundancyStrategy from the given ErasureScheme, repair and optimal thresholds. 30 // 31 // repairThreshold is the minimum repair threshold. 32 // If set to 0, it will be reset to the TotalCount of the ErasureScheme. 33 // optimalThreshold is the optimal threshold. 34 // If set to 0, it will be reset to the TotalCount of the ErasureScheme. 35 func NewRedundancyStrategy(es ErasureScheme, repairThreshold, optimalThreshold int) (RedundancyStrategy, error) { 36 if repairThreshold == 0 { 37 repairThreshold = es.TotalCount() 38 } 39 40 if optimalThreshold == 0 { 41 optimalThreshold = es.TotalCount() 42 } 43 if repairThreshold < 0 { 44 return RedundancyStrategy{}, Error.New("negative repair threshold") 45 } 46 if repairThreshold > 0 && repairThreshold < es.RequiredCount() { 47 return RedundancyStrategy{}, Error.New("repair threshold less than required count") 48 } 49 if repairThreshold > es.TotalCount() { 50 return RedundancyStrategy{}, Error.New("repair threshold greater than total count") 51 } 52 if optimalThreshold < 0 { 53 return RedundancyStrategy{}, Error.New("negative optimal threshold") 54 } 55 if optimalThreshold > 0 && optimalThreshold < es.RequiredCount() { 56 return RedundancyStrategy{}, Error.New("optimal threshold less than required count") 57 } 58 if optimalThreshold > es.TotalCount() { 59 return RedundancyStrategy{}, Error.New("optimal threshold greater than total count") 60 } 61 if repairThreshold > optimalThreshold { 62 return RedundancyStrategy{}, Error.New("repair threshold greater than optimal threshold") 63 } 64 return RedundancyStrategy{ErasureScheme: es, repairThreshold: repairThreshold, optimalThreshold: optimalThreshold}, nil 65 } 66 67 // NewRedundancyStrategyFromProto creates new RedundancyStrategy from the given 68 // RedundancyScheme protobuf. 69 func NewRedundancyStrategyFromProto(scheme *pb.RedundancyScheme) (RedundancyStrategy, error) { 70 fc, err := infectious.NewFEC(int(scheme.GetMinReq()), int(scheme.GetTotal())) 71 if err != nil { 72 return RedundancyStrategy{}, Error.Wrap(err) 73 } 74 es := NewRSScheme(fc, int(scheme.GetErasureShareSize())) 75 return NewRedundancyStrategy(es, int(scheme.GetRepairThreshold()), int(scheme.GetSuccessThreshold())) 76 } 77 78 // NewRedundancyStrategyFromStorj creates new RedundancyStrategy from the given 79 // storj.RedundancyScheme. 80 func NewRedundancyStrategyFromStorj(scheme storj.RedundancyScheme) (RedundancyStrategy, error) { 81 fc, err := infectious.NewFEC(int(scheme.RequiredShares), int(scheme.TotalShares)) 82 if err != nil { 83 return RedundancyStrategy{}, Error.Wrap(err) 84 } 85 es := NewRSScheme(fc, int(scheme.ShareSize)) 86 return NewRedundancyStrategy(es, int(scheme.RepairShares), int(scheme.OptimalShares)) 87 } 88 89 // RepairThreshold is the number of available erasure pieces below which 90 // the data must be repaired to avoid loss. 91 func (rs *RedundancyStrategy) RepairThreshold() int { 92 return rs.repairThreshold 93 } 94 95 // OptimalThreshold is the number of available erasure pieces above which 96 // there is no need for the data to be repaired. 97 func (rs *RedundancyStrategy) OptimalThreshold() int { 98 return rs.optimalThreshold 99 } 100 101 type encodedReader struct { 102 ctx context.Context 103 rs RedundancyStrategy 104 pieces map[int]*encodedPiece 105 } 106 107 // EncodeReader2 takes a Reader and a RedundancyStrategy and returns a slice of 108 // io.ReadClosers. 109 func EncodeReader2(ctx context.Context, r io.Reader, rs RedundancyStrategy) (_ []io.ReadCloser, err error) { 110 defer mon.Task()(&ctx)(&err) 111 112 er := &encodedReader{ 113 ctx: ctx, 114 rs: rs, 115 pieces: make(map[int]*encodedPiece, rs.TotalCount()), 116 } 117 118 var pipeReaders []sync2.PipeReader 119 var pipeWriter sync2.PipeWriter 120 121 tempDir, inmemory, _ := fpath.GetTempData(ctx) 122 if inmemory { 123 // TODO what default inmemory size will be enough 124 pipeReaders, pipeWriter, err = sync2.NewTeeInmemory(rs.TotalCount(), memory.MiB.Int64()) 125 } else { 126 if tempDir == "" { 127 tempDir = os.TempDir() 128 } 129 pipeReaders, pipeWriter, err = sync2.NewTeeFile(rs.TotalCount(), tempDir) 130 } 131 if err != nil { 132 return nil, err 133 } 134 135 readers := make([]io.ReadCloser, 0, rs.TotalCount()) 136 for i := 0; i < rs.TotalCount(); i++ { 137 er.pieces[i] = &encodedPiece{ 138 er: er, 139 pipeReader: pipeReaders[i], 140 num: i, 141 stripeBuf: make([]byte, rs.StripeSize()), 142 shareBuf: make([]byte, rs.ErasureShareSize()), 143 } 144 readers = append(readers, er.pieces[i]) 145 } 146 147 go er.fillBuffer(ctx, r, pipeWriter) 148 149 return readers, nil 150 } 151 152 func (er *encodedReader) fillBuffer(ctx context.Context, r io.Reader, w sync2.PipeWriter) { 153 var err error 154 defer mon.Task()(&ctx)(&err) 155 _, err = sync2.Copy(ctx, w, r) 156 157 // We probably cannot do anything reasonable with the error here. 158 // This would indicate failure to close a temporary file, which doesn't need to be persisted. 159 _ = w.CloseWithError(err) 160 } 161 162 type encodedPiece struct { 163 er *encodedReader 164 pipeReader sync2.PipeReader 165 num int 166 currentStripe int64 167 stripeBuf []byte 168 shareBuf []byte 169 available int 170 err error 171 } 172 173 func (ep *encodedPiece) Read(p []byte) (n int, err error) { 174 // No need to trace this function because it's very fast and called many times. 175 if ep.err != nil { 176 return 0, ep.err 177 } 178 179 if ep.available == 0 { 180 // take the next stripe from the segment buffer 181 _, err := io.ReadFull(ep.pipeReader, ep.stripeBuf) 182 if err != nil { 183 return 0, err 184 } 185 186 // encode the num-th erasure share 187 err = ep.er.rs.EncodeSingle(ep.stripeBuf, ep.shareBuf, ep.num) 188 if err != nil { 189 return 0, err 190 } 191 192 ep.currentStripe++ 193 ep.available = ep.er.rs.ErasureShareSize() 194 } 195 196 // we have some buffer remaining for this piece. write it to the output 197 off := len(ep.shareBuf) - ep.available 198 n = copy(p, ep.shareBuf[off:]) 199 ep.available -= n 200 201 return n, nil 202 } 203 204 func (ep *encodedPiece) Close() (err error) { 205 ctx := ep.er.ctx 206 defer mon.Task()(&ctx)(&err) 207 return ep.pipeReader.Close() 208 } 209 210 // EncodedRanger will take an existing Ranger and provide a means to get 211 // multiple Ranged sub-Readers. EncodedRanger does not match the normal Ranger 212 // interface. 213 type EncodedRanger struct { 214 rr ranger.Ranger 215 rs RedundancyStrategy 216 } 217 218 // NewEncodedRanger from the given Ranger and RedundancyStrategy. See the 219 // comments for EncodeReader about the repair and success thresholds. 220 func NewEncodedRanger(rr ranger.Ranger, rs RedundancyStrategy) (*EncodedRanger, error) { 221 if rr.Size()%int64(rs.StripeSize()) != 0 { 222 return nil, Error.New("invalid erasure encoder and range reader combo. range reader size must be a multiple of erasure encoder block size") 223 } 224 return &EncodedRanger{ 225 rs: rs, 226 rr: rr, 227 }, nil 228 } 229 230 // OutputSize is like Ranger.Size but returns the Size of the erasure encoded 231 // pieces that come out. 232 func (er *EncodedRanger) OutputSize() int64 { 233 blocks := er.rr.Size() / int64(er.rs.StripeSize()) 234 return blocks * int64(er.rs.ErasureShareSize()) 235 } 236 237 // Range is like Ranger.Range, but returns a slice of Readers. 238 func (er *EncodedRanger) Range(ctx context.Context, offset, length int64) (_ []io.ReadCloser, err error) { 239 defer mon.Task()(&ctx)(&err) 240 // the offset and length given may not be block-aligned, so let's figure 241 // out which blocks contain the request. 242 firstBlock, blockCount := encryption.CalcEncompassingBlocks( 243 offset, length, er.rs.ErasureShareSize()) 244 // okay, now let's encode the reader for the range containing the blocks 245 r, err := er.rr.Range(ctx, 246 firstBlock*int64(er.rs.StripeSize()), 247 blockCount*int64(er.rs.StripeSize())) 248 if err != nil { 249 return nil, err 250 } 251 readers, err := EncodeReader2(ctx, r, er.rs) 252 if err != nil { 253 return nil, err 254 } 255 for i, r := range readers { 256 // the offset might start a few bytes in, so we potentially have to 257 // discard the beginning bytes 258 _, err := io.CopyN(io.Discard, r, 259 offset-firstBlock*int64(er.rs.ErasureShareSize())) 260 if err != nil { 261 return nil, Error.Wrap(err) 262 } 263 // the length might be shorter than a multiple of the block size, so 264 // limit it 265 readers[i] = readcloser.LimitReadCloser(r, length) 266 } 267 return readers, nil 268 } 269 270 // CalcPieceSize calculates what would be the piece size of the encoded data 271 // after erasure coding data with dataSize using the given ErasureScheme. 272 func CalcPieceSize(dataSize int64, scheme ErasureScheme) int64 { 273 const uint32Size = 4 274 stripeSize := int64(scheme.StripeSize()) 275 stripes := (dataSize + uint32Size + stripeSize - 1) / stripeSize 276 277 encodedSize := stripes * int64(scheme.StripeSize()) 278 pieceSize := encodedSize / int64(scheme.RequiredCount()) 279 280 return pieceSize 281 }