storj.io/uplink@v1.13.0/private/eestream/piece.go (about) 1 // Copyright (C) 2023 Storj Labs, Inc. 2 // See LICENSE for copying information. 3 4 package eestream 5 6 import ( 7 "errors" 8 "fmt" 9 "io" 10 "sync/atomic" 11 12 "storj.io/common/sync2/race2" 13 ) 14 15 var freedBatch = &Batch{} 16 17 // A StreamingPiece is an in memory storage location for a stream of bytes being 18 // operated on by a single producer and a single consumer in atomic units of 19 // a given erasure share size. The StreamingPiece type must know its full expected size 20 // up front, and allocates slots for each *BufPool batch of erasure shares 21 // up to that total size. It will hydrate these slots on demand and free them 22 // back to the BufPool as they are consumed. 23 type StreamingPiece struct { 24 // bufs is a list of atomic.Pointer[Batch]. 25 // The value of each slot is either nil, emptyBatch, or a valid Batch 26 // slice returned from BatchPool. nil means the batch has yet to be 27 // instantiated. freedBatch means that the batch is freed or no longer 28 // needed. 29 batches []atomic.Pointer[Batch] 30 pool *BatchPool 31 receivedBytesSoFar int64 32 receivedSharesSoFar atomic.Int64 33 shareSize int 34 err atomic.Value 35 completedBatches atomic.Int32 36 } 37 38 // NewStreamingPiece creates a buffer that uses units of size unitSize, with a total 39 // amount of bytes of totalSize. It uses pool to hydrate and return buffers in 40 // its slots. 41 func NewStreamingPiece(shareSize int, totalSize int64, pool *BatchPool) *StreamingPiece { 42 poolSize := int64(pool.Size()) 43 batches := (totalSize + poolSize - 1) / poolSize 44 45 return &StreamingPiece{ 46 batches: make([]atomic.Pointer[Batch], batches+1), 47 pool: pool, 48 shareSize: shareSize, 49 } 50 } 51 52 // ensureBatch will return a batch for writing that is either a full length 53 // from the BatchPool, or specifically the freedBatch value, which means that 54 // the consumer has already indicated all units that would belong to this 55 // buffer are not needed. 56 func (b *StreamingPiece) ensureBatch(idx int) *Batch { 57 for { 58 if batch := b.batches[idx].Load(); batch != nil { 59 return batch 60 } 61 batch := b.pool.GetAndClaim() 62 if b.batches[idx].CompareAndSwap(nil, batch) { 63 return batch 64 } 65 batch.Release() 66 } 67 } 68 69 // byteToBatch determines which batch and which batch offset a specific byte 70 // at byteOffset in the overall stream lives. 71 func (b *StreamingPiece) byteToBatch(byteOffset int64) (batchIdx, batchOffset int) { 72 poolSize := int64(b.pool.Size()) 73 batchIdx = int(byteOffset / poolSize) 74 batchOffset = int(byteOffset % poolSize) 75 return batchIdx, batchOffset 76 } 77 78 // ReadSharesFrom is going to call r.Read() once, and will return the number of 79 // full shares that are now newly completely read as a result of this call. If 80 // r.Read() returns an error or io.EOF, or no data is expected otherwise, 81 // done will be true. The read error if any is available from Err() or 82 // ReadShare(). 83 func (b *StreamingPiece) ReadSharesFrom(r io.Reader) (shareCount int, done bool) { 84 // find our current buffer 85 currentBatchIdx, currentBatchOffset := b.byteToBatch(b.receivedBytesSoFar) 86 87 currentBatch := b.ensureBatch(currentBatchIdx) 88 89 // okay, there are two main cases for the batch we just grabbed: 90 // 1) it has already been freed. in this case, currentBatch == freedBatch 91 // 2) we can attempt to claim it. this has two subcases: 92 // a) if the claim fails, then it was in the process of being freed. 93 // b) if the claim succeeds, then we know that no one is going to free 94 // it while we have it, at least until we release it. 95 // in case 1 and 2a, then we don't have a batch to use. only case 2b is 96 // useful for using the returned batch. 97 // all of this is because in case 2b, we want to make sure someone doesn't 98 // try and free this batch while we're using it. 99 if currentBatch == freedBatch || !currentBatch.Claim() { 100 // this batch isn't ours, but we still need to read off the stream 101 // as long as someone is still interested in this stream, so we need 102 // to use a throwaway buffer. 103 // we do this instead of using io.Discard or something because this way 104 // all of the logic and bookkeeping is exactly the same. 105 currentBatch = b.pool.GetAndClaim() 106 } 107 defer currentBatch.Release() 108 109 currentSlice := currentBatch.Slice()[currentBatchOffset:] 110 race2.WriteSlice(currentSlice) 111 112 // okay, read into the current buffer 113 n, err := r.Read(currentSlice) 114 // keep track of how many bytes we've read from the stream 115 b.receivedBytesSoFar += int64(n) 116 117 // we may have only read a partial share last time, so we need to 118 // recalculate how many bytes are covered by prior completed ReadShareFrom 119 // shareCount returns. 120 receivedSharesSoFar := b.receivedSharesSoFar.Load() 121 122 notifiedBytesSoFar := receivedSharesSoFar * int64(b.shareSize) 123 124 // okay, let's see how many completed shares we can tell the caller about. 125 unnotifiedBytes := b.receivedBytesSoFar - notifiedBytesSoFar 126 unnotifiedShares := unnotifiedBytes / int64(b.shareSize) 127 128 // make a note about how many we've told the caller about 129 b.receivedSharesSoFar.Add(unnotifiedShares) 130 131 // keep track of the error if there was a read error. 132 if err != nil && !errors.Is(err, io.EOF) { 133 b.err.Store(err) 134 } 135 136 return int(unnotifiedShares), err != nil 137 } 138 139 // ReadShare returns the byte slice that references the read data in a buffer 140 // representing the share with index shareIdx. Note that shareIdx is not 141 // the Reed Solomon Share Number, since all shares in this buffer share the same 142 // Reed Solomon Share Number. If a share at shareIdx cannot be returned, it will 143 // return an error, which may be a read error determined by ReadSharesFrom. 144 // The release callback must be released when the share is done being read from. 145 func (b *StreamingPiece) ReadShare(shareIdx int) (data []byte, release func(), err error) { 146 // first, let's see if we even have data for this unit. have we read this 147 // far yet? 148 receivedSharesSoFar := b.receivedSharesSoFar.Load() 149 if int64(shareIdx) >= receivedSharesSoFar { 150 if err, ok := b.err.Load().(error); ok { 151 // oh, there's a stored error. let's return that, that probably says 152 // what happened. 153 return nil, nil, err 154 } 155 return nil, nil, Error.New("read past end of buffer: %w", io.ErrUnexpectedEOF) 156 } 157 158 // find our buffer and buffer offset for this unit 159 byteOffset := int64(shareIdx) * int64(b.shareSize) 160 if debugEnabled { 161 fmt.Println("buffer reading byte offset", byteOffset, "for share", shareIdx) 162 } 163 batchIdx, batchOffset := b.byteToBatch(byteOffset) 164 165 // okay, let's go find our batch 166 batch := b.batches[batchIdx].Load() 167 if batch == nil { 168 // huh! someone asked for a batch that we haven't received yet, but we 169 // checked up top. some major problem with bookkeeping happened. 170 return nil, nil, Error.New("unreachable - this batch should be hydrated") 171 } 172 if batch == freedBatch || !batch.Claim() { 173 // this buffer was already marked as completed, so we probably returned it 174 // back to the BufPool. 175 return nil, nil, Error.New("read completed buffer") 176 } 177 178 data = batch.Slice()[batchOffset:][:b.shareSize] 179 race2.ReadSlice(data) 180 181 // okay we have the data. 182 if debugEnabled { 183 fmt.Println("buffer reading unit", shareIdx, "from", batchIdx, batchOffset, fmt.Sprintf("%x", data[:3])) 184 } 185 return data, batch.Release, nil 186 } 187 188 // Err returns the last error encountered during reading. 189 func (b *StreamingPiece) Err() error { 190 if err, ok := b.err.Load().(error); ok { 191 return err 192 } 193 return nil 194 } 195 196 // MarkCompleted tells the StreamingPiece to return some internal batches back to the 197 // BatchPool, since we don't need them anymore. It will assume that none of the 198 // first sharesCompleted units will be asked for again. 199 func (b *StreamingPiece) MarkCompleted(sharesCompleted int) { 200 // okay figure out which buffer is needed for unit index unitsCompleted 201 // (this is the next incomplete unit). This will be completedBatches, and 202 // the batch with index completedBatches is still in use! Everything before 203 // it is free to reclaim though. 204 completedBatches, _ := b.byteToBatch(int64(sharesCompleted) * int64(b.shareSize)) 205 206 for { 207 // what do we think we've already marked completed? 208 oldCompletedBatches := int(b.completedBatches.Load()) 209 if completedBatches <= oldCompletedBatches { 210 // already done 211 break 212 } 213 // okay, let's mark all of the ones we don't think we've marked completed 214 // before as completed. 215 for i := oldCompletedBatches; i < completedBatches; i++ { 216 if batch := b.batches[i].Swap(freedBatch); batch != nil && batch != freedBatch { 217 // a live batch! let's return to the BufPool. 218 batch.Release() 219 } 220 } 221 // okay, let's see if we're racing with any other MarkCompleteds and if 222 // we need to rethink what we've done here. (we wouldn't want to stomp on 223 // another MarkCompleted that did more than us). 224 if b.completedBatches.CompareAndSwap(int32(oldCompletedBatches), int32(completedBatches)) { 225 // we're okay, we're done. 226 break 227 } 228 } 229 }