github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/block/azure/chunkwriting.go (about) 1 package azure 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/base64" 7 "encoding/binary" 8 "errors" 9 "fmt" 10 "io" 11 "sync" 12 "sync/atomic" 13 14 "github.com/Azure/azure-sdk-for-go/sdk/azcore/streaming" 15 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob" 16 guuid "github.com/google/uuid" 17 ) 18 19 var ErrEmptyBuffer = errors.New("BufferManager returned a 0 size buffer, this is a bug in the manager") 20 21 // This code adapted from azblob chunkwriting.go 22 // The reason is that the original code commit the data at the end of the copy 23 // In order to support multipart upload we need to save the blockIDs instead of committing them 24 // And once complete multipart is called we commit all the blockIDs 25 26 // blockWriter provides methods to upload blocks that represent a file to a server and commit them. 27 // This allows us to provide a local implementation that fakes the server for hermetic testing. 28 type blockWriter interface { 29 StageBlock(context.Context, string, io.ReadSeekCloser, *blockblob.StageBlockOptions) (blockblob.StageBlockResponse, error) 30 Upload(context.Context, io.ReadSeekCloser, *blockblob.UploadOptions) (blockblob.UploadResponse, error) 31 CommitBlockList(context.Context, []string, *blockblob.CommitBlockListOptions) (blockblob.CommitBlockListResponse, error) 32 } 33 34 // copyFromReader copies a source io.Reader to blob storage using concurrent uploads. 35 func copyFromReader(ctx context.Context, from io.Reader, to blockWriter, o blockblob.UploadStreamOptions) (*blockblob.CommitBlockListResponse, error) { 36 ctx, cancel := context.WithCancel(ctx) 37 defer cancel() 38 39 buffers := newMMBPool(o.Concurrency, o.BlockSize) 40 defer buffers.Free() 41 42 cp := &copier{ 43 ctx: ctx, 44 cancel: cancel, 45 reader: from, 46 to: to, 47 id: newID(), 48 o: o, 49 errCh: make(chan error, 1), 50 buffers: buffers, 51 } 52 53 // Send all our chunks until we get an error. 54 var ( 55 err error 56 buffer []byte 57 ) 58 for { 59 select { 60 case buffer = <-buffers.Acquire(): 61 // got a buffer 62 default: 63 // no buffer available; allocate a new buffer if possible 64 buffers.Grow() 65 // either grab the newly allocated buffer or wait for one to become available 66 buffer = <-buffers.Acquire() 67 } 68 err = cp.sendChunk(buffer) 69 if err != nil { 70 break 71 } 72 } 73 cp.wg.Wait() 74 // If the error is not EOF, then we have a problem. 75 if err != nil && !errors.Is(err, io.EOF) { 76 return nil, err 77 } 78 79 // Close out our upload. 80 if err := cp.close(); err != nil { 81 return nil, err 82 } 83 84 return &cp.result, nil 85 } 86 87 // copier streams a file via chunks in parallel from a reader representing a file. 88 // Do not use directly, instead use copyFromReader(). 89 type copier struct { 90 // ctx holds the context of a copier. This is normally a faux pas to store a Context in a struct. In this case, 91 // the copier has the lifetime of a function call, so it's fine. 92 ctx context.Context 93 cancel context.CancelFunc 94 95 // o contains our options for uploading. 96 o blockblob.UploadStreamOptions 97 98 // id provides the ids for each chunk. 99 id *id 100 101 // reader is the source to be written to storage. 102 reader io.Reader 103 // to is the location we are writing our chunks to. 104 to blockWriter 105 106 // errCh is used to hold the first error from our concurrent writers. 107 errCh chan error 108 // wg provides a count of how many writers we are waiting to finish. 109 wg sync.WaitGroup 110 111 // result holds the final result from blob storage after we have submitted all chunks. 112 result blockblob.CommitBlockListResponse 113 114 buffers bufferManager[mmb] 115 } 116 117 type copierChunk struct { 118 buffer []byte 119 id string 120 } 121 122 // getErr returns an error by priority. First, if a function set an error, it returns that error. Next, if the Context has an error 123 // it returns that error. Otherwise it is nil. getErr supports only returning an error once per copier. 124 func (c *copier) getErr() error { 125 select { 126 case err := <-c.errCh: 127 return err 128 default: 129 } 130 return c.ctx.Err() 131 } 132 133 // sendChunk reads data from out internal reader, creates a chunk, and sends it to be written via a channel. 134 // sendChunk returns io.EOF when the reader returns an io.EOF or io.ErrUnexpectedEOF. 135 func (c *copier) sendChunk(buffer []byte) error { 136 // TODO(niro): Need to find a solution to all the buffers.Release 137 if err := c.getErr(); err != nil { 138 c.buffers.Release(buffer) 139 return err 140 } 141 142 if len(buffer) == 0 { 143 c.buffers.Release(buffer) 144 return ErrEmptyBuffer 145 } 146 147 n, err := io.ReadFull(c.reader, buffer) 148 switch { 149 case err == nil && n == 0: 150 c.buffers.Release(buffer) 151 return nil 152 153 case err == nil: 154 nextID := c.id.next() 155 c.wg.Add(1) 156 // NOTE: we must pass id as an arg to our goroutine else 157 // it's captured by reference and can change underneath us! 158 go func(nextID string) { 159 // signal that the block has been staged. 160 // we MUST do this after attempting to write to errCh 161 // to avoid it racing with the reading goroutine. 162 defer c.wg.Done() 163 defer c.buffers.Release(buffer) 164 // Upload the outgoing block, matching the number of bytes read 165 c.write(copierChunk{buffer: buffer[0:n], id: nextID}) 166 }(nextID) 167 return nil 168 169 case err != nil && (errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF)) && n == 0: 170 c.buffers.Release(buffer) 171 return io.EOF 172 } 173 174 if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { 175 nextID := c.id.next() 176 c.wg.Add(1) 177 go func(nextID string) { 178 defer c.wg.Done() 179 defer c.buffers.Release(buffer) 180 // Upload the outgoing block, matching the number of bytes read 181 c.write(copierChunk{buffer: buffer[0:n], id: nextID}) 182 }(nextID) 183 return io.EOF 184 } 185 if err := c.getErr(); err != nil { 186 c.buffers.Release(buffer) 187 return err 188 } 189 c.buffers.Release(buffer) 190 return err 191 } 192 193 // write uploads a chunk to blob storage. 194 func (c *copier) write(chunk copierChunk) { 195 if err := c.ctx.Err(); err != nil { 196 return 197 } 198 _, err := c.to.StageBlock(c.ctx, chunk.id, streaming.NopCloser(bytes.NewReader(chunk.buffer)), &blockblob.StageBlockOptions{ 199 CPKInfo: c.o.CPKInfo, 200 CPKScopeInfo: c.o.CPKScopeInfo, 201 TransactionalValidation: c.o.TransactionalValidation, 202 }) 203 if err != nil { 204 c.errCh <- fmt.Errorf("write error: %w", err) 205 return 206 } 207 } 208 209 // close commits our blocks to blob storage and closes our writer. 210 func (c *copier) close() error { 211 if err := c.getErr(); err != nil { 212 return err 213 } 214 215 var err error 216 c.result, err = c.to.CommitBlockList(c.ctx, c.id.issued(), &blockblob.CommitBlockListOptions{ 217 Tags: c.o.Tags, 218 Metadata: c.o.Metadata, 219 Tier: c.o.AccessTier, 220 HTTPHeaders: c.o.HTTPHeaders, 221 CPKInfo: c.o.CPKInfo, 222 CPKScopeInfo: c.o.CPKScopeInfo, 223 AccessConditions: c.o.AccessConditions, 224 }) 225 return err 226 } 227 228 // id allows the creation of unique IDs based on UUID4 + an int32. This auto-increments. 229 type id struct { 230 u [64]byte 231 num uint32 232 all []string 233 } 234 235 // newID constructs a new id. 236 func newID() *id { 237 uu := guuid.New() 238 u := [64]byte{} 239 copy(u[:], uu[:]) 240 return &id{u: u} 241 } 242 243 // next returns the next ID. 244 func (id *id) next() string { 245 defer atomic.AddUint32(&id.num, 1) 246 247 binary.BigEndian.PutUint32(id.u[len(guuid.UUID{}):], atomic.LoadUint32(&id.num)) 248 str := base64.StdEncoding.EncodeToString(id.u[:]) 249 id.all = append(id.all, str) 250 251 return str 252 } 253 254 // issued returns all ids that have been issued. This returned value shares the internal slice, so it is not safe to modify the return. 255 // The value is only valid until the next time next() is called. 256 func (id *id) issued() []string { 257 return id.all 258 } 259 260 // Code taken from Azure SDK for go blockblob/chunkwriting.go 261 262 // bufferManager provides an abstraction for the management of buffers. 263 // this is mostly for testing purposes, but does allow for different implementations without changing the algorithm. 264 type bufferManager[T ~[]byte] interface { 265 // Acquire returns the channel that contains the pool of buffers. 266 Acquire() <-chan T 267 268 // Release releases the buffer back to the pool for reuse/cleanup. 269 Release(T) 270 271 // Grow grows the number of buffers, up to the predefined max. 272 // It returns the total number of buffers or an error. 273 // No error is returned if the number of buffers has reached max. 274 // This is called only from the reading goroutine. 275 Grow() int 276 277 // Free cleans up all buffers. 278 Free() 279 } 280 281 // mmb is a memory mapped buffer 282 type mmb []byte 283 284 // TODO (niro): consider implementation refactoring 285 // newMMB creates a new memory mapped buffer with the specified size 286 func newMMB(size int64) mmb { 287 return make(mmb, size) 288 } 289 290 // delete cleans up the memory mapped buffer 291 func (m *mmb) delete() { 292 } 293 294 // mmbPool implements the bufferManager interface. 295 // it uses anonymous memory mapped files for buffers. 296 // don't use this type directly, use newMMBPool() instead. 297 type mmbPool struct { 298 buffers chan mmb 299 count int 300 max int 301 size int64 302 } 303 304 func newMMBPool(maxBuffers int, bufferSize int64) bufferManager[mmb] { 305 return &mmbPool{ 306 buffers: make(chan mmb, maxBuffers), 307 max: maxBuffers, 308 size: bufferSize, 309 } 310 } 311 312 func (pool *mmbPool) Acquire() <-chan mmb { 313 return pool.buffers 314 } 315 316 func (pool *mmbPool) Grow() int { 317 if pool.count < pool.max { 318 buffer := newMMB(pool.size) 319 pool.buffers <- buffer 320 pool.count++ 321 } 322 return pool.count 323 } 324 325 func (pool *mmbPool) Release(buffer mmb) { 326 pool.buffers <- buffer 327 } 328 329 func (pool *mmbPool) Free() { 330 for i := 0; i < pool.count; i++ { 331 buffer := <-pool.buffers 332 buffer.delete() 333 } 334 pool.count = 0 335 }