github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/block/azure/chunkwriting.go

github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/block/azure/chunkwriting.go (about)

     1  package azure
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/base64"
     7  	"encoding/binary"
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"sync"
    12  	"sync/atomic"
    13  
    14  	"github.com/Azure/azure-sdk-for-go/sdk/azcore/streaming"
    15  	"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob"
    16  	guuid "github.com/google/uuid"
    17  )
    18  
    19  var ErrEmptyBuffer = errors.New("BufferManager returned a 0 size buffer, this is a bug in the manager")
    20  
    21  // This code adapted from azblob chunkwriting.go
    22  // The reason is that the original code commit the data at the end of the copy
    23  // In order to support multipart upload we need to save the blockIDs instead of committing them
    24  // And once complete multipart is called we commit all the blockIDs
    25  
    26  // blockWriter provides methods to upload blocks that represent a file to a server and commit them.
    27  // This allows us to provide a local implementation that fakes the server for hermetic testing.
    28  type blockWriter interface {
    29  	StageBlock(context.Context, string, io.ReadSeekCloser, *blockblob.StageBlockOptions) (blockblob.StageBlockResponse, error)
    30  	Upload(context.Context, io.ReadSeekCloser, *blockblob.UploadOptions) (blockblob.UploadResponse, error)
    31  	CommitBlockList(context.Context, []string, *blockblob.CommitBlockListOptions) (blockblob.CommitBlockListResponse, error)
    32  }
    33  
    34  // copyFromReader copies a source io.Reader to blob storage using concurrent uploads.
    35  func copyFromReader(ctx context.Context, from io.Reader, to blockWriter, o blockblob.UploadStreamOptions) (*blockblob.CommitBlockListResponse, error) {
    36  	ctx, cancel := context.WithCancel(ctx)
    37  	defer cancel()
    38  
    39  	buffers := newMMBPool(o.Concurrency, o.BlockSize)
    40  	defer buffers.Free()
    41  
    42  	cp := &copier{
    43  		ctx:     ctx,
    44  		cancel:  cancel,
    45  		reader:  from,
    46  		to:      to,
    47  		id:      newID(),
    48  		o:       o,
    49  		errCh:   make(chan error, 1),
    50  		buffers: buffers,
    51  	}
    52  
    53  	// Send all our chunks until we get an error.
    54  	var (
    55  		err    error
    56  		buffer []byte
    57  	)
    58  	for {
    59  		select {
    60  		case buffer = <-buffers.Acquire():
    61  			// got a buffer
    62  		default:
    63  			// no buffer available; allocate a new buffer if possible
    64  			buffers.Grow()
    65  			// either grab the newly allocated buffer or wait for one to become available
    66  			buffer = <-buffers.Acquire()
    67  		}
    68  		err = cp.sendChunk(buffer)
    69  		if err != nil {
    70  			break
    71  		}
    72  	}
    73  	cp.wg.Wait()
    74  	// If the error is not EOF, then we have a problem.
    75  	if err != nil && !errors.Is(err, io.EOF) {
    76  		return nil, err
    77  	}
    78  
    79  	// Close out our upload.
    80  	if err := cp.close(); err != nil {
    81  		return nil, err
    82  	}
    83  
    84  	return &cp.result, nil
    85  }
    86  
    87  // copier streams a file via chunks in parallel from a reader representing a file.
    88  // Do not use directly, instead use copyFromReader().
    89  type copier struct {
    90  	// ctx holds the context of a copier. This is normally a faux pas to store a Context in a struct. In this case,
    91  	// the copier has the lifetime of a function call, so it's fine.
    92  	ctx    context.Context
    93  	cancel context.CancelFunc
    94  
    95  	// o contains our options for uploading.
    96  	o blockblob.UploadStreamOptions
    97  
    98  	// id provides the ids for each chunk.
    99  	id *id
   100  
   101  	// reader is the source to be written to storage.
   102  	reader io.Reader
   103  	// to is the location we are writing our chunks to.
   104  	to blockWriter
   105  
   106  	// errCh is used to hold the first error from our concurrent writers.
   107  	errCh chan error
   108  	// wg provides a count of how many writers we are waiting to finish.
   109  	wg sync.WaitGroup
   110  
   111  	// result holds the final result from blob storage after we have submitted all chunks.
   112  	result blockblob.CommitBlockListResponse
   113  
   114  	buffers bufferManager[mmb]
   115  }
   116  
   117  type copierChunk struct {
   118  	buffer []byte
   119  	id     string
   120  }
   121  
   122  // getErr returns an error by priority. First, if a function set an error, it returns that error. Next, if the Context has an error
   123  // it returns that error. Otherwise it is nil. getErr supports only returning an error once per copier.
   124  func (c *copier) getErr() error {
   125  	select {
   126  	case err := <-c.errCh:
   127  		return err
   128  	default:
   129  	}
   130  	return c.ctx.Err()
   131  }
   132  
   133  // sendChunk reads data from out internal reader, creates a chunk, and sends it to be written via a channel.
   134  // sendChunk returns io.EOF when the reader returns an io.EOF or io.ErrUnexpectedEOF.
   135  func (c *copier) sendChunk(buffer []byte) error {
   136  	// TODO(niro): Need to find a solution to all the buffers.Release
   137  	if err := c.getErr(); err != nil {
   138  		c.buffers.Release(buffer)
   139  		return err
   140  	}
   141  
   142  	if len(buffer) == 0 {
   143  		c.buffers.Release(buffer)
   144  		return ErrEmptyBuffer
   145  	}
   146  
   147  	n, err := io.ReadFull(c.reader, buffer)
   148  	switch {
   149  	case err == nil && n == 0:
   150  		c.buffers.Release(buffer)
   151  		return nil
   152  
   153  	case err == nil:
   154  		nextID := c.id.next()
   155  		c.wg.Add(1)
   156  		// NOTE: we must pass id as an arg to our goroutine else
   157  		// it's captured by reference and can change underneath us!
   158  		go func(nextID string) {
   159  			// signal that the block has been staged.
   160  			// we MUST do this after attempting to write to errCh
   161  			// to avoid it racing with the reading goroutine.
   162  			defer c.wg.Done()
   163  			defer c.buffers.Release(buffer)
   164  			// Upload the outgoing block, matching the number of bytes read
   165  			c.write(copierChunk{buffer: buffer[0:n], id: nextID})
   166  		}(nextID)
   167  		return nil
   168  
   169  	case err != nil && (errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF)) && n == 0:
   170  		c.buffers.Release(buffer)
   171  		return io.EOF
   172  	}
   173  
   174  	if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
   175  		nextID := c.id.next()
   176  		c.wg.Add(1)
   177  		go func(nextID string) {
   178  			defer c.wg.Done()
   179  			defer c.buffers.Release(buffer)
   180  			// Upload the outgoing block, matching the number of bytes read
   181  			c.write(copierChunk{buffer: buffer[0:n], id: nextID})
   182  		}(nextID)
   183  		return io.EOF
   184  	}
   185  	if err := c.getErr(); err != nil {
   186  		c.buffers.Release(buffer)
   187  		return err
   188  	}
   189  	c.buffers.Release(buffer)
   190  	return err
   191  }
   192  
   193  // write uploads a chunk to blob storage.
   194  func (c *copier) write(chunk copierChunk) {
   195  	if err := c.ctx.Err(); err != nil {
   196  		return
   197  	}
   198  	_, err := c.to.StageBlock(c.ctx, chunk.id, streaming.NopCloser(bytes.NewReader(chunk.buffer)), &blockblob.StageBlockOptions{
   199  		CPKInfo:                 c.o.CPKInfo,
   200  		CPKScopeInfo:            c.o.CPKScopeInfo,
   201  		TransactionalValidation: c.o.TransactionalValidation,
   202  	})
   203  	if err != nil {
   204  		c.errCh <- fmt.Errorf("write error: %w", err)
   205  		return
   206  	}
   207  }
   208  
   209  // close commits our blocks to blob storage and closes our writer.
   210  func (c *copier) close() error {
   211  	if err := c.getErr(); err != nil {
   212  		return err
   213  	}
   214  
   215  	var err error
   216  	c.result, err = c.to.CommitBlockList(c.ctx, c.id.issued(), &blockblob.CommitBlockListOptions{
   217  		Tags:             c.o.Tags,
   218  		Metadata:         c.o.Metadata,
   219  		Tier:             c.o.AccessTier,
   220  		HTTPHeaders:      c.o.HTTPHeaders,
   221  		CPKInfo:          c.o.CPKInfo,
   222  		CPKScopeInfo:     c.o.CPKScopeInfo,
   223  		AccessConditions: c.o.AccessConditions,
   224  	})
   225  	return err
   226  }
   227  
   228  // id allows the creation of unique IDs based on UUID4 + an int32. This auto-increments.
   229  type id struct {
   230  	u   [64]byte
   231  	num uint32
   232  	all []string
   233  }
   234  
   235  // newID constructs a new id.
   236  func newID() *id {
   237  	uu := guuid.New()
   238  	u := [64]byte{}
   239  	copy(u[:], uu[:])
   240  	return &id{u: u}
   241  }
   242  
   243  // next returns the next ID.
   244  func (id *id) next() string {
   245  	defer atomic.AddUint32(&id.num, 1)
   246  
   247  	binary.BigEndian.PutUint32(id.u[len(guuid.UUID{}):], atomic.LoadUint32(&id.num))
   248  	str := base64.StdEncoding.EncodeToString(id.u[:])
   249  	id.all = append(id.all, str)
   250  
   251  	return str
   252  }
   253  
   254  // issued returns all ids that have been issued. This returned value shares the internal slice, so it is not safe to modify the return.
   255  // The value is only valid until the next time next() is called.
   256  func (id *id) issued() []string {
   257  	return id.all
   258  }
   259  
   260  // Code taken from Azure SDK for go blockblob/chunkwriting.go
   261  
   262  // bufferManager provides an abstraction for the management of buffers.
   263  // this is mostly for testing purposes, but does allow for different implementations without changing the algorithm.
   264  type bufferManager[T ~[]byte] interface {
   265  	// Acquire returns the channel that contains the pool of buffers.
   266  	Acquire() <-chan T
   267  
   268  	// Release releases the buffer back to the pool for reuse/cleanup.
   269  	Release(T)
   270  
   271  	// Grow grows the number of buffers, up to the predefined max.
   272  	// It returns the total number of buffers or an error.
   273  	// No error is returned if the number of buffers has reached max.
   274  	// This is called only from the reading goroutine.
   275  	Grow() int
   276  
   277  	// Free cleans up all buffers.
   278  	Free()
   279  }
   280  
   281  // mmb is a memory mapped buffer
   282  type mmb []byte
   283  
   284  // TODO (niro): consider implementation refactoring
   285  // newMMB creates a new memory mapped buffer with the specified size
   286  func newMMB(size int64) mmb {
   287  	return make(mmb, size)
   288  }
   289  
   290  // delete cleans up the memory mapped buffer
   291  func (m *mmb) delete() {
   292  }
   293  
   294  // mmbPool implements the bufferManager interface.
   295  // it uses anonymous memory mapped files for buffers.
   296  // don't use this type directly, use newMMBPool() instead.
   297  type mmbPool struct {
   298  	buffers chan mmb
   299  	count   int
   300  	max     int
   301  	size    int64
   302  }
   303  
   304  func newMMBPool(maxBuffers int, bufferSize int64) bufferManager[mmb] {
   305  	return &mmbPool{
   306  		buffers: make(chan mmb, maxBuffers),
   307  		max:     maxBuffers,
   308  		size:    bufferSize,
   309  	}
   310  }
   311  
   312  func (pool *mmbPool) Acquire() <-chan mmb {
   313  	return pool.buffers
   314  }
   315  
   316  func (pool *mmbPool) Grow() int {
   317  	if pool.count < pool.max {
   318  		buffer := newMMB(pool.size)
   319  		pool.buffers <- buffer
   320  		pool.count++
   321  	}
   322  	return pool.count
   323  }
   324  
   325  func (pool *mmbPool) Release(buffer mmb) {
   326  	pool.buffers <- buffer
   327  }
   328  
   329  func (pool *mmbPool) Free() {
   330  	for i := 0; i < pool.count; i++ {
   331  		buffer := <-pool.buffers
   332  		buffer.delete()
   333  	}
   334  	pool.count = 0
   335  }