github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/digest/digestwriter.go (about)

     1  // Copyright 2017 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package digest
     6  
     7  import (
     8  	"context"
     9  	"io"
    10  	"sync"
    11  )
    12  
    13  type write struct {
    14  	done chan struct{}
    15  	p    []byte
    16  	off  int64
    17  	next *write
    18  }
    19  
    20  // WriterAt can be used to calculate the digest of a file as it is
    21  // being written. It uses back pressure to stall writes when a block
    22  // is missing. WriterAt must be written to sequentially (otherwise a
    23  // deadlock is possible); but it accepts re-writes of past regions so
    24  // that the user can retry failures.
    25  //
    26  // In particular, this matches the semantics for the S3 download
    27  // manager from github.com/aws/aws-sdk-go; thus WriterAt can be used
    28  // to speed up simultaneous download and integrity checking for
    29  // objects stored in S3.
    30  type WriterAt struct {
    31  	ctx          context.Context
    32  	digestWriter Writer
    33  	target       io.WriterAt
    34  	loopOnce     sync.Once
    35  	writes       chan *write
    36  	done         chan struct{}
    37  }
    38  
    39  // NewWriterAt creates a new WriterAt. The provided context is used
    40  // to fail pending writes upon cancellation.
    41  func (d Digester) NewWriterAt(ctx context.Context, target io.WriterAt) *WriterAt {
    42  	return &WriterAt{
    43  		ctx:          ctx,
    44  		digestWriter: d.NewWriter(),
    45  		target:       target,
    46  		writes:       make(chan *write),
    47  		done:         make(chan struct{}),
    48  	}
    49  }
    50  
    51  func (w *WriterAt) loop() {
    52  	var (
    53  		q   *write
    54  		off int64
    55  	)
    56  	for {
    57  		var (
    58  			wr *write
    59  			ok bool
    60  		)
    61  		select {
    62  		case wr, ok = <-w.writes:
    63  		case <-w.ctx.Done():
    64  		}
    65  		if !ok {
    66  			break
    67  		}
    68  		switch {
    69  		// No overlap: waiting for preceding bytes.
    70  		// Insert ourselves into the waitq.
    71  		case off < wr.off:
    72  			p := &q
    73  			for *p != nil && (*p).off < wr.off {
    74  				p = &(*p).next
    75  			}
    76  			wr.next = *p
    77  			*p = wr
    78  		// No overlap: already written.
    79  		case off >= wr.off+int64(len(wr.p)):
    80  			close(wr.done)
    81  		// Unwritten overlap:
    82  		default:
    83  			wr.next = q
    84  			q = wr
    85  			for q != nil && q.off <= off {
    86  				p := q.p[off-q.off:]
    87  				w.digestWriter.Write(p)
    88  				close(q.done)
    89  				off += int64(len(p))
    90  				q = q.next
    91  			}
    92  		}
    93  	}
    94  	close(w.done)
    95  }
    96  
    97  func (w *WriterAt) goloop() { go w.loop() }
    98  
    99  // WriteAt implements the WriterAt interface.
   100  func (w *WriterAt) WriteAt(p []byte, off int64) (int, error) {
   101  	w.loopOnce.Do(w.goloop)
   102  	n, err := w.target.WriteAt(p, off)
   103  	if n > 0 {
   104  		wr := &write{done: make(chan struct{}), p: p[:n], off: off}
   105  		select {
   106  		case w.writes <- wr:
   107  		case <-w.ctx.Done():
   108  			return 0, w.ctx.Err()
   109  		}
   110  		select {
   111  		case <-wr.done:
   112  		case <-w.ctx.Done():
   113  			return 0, w.ctx.Err()
   114  		}
   115  	}
   116  	return n, err
   117  }
   118  
   119  // Digest returns the digest for the data that has been written. Digest
   120  // waits for writes to flush, and returns underlying context errors.
   121  func (w *WriterAt) Digest() (Digest, error) {
   122  	w.loopOnce.Do(w.goloop)
   123  	close(w.writes)
   124  	select {
   125  	case <-w.done:
   126  	case <-w.ctx.Done():
   127  		return Digest{}, w.ctx.Err()
   128  	}
   129  	return w.digestWriter.Digest(), nil
   130  }