github.com/grailbio/base@v0.0.11/digest/digestwriter.go (about) 1 // Copyright 2017 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 package digest 6 7 import ( 8 "context" 9 "io" 10 "sync" 11 ) 12 13 type write struct { 14 done chan struct{} 15 p []byte 16 off int64 17 next *write 18 } 19 20 // WriterAt can be used to calculate the digest of a file as it is 21 // being written. It uses back pressure to stall writes when a block 22 // is missing. WriterAt must be written to sequentially (otherwise a 23 // deadlock is possible); but it accepts re-writes of past regions so 24 // that the user can retry failures. 25 // 26 // In particular, this matches the semantics for the S3 download 27 // manager from github.com/aws/aws-sdk-go; thus WriterAt can be used 28 // to speed up simultaneous download and integrity checking for 29 // objects stored in S3. 30 type WriterAt struct { 31 ctx context.Context 32 digestWriter Writer 33 target io.WriterAt 34 loopOnce sync.Once 35 writes chan *write 36 done chan struct{} 37 } 38 39 // NewWriterAt creates a new WriterAt. The provided context is used 40 // to fail pending writes upon cancellation. 41 func (d Digester) NewWriterAt(ctx context.Context, target io.WriterAt) *WriterAt { 42 return &WriterAt{ 43 ctx: ctx, 44 digestWriter: d.NewWriter(), 45 target: target, 46 writes: make(chan *write), 47 done: make(chan struct{}), 48 } 49 } 50 51 func (w *WriterAt) loop() { 52 var ( 53 q *write 54 off int64 55 ) 56 for { 57 var ( 58 wr *write 59 ok bool 60 ) 61 select { 62 case wr, ok = <-w.writes: 63 case <-w.ctx.Done(): 64 } 65 if !ok { 66 break 67 } 68 switch { 69 // No overlap: waiting for preceding bytes. 70 // Insert ourselves into the waitq. 71 case off < wr.off: 72 p := &q 73 for *p != nil && (*p).off < wr.off { 74 p = &(*p).next 75 } 76 wr.next = *p 77 *p = wr 78 // No overlap: already written. 79 case off >= wr.off+int64(len(wr.p)): 80 close(wr.done) 81 // Unwritten overlap: 82 default: 83 wr.next = q 84 q = wr 85 for q != nil && q.off <= off { 86 p := q.p[off-q.off:] 87 w.digestWriter.Write(p) 88 close(q.done) 89 off += int64(len(p)) 90 q = q.next 91 } 92 } 93 } 94 close(w.done) 95 } 96 97 func (w *WriterAt) goloop() { go w.loop() } 98 99 // WriteAt implements the WriterAt interface. 100 func (w *WriterAt) WriteAt(p []byte, off int64) (int, error) { 101 w.loopOnce.Do(w.goloop) 102 n, err := w.target.WriteAt(p, off) 103 if n > 0 { 104 wr := &write{done: make(chan struct{}), p: p[:n], off: off} 105 select { 106 case w.writes <- wr: 107 case <-w.ctx.Done(): 108 return 0, w.ctx.Err() 109 } 110 select { 111 case <-wr.done: 112 case <-w.ctx.Done(): 113 return 0, w.ctx.Err() 114 } 115 } 116 return n, err 117 } 118 119 // Digest returns the digest for the data that has been written. Digest 120 // waits for writes to flush, and returns underlying context errors. 121 func (w *WriterAt) Digest() (Digest, error) { 122 w.loopOnce.Do(w.goloop) 123 close(w.writes) 124 select { 125 case <-w.done: 126 case <-w.ctx.Done(): 127 return Digest{}, w.ctx.Err() 128 } 129 return w.digestWriter.Digest(), nil 130 }