github.com/grailbio/base@v0.0.11/digest/digestreader.go (about) 1 // Copyright 2017 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 package digest 6 7 import ( 8 "fmt" 9 "io" 10 "sync" 11 "sync/atomic" 12 ) 13 14 // Reader can be used to calculate the digest of a file as it is being 15 // read. It uses back pressure to stall reads when a block is missing. 16 // This can cause deadlock if the application doesn't retry immediately. 17 // 18 // The s3manager uploader differentiates between two kinds of readers to 19 // improve upload performance: Simple Readers and "ReaderSeekers" for 20 // performance. This implementation creates either a simpleReaderAt or a 21 // readerAtSeeker depending on the underlying ReaderAt. 22 // 23 // Expects the reads to be complete and non-overlapping. 24 type Reader interface { 25 io.Reader 26 Digest() (Digest, error) 27 } 28 29 type readerWrap struct { 30 mu sync.Mutex // GUARDS reader. 31 err error 32 digestWriter Writer 33 source io.Reader 34 } 35 36 // Digest returns the digest for the data that has been read. 37 func (r *readerWrap) Digest() (Digest, error) { 38 r.mu.Lock() 39 defer r.mu.Unlock() 40 41 if r.err != nil { 42 return Digest{}, r.err 43 } 44 45 return r.digestWriter.Digest(), nil 46 } 47 48 // Read implements the io.Reader interface. It reads data from the file 49 // and places it in p, returning the number of bytes placed in the slice as 50 // well as any error. 51 func (r *readerWrap) Read(p []byte) (int, error) { 52 r.mu.Lock() 53 defer r.mu.Unlock() 54 55 n, err := r.source.Read(p) 56 r.err = err 57 58 if r.err != nil { 59 return n, r.err 60 } 61 62 q := p[:n] 63 // todo(ysiato, schandra, pknudsgaard) this looks like another intentional no-error-check like digest.go:407 64 r.digestWriter.Write(q) 65 66 return n, r.err 67 } 68 69 type readerAtSeeker interface { 70 io.ReaderAt 71 io.ReadSeeker 72 } 73 74 type readerAtSeekerWrap struct { 75 mu sync.Mutex 76 cond *sync.Cond 77 pending int64 78 err error 79 current int64 80 digestWriter Writer 81 source readerAtSeeker 82 } 83 84 // Read is present to fulfill the io.Reader API, but should not be called. 85 func (ras *readerAtSeekerWrap) Read(p []byte) (n int, err error) { 86 panic("Read should not be called on ReaderAt") 87 } 88 89 // ReadAt implements the ReaderAt interface. 90 func (ras *readerAtSeekerWrap) ReadAt(p []byte, off int64) (int, error) { 91 // pending should be incremented, but concurrency for the source.ReadAt 92 // should be maintained. Using atomic means that we don't have to 93 // acquire/release/read/acquire/release. 94 for { 95 n := atomic.LoadInt64(&ras.pending) 96 if n < 0 { 97 panic("digest already called") 98 } 99 if atomic.CompareAndSwapInt64(&ras.pending, n, n+1) { 100 break 101 } 102 } 103 defer atomic.AddInt64(&ras.pending, -1) 104 105 n, err := ras.source.ReadAt(p, off) 106 107 ras.mu.Lock() 108 defer ras.mu.Unlock() 109 110 if ras.err != nil { 111 return 0, ras.err 112 } 113 114 ras.err = err 115 116 for ras.current != off && ras.err == nil { 117 ras.cond.Wait() 118 } 119 120 if ras.err != nil { 121 return 0, ras.err 122 } 123 124 q := p[:n] 125 ras.digestWriter.Write(q) 126 127 ras.current += int64(n) 128 ras.cond.Broadcast() 129 130 return n, ras.err 131 } 132 133 func (ras *readerAtSeekerWrap) Seek(offset int64, whence int) (int64, error) { 134 return ras.source.Seek(offset, whence) 135 } 136 137 // Digest returns the digest for the data. Digest cannot be called with pending 138 // reads. 139 func (ras *readerAtSeekerWrap) Digest() (Digest, error) { 140 ras.mu.Lock() 141 defer ras.mu.Unlock() 142 143 for { 144 n := atomic.LoadInt64(&ras.pending) 145 if n > 0 { 146 panic(fmt.Sprintf("Digest() called before all writes have completed, %d pending", ras.pending)) 147 } 148 if n < 0 || atomic.CompareAndSwapInt64(&ras.pending, n, -1) { 149 break 150 } 151 } 152 153 if ras.err != nil { 154 return Digest{}, ras.err 155 } 156 157 return ras.digestWriter.Digest(), nil 158 } 159 160 // NewReader creates a new WriterAt. 161 func (d Digester) NewReader(source io.Reader) Reader { 162 ras, ok := source.(readerAtSeeker) 163 if ok { 164 result := &readerAtSeekerWrap{ 165 digestWriter: d.NewWriter(), 166 source: ras, 167 } 168 result.cond = sync.NewCond(&result.mu) 169 170 return result 171 } 172 173 result := &readerWrap{ 174 digestWriter: d.NewWriter(), 175 source: source, 176 } 177 178 return result 179 }