github.com/bazelbuild/remote-apis-sdks@v0.0.0-20240425170053-8a36686a6350/go/pkg/reader/reader.go (about) 1 package reader 2 3 import ( 4 "bufio" 5 "bytes" 6 "errors" 7 "fmt" 8 "io" 9 "os" 10 "sync" 11 12 log "github.com/golang/glog" 13 "github.com/klauspost/compress/zstd" 14 ) 15 16 // errNotInitialized is the error returned from Read() by a ReedSeeker that 17 // hasn't yet had Initialize() called. 18 // 19 // stylecheck is disabled because the error text starts with a capital letter, 20 // and changing the text would be an API change. 21 var errNotInitialized = errors.New("Not yet initialized") // nolint:stylecheck 22 23 // Initializable is an interface containing methods to initialize a ReadSeeker. 24 type Initializable interface { 25 IsInitialized() bool 26 Initialize() error 27 } 28 29 // ReadSeeker is an interface used to capture a file reader with seek functionality. 30 type ReadSeeker interface { 31 io.Reader 32 io.Closer 33 Initializable 34 SeekOffset(offset int64) error 35 } 36 37 type fileSeeker struct { 38 reader *bufio.Reader 39 40 f *os.File 41 path string 42 buffSize int 43 seekOffset int64 44 initialized bool 45 } 46 47 // NewFileReadSeeker wraps a buffered file reader with Seeking functionality. 48 // Notice that Seek calls un-set the reader and require Initialize calls. This 49 // is to avoid potentially unnecessary disk IO. 50 func NewFileReadSeeker(path string, buffsize int) ReadSeeker { 51 return &fileSeeker{ 52 f: nil, 53 path: path, 54 buffSize: buffsize, 55 seekOffset: 0, 56 initialized: false, 57 } 58 } 59 60 // Close closes the reader. It still can be reopened with Initialize(). 61 func (fio *fileSeeker) Close() (err error) { 62 fio.initialized = false 63 if fio.f != nil { 64 err = fio.f.Close() 65 } 66 fio.f = nil 67 fio.reader = nil 68 return err 69 } 70 71 // Read implements io.Reader. 72 func (fio *fileSeeker) Read(p []byte) (int, error) { 73 if !fio.IsInitialized() { 74 return 0, errNotInitialized 75 } 76 77 return fio.reader.Read(p) 78 } 79 80 // Seek is a simplified version of io.Seeker. It only supports offsets from the 81 // beginning of the file, and it errors lazily at the next Initialize. 82 func (fio *fileSeeker) SeekOffset(offset int64) error { 83 fio.seekOffset = offset 84 fio.initialized = false 85 fio.reader = nil 86 return nil 87 } 88 89 // IsInitialized indicates whether this reader is ready. If false, Read calls 90 // will fail. 91 func (fio *fileSeeker) IsInitialized() bool { 92 return fio.initialized 93 } 94 95 // Initialize does the required IO pre-work for Read calls to function. 96 func (fio *fileSeeker) Initialize() error { 97 if fio.initialized { 98 return errors.New("Already initialized") 99 } 100 101 if fio.f == nil { 102 var err error 103 fio.f, err = os.Open(fio.path) 104 if err != nil { 105 return err 106 } 107 } 108 109 off, err := fio.f.Seek(fio.seekOffset, io.SeekStart) 110 if err != nil { 111 return err 112 } 113 if off != fio.seekOffset { 114 return fmt.Errorf("File seeking ended at %d. Expected %d,", off, fio.seekOffset) 115 } 116 117 if fio.reader == nil { 118 fio.reader = bufio.NewReaderSize(fio.f, fio.buffSize) 119 } else { 120 fio.reader.Reset(fio.f) 121 } 122 fio.initialized = true 123 return nil 124 } 125 126 // The zstd encoder lib will async write to the buffer, so we need 127 // to lock access to actually check for contents. 128 type syncedBuffer struct { 129 mu sync.Mutex 130 buf *bytes.Buffer 131 } 132 133 func (sb *syncedBuffer) Read(p []byte) (int, error) { 134 sb.mu.Lock() 135 defer sb.mu.Unlock() 136 return sb.buf.Read(p) 137 } 138 139 func (sb *syncedBuffer) Write(p []byte) (int, error) { 140 sb.mu.Lock() 141 defer sb.mu.Unlock() 142 return sb.buf.Write(p) 143 } 144 145 func (sb *syncedBuffer) Len() int { 146 sb.mu.Lock() 147 defer sb.mu.Unlock() 148 return sb.buf.Len() 149 } 150 151 func (sb *syncedBuffer) Reset() { 152 sb.mu.Lock() 153 defer sb.mu.Unlock() 154 sb.buf.Reset() 155 } 156 157 type compressedSeeker struct { 158 fs ReadSeeker 159 encdW *zstd.Encoder 160 // This keeps the compressed data 161 buf *syncedBuffer 162 } 163 164 var encoderInit sync.Once 165 var encoders *sync.Pool 166 167 // NewCompressedFileSeeker creates a ReadSeeker based on a file path. 168 func NewCompressedFileSeeker(path string, buffsize int) (ReadSeeker, error) { 169 return NewCompressedSeeker(NewFileReadSeeker(path, buffsize)) 170 } 171 172 // NewCompressedSeeker wraps a ReadSeeker to compress its data on the fly. 173 func NewCompressedSeeker(fs ReadSeeker) (ReadSeeker, error) { 174 if _, ok := fs.(*compressedSeeker); ok { 175 return nil, errors.New("trying to double compress files") 176 } 177 178 encoderInit.Do(func() { 179 encoders = &sync.Pool{ 180 New: func() interface{} { 181 e, err := zstd.NewWriter(nil, zstd.WithEncoderConcurrency(1)) 182 if err != nil { 183 log.Errorf("Error creating new encoder: %v", err) 184 return nil 185 } 186 return e 187 }, 188 } 189 }) 190 191 buf := bytes.NewBuffer(nil) 192 sb := &syncedBuffer{buf: buf} 193 194 encdIntf := encoders.Get() 195 encdW, ok := encdIntf.(*zstd.Encoder) 196 if !ok || encdW == nil { 197 return nil, errors.New("failed creating new encoder") 198 } 199 200 encdW.Reset(sb) 201 return &compressedSeeker{ 202 fs: fs, 203 encdW: encdW, 204 buf: sb, 205 }, nil 206 } 207 208 func (cfs *compressedSeeker) Read(p []byte) (int, error) { 209 if !cfs.IsInitialized() { 210 return 0, errNotInitialized 211 } 212 213 var err error 214 // Repeatedly encode chunks of input data until there's enough compressed 215 // data to fill the output buffer. It can't be known ahead of time how much 216 // uncompressed data will correspond to the desired amount of output 217 // compressed data, hence the need for a loop. 218 // 219 // err will be nil until the loop encounters an error. cfs.encdW will be nil 220 // when entering the loop if a previous Read call encountered an error or 221 // reached an EOF, in which case there's no more data to encode. 222 for cfs.buf.Len() < len(p) && err == nil && cfs.encdW != nil { 223 var n int 224 // Read is allowed to use the entirety of p as a scratchpad. 225 n, err = cfs.fs.Read(p) 226 // errW must be non-nil if written bytes != n. 227 _, errW := cfs.encdW.Write(p[:n]) 228 if errW != nil && (err == nil || err == io.EOF) { 229 err = errW 230 } 231 } 232 233 if err != nil { 234 // When the buffer ends (EOF), or in case of an unexpected error, 235 // compress remaining available bytes. The encoder requires a Close call 236 // to finish writing compressed data smaller than zstd's window size. 237 closeErr := cfs.encdW.Close() 238 if err == io.EOF { 239 err = closeErr 240 } 241 encoders.Put(cfs.encdW) 242 cfs.encdW = nil 243 } 244 245 n, readErr := cfs.buf.Read(p) 246 if err == nil { 247 err = readErr 248 } 249 return n, err 250 } 251 252 func (cfs *compressedSeeker) SeekOffset(offset int64) error { 253 cfs.buf.Reset() 254 if cfs.encdW == nil { 255 encdIntf := encoders.Get() 256 var ok bool 257 cfs.encdW, ok = encdIntf.(*zstd.Encoder) 258 if !ok || cfs.encdW == nil { 259 return errors.New("failed to get a new encoder") 260 } 261 } else if err := cfs.encdW.Close(); err != nil { 262 encoders.Put(cfs.encdW) 263 cfs.encdW = nil 264 return err 265 } 266 267 cfs.buf.Reset() 268 cfs.encdW.Reset(cfs.buf) 269 return cfs.fs.SeekOffset(offset) 270 } 271 272 func (cfs *compressedSeeker) IsInitialized() bool { return cfs.fs.IsInitialized() } 273 func (cfs *compressedSeeker) Initialize() error { return cfs.fs.Initialize() } 274 275 // No need for close to close the encoder - that's handled by Read. 276 func (cfs *compressedSeeker) Close() error { return cfs.fs.Close() }