github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/byte_sink.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package nbs 16 17 import ( 18 "crypto/md5" 19 "errors" 20 "hash" 21 "io" 22 "os" 23 "sync" 24 25 "github.com/dolthub/dolt/go/store/util/tempfiles" 26 27 "github.com/dolthub/dolt/go/libraries/utils/iohelp" 28 "github.com/dolthub/dolt/go/store/atomicerr" 29 ) 30 31 func flushSinkToFile(sink ByteSink, path string) (err error) { 32 var f *os.File 33 f, err = os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm) 34 35 if err != nil { 36 return err 37 } 38 39 defer func() { 40 closeErr := f.Close() 41 42 if err == nil { 43 err = closeErr 44 } 45 }() 46 47 err = sink.Flush(f) 48 return err 49 } 50 51 // A ByteSink is an interface for writing bytes which can later be flushed to a writer 52 type ByteSink interface { 53 io.Writer 54 55 // Flush writes all the data that was written to the ByteSink to the supplied writer 56 Flush(wr io.Writer) error 57 58 // FlushToFile writes all the data that was written to the ByteSink to a file at the given path 59 FlushToFile(path string) error 60 } 61 62 // ErrBuffFull used by the FixedBufferSink when the data written is larger than the buffer allocated. 63 var ErrBufferFull = errors.New("buffer full") 64 65 // FixedBufferByteSink is a ByteSink implementation with a buffer whose size will not change. Writing more 66 // data than the fixed buffer can hold will result in an error 67 type FixedBufferByteSink struct { 68 buff []byte 69 pos uint64 70 } 71 72 // NewFixedBufferTableSink creates a FixedBufferTableSink which will use the supplied buffer 73 func NewFixedBufferTableSink(buff []byte) *FixedBufferByteSink { 74 if len(buff) == 0 { 75 panic("must provide a buffer") 76 } 77 78 return &FixedBufferByteSink{buff: buff} 79 } 80 81 // Write writes a byte array to the sink. 82 func (sink *FixedBufferByteSink) Write(src []byte) (int, error) { 83 dest := sink.buff[sink.pos:] 84 destLen := len(dest) 85 srcLen := len(src) 86 87 if destLen < srcLen { 88 return 0, ErrBufferFull 89 } 90 91 copy(dest, src) 92 93 sink.pos += uint64(srcLen) 94 return srcLen, nil 95 } 96 97 // Flush writes all the data that was written to the ByteSink to the supplied writer 98 func (sink *FixedBufferByteSink) Flush(wr io.Writer) error { 99 return iohelp.WriteAll(wr, sink.buff[:sink.pos]) 100 } 101 102 // FlushToFile writes all the data that was written to the ByteSink to a file at the given path 103 func (sink *FixedBufferByteSink) FlushToFile(path string) (err error) { 104 return flushSinkToFile(sink, path) 105 } 106 107 // BlockBufferByteSink allocates blocks of data with a given block size to store the bytes written to the sink. New 108 // blocks are allocated as needed in order to handle all the data of the Write calls. 109 type BlockBufferByteSink struct { 110 blockSize int 111 pos uint64 112 blocks [][]byte 113 } 114 115 // NewBlockBufferTableSink creates a BlockBufferByteSink with the provided block size. 116 func NewBlockBufferTableSink(blockSize int) *BlockBufferByteSink { 117 block := make([]byte, 0, blockSize) 118 return &BlockBufferByteSink{blockSize, 0, [][]byte{block}} 119 } 120 121 // Write writes a byte array to the sink. 122 func (sink *BlockBufferByteSink) Write(src []byte) (int, error) { 123 srcLen := len(src) 124 currBlockIdx := len(sink.blocks) - 1 125 currBlock := sink.blocks[currBlockIdx] 126 remaining := cap(currBlock) - len(currBlock) 127 128 if remaining >= srcLen { 129 currBlock = append(currBlock, src...) 130 sink.blocks[currBlockIdx] = currBlock 131 } else { 132 if remaining > 0 { 133 currBlock = append(currBlock, src[:remaining]...) 134 sink.blocks[currBlockIdx] = currBlock 135 } 136 137 newBlock := make([]byte, 0, sink.blockSize) 138 newBlock = append(newBlock, src[remaining:]...) 139 sink.blocks = append(sink.blocks, newBlock) 140 } 141 142 sink.pos += uint64(srcLen) 143 return srcLen, nil 144 } 145 146 // Flush writes all the data that was written to the ByteSink to the supplied writer 147 func (sink *BlockBufferByteSink) Flush(wr io.Writer) (err error) { 148 return iohelp.WriteAll(wr, sink.blocks...) 149 } 150 151 // FlushToFile writes all the data that was written to the ByteSink to a file at the given path 152 func (sink *BlockBufferByteSink) FlushToFile(path string) (err error) { 153 return flushSinkToFile(sink, path) 154 } 155 156 // BufferedFileByteSink is a ByteSink implementation that buffers some amount of data before it passes it 157 // to a background writing thread to be flushed to a file. 158 type BufferedFileByteSink struct { 159 blockSize int 160 pos uint64 161 currentBlock []byte 162 163 writeCh chan []byte 164 ae *atomicerr.AtomicError 165 wg *sync.WaitGroup 166 167 wr io.WriteCloser 168 path string 169 } 170 171 // NewBufferedFileByteSink creates a BufferedFileByteSink 172 func NewBufferedFileByteSink(tempDir string, blockSize, chBufferSize int) (*BufferedFileByteSink, error) { 173 f, err := tempfiles.MovableTempFileProvider.NewFile(tempDir, "buffered_file_byte_sink_") 174 175 if err != nil { 176 return nil, err 177 } 178 179 sink := &BufferedFileByteSink{ 180 blockSize: blockSize, 181 currentBlock: make([]byte, blockSize), 182 writeCh: make(chan []byte, chBufferSize), 183 ae: atomicerr.New(), 184 wg: &sync.WaitGroup{}, 185 wr: f, 186 path: f.Name(), 187 } 188 189 sink.wg.Add(1) 190 go func() { 191 defer sink.wg.Done() 192 sink.backgroundWrite() 193 }() 194 195 return sink, nil 196 } 197 198 // Write writes a byte array to the sink. 199 func (sink *BufferedFileByteSink) Write(src []byte) (int, error) { 200 srcLen := len(src) 201 remaining := cap(sink.currentBlock) - len(sink.currentBlock) 202 203 if remaining >= srcLen { 204 sink.currentBlock = append(sink.currentBlock, src...) 205 206 if remaining == srcLen { 207 sink.writeCh <- sink.currentBlock 208 sink.currentBlock = nil 209 } 210 } else { 211 if remaining > 0 { 212 sink.currentBlock = append(sink.currentBlock, src[:remaining]...) 213 sink.writeCh <- sink.currentBlock 214 } 215 216 newBlock := make([]byte, 0, sink.blockSize) 217 newBlock = append(newBlock, src[remaining:]...) 218 sink.currentBlock = newBlock 219 } 220 221 sink.pos += uint64(srcLen) 222 return srcLen, nil 223 } 224 225 func (sink *BufferedFileByteSink) backgroundWrite() { 226 var err error 227 for buff := range sink.writeCh { 228 if err != nil { 229 continue // drain 230 } 231 232 err = iohelp.WriteAll(sink.wr, buff) 233 sink.ae.SetIfError(err) 234 } 235 236 err = sink.wr.Close() 237 sink.ae.SetIfError(err) 238 } 239 240 // Flush writes all the data that was written to the ByteSink to the supplied writer 241 func (sink *BufferedFileByteSink) Flush(wr io.Writer) (err error) { 242 toWrite := len(sink.currentBlock) 243 if toWrite > 0 { 244 sink.writeCh <- sink.currentBlock[:toWrite] 245 } 246 247 close(sink.writeCh) 248 sink.wg.Wait() 249 250 if err := sink.ae.Get(); err != nil { 251 return err 252 } 253 254 var f *os.File 255 f, err = os.Open(sink.path) 256 257 if err != nil { 258 return err 259 } 260 261 defer func() { 262 closeErr := f.Close() 263 264 if err == nil { 265 err = closeErr 266 } 267 }() 268 269 _, err = io.Copy(wr, f) 270 271 return err 272 } 273 274 // FlushToFile writes all the data that was written to the ByteSink to a file at the given path 275 func (sink *BufferedFileByteSink) FlushToFile(path string) (err error) { 276 toWrite := len(sink.currentBlock) 277 if toWrite > 0 { 278 sink.writeCh <- sink.currentBlock[:toWrite] 279 } 280 281 close(sink.writeCh) 282 sink.wg.Wait() 283 284 if err := sink.ae.Get(); err != nil { 285 return err 286 } 287 288 return os.Rename(sink.path, path) 289 } 290 291 // HashingByteSink is a ByteSink that keeps an md5 hash of all the data written to it. 292 type HashingByteSink struct { 293 backingSink ByteSink 294 hasher hash.Hash 295 size uint64 296 } 297 298 func NewHashingByteSink(backingSink ByteSink) *HashingByteSink { 299 return &HashingByteSink{backingSink: backingSink, hasher: md5.New(), size: 0} 300 } 301 302 // Write writes a byte array to the sink. 303 func (sink *HashingByteSink) Write(src []byte) (int, error) { 304 nWritten, err := sink.backingSink.Write(src) 305 306 if err != nil { 307 return 0, err 308 } 309 310 nHashed, err := sink.hasher.Write(src[:nWritten]) 311 312 if err != nil { 313 return 0, err 314 } else if nWritten != nHashed { 315 return 0, errors.New("failed to hash all the data that was written to the byte sink.") 316 } 317 318 sink.size += uint64(nWritten) 319 320 return nWritten, nil 321 } 322 323 // Flush writes all the data that was written to the ByteSink to the supplied writer 324 func (sink *HashingByteSink) Flush(wr io.Writer) error { 325 return sink.backingSink.Flush(wr) 326 } 327 328 // FlushToFile writes all the data that was written to the ByteSink to a file at the given path 329 func (sink *HashingByteSink) FlushToFile(path string) error { 330 return sink.backingSink.FlushToFile(path) 331 } 332 333 // GetMD5 gets the MD5 hash of all the bytes written to the sink 334 func (sink *HashingByteSink) GetMD5() []byte { 335 return sink.hasher.Sum(nil) 336 } 337 338 // Size gets the number of bytes written to the sink 339 func (sink *HashingByteSink) Size() uint64 { 340 return sink.size 341 }