github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/byte_sink.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package nbs
    16  
    17  import (
    18  	"crypto/md5"
    19  	"errors"
    20  	"hash"
    21  	"io"
    22  	"os"
    23  	"sync"
    24  
    25  	"github.com/dolthub/dolt/go/store/util/tempfiles"
    26  
    27  	"github.com/dolthub/dolt/go/libraries/utils/iohelp"
    28  	"github.com/dolthub/dolt/go/store/atomicerr"
    29  )
    30  
    31  func flushSinkToFile(sink ByteSink, path string) (err error) {
    32  	var f *os.File
    33  	f, err = os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm)
    34  
    35  	if err != nil {
    36  		return err
    37  	}
    38  
    39  	defer func() {
    40  		closeErr := f.Close()
    41  
    42  		if err == nil {
    43  			err = closeErr
    44  		}
    45  	}()
    46  
    47  	err = sink.Flush(f)
    48  	return err
    49  }
    50  
    51  // A ByteSink is an interface for writing bytes which can later be flushed to a writer
    52  type ByteSink interface {
    53  	io.Writer
    54  
    55  	// Flush writes all the data that was written to the ByteSink to the supplied writer
    56  	Flush(wr io.Writer) error
    57  
    58  	// FlushToFile writes all the data that was written to the ByteSink to a file at the given path
    59  	FlushToFile(path string) error
    60  }
    61  
    62  // ErrBuffFull used by the FixedBufferSink when the data written is larger than the buffer allocated.
    63  var ErrBufferFull = errors.New("buffer full")
    64  
    65  // FixedBufferByteSink is a ByteSink implementation with a buffer whose size will not change.  Writing more
    66  // data than the fixed buffer can hold will result in an error
    67  type FixedBufferByteSink struct {
    68  	buff []byte
    69  	pos  uint64
    70  }
    71  
    72  // NewFixedBufferTableSink creates a FixedBufferTableSink which will use the supplied buffer
    73  func NewFixedBufferTableSink(buff []byte) *FixedBufferByteSink {
    74  	if len(buff) == 0 {
    75  		panic("must provide a buffer")
    76  	}
    77  
    78  	return &FixedBufferByteSink{buff: buff}
    79  }
    80  
    81  // Write writes a byte array to the sink.
    82  func (sink *FixedBufferByteSink) Write(src []byte) (int, error) {
    83  	dest := sink.buff[sink.pos:]
    84  	destLen := len(dest)
    85  	srcLen := len(src)
    86  
    87  	if destLen < srcLen {
    88  		return 0, ErrBufferFull
    89  	}
    90  
    91  	copy(dest, src)
    92  
    93  	sink.pos += uint64(srcLen)
    94  	return srcLen, nil
    95  }
    96  
    97  // Flush writes all the data that was written to the ByteSink to the supplied writer
    98  func (sink *FixedBufferByteSink) Flush(wr io.Writer) error {
    99  	return iohelp.WriteAll(wr, sink.buff[:sink.pos])
   100  }
   101  
   102  // FlushToFile writes all the data that was written to the ByteSink to a file at the given path
   103  func (sink *FixedBufferByteSink) FlushToFile(path string) (err error) {
   104  	return flushSinkToFile(sink, path)
   105  }
   106  
   107  // BlockBufferByteSink allocates blocks of data with a given block size to store the bytes written to the sink. New
   108  // blocks are allocated as needed in order to handle all the data of the Write calls.
   109  type BlockBufferByteSink struct {
   110  	blockSize int
   111  	pos       uint64
   112  	blocks    [][]byte
   113  }
   114  
   115  // NewBlockBufferTableSink creates a BlockBufferByteSink with the provided block size.
   116  func NewBlockBufferTableSink(blockSize int) *BlockBufferByteSink {
   117  	block := make([]byte, 0, blockSize)
   118  	return &BlockBufferByteSink{blockSize, 0, [][]byte{block}}
   119  }
   120  
   121  // Write writes a byte array to the sink.
   122  func (sink *BlockBufferByteSink) Write(src []byte) (int, error) {
   123  	srcLen := len(src)
   124  	currBlockIdx := len(sink.blocks) - 1
   125  	currBlock := sink.blocks[currBlockIdx]
   126  	remaining := cap(currBlock) - len(currBlock)
   127  
   128  	if remaining >= srcLen {
   129  		currBlock = append(currBlock, src...)
   130  		sink.blocks[currBlockIdx] = currBlock
   131  	} else {
   132  		if remaining > 0 {
   133  			currBlock = append(currBlock, src[:remaining]...)
   134  			sink.blocks[currBlockIdx] = currBlock
   135  		}
   136  
   137  		newBlock := make([]byte, 0, sink.blockSize)
   138  		newBlock = append(newBlock, src[remaining:]...)
   139  		sink.blocks = append(sink.blocks, newBlock)
   140  	}
   141  
   142  	sink.pos += uint64(srcLen)
   143  	return srcLen, nil
   144  }
   145  
   146  // Flush writes all the data that was written to the ByteSink to the supplied writer
   147  func (sink *BlockBufferByteSink) Flush(wr io.Writer) (err error) {
   148  	return iohelp.WriteAll(wr, sink.blocks...)
   149  }
   150  
   151  // FlushToFile writes all the data that was written to the ByteSink to a file at the given path
   152  func (sink *BlockBufferByteSink) FlushToFile(path string) (err error) {
   153  	return flushSinkToFile(sink, path)
   154  }
   155  
   156  // BufferedFileByteSink is a ByteSink implementation that buffers some amount of data before it passes it
   157  // to a background writing thread to be flushed to a file.
   158  type BufferedFileByteSink struct {
   159  	blockSize    int
   160  	pos          uint64
   161  	currentBlock []byte
   162  
   163  	writeCh chan []byte
   164  	ae      *atomicerr.AtomicError
   165  	wg      *sync.WaitGroup
   166  
   167  	wr   io.WriteCloser
   168  	path string
   169  }
   170  
   171  // NewBufferedFileByteSink creates a BufferedFileByteSink
   172  func NewBufferedFileByteSink(tempDir string, blockSize, chBufferSize int) (*BufferedFileByteSink, error) {
   173  	f, err := tempfiles.MovableTempFileProvider.NewFile(tempDir, "buffered_file_byte_sink_")
   174  
   175  	if err != nil {
   176  		return nil, err
   177  	}
   178  
   179  	sink := &BufferedFileByteSink{
   180  		blockSize:    blockSize,
   181  		currentBlock: make([]byte, blockSize),
   182  		writeCh:      make(chan []byte, chBufferSize),
   183  		ae:           atomicerr.New(),
   184  		wg:           &sync.WaitGroup{},
   185  		wr:           f,
   186  		path:         f.Name(),
   187  	}
   188  
   189  	sink.wg.Add(1)
   190  	go func() {
   191  		defer sink.wg.Done()
   192  		sink.backgroundWrite()
   193  	}()
   194  
   195  	return sink, nil
   196  }
   197  
   198  // Write writes a byte array to the sink.
   199  func (sink *BufferedFileByteSink) Write(src []byte) (int, error) {
   200  	srcLen := len(src)
   201  	remaining := cap(sink.currentBlock) - len(sink.currentBlock)
   202  
   203  	if remaining >= srcLen {
   204  		sink.currentBlock = append(sink.currentBlock, src...)
   205  
   206  		if remaining == srcLen {
   207  			sink.writeCh <- sink.currentBlock
   208  			sink.currentBlock = nil
   209  		}
   210  	} else {
   211  		if remaining > 0 {
   212  			sink.currentBlock = append(sink.currentBlock, src[:remaining]...)
   213  			sink.writeCh <- sink.currentBlock
   214  		}
   215  
   216  		newBlock := make([]byte, 0, sink.blockSize)
   217  		newBlock = append(newBlock, src[remaining:]...)
   218  		sink.currentBlock = newBlock
   219  	}
   220  
   221  	sink.pos += uint64(srcLen)
   222  	return srcLen, nil
   223  }
   224  
   225  func (sink *BufferedFileByteSink) backgroundWrite() {
   226  	var err error
   227  	for buff := range sink.writeCh {
   228  		if err != nil {
   229  			continue // drain
   230  		}
   231  
   232  		err = iohelp.WriteAll(sink.wr, buff)
   233  		sink.ae.SetIfError(err)
   234  	}
   235  
   236  	err = sink.wr.Close()
   237  	sink.ae.SetIfError(err)
   238  }
   239  
   240  // Flush writes all the data that was written to the ByteSink to the supplied writer
   241  func (sink *BufferedFileByteSink) Flush(wr io.Writer) (err error) {
   242  	toWrite := len(sink.currentBlock)
   243  	if toWrite > 0 {
   244  		sink.writeCh <- sink.currentBlock[:toWrite]
   245  	}
   246  
   247  	close(sink.writeCh)
   248  	sink.wg.Wait()
   249  
   250  	if err := sink.ae.Get(); err != nil {
   251  		return err
   252  	}
   253  
   254  	var f *os.File
   255  	f, err = os.Open(sink.path)
   256  
   257  	if err != nil {
   258  		return err
   259  	}
   260  
   261  	defer func() {
   262  		closeErr := f.Close()
   263  
   264  		if err == nil {
   265  			err = closeErr
   266  		}
   267  	}()
   268  
   269  	_, err = io.Copy(wr, f)
   270  
   271  	return err
   272  }
   273  
   274  // FlushToFile writes all the data that was written to the ByteSink to a file at the given path
   275  func (sink *BufferedFileByteSink) FlushToFile(path string) (err error) {
   276  	toWrite := len(sink.currentBlock)
   277  	if toWrite > 0 {
   278  		sink.writeCh <- sink.currentBlock[:toWrite]
   279  	}
   280  
   281  	close(sink.writeCh)
   282  	sink.wg.Wait()
   283  
   284  	if err := sink.ae.Get(); err != nil {
   285  		return err
   286  	}
   287  
   288  	return os.Rename(sink.path, path)
   289  }
   290  
   291  // HashingByteSink is a ByteSink that keeps an md5 hash of all the data written to it.
   292  type HashingByteSink struct {
   293  	backingSink ByteSink
   294  	hasher      hash.Hash
   295  	size        uint64
   296  }
   297  
   298  func NewHashingByteSink(backingSink ByteSink) *HashingByteSink {
   299  	return &HashingByteSink{backingSink: backingSink, hasher: md5.New(), size: 0}
   300  }
   301  
   302  // Write writes a byte array to the sink.
   303  func (sink *HashingByteSink) Write(src []byte) (int, error) {
   304  	nWritten, err := sink.backingSink.Write(src)
   305  
   306  	if err != nil {
   307  		return 0, err
   308  	}
   309  
   310  	nHashed, err := sink.hasher.Write(src[:nWritten])
   311  
   312  	if err != nil {
   313  		return 0, err
   314  	} else if nWritten != nHashed {
   315  		return 0, errors.New("failed to hash all the data that was written to the byte sink.")
   316  	}
   317  
   318  	sink.size += uint64(nWritten)
   319  
   320  	return nWritten, nil
   321  }
   322  
   323  // Flush writes all the data that was written to the ByteSink to the supplied writer
   324  func (sink *HashingByteSink) Flush(wr io.Writer) error {
   325  	return sink.backingSink.Flush(wr)
   326  }
   327  
   328  // FlushToFile writes all the data that was written to the ByteSink to a file at the given path
   329  func (sink *HashingByteSink) FlushToFile(path string) error {
   330  	return sink.backingSink.FlushToFile(path)
   331  }
   332  
   333  // GetMD5 gets the MD5 hash of all the bytes written to the sink
   334  func (sink *HashingByteSink) GetMD5() []byte {
   335  	return sink.hasher.Sum(nil)
   336  }
   337  
   338  // Size gets the number of bytes written to the sink
   339  func (sink *HashingByteSink) Size() uint64 {
   340  	return sink.size
   341  }