github.com/m3db/m3@v1.5.0/src/dbnode/persist/fs/commitlog/writer.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package commitlog
    22  
    23  import (
    24  	"bufio"
    25  	"encoding/binary"
    26  	"errors"
    27  	"io"
    28  	"os"
    29  
    30  	"github.com/m3db/bitset"
    31  	"github.com/m3db/m3/src/dbnode/digest"
    32  	"github.com/m3db/m3/src/dbnode/persist"
    33  	"github.com/m3db/m3/src/dbnode/persist/fs"
    34  	"github.com/m3db/m3/src/dbnode/persist/fs/msgpack"
    35  	"github.com/m3db/m3/src/dbnode/persist/schema"
    36  	"github.com/m3db/m3/src/dbnode/ts"
    37  	"github.com/m3db/m3/src/x/clock"
    38  	xos "github.com/m3db/m3/src/x/os"
    39  	xtime "github.com/m3db/m3/src/x/time"
    40  )
    41  
    42  const (
    43  	// The lengths to reserve for a chunk header:
    44  	// - size uint32
    45  	// - checksumSize uint32
    46  	// - checksumData uint32
    47  	chunkHeaderSizeLen         = 4
    48  	chunkHeaderChecksumSizeLen = 4
    49  	chunkHeaderChecksumDataLen = 4
    50  	chunkHeaderLen             = chunkHeaderSizeLen +
    51  		chunkHeaderChecksumSizeLen +
    52  		chunkHeaderChecksumDataLen
    53  
    54  	defaultBitSetLength = 65536
    55  
    56  	defaultEncoderBuffSize = 16384
    57  )
    58  
    59  var (
    60  	errCommitLogWriterAlreadyOpen = errors.New("commit log writer already open")
    61  	errTagEncoderDataNotAvailable = errors.New("tag iterator data not available")
    62  
    63  	endianness = binary.LittleEndian
    64  )
    65  
    66  type commitLogWriter interface {
    67  	// Open opens the commit log for writing data
    68  	Open() (persist.CommitLogFile, error)
    69  
    70  	// Write will write an entry in the commit log for a given series
    71  	Write(
    72  		series ts.Series,
    73  		datapoint ts.Datapoint,
    74  		unit xtime.Unit,
    75  		annotation ts.Annotation,
    76  	) error
    77  
    78  	// Flush will flush any data in the writers buffer to the chunkWriter, essentially forcing
    79  	// a new chunk to be created. Optionally forces the data to be FSync'd to disk.
    80  	Flush(sync bool) error
    81  
    82  	// setOnFlush will provide/override a callback that will be called after successful flush calls.
    83  	// Implementors MAY choose to not implement such a callback mechanism, however if
    84  	// such a mechanism is implemented they SHOULD properly implement this method.
    85  	setOnFlush(func(err error))
    86  
    87  	// Close the reader
    88  	Close() error
    89  }
    90  
    91  type chunkWriter interface {
    92  	io.Writer
    93  
    94  	reset(f xos.File)
    95  	setOnFlush(func(err error))
    96  	close() error
    97  	isOpen() bool
    98  	sync() error
    99  }
   100  
   101  type flushFn func(err error)
   102  
   103  type writer struct {
   104  	filePathPrefix      string
   105  	newFileMode         os.FileMode
   106  	newDirectoryMode    os.FileMode
   107  	nowFn               clock.NowFn
   108  	chunkWriter         chunkWriter
   109  	chunkReserveHeader  []byte
   110  	buffer              *bufio.Writer
   111  	sizeBuffer          []byte
   112  	seen                *bitset.BitSet
   113  	logEncoder          *msgpack.Encoder
   114  	logEncoderBuff      []byte
   115  	metadataEncoderBuff []byte
   116  	opts                Options
   117  }
   118  
   119  func newCommitLogWriter(
   120  	flushFn flushFn,
   121  	opts Options,
   122  ) commitLogWriter {
   123  	shouldFsync := opts.Strategy() == StrategyWriteWait
   124  
   125  	return &writer{
   126  		filePathPrefix:      opts.FilesystemOptions().FilePathPrefix(),
   127  		newFileMode:         opts.FilesystemOptions().NewFileMode(),
   128  		newDirectoryMode:    opts.FilesystemOptions().NewDirectoryMode(),
   129  		nowFn:               opts.ClockOptions().NowFn(),
   130  		chunkWriter:         newChunkWriter(flushFn, shouldFsync),
   131  		chunkReserveHeader:  make([]byte, chunkHeaderLen),
   132  		buffer:              bufio.NewWriterSize(nil, opts.FlushSize()),
   133  		sizeBuffer:          make([]byte, binary.MaxVarintLen64),
   134  		seen:                bitset.NewBitSet(defaultBitSetLength),
   135  		logEncoder:          msgpack.NewEncoder(),
   136  		logEncoderBuff:      make([]byte, 0, defaultEncoderBuffSize),
   137  		metadataEncoderBuff: make([]byte, 0, defaultEncoderBuffSize),
   138  		opts:                opts,
   139  	}
   140  }
   141  
   142  func (w *writer) Open() (persist.CommitLogFile, error) {
   143  	if w.isOpen() {
   144  		return persist.CommitLogFile{}, errCommitLogWriterAlreadyOpen
   145  	}
   146  
   147  	// Reset buffers since they will grow 2x on demand so we want to make sure that
   148  	// one exceptionally large write does not cause them to remain oversized forever.
   149  	if cap(w.logEncoderBuff) != defaultEncoderBuffSize {
   150  		w.logEncoderBuff = make([]byte, 0, defaultEncoderBuffSize)
   151  	}
   152  	if cap(w.metadataEncoderBuff) != defaultEncoderBuffSize {
   153  		w.metadataEncoderBuff = make([]byte, 0, defaultEncoderBuffSize)
   154  	}
   155  
   156  	commitLogsDir := fs.CommitLogsDirPath(w.filePathPrefix)
   157  	if err := os.MkdirAll(commitLogsDir, w.newDirectoryMode); err != nil {
   158  		return persist.CommitLogFile{}, err
   159  	}
   160  
   161  	filePath, index, err := NextFile(w.opts)
   162  	if err != nil {
   163  		return persist.CommitLogFile{}, err
   164  	}
   165  	logInfo := schema.LogInfo{
   166  		Index: int64(index),
   167  	}
   168  	w.logEncoder.Reset()
   169  	if err := w.logEncoder.EncodeLogInfo(logInfo); err != nil {
   170  		return persist.CommitLogFile{}, err
   171  	}
   172  	fd, err := fs.OpenWritable(filePath, w.newFileMode)
   173  	if err != nil {
   174  		return persist.CommitLogFile{}, err
   175  	}
   176  
   177  	w.chunkWriter.reset(fd)
   178  	w.buffer.Reset(w.chunkWriter)
   179  	if err := w.write(w.logEncoder.Bytes()); err != nil {
   180  		w.Close()
   181  		return persist.CommitLogFile{}, err
   182  	}
   183  
   184  	return persist.CommitLogFile{
   185  		FilePath: filePath,
   186  		Index:    int64(index),
   187  	}, nil
   188  }
   189  
   190  func (w *writer) isOpen() bool {
   191  	return w.chunkWriter.isOpen()
   192  }
   193  
   194  func (w *writer) Write(
   195  	series ts.Series,
   196  	datapoint ts.Datapoint,
   197  	unit xtime.Unit,
   198  	annotation ts.Annotation,
   199  ) error {
   200  	var logEntry schema.LogEntry
   201  	logEntry.Create = w.nowFn().UnixNano()
   202  	logEntry.Index = series.UniqueIndex
   203  
   204  	seen := w.seen.Test(uint(series.UniqueIndex))
   205  	if !seen {
   206  		// If "idx" likely hasn't been written to commit log
   207  		// yet we need to include series metadata
   208  		var metadata schema.LogMetadata
   209  		metadata.ID = series.ID.Bytes()
   210  		metadata.Namespace = series.Namespace.Bytes()
   211  		metadata.Shard = series.Shard
   212  		metadata.EncodedTags = series.EncodedTags
   213  
   214  		var err error
   215  		w.metadataEncoderBuff, err = msgpack.EncodeLogMetadataFast(w.metadataEncoderBuff[:0], metadata)
   216  		if err != nil {
   217  			return err
   218  		}
   219  		logEntry.Metadata = w.metadataEncoderBuff
   220  	}
   221  
   222  	logEntry.Timestamp = int64(datapoint.TimestampNanos)
   223  	logEntry.Value = datapoint.Value
   224  	logEntry.Unit = uint32(unit)
   225  	logEntry.Annotation = annotation
   226  
   227  	var err error
   228  	w.logEncoderBuff, err = msgpack.EncodeLogEntryFast(w.logEncoderBuff[:0], logEntry)
   229  	if err != nil {
   230  		return err
   231  	}
   232  
   233  	if err := w.write(w.logEncoderBuff); err != nil {
   234  		return err
   235  	}
   236  
   237  	if !seen {
   238  		// Record we have written this series and metadata to this commit log
   239  		w.seen.Set(uint(series.UniqueIndex))
   240  	}
   241  	return nil
   242  }
   243  
   244  func (w *writer) Flush(sync bool) error {
   245  	err := w.buffer.Flush()
   246  	if err != nil {
   247  		return err
   248  	}
   249  
   250  	if !sync {
   251  		return nil
   252  	}
   253  
   254  	return w.sync()
   255  }
   256  
   257  func (w *writer) setOnFlush(f func(err error)) {
   258  	w.chunkWriter.setOnFlush(f)
   259  }
   260  
   261  func (w *writer) sync() error {
   262  	return w.chunkWriter.sync()
   263  }
   264  
   265  func (w *writer) Close() error {
   266  	if !w.isOpen() {
   267  		return nil
   268  	}
   269  
   270  	if err := w.Flush(true); err != nil {
   271  		return err
   272  	}
   273  	if err := w.chunkWriter.close(); err != nil {
   274  		return err
   275  	}
   276  
   277  	w.seen.ClearAll()
   278  	return nil
   279  }
   280  
   281  func (w *writer) write(data []byte) error {
   282  	dataLen := len(data)
   283  	sizeLen := binary.PutUvarint(w.sizeBuffer, uint64(dataLen))
   284  	totalLen := sizeLen + dataLen
   285  
   286  	// Avoid writing across the checksum boundary if we can avoid it
   287  	if w.buffer.Buffered() > 0 && totalLen > w.buffer.Available() {
   288  		if err := w.buffer.Flush(); err != nil {
   289  			return err
   290  		}
   291  		return w.write(data)
   292  	}
   293  
   294  	// Write size and then data
   295  	if _, err := w.buffer.Write(w.sizeBuffer[:sizeLen]); err != nil {
   296  		return err
   297  	}
   298  	_, err := w.buffer.Write(data)
   299  	return err
   300  }
   301  
   302  type fsChunkWriter struct {
   303  	fd      xos.File
   304  	flushFn flushFn
   305  	buff    []byte
   306  	fsync   bool
   307  }
   308  
   309  func newChunkWriter(flushFn flushFn, fsync bool) chunkWriter {
   310  	return &fsChunkWriter{
   311  		flushFn: flushFn,
   312  		buff:    make([]byte, chunkHeaderLen),
   313  		fsync:   fsync,
   314  	}
   315  }
   316  
   317  func (w *fsChunkWriter) reset(f xos.File) {
   318  	w.fd = f
   319  }
   320  
   321  func (w *fsChunkWriter) setOnFlush(f func(err error)) {
   322  	w.flushFn = f
   323  }
   324  
   325  func (w *fsChunkWriter) close() error {
   326  	err := w.fd.Close()
   327  	w.fd = nil
   328  	return err
   329  }
   330  
   331  func (w *fsChunkWriter) isOpen() bool {
   332  	return w.fd != nil
   333  }
   334  
   335  func (w *fsChunkWriter) sync() error {
   336  	return w.fd.Sync()
   337  }
   338  
   339  // Writes a custom header in front of p to a file and returns number of bytes of p successfully written to the file.
   340  // If the header or p is not fully written to the file, then this method returns number of bytes of p actually written
   341  // to the file and an error explaining the reason of failure to write fully to the file.
   342  func (w *fsChunkWriter) Write(p []byte) (int, error) {
   343  	size := len(p)
   344  
   345  	sizeStart, sizeEnd :=
   346  		0, chunkHeaderSizeLen
   347  	checksumSizeStart, checksumSizeEnd :=
   348  		sizeEnd, sizeEnd+chunkHeaderSizeLen
   349  	checksumDataStart, checksumDataEnd :=
   350  		checksumSizeEnd, checksumSizeEnd+chunkHeaderChecksumDataLen
   351  
   352  	// Write size
   353  	endianness.PutUint32(w.buff[sizeStart:sizeEnd], uint32(size))
   354  
   355  	// Calculate checksums
   356  	checksumSize := digest.Checksum(w.buff[sizeStart:sizeEnd])
   357  	checksumData := digest.Checksum(p)
   358  
   359  	// Write checksums
   360  	digest.
   361  		Buffer(w.buff[checksumSizeStart:checksumSizeEnd]).
   362  		WriteDigest(checksumSize)
   363  	digest.
   364  		Buffer(w.buff[checksumDataStart:checksumDataEnd]).
   365  		WriteDigest(checksumData)
   366  
   367  	// Combine buffers to reduce to a single syscall
   368  	w.buff = append(w.buff[:chunkHeaderLen], p...)
   369  
   370  	// Write contents to file descriptor
   371  	n, err := w.fd.Write(w.buff)
   372  	// Count bytes successfully written from slice p
   373  	pBytesWritten := n - chunkHeaderLen
   374  	if pBytesWritten < 0 {
   375  		pBytesWritten = 0
   376  	}
   377  
   378  	if err != nil {
   379  		w.flushFn(err)
   380  		return pBytesWritten, err
   381  	}
   382  
   383  	// Fsync if required to
   384  	if w.fsync {
   385  		err = w.sync()
   386  	}
   387  
   388  	// Fire flush callback
   389  	w.flushFn(err)
   390  	return pBytesWritten, err
   391  }