github.com/m3db/m3@v1.5.0/src/dbnode/persist/fs/streaming_write.go

github.com/m3db/m3@v1.5.0/src/dbnode/persist/fs/streaming_write.go (about)

     1  // Copyright (c) 2020 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package fs
    22  
    23  import (
    24  	"bytes"
    25  	"fmt"
    26  	"io"
    27  	"math"
    28  	"time"
    29  
    30  	"github.com/m3db/m3/src/dbnode/persist"
    31  	"github.com/m3db/m3/src/dbnode/ts"
    32  	"github.com/m3db/m3/src/x/ident"
    33  	xtime "github.com/m3db/m3/src/x/time"
    34  
    35  	"github.com/m3db/bloom/v4"
    36  )
    37  
    38  // StreamingWriter writes into data fileset without intermediate buffering.
    39  // Writes must be lexicographically ordered by the id.
    40  type StreamingWriter interface {
    41  	io.Closer
    42  
    43  	// Open opens the files for writing data to the given shard in the given namespace.
    44  	Open(opts StreamingWriterOpenOptions) error
    45  
    46  	// WriteAll will write the id and all byte slices and returns an error on a write error.
    47  	// Callers should call this method with strictly lexicographically increasing ID values.
    48  	WriteAll(id ident.BytesID, encodedTags ts.EncodedTags, data [][]byte, dataChecksum uint32) error
    49  
    50  	// Abort closes the file descriptors without writing out a checkpoint file.
    51  	Abort() error
    52  }
    53  
    54  // StreamingWriterOpenOptions in the options for the StreamingWriter.
    55  type StreamingWriterOpenOptions struct {
    56  	NamespaceID ident.ID
    57  	ShardID     uint32
    58  	BlockStart  xtime.UnixNano
    59  	BlockSize   time.Duration
    60  	VolumeIndex int
    61  
    62  	// PlannedRecordsCount is an estimate of the number of series to be written.
    63  	// Must be greater than 0.
    64  	PlannedRecordsCount uint
    65  }
    66  
    67  type streamingWriter struct {
    68  	writer       *writer
    69  	options      Options
    70  	currIdx      int64
    71  	prevIDBytes  []byte
    72  	summaryEvery int64
    73  	bloomFilter  *bloom.BloomFilter
    74  	indexOffset  int64
    75  	summaries    int
    76  }
    77  
    78  // NewStreamingWriter creates a new streaming writer that writes into the data
    79  // fileset without buffering.
    80  func NewStreamingWriter(opts Options) (StreamingWriter, error) {
    81  	w, err := NewWriter(opts)
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  
    86  	return &streamingWriter{writer: w.(*writer), options: opts}, nil
    87  }
    88  
    89  func (w *streamingWriter) Open(opts StreamingWriterOpenOptions) error {
    90  	if opts.PlannedRecordsCount <= 0 {
    91  		return fmt.Errorf(
    92  			"PlannedRecordsCount must be positive, got %d", opts.PlannedRecordsCount)
    93  	}
    94  
    95  	writerOpts := DataWriterOpenOptions{
    96  		BlockSize: opts.BlockSize,
    97  		Identifier: FileSetFileIdentifier{
    98  			Namespace:   opts.NamespaceID,
    99  			Shard:       opts.ShardID,
   100  			BlockStart:  opts.BlockStart,
   101  			VolumeIndex: opts.VolumeIndex,
   102  		},
   103  		FileSetType: persist.FileSetFlushType,
   104  	}
   105  
   106  	plannedRecordsCount := opts.PlannedRecordsCount
   107  	if plannedRecordsCount == 0 {
   108  		plannedRecordsCount = 1
   109  	}
   110  	m, k := bloom.EstimateFalsePositiveRate(
   111  		plannedRecordsCount,
   112  		w.options.IndexBloomFilterFalsePositivePercent(),
   113  	)
   114  	w.bloomFilter = bloom.NewBloomFilter(m, k)
   115  
   116  	summariesApprox := float64(opts.PlannedRecordsCount) * w.options.IndexSummariesPercent()
   117  	w.summaryEvery = 1
   118  	if summariesApprox > 0 {
   119  		w.summaryEvery = int64(math.Max(1,
   120  			math.Floor(float64(opts.PlannedRecordsCount)/summariesApprox)))
   121  	}
   122  
   123  	if err := w.writer.Open(writerOpts); err != nil {
   124  		return err
   125  	}
   126  
   127  	w.currIdx = 0
   128  	w.indexOffset = 0
   129  	w.summaries = 0
   130  	w.prevIDBytes = nil
   131  
   132  	return nil
   133  }
   134  
   135  func (w *streamingWriter) WriteAll(
   136  	id ident.BytesID,
   137  	encodedTags ts.EncodedTags,
   138  	data [][]byte,
   139  	dataChecksum uint32,
   140  ) error {
   141  	// Need to check if w.prevIDBytes != nil, otherwise we can never write an empty string ID
   142  	if w.prevIDBytes != nil && bytes.Compare(id, w.prevIDBytes) <= 0 {
   143  		return fmt.Errorf("ids must be written in lexicographic order, no duplicates, but got %s followed by %s", w.prevIDBytes, id)
   144  	}
   145  	w.prevIDBytes = append(w.prevIDBytes[:0], id...)
   146  
   147  	entry, ok, err := w.writeData(data, dataChecksum)
   148  	if err != nil {
   149  		return err
   150  	}
   151  
   152  	if ok {
   153  		return w.writeIndexRelated(id, encodedTags, entry)
   154  	}
   155  
   156  	return nil
   157  }
   158  
   159  func (w *streamingWriter) writeData(
   160  	data [][]byte,
   161  	dataChecksum uint32,
   162  ) (indexEntry, bool, error) {
   163  	var size int64
   164  	for _, d := range data {
   165  		size += int64(len(d))
   166  	}
   167  	if size == 0 {
   168  		return indexEntry{}, false, nil
   169  	}
   170  
   171  	entry := indexEntry{
   172  		index:          w.currIdx,
   173  		dataFileOffset: w.writer.currOffset,
   174  		size:           uint32(size),
   175  		dataChecksum:   dataChecksum,
   176  	}
   177  	for _, d := range data {
   178  		if err := w.writer.writeData(d); err != nil {
   179  			return indexEntry{}, false, err
   180  		}
   181  	}
   182  
   183  	w.currIdx++
   184  
   185  	return entry, true, nil
   186  }
   187  
   188  func (w *streamingWriter) writeIndexRelated(
   189  	id ident.BytesID,
   190  	encodedTags ts.EncodedTags,
   191  	entry indexEntry,
   192  ) error {
   193  	// Add to the bloom filter, note this must be zero alloc or else this will
   194  	// cause heavy GC churn as we flush millions of series at end of each
   195  	// time window
   196  	w.bloomFilter.Add(id)
   197  
   198  	writeSummary := w.summaryEvery == 0 || entry.index%w.summaryEvery == 0
   199  	if writeSummary {
   200  		// Capture the offset for when we write this summary back, only capture
   201  		// for every summary we'll actually write to avoid a few memcopies
   202  		entry.indexFileOffset = w.indexOffset
   203  	}
   204  
   205  	length, err := w.writer.writeIndexWithEncodedTags(id, encodedTags, entry)
   206  	if err != nil {
   207  		return err
   208  	}
   209  	w.indexOffset += length
   210  
   211  	if writeSummary {
   212  		err = w.writer.writeSummariesEntry(id, entry)
   213  		if err != nil {
   214  			return err
   215  		}
   216  		w.summaries++
   217  	}
   218  
   219  	return nil
   220  }
   221  
   222  func (w *streamingWriter) Close() error {
   223  	// Write the bloom filter bitset out
   224  	if err := w.writer.writeBloomFilterFileContents(w.bloomFilter); err != nil {
   225  		return err
   226  	}
   227  
   228  	if err := w.writer.writeInfoFileContents(w.bloomFilter, w.summaries, w.currIdx); err != nil {
   229  		return err
   230  	}
   231  
   232  	w.bloomFilter = nil
   233  
   234  	err := w.writer.closeWOIndex()
   235  	if err != nil {
   236  		w.writer.err = err
   237  		return err
   238  	}
   239  
   240  	// NB(xichen): only write out the checkpoint file if there are no errors
   241  	// encountered between calling writer.Open() and writer.Close().
   242  	if err := writeCheckpointFile(
   243  		w.writer.checkpointFilePath,
   244  		w.writer.digestFdWithDigestContents.Digest().Sum32(),
   245  		w.writer.digestBuf,
   246  		w.writer.newFileMode,
   247  	); err != nil {
   248  		w.writer.err = err
   249  		return err
   250  	}
   251  
   252  	return nil
   253  }
   254  
   255  func (w *streamingWriter) Abort() error {
   256  	err := w.writer.closeWOIndex()
   257  	if err != nil {
   258  		w.writer.err = err
   259  		return err
   260  	}
   261  
   262  	return nil
   263  }