github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/types.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package persist
    22  
    23  import (
    24  	"errors"
    25  	"fmt"
    26  
    27  	"github.com/m3db/m3/src/dbnode/namespace"
    28  	"github.com/m3db/m3/src/dbnode/ts"
    29  	"github.com/m3db/m3/src/m3ninx/doc"
    30  	"github.com/m3db/m3/src/m3ninx/index/segment"
    31  	idxpersist "github.com/m3db/m3/src/m3ninx/persist"
    32  	"github.com/m3db/m3/src/x/ident"
    33  	xtime "github.com/m3db/m3/src/x/time"
    34  
    35  	"github.com/pborman/uuid"
    36  )
    37  
    38  var errReusableTagIteratorRequired = errors.New("reusable tags iterator is required")
    39  
    40  // Metadata is metadata for a time series, it can
    41  // have several underlying sources.
    42  type Metadata struct {
    43  	metadata doc.Metadata
    44  	id       ident.ID
    45  	tags     ident.Tags
    46  	tagsIter ident.TagIterator
    47  	opts     MetadataOptions
    48  }
    49  
    50  // MetadataOptions is options to use when creating metadata.
    51  type MetadataOptions struct {
    52  	FinalizeID          bool
    53  	FinalizeTags        bool
    54  	FinalizeTagIterator bool
    55  }
    56  
    57  // NewMetadata returns a new metadata struct from series metadata.
    58  // Note: because doc.Metadata has no pools for finalization we do not
    59  // take MetadataOptions here, in future if we have pools or
    60  // some other shared options that Metadata needs we will add it to this
    61  // constructor as well.
    62  func NewMetadata(metadata doc.Metadata) Metadata {
    63  	return Metadata{metadata: metadata}
    64  }
    65  
    66  // NewMetadataFromIDAndTags returns a new metadata struct from
    67  // explicit ID and tags.
    68  func NewMetadataFromIDAndTags(
    69  	id ident.ID,
    70  	tags ident.Tags,
    71  	opts MetadataOptions,
    72  ) Metadata {
    73  	return Metadata{
    74  		id:   id,
    75  		tags: tags,
    76  		opts: opts,
    77  	}
    78  }
    79  
    80  // NewMetadataFromIDAndTagIterator returns a new metadata struct from
    81  // explicit ID and tag iterator.
    82  func NewMetadataFromIDAndTagIterator(
    83  	id ident.ID,
    84  	tagsIter ident.TagIterator,
    85  	opts MetadataOptions,
    86  ) Metadata {
    87  	return Metadata{
    88  		id:       id,
    89  		tagsIter: tagsIter,
    90  		opts:     opts,
    91  	}
    92  }
    93  
    94  // BytesID returns the bytes ID of the series.
    95  func (m Metadata) BytesID() []byte {
    96  	if m.id != nil {
    97  		return m.id.Bytes()
    98  	}
    99  	return m.metadata.ID
   100  }
   101  
   102  // ResetOrReturnProvidedTagIterator returns a tag iterator
   103  // for the series, returning a direct ref to a provided tag
   104  // iterator or using the reusable tag iterator provided by the
   105  // callsite if it needs to iterate over tags or fields.
   106  func (m Metadata) ResetOrReturnProvidedTagIterator(
   107  	reusableTagsIterator ident.TagsIterator,
   108  ) (ident.TagIterator, error) {
   109  	if reusableTagsIterator == nil {
   110  		// Always check to make sure callsites won't
   111  		// get a bad allocation pattern of having
   112  		// to create one here inline if the metadata
   113  		// they are passing in suddenly changes from
   114  		// tagsIter to tags or fields with metadata.
   115  		return nil, errReusableTagIteratorRequired
   116  	}
   117  	if m.tagsIter != nil {
   118  		return m.tagsIter, nil
   119  	}
   120  
   121  	if len(m.tags.Values()) > 0 {
   122  		reusableTagsIterator.Reset(m.tags)
   123  		return reusableTagsIterator, reusableTagsIterator.Err()
   124  	}
   125  
   126  	reusableTagsIterator.ResetFields(m.metadata.Fields)
   127  	return reusableTagsIterator, reusableTagsIterator.Err()
   128  }
   129  
   130  // Finalize will finalize any resources that requested
   131  // to be finalized.
   132  func (m Metadata) Finalize() {
   133  	if m.opts.FinalizeID && m.id != nil {
   134  		m.id.Finalize()
   135  	}
   136  	if m.opts.FinalizeTags && m.tags.Values() != nil {
   137  		m.tags.Finalize()
   138  	}
   139  	if m.opts.FinalizeTagIterator && m.tagsIter != nil {
   140  		m.tagsIter.Close()
   141  	}
   142  }
   143  
   144  // DataFn is a function that persists a m3db segment for a given ID.
   145  type DataFn func(metadata Metadata, segment ts.Segment, checksum uint32) error
   146  
   147  // DataCloser is a function that performs cleanup after persisting the data
   148  // blocks for a (shard, blockStart) combination.
   149  type DataCloser func() error
   150  
   151  // DeferCloser returns a DataCloser that persists the data checkpoint file when called.
   152  type DeferCloser func() (DataCloser, error)
   153  
   154  // PreparedDataPersist is an object that wraps holds a persist function and a closer.
   155  type PreparedDataPersist struct {
   156  	Persist    DataFn
   157  	Close      DataCloser
   158  	DeferClose DeferCloser
   159  }
   160  
   161  // CommitLogFiles represents a slice of commitlog files.
   162  type CommitLogFiles []CommitLogFile
   163  
   164  // Contains returns a boolean indicating whether the CommitLogFiles slice
   165  // contains the provided CommitlogFile based on its path.
   166  func (c CommitLogFiles) Contains(path string) bool {
   167  	for _, f := range c {
   168  		if f.FilePath == path {
   169  			return true
   170  		}
   171  	}
   172  	return false
   173  }
   174  
   175  // CommitLogFile represents a commit log file and its associated metadata.
   176  type CommitLogFile struct {
   177  	FilePath string
   178  	Index    int64
   179  }
   180  
   181  // IndexFn is a function that persists a m3ninx MutableSegment.
   182  type IndexFn func(segment.Builder) error
   183  
   184  // IndexCloser is a function that performs cleanup after persisting the index data
   185  // block for a (namespace, blockStart) combination and returns the corresponding
   186  // immutable Segment.
   187  type IndexCloser func() ([]segment.Segment, error)
   188  
   189  // PreparedIndexPersist is an object that wraps holds a persist function and a closer.
   190  type PreparedIndexPersist struct {
   191  	Persist IndexFn
   192  	Close   IndexCloser
   193  }
   194  
   195  // Manager manages the internals of persisting data onto storage layer.
   196  type Manager interface {
   197  	// StartFlushPersist begins a data flush for a set of shards.
   198  	StartFlushPersist() (FlushPreparer, error)
   199  
   200  	// StartSnapshotPersist begins a snapshot for a set of shards.
   201  	StartSnapshotPersist(snapshotID uuid.UUID) (SnapshotPreparer, error)
   202  
   203  	// StartIndexPersist begins a flush for index data.
   204  	StartIndexPersist() (IndexFlush, error)
   205  
   206  	Close()
   207  }
   208  
   209  // Preparer can generate a PreparedDataPersist object for writing data for
   210  // a given (shard, blockstart) combination.
   211  type Preparer interface {
   212  	// Prepare prepares writing data for a given (shard, blockStart) combination,
   213  	// returning a PreparedDataPersist object and any error encountered during
   214  	// preparation if any.
   215  	PrepareData(opts DataPrepareOptions) (PreparedDataPersist, error)
   216  }
   217  
   218  // FlushPreparer is a persist flush cycle, each shard and block start permutation needs
   219  // to explicitly be prepared.
   220  type FlushPreparer interface {
   221  	Preparer
   222  
   223  	// DoneFlush marks the data flush as complete.
   224  	DoneFlush() error
   225  }
   226  
   227  // SnapshotPreparer is a persist snapshot cycle, each shard and block start permutation needs
   228  // to explicitly be prepared.
   229  type SnapshotPreparer interface {
   230  	Preparer
   231  
   232  	// DoneSnapshot marks the snapshot as complete.
   233  	DoneSnapshot(snapshotUUID uuid.UUID, commitLogIdentifier CommitLogFile) error
   234  }
   235  
   236  // IndexFlush is a persist flush cycle, each namespace, block combination needs
   237  // to explicitly be prepared.
   238  type IndexFlush interface {
   239  	// Prepare prepares writing data for a given ns/blockStart, returning a
   240  	// PreparedIndexPersist object and any error encountered during
   241  	// preparation if any.
   242  	PrepareIndex(opts IndexPrepareOptions) (PreparedIndexPersist, error)
   243  
   244  	// DoneIndex marks the index flush as complete.
   245  	DoneIndex() error
   246  }
   247  
   248  // DataPrepareOptions is the options struct for the DataFlush's Prepare method.
   249  // nolint: maligned
   250  type DataPrepareOptions struct {
   251  	NamespaceMetadata namespace.Metadata
   252  	BlockStart        xtime.UnixNano
   253  	Shard             uint32
   254  	// This volume index is only used when preparing for a flush fileset type.
   255  	// When opening a snapshot, the new volume index is determined by looking
   256  	// at what files exist on disk.
   257  	VolumeIndex    int
   258  	FileSetType    FileSetType
   259  	DeleteIfExists bool
   260  	// Snapshot options are applicable to snapshots (index yes, data yes)
   261  	Snapshot DataPrepareSnapshotOptions
   262  }
   263  
   264  // IndexPrepareOptions is the options struct for the IndexFlush's Prepare method.
   265  // nolint: maligned
   266  type IndexPrepareOptions struct {
   267  	NamespaceMetadata namespace.Metadata
   268  	BlockStart        xtime.UnixNano
   269  	FileSetType       FileSetType
   270  	Shards            map[uint32]struct{}
   271  	IndexVolumeType   idxpersist.IndexVolumeType
   272  	VolumeIndex       int
   273  }
   274  
   275  // DataPrepareSnapshotOptions is the options struct for the Prepare method that contains
   276  // information specific to read/writing snapshot files.
   277  type DataPrepareSnapshotOptions struct {
   278  	SnapshotTime xtime.UnixNano
   279  	SnapshotID   uuid.UUID
   280  }
   281  
   282  // FileSetType is an enum that indicates what type of files a fileset contains
   283  type FileSetType int
   284  
   285  func (f FileSetType) String() string {
   286  	switch f {
   287  	case FileSetFlushType:
   288  		return "flush"
   289  	case FileSetSnapshotType:
   290  		return "snapshot"
   291  	}
   292  
   293  	return fmt.Sprintf("unknown: %d", f)
   294  }
   295  
   296  const (
   297  	// FileSetFlushType indicates that the fileset files contain a complete flush
   298  	FileSetFlushType FileSetType = iota
   299  	// FileSetSnapshotType indicates that the fileset files contain a snapshot
   300  	FileSetSnapshotType
   301  )
   302  
   303  // FileSetContentType is an enum that indicates what the contents of files a fileset contains
   304  type FileSetContentType int
   305  
   306  func (f FileSetContentType) String() string {
   307  	switch f {
   308  	case FileSetDataContentType:
   309  		return "data"
   310  	case FileSetIndexContentType:
   311  		return "index"
   312  	}
   313  	return fmt.Sprintf("unknown: %d", f)
   314  }
   315  
   316  const (
   317  	// FileSetDataContentType indicates that the fileset files contents is time series data
   318  	FileSetDataContentType FileSetContentType = iota
   319  	// FileSetIndexContentType indicates that the fileset files contain time series index metadata
   320  	FileSetIndexContentType
   321  )
   322  
   323  // SeriesMetadataLifeTime describes the memory life time type.
   324  type SeriesMetadataLifeTime uint8
   325  
   326  const (
   327  	// SeriesLifeTimeLong means the underlying memory's life time is long lived and exceeds
   328  	// the execution duration of the series metadata receiver.
   329  	SeriesLifeTimeLong SeriesMetadataLifeTime = iota
   330  	// SeriesLifeTimeShort means that the underlying memory is only valid for the duration
   331  	// of the OnFlushNewSeries call. Must clone the underlying bytes in order to extend the life time.
   332  	SeriesLifeTimeShort
   333  )
   334  
   335  // SeriesMetadataType describes the type of series metadata.
   336  type SeriesMetadataType uint8
   337  
   338  const (
   339  	// SeriesDocumentType means the metadata is in doc.Metadata form.
   340  	SeriesDocumentType SeriesMetadataType = iota
   341  	// SeriesIDAndEncodedTagsType means the metadata is in IDAndEncodedTags form.
   342  	SeriesIDAndEncodedTagsType
   343  )
   344  
   345  // IDAndEncodedTags contains a series ID and encoded tags.
   346  type IDAndEncodedTags struct {
   347  	ID          ident.BytesID
   348  	EncodedTags ts.EncodedTags
   349  }
   350  
   351  // SeriesMetadata captures different representations of series metadata and
   352  // the ownership status of the underlying memory.
   353  type SeriesMetadata struct {
   354  	Document         doc.Metadata
   355  	IDAndEncodedTags IDAndEncodedTags
   356  	Type             SeriesMetadataType
   357  	LifeTime         SeriesMetadataLifeTime
   358  }
   359  
   360  // OnFlushNewSeriesEvent is the fields related to a flush of a new series.
   361  type OnFlushNewSeriesEvent struct {
   362  	Shard          uint32
   363  	BlockStart     xtime.UnixNano
   364  	FirstWrite     xtime.UnixNano
   365  	SeriesMetadata SeriesMetadata
   366  }
   367  
   368  // OnFlushSeries performs work on a per series level.
   369  // Also exposes a checkpoint fn for maybe compacting multiple index segments based on size.
   370  type OnFlushSeries interface {
   371  	OnFlushNewSeries(OnFlushNewSeriesEvent) error
   372  
   373  	// CheckpointAndMaybeCompact checks to see if we're at maximum cardinality
   374  	// for any index segments we're currently building and compact if we are.
   375  	CheckpointAndMaybeCompact() error
   376  }
   377  
   378  // NoOpColdFlushNamespace is a no-op impl of OnFlushSeries.
   379  type NoOpColdFlushNamespace struct{}
   380  
   381  // CheckpointAndMaybeCompact is a no-op.
   382  func (n *NoOpColdFlushNamespace) CheckpointAndMaybeCompact() error { return nil }
   383  
   384  // OnFlushNewSeries is a no-op.
   385  func (n *NoOpColdFlushNamespace) OnFlushNewSeries(event OnFlushNewSeriesEvent) error {
   386  	return nil
   387  }
   388  
   389  // Abort is a no-op.
   390  func (n *NoOpColdFlushNamespace) Abort() error { return nil }
   391  
   392  // Done is a no-op.
   393  func (n *NoOpColdFlushNamespace) Done() error { return nil }