github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/m3ninx/index/segment/types.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package segment
    22  
    23  import (
    24  	"errors"
    25  
    26  	"github.com/m3db/m3/src/m3ninx/doc"
    27  	"github.com/m3db/m3/src/m3ninx/index"
    28  	"github.com/m3db/m3/src/m3ninx/postings"
    29  )
    30  
    31  // ErrClosed is the error returned when attempting to perform operations on a
    32  // segment that has already been closed.
    33  var ErrClosed = errors.New("segment has been closed")
    34  
    35  // Segment is a sub-collection of documents within an index.
    36  type Segment interface {
    37  	// FieldsIterable returns an iterable fields, for which is not
    38  	// safe for concurrent use. For concurrent use call FieldsIterable
    39  	// multiple times.
    40  	FieldsIterable() FieldsIterable
    41  
    42  	// TermsIterable returns an iterable terms, for which is not
    43  	// safe for concurrent use. For concurrent use call TermsIterable
    44  	// multiple times.
    45  	TermsIterable() TermsIterable
    46  
    47  	// Size returns the number of documents within the Segment. It returns
    48  	// 0 if the Segment has been closed.
    49  	Size() int64
    50  
    51  	// ContainsID returns a bool indicating if the Segment contains the provided ID.
    52  	ContainsID(docID []byte) (bool, error)
    53  
    54  	// ContainsField returns a bool indicating if the Segment contains the provided field.
    55  	ContainsField(field []byte) (bool, error)
    56  
    57  	// Reader returns a point-in-time accessor to search the segment.
    58  	Reader() (Reader, error)
    59  
    60  	// Close closes the segment and releases any internal resources.
    61  	Close() error
    62  }
    63  
    64  // Reader extends index reader interface to allow for reading
    65  // of fields and terms.
    66  type Reader interface {
    67  	index.Reader
    68  	FieldsIterable
    69  	TermsIterable
    70  	FieldsPostingsListIterable
    71  
    72  	// ContainsField returns a bool indicating if the Segment contains the provided field.
    73  	ContainsField(field []byte) (bool, error)
    74  }
    75  
    76  // FieldsIterable can iterate over segment fields, it is not by default
    77  // concurrency safe.
    78  type FieldsIterable interface {
    79  	// Fields returns an iterator over the list of known fields, in order
    80  	// by name, it is not valid for reading after mutating the
    81  	// builder by inserting more documents.
    82  	Fields() (FieldsIterator, error)
    83  }
    84  
    85  // FieldsPostingsListIterable can iterate over segment fields/postings lists, it is not by default
    86  // concurrency safe.
    87  type FieldsPostingsListIterable interface {
    88  	// Fields returns an iterator over the list of known fields, in order
    89  	// by name, it is not valid for reading after mutating the
    90  	// builder by inserting more documents.
    91  	FieldsPostingsList() (FieldsPostingsListIterator, error)
    92  }
    93  
    94  // TermsIterable can iterate over segment terms, it is not by default
    95  // concurrency safe.
    96  type TermsIterable interface {
    97  	// Terms returns an iterator over the known terms values for the given
    98  	// field, in order by name, it is not valid for reading after mutating the
    99  	// builder by inserting more documents.
   100  	Terms(field []byte) (TermsIterator, error)
   101  }
   102  
   103  // OrderedBytesIterator iterates over a collection of []bytes in lexicographical order.
   104  type OrderedBytesIterator interface {
   105  	// Next returns a bool indicating if there are any more elements.
   106  	Next() bool
   107  
   108  	// Current returns the current element.
   109  	// NB: the element returned is only valid until the subsequent call to Next().
   110  	Current() []byte
   111  
   112  	// Err returns any errors encountered during iteration.
   113  	Err() error
   114  
   115  	// Close releases any resources held by the iterator.
   116  	Close() error
   117  }
   118  
   119  // FieldsPostingsListIterator iterates over all known fields.
   120  type FieldsPostingsListIterator interface {
   121  	Iterator
   122  
   123  	// Current returns the current field and associated postings list.
   124  	// NB: the field returned is only valid until the subsequent call to Next().
   125  	Current() ([]byte, postings.List)
   126  }
   127  
   128  // FieldsIterator iterates over all known fields.
   129  type FieldsIterator interface {
   130  	Iterator
   131  
   132  	// Current returns the current field.
   133  	// NB: the field returned is only valid until the subsequent call to Next().
   134  	Current() []byte
   135  
   136  	// Empty returns true if there are no fields in the iterator.
   137  	Empty() bool
   138  }
   139  
   140  // TermsIterator iterates over all known terms for the provided field.
   141  type TermsIterator interface {
   142  	Iterator
   143  
   144  	// Current returns the current element.
   145  	// NB: the element returned is only valid until the subsequent call to Next().
   146  	Current() (term []byte, postings postings.List)
   147  
   148  	// Empty returns true if there are no terms.
   149  	Empty() bool
   150  }
   151  
   152  // Iterator holds common iterator methods.
   153  type Iterator interface {
   154  	// Next returns a bool indicating if there are any more elements.
   155  	Next() bool
   156  
   157  	// Err returns any errors encountered during iteration.
   158  	Err() error
   159  
   160  	// Close releases any resources held by the iterator.
   161  	Close() error
   162  }
   163  
   164  // MutableSegment is a segment which can be updated.
   165  type MutableSegment interface {
   166  	Segment
   167  	DocumentsBuilder
   168  
   169  	// Fields returns an iterator over the list of known fields, in order
   170  	// by name, it is not valid for reading after mutating the
   171  	// builder by inserting more documents.
   172  	Fields() (FieldsIterator, error)
   173  
   174  	// Seal marks the Mutable Segment immutable.
   175  	Seal() error
   176  
   177  	// IsSealed returns true iff the segment is open and un-sealed.
   178  	IsSealed() bool
   179  }
   180  
   181  // ImmutableSegment is segment that has been written to disk.
   182  type ImmutableSegment interface {
   183  	Segment
   184  
   185  	FreeMmap() error
   186  }
   187  
   188  // Builder is a builder that can be used to construct segments.
   189  type Builder interface {
   190  	FieldsPostingsListIterable
   191  	TermsIterable
   192  
   193  	// Reset resets the builder for reuse.
   194  	Reset()
   195  
   196  	// Docs returns the current docs slice, this is not safe to modify
   197  	// and is invalidated on a call to reset.
   198  	Docs() []doc.Metadata
   199  
   200  	// AllDocs returns an iterator over the documents known to the Reader.
   201  	AllDocs() (index.IDDocIterator, error)
   202  }
   203  
   204  // DocumentsBuilder is a builder that has documents written to it.
   205  type DocumentsBuilder interface {
   206  	Builder
   207  	index.Writer
   208  
   209  	// SetIndexConcurrency sets the concurrency used for building the segment.
   210  	SetIndexConcurrency(value int)
   211  
   212  	// IndexConcurrency returns the concurrency used for building the segment.
   213  	IndexConcurrency() int
   214  }
   215  
   216  // CloseableDocumentsBuilder is a builder that has documents written to it and has freeable resources.
   217  type CloseableDocumentsBuilder interface {
   218  	DocumentsBuilder
   219  
   220  	Close() error
   221  }
   222  
   223  // SegmentsBuilder is a builder that is built from segments.
   224  type SegmentsBuilder interface {
   225  	Builder
   226  
   227  	// SetFilter sets a filter on which documents to retain
   228  	// when building the segment.
   229  	SetFilter(keep DocumentsFilter)
   230  
   231  	// AddSegments adds segments to build from.
   232  	AddSegments(segments []Segment) error
   233  
   234  	// SegmentMetadatas returns the segment builder segment metadata.
   235  	SegmentMetadatas() ([]SegmentsBuilderSegmentMetadata, error)
   236  }
   237  
   238  // SegmentsBuilderSegmentMetadata is a set of metadata about a segment
   239  // that was used to build a compacted segment.
   240  type SegmentsBuilderSegmentMetadata struct {
   241  	Segment Segment
   242  	Offset  postings.ID
   243  	// NegativeOffsets is a lookup of document IDs are duplicates or should be skipped,
   244  	// that is documents that are already contained by other segments or should
   245  	// not be included in the output segment and hence should not be returned
   246  	// when looking up documents. If this is the case offset is -1.
   247  	// If a document ID is not a duplicate or skipped then the offset is
   248  	// the shift that should be applied when translating this postings ID
   249  	// to the result postings ID.
   250  	NegativeOffsets []int64
   251  	Skips           int64
   252  }
   253  
   254  // DocumentsFilter is a documents filter.
   255  type DocumentsFilter interface {
   256  	// Contains is true if the document passes the filter.
   257  	ContainsDoc(d doc.Metadata) bool
   258  	// OnDuplicateDoc is a callback for when a duplicate document is
   259  	// encountered which is then removed from the resulting segment.
   260  	OnDuplicateDoc(d doc.Metadata)
   261  }