github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/m3ninx/index/segment/fst/encoding/docs/index.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package docs
    22  
    23  import (
    24  	"fmt"
    25  	"io"
    26  	"math"
    27  
    28  	"github.com/m3db/m3/src/m3ninx/index"
    29  	"github.com/m3db/m3/src/m3ninx/index/segment/fst/encoding"
    30  	"github.com/m3db/m3/src/m3ninx/postings"
    31  )
    32  
    33  const emptyID = math.MaxUint64
    34  
    35  const (
    36  	indexMetadataSize = 8 // Base postings ID as a uint64.
    37  
    38  	initialIndexEncoderLen = 256
    39  )
    40  
    41  // IndexWriter is a writer for the index file for documents.
    42  type IndexWriter struct {
    43  	writer io.Writer
    44  	enc    *encoding.Encoder
    45  	ready  bool
    46  	prev   postings.ID
    47  }
    48  
    49  // NewIndexWriter returns a new IndexWriter.
    50  func NewIndexWriter(w io.Writer) *IndexWriter {
    51  	iw := &IndexWriter{
    52  		writer: w,
    53  		enc:    encoding.NewEncoder(initialIndexEncoderLen),
    54  	}
    55  	return iw
    56  }
    57  
    58  // Write writes the offset for an id. IDs must be written in increasing order but can be
    59  // non-contiguous.
    60  func (w *IndexWriter) Write(id postings.ID, offset uint64) error {
    61  	if !w.ready {
    62  		w.writeMetadata(id)
    63  		w.ready = true
    64  	} else {
    65  		if id <= w.prev {
    66  			return fmt.Errorf("postings IDs must be monotonically increasing: received %v but previous ID was %v", id, w.prev)
    67  		}
    68  		for i := 0; i < int(id-w.prev)-1; i++ {
    69  			w.enc.PutUint64(emptyID)
    70  		}
    71  	}
    72  
    73  	w.enc.PutUint64(offset)
    74  	w.prev = id
    75  
    76  	return w.write()
    77  }
    78  
    79  func (w *IndexWriter) writeMetadata(id postings.ID) {
    80  	w.enc.PutUint64(uint64(id))
    81  }
    82  
    83  func (w *IndexWriter) write() error {
    84  	b := w.enc.Bytes()
    85  	n, err := w.writer.Write(b)
    86  	if err != nil {
    87  		return err
    88  	}
    89  	if n < len(b) {
    90  		return io.ErrShortWrite
    91  	}
    92  	w.enc.Reset()
    93  	return nil
    94  }
    95  
    96  // Reset resets the IndexWriter.
    97  func (w *IndexWriter) Reset(wr io.Writer) {
    98  	w.writer = wr
    99  	w.enc.Reset()
   100  	w.ready = false
   101  }
   102  
   103  // IndexReader is a reader for the index file for documents.
   104  type IndexReader struct {
   105  	data  []byte
   106  	base  postings.ID
   107  	limit postings.ID
   108  	len   int
   109  }
   110  
   111  // NewIndexReader returns a new IndexReader.
   112  func NewIndexReader(data []byte) (*IndexReader, error) {
   113  	if len(data) == 0 {
   114  		return &IndexReader{}, nil
   115  	}
   116  
   117  	if len(data) < indexMetadataSize {
   118  		return nil, io.ErrShortBuffer
   119  	}
   120  
   121  	payloadLen := len(data) - indexMetadataSize
   122  	if payloadLen%8 != 0 {
   123  		return nil, fmt.Errorf("stored fields index payload should be a multiple of 8, found %v", payloadLen%8)
   124  	}
   125  	count := payloadLen / 8
   126  
   127  	r := &IndexReader{
   128  		data: data,
   129  	}
   130  
   131  	dec := encoding.NewDecoder(data[:8])
   132  	base, err := dec.Uint64()
   133  	if err != nil {
   134  		return nil, fmt.Errorf("could not read base postings ID: %v", err)
   135  	}
   136  	r.base = postings.ID(base)
   137  	r.limit = r.base + postings.ID(count)
   138  	r.len = count
   139  	return r, nil
   140  }
   141  
   142  func (r *IndexReader) Read(id postings.ID) (uint64, error) {
   143  	if id < r.base || id >= r.limit {
   144  		return 0, index.ErrDocNotFound
   145  	}
   146  
   147  	idx := r.index(id)
   148  	dec := encoding.NewDecoder(r.data[idx:])
   149  	offset, err := dec.Uint64()
   150  	if err != nil {
   151  		return 0, err
   152  	}
   153  
   154  	return offset, nil
   155  }
   156  
   157  // Base returns the base postings ID.
   158  func (r *IndexReader) Base() postings.ID {
   159  	return r.base
   160  }
   161  
   162  // Len returns the number of postings IDs.
   163  func (r *IndexReader) Len() int {
   164  	return r.len
   165  }
   166  
   167  func (r *IndexReader) index(id postings.ID) int {
   168  	return (int(id-r.base) * 8) + indexMetadataSize
   169  }