github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/m3ninx/index/segment/fst/docs_writer.go (about)

     1  // Copyright (c) 2020 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package fst
    22  
    23  import (
    24  	"io"
    25  
    26  	"github.com/m3db/m3/src/m3ninx/index"
    27  	"github.com/m3db/m3/src/m3ninx/index/segment/fst/encoding/docs"
    28  )
    29  
    30  // DocumentsWriter writes out documents data given a doc iterator.
    31  type DocumentsWriter struct {
    32  	iter           index.IDDocIterator
    33  	sizeHint       int
    34  	docDataWriter  *docs.DataWriter
    35  	docIndexWriter *docs.IndexWriter
    36  	docOffsets     []docOffset
    37  }
    38  
    39  // NewDocumentsWriter creates a new documents writer.
    40  func NewDocumentsWriter() (*DocumentsWriter, error) {
    41  	return &DocumentsWriter{
    42  		docDataWriter:  docs.NewDataWriter(nil),
    43  		docIndexWriter: docs.NewIndexWriter(nil),
    44  		docOffsets:     make([]docOffset, 0, defaultInitialDocOffsetsSize),
    45  	}, nil
    46  }
    47  
    48  // DocumentsWriterOptions is a set of options to pass to the documents writer.
    49  type DocumentsWriterOptions struct {
    50  	// Iter is the ID and document iterator, required.
    51  	Iter index.IDDocIterator
    52  	// SizeHint is the size hint, optional.
    53  	SizeHint int
    54  }
    55  
    56  // Reset the documents writer for writing out.
    57  func (w *DocumentsWriter) Reset(opts DocumentsWriterOptions) {
    58  	w.iter = opts.Iter
    59  	w.sizeHint = opts.SizeHint
    60  	w.docDataWriter.Reset(nil)
    61  	w.docIndexWriter.Reset(nil)
    62  	w.docOffsets = w.docOffsets[:0]
    63  }
    64  
    65  // WriteDocumentsData writes out the documents data.
    66  func (w *DocumentsWriter) WriteDocumentsData(iow io.Writer) error {
    67  	w.docDataWriter.Reset(iow)
    68  
    69  	var currOffset uint64
    70  	if cap(w.docOffsets) < w.sizeHint {
    71  		w.docOffsets = make([]docOffset, 0, w.sizeHint)
    72  	}
    73  	for w.iter.Next() {
    74  		id, doc := w.iter.PostingsID(), w.iter.Current()
    75  		n, err := w.docDataWriter.Write(doc)
    76  		if err != nil {
    77  			return err
    78  		}
    79  		w.docOffsets = append(w.docOffsets, docOffset{ID: id, offset: currOffset})
    80  		currOffset += uint64(n)
    81  	}
    82  
    83  	return nil
    84  }
    85  
    86  // WriteDocumentsIndex writes out the documents index data.
    87  func (w *DocumentsWriter) WriteDocumentsIndex(iow io.Writer) error {
    88  	w.docIndexWriter.Reset(iow)
    89  	for _, do := range w.docOffsets {
    90  		if err := w.docIndexWriter.Write(do.ID, do.offset); err != nil {
    91  			return err
    92  		}
    93  	}
    94  	return nil
    95  }