github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/m3ninx/index/segment/fst/docs_writer.go (about) 1 // Copyright (c) 2020 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package fst 22 23 import ( 24 "io" 25 26 "github.com/m3db/m3/src/m3ninx/index" 27 "github.com/m3db/m3/src/m3ninx/index/segment/fst/encoding/docs" 28 ) 29 30 // DocumentsWriter writes out documents data given a doc iterator. 31 type DocumentsWriter struct { 32 iter index.IDDocIterator 33 sizeHint int 34 docDataWriter *docs.DataWriter 35 docIndexWriter *docs.IndexWriter 36 docOffsets []docOffset 37 } 38 39 // NewDocumentsWriter creates a new documents writer. 40 func NewDocumentsWriter() (*DocumentsWriter, error) { 41 return &DocumentsWriter{ 42 docDataWriter: docs.NewDataWriter(nil), 43 docIndexWriter: docs.NewIndexWriter(nil), 44 docOffsets: make([]docOffset, 0, defaultInitialDocOffsetsSize), 45 }, nil 46 } 47 48 // DocumentsWriterOptions is a set of options to pass to the documents writer. 49 type DocumentsWriterOptions struct { 50 // Iter is the ID and document iterator, required. 51 Iter index.IDDocIterator 52 // SizeHint is the size hint, optional. 53 SizeHint int 54 } 55 56 // Reset the documents writer for writing out. 57 func (w *DocumentsWriter) Reset(opts DocumentsWriterOptions) { 58 w.iter = opts.Iter 59 w.sizeHint = opts.SizeHint 60 w.docDataWriter.Reset(nil) 61 w.docIndexWriter.Reset(nil) 62 w.docOffsets = w.docOffsets[:0] 63 } 64 65 // WriteDocumentsData writes out the documents data. 66 func (w *DocumentsWriter) WriteDocumentsData(iow io.Writer) error { 67 w.docDataWriter.Reset(iow) 68 69 var currOffset uint64 70 if cap(w.docOffsets) < w.sizeHint { 71 w.docOffsets = make([]docOffset, 0, w.sizeHint) 72 } 73 for w.iter.Next() { 74 id, doc := w.iter.PostingsID(), w.iter.Current() 75 n, err := w.docDataWriter.Write(doc) 76 if err != nil { 77 return err 78 } 79 w.docOffsets = append(w.docOffsets, docOffset{ID: id, offset: currOffset}) 80 currOffset += uint64(n) 81 } 82 83 return nil 84 } 85 86 // WriteDocumentsIndex writes out the documents index data. 87 func (w *DocumentsWriter) WriteDocumentsIndex(iow io.Writer) error { 88 w.docIndexWriter.Reset(iow) 89 for _, do := range w.docOffsets { 90 if err := w.docIndexWriter.Write(do.ID, do.offset); err != nil { 91 return err 92 } 93 } 94 return nil 95 }