github.com/m3db/m3@v1.5.0/src/m3ninx/index/segment/fst/encoding/docs/index.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package docs 22 23 import ( 24 "fmt" 25 "io" 26 "math" 27 28 "github.com/m3db/m3/src/m3ninx/index" 29 "github.com/m3db/m3/src/m3ninx/index/segment/fst/encoding" 30 "github.com/m3db/m3/src/m3ninx/postings" 31 ) 32 33 const emptyID = math.MaxUint64 34 35 const ( 36 indexMetadataSize = 8 // Base postings ID as a uint64. 37 38 initialIndexEncoderLen = 256 39 ) 40 41 // IndexWriter is a writer for the index file for documents. 42 type IndexWriter struct { 43 writer io.Writer 44 enc *encoding.Encoder 45 ready bool 46 prev postings.ID 47 } 48 49 // NewIndexWriter returns a new IndexWriter. 50 func NewIndexWriter(w io.Writer) *IndexWriter { 51 iw := &IndexWriter{ 52 writer: w, 53 enc: encoding.NewEncoder(initialIndexEncoderLen), 54 } 55 return iw 56 } 57 58 // Write writes the offset for an id. IDs must be written in increasing order but can be 59 // non-contiguous. 60 func (w *IndexWriter) Write(id postings.ID, offset uint64) error { 61 if !w.ready { 62 w.writeMetadata(id) 63 w.ready = true 64 } else { 65 if id <= w.prev { 66 return fmt.Errorf("postings IDs must be monotonically increasing: received %v but previous ID was %v", id, w.prev) 67 } 68 for i := 0; i < int(id-w.prev)-1; i++ { 69 w.enc.PutUint64(emptyID) 70 } 71 } 72 73 w.enc.PutUint64(offset) 74 w.prev = id 75 76 return w.write() 77 } 78 79 func (w *IndexWriter) writeMetadata(id postings.ID) { 80 w.enc.PutUint64(uint64(id)) 81 } 82 83 func (w *IndexWriter) write() error { 84 b := w.enc.Bytes() 85 n, err := w.writer.Write(b) 86 if err != nil { 87 return err 88 } 89 if n < len(b) { 90 return io.ErrShortWrite 91 } 92 w.enc.Reset() 93 return nil 94 } 95 96 // Reset resets the IndexWriter. 97 func (w *IndexWriter) Reset(wr io.Writer) { 98 w.writer = wr 99 w.enc.Reset() 100 w.ready = false 101 } 102 103 // IndexReader is a reader for the index file for documents. 104 type IndexReader struct { 105 data []byte 106 base postings.ID 107 limit postings.ID 108 len int 109 } 110 111 // NewIndexReader returns a new IndexReader. 112 func NewIndexReader(data []byte) (*IndexReader, error) { 113 if len(data) == 0 { 114 return &IndexReader{}, nil 115 } 116 117 if len(data) < indexMetadataSize { 118 return nil, io.ErrShortBuffer 119 } 120 121 payloadLen := len(data) - indexMetadataSize 122 if payloadLen%8 != 0 { 123 return nil, fmt.Errorf("stored fields index payload should be a multiple of 8, found %v", payloadLen%8) 124 } 125 count := payloadLen / 8 126 127 r := &IndexReader{ 128 data: data, 129 } 130 131 dec := encoding.NewDecoder(data[:8]) 132 base, err := dec.Uint64() 133 if err != nil { 134 return nil, fmt.Errorf("could not read base postings ID: %v", err) 135 } 136 r.base = postings.ID(base) 137 r.limit = r.base + postings.ID(count) 138 r.len = count 139 return r, nil 140 } 141 142 func (r *IndexReader) Read(id postings.ID) (uint64, error) { 143 if id < r.base || id >= r.limit { 144 return 0, index.ErrDocNotFound 145 } 146 147 idx := r.index(id) 148 dec := encoding.NewDecoder(r.data[idx:]) 149 offset, err := dec.Uint64() 150 if err != nil { 151 return 0, err 152 } 153 154 return offset, nil 155 } 156 157 // Base returns the base postings ID. 158 func (r *IndexReader) Base() postings.ID { 159 return r.base 160 } 161 162 // Len returns the number of postings IDs. 163 func (r *IndexReader) Len() int { 164 return r.len 165 } 166 167 func (r *IndexReader) index(id postings.ID) int { 168 return (int(id-r.base) * 8) + indexMetadataSize 169 }