github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/m3ninx/index/segment/fst/encoding/docs/data.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Softwarw. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package docs 22 23 import ( 24 "errors" 25 "fmt" 26 "io" 27 28 "github.com/m3db/m3/src/m3ninx/doc" 29 "github.com/m3db/m3/src/m3ninx/index/segment/fst/encoding" 30 ) 31 32 const initialDataEncoderLen = 1024 33 34 // DataWriter writes the data file for documents. 35 type DataWriter struct { 36 writer io.Writer 37 enc *encoding.Encoder 38 } 39 40 // NewDataWriter returns a new DataWriter. 41 func NewDataWriter(w io.Writer) *DataWriter { 42 return &DataWriter{ 43 writer: w, 44 enc: encoding.NewEncoder(initialDataEncoderLen), 45 } 46 } 47 48 func (w *DataWriter) Write(d doc.Metadata) (int, error) { 49 n := w.enc.PutBytes(d.ID) 50 n += w.enc.PutUvarint(uint64(len(d.Fields))) 51 for _, f := range d.Fields { 52 n += w.enc.PutBytes(f.Name) 53 n += w.enc.PutBytes(f.Value) 54 } 55 56 if err := w.write(); err != nil { 57 return 0, err 58 } 59 60 return n, nil 61 } 62 63 func (w *DataWriter) write() error { 64 b := w.enc.Bytes() 65 n, err := w.writer.Write(b) 66 if err != nil { 67 return err 68 } 69 if n < len(b) { 70 return io.ErrShortWrite 71 } 72 w.enc.Reset() 73 return nil 74 } 75 76 // Reset resets the DataWriter. 77 func (w *DataWriter) Reset(wr io.Writer) { 78 w.writer = wr 79 w.enc.Reset() 80 } 81 82 // DataReader is a reader for the data file for documents. 83 type DataReader struct { 84 data []byte 85 } 86 87 // NewDataReader returns a new DataReader. 88 func NewDataReader(data []byte) *DataReader { 89 return &DataReader{ 90 data: data, 91 } 92 } 93 94 func (r *DataReader) Read(offset uint64) (doc.Metadata, error) { 95 if offset >= uint64(len(r.data)) { 96 return doc.Metadata{}, fmt.Errorf("invalid offset: %v is past the end of the data file", offset) 97 } 98 dec := encoding.NewDecoder(r.data[int(offset):]) 99 id, err := dec.Bytes() 100 if err != nil { 101 return doc.Metadata{}, err 102 } 103 104 x, err := dec.Uvarint() 105 if err != nil { 106 return doc.Metadata{}, err 107 } 108 n := int(x) 109 110 d := doc.Metadata{ 111 ID: id, 112 Fields: make([]doc.Field, n), 113 } 114 115 for i := 0; i < n; i++ { 116 name, err := dec.Bytes() 117 if err != nil { 118 return doc.Metadata{}, err 119 } 120 val, err := dec.Bytes() 121 if err != nil { 122 return doc.Metadata{}, err 123 } 124 d.Fields[i] = doc.Field{ 125 Name: name, 126 Value: val, 127 } 128 } 129 130 return d, nil 131 } 132 133 // EncodedDataReader is a reader for the data file for encoded document metadata. 134 type EncodedDataReader struct { 135 data []byte 136 } 137 138 // NewEncodedDataReader returns a new EncodedDataReader. 139 func NewEncodedDataReader(data []byte) *EncodedDataReader { 140 return &EncodedDataReader{ 141 data: data, 142 } 143 } 144 145 // Read reads a doc.Encoded from a data stream starting at the specified offset. 146 func (e *EncodedDataReader) Read(offset uint64) (doc.Encoded, error) { 147 if offset >= uint64(len(e.data)) { 148 return doc.Encoded{}, fmt.Errorf( 149 "invalid offset: %v is past the end of the data file", offset, 150 ) 151 } 152 153 return doc.Encoded{ 154 Bytes: e.data[int(offset):], 155 }, nil 156 } 157 158 // EncodedDocumentReader is a reader for reading documents from encoded metadata. 159 type EncodedDocumentReader struct { 160 currFields []doc.Field 161 } 162 163 // NewEncodedDocumentReader returns a new EncodedDocumentReader. 164 func NewEncodedDocumentReader() *EncodedDocumentReader { 165 return &EncodedDocumentReader{} 166 } 167 168 // Read reads a doc.Metadata from a doc.Encoded. Returned doc.Metadata should be 169 // processed before calling Read again as the underlying array pointed to by the Fields 170 // slice will be updated. This approach avoids allocating a new slice with a new backing 171 // array for every document processed, unlike (*DataReader).Read 172 func (r *EncodedDocumentReader) Read(encoded doc.Encoded) (doc.Metadata, error) { 173 for i := range r.currFields { 174 r.currFields[i] = doc.Field{} 175 } 176 r.currFields = r.currFields[:0] 177 id, buf, err := encoding.ReadBytes(encoded.Bytes) 178 if err != nil { 179 return doc.Metadata{}, err 180 } 181 182 x, buf, err := encoding.ReadUvarint(buf) 183 if err != nil { 184 return doc.Metadata{}, err 185 } 186 n := int(x) 187 188 var name, val []byte 189 for i := 0; i < n; i++ { 190 name, buf, err = encoding.ReadBytes(buf) 191 if err != nil { 192 return doc.Metadata{}, err 193 } 194 val, buf, err = encoding.ReadBytes(buf) 195 if err != nil { 196 return doc.Metadata{}, err 197 } 198 r.currFields = append(r.currFields, doc.Field{ 199 Name: name, 200 Value: val, 201 }) 202 } 203 204 return doc.Metadata{ 205 ID: id, 206 Fields: r.currFields, 207 }, nil 208 } 209 210 // ReadEncodedDocumentID reads the document ID from the encoded document metadata. 211 func ReadEncodedDocumentID(encoded doc.Encoded) ([]byte, error) { 212 id, _, err := encoding.ReadBytes(encoded.Bytes) 213 return id, err 214 } 215 216 // MetadataFromDocument retrieves a doc.Metadata from a doc.Document. 217 func MetadataFromDocument(document doc.Document, reader *EncodedDocumentReader) (doc.Metadata, error) { 218 if d, ok := document.Metadata(); ok { 219 return d, nil 220 } 221 222 if e, ok := document.Encoded(); ok { 223 return reader.Read(e) 224 } 225 226 return doc.Metadata{}, errors.New("document does not contain metadata or encoded metadata") 227 } 228 229 // ReadIDFromDocument reads the document ID from the document. 230 func ReadIDFromDocument(document doc.Document) ([]byte, error) { 231 if d, ok := document.Metadata(); ok { 232 return d.ID, nil 233 } 234 235 if e, ok := document.Encoded(); ok { 236 return ReadEncodedDocumentID(e) 237 } 238 239 return nil, errors.New("document does not contain metadata or encoded metadata") 240 }