github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/m3ninx/doc/document.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package doc 22 23 import ( 24 "bytes" 25 "errors" 26 "fmt" 27 "sort" 28 "unicode/utf8" 29 ) 30 31 var ( 32 errReservedFieldName = fmt.Errorf("'%s' is a reserved field name", IDReservedFieldName) 33 // ErrEmptyDocument is an error for an empty document. 34 ErrEmptyDocument = errors.New("document cannot be empty") 35 ) 36 37 // IDReservedFieldName is the field name reserved for IDs. 38 var IDReservedFieldName = []byte("_m3ninx_id") 39 40 // Field represents a field in a document. It is composed of a name and a value. 41 type Field struct { 42 Name []byte 43 Value []byte 44 } 45 46 // Fields is a list of fields. 47 type Fields []Field 48 49 func (f Fields) Len() int { 50 return len(f) 51 } 52 53 func (f Fields) Less(i, j int) bool { 54 l, r := f[i], f[j] 55 56 c := bytes.Compare(l.Name, r.Name) 57 switch { 58 case c < 0: 59 return true 60 case c > 0: 61 return false 62 } 63 64 c = bytes.Compare(l.Value, r.Value) 65 switch { 66 case c < 0: 67 return true 68 case c > 0: 69 return false 70 } 71 72 return true 73 } 74 75 func (f Fields) Swap(i, j int) { 76 f[i], f[j] = f[j], f[i] 77 } 78 79 func (f Fields) shallowCopy() Fields { 80 cp := make([]Field, 0, len(f)) 81 for _, fld := range f { 82 cp = append(cp, Field{ 83 Name: fld.Name, 84 Value: fld.Value, 85 }) 86 } 87 return cp 88 } 89 90 // Metadata represents a document to be indexed. 91 type Metadata struct { 92 ID []byte 93 Fields []Field 94 OnIndexSeries OnIndexSeries 95 } 96 97 // Get returns the value of the specified field name in the document if it exists. 98 func (m Metadata) Get(fieldName []byte) ([]byte, bool) { 99 for _, f := range m.Fields { // nolint:gocritic 100 if bytes.Equal(fieldName, f.Name) { 101 return f.Value, true 102 } 103 } 104 return nil, false 105 } 106 107 // Compare returns an integer comparing two documents. The result will be 0 if the documents 108 // are equal, -1 if d is ordered before other, and 1 if d is ordered aftered other. 109 func (m Metadata) Compare(other Metadata) int { 110 if c := bytes.Compare(m.ID, other.ID); c != 0 { 111 return c 112 } 113 114 l, r := Fields(m.Fields), Fields(other.Fields) 115 116 // Make a shallow copy of the Fields so we don't mutate the document. 117 if !sort.IsSorted(l) { 118 l = l.shallowCopy() 119 sort.Sort(l) 120 } 121 if !sort.IsSorted(r) { 122 r = r.shallowCopy() 123 sort.Sort(r) 124 } 125 126 min := len(l) 127 if len(r) < min { 128 min = len(r) 129 } 130 131 for i := 0; i < min; i++ { 132 if c := bytes.Compare(l[i].Name, r[i].Name); c != 0 { 133 return c 134 } 135 if c := bytes.Compare(l[i].Value, r[i].Value); c != 0 { 136 return c 137 } 138 } 139 140 if len(l) < len(r) { 141 return -1 142 } else if len(l) > len(r) { 143 return 1 144 } 145 146 return 0 147 } 148 149 // Equal returns a bool indicating whether d is equal to other. 150 func (m Metadata) Equal(other Metadata) bool { 151 return m.Compare(other) == 0 152 } 153 154 // Validate returns a bool indicating whether the document is valid. 155 func (m Metadata) Validate() error { 156 if len(m.Fields) == 0 && !m.HasID() { 157 return ErrEmptyDocument 158 } 159 160 if !utf8.Valid(m.ID) { 161 return fmt.Errorf("document has invalid ID: id=%v, id_hex=%x", m.ID, m.ID) 162 } 163 164 for _, f := range m.Fields { // nolint:gocritic 165 // TODO: Should we enforce uniqueness of field names? 166 if !utf8.Valid(f.Name) { 167 return fmt.Errorf("document has invalid field name: name=%v, name_hex=%x", 168 f.Name, f.Name) 169 } 170 171 if bytes.Equal(f.Name, IDReservedFieldName) { 172 return errReservedFieldName 173 } 174 175 if !utf8.Valid(f.Value) { 176 return fmt.Errorf("document has invalid field value: value=%v, value_hex=%x", 177 f.Value, f.Value) 178 } 179 } 180 181 return nil 182 } 183 184 // HasID returns a bool indicating whether the document has an ID or not. 185 func (m Metadata) HasID() bool { 186 return len(m.ID) > 0 187 } 188 189 func (m Metadata) String() string { 190 var buf bytes.Buffer 191 for i, f := range m.Fields { // nolint:gocritic 192 buf.WriteString(fmt.Sprintf("%s: %s", f.Name, f.Value)) 193 if i != len(m.Fields)-1 { 194 buf.WriteString(", ") 195 } 196 } 197 return fmt.Sprintf("{id: %s, fields: {%s}}", m.ID, buf.String()) 198 } 199 200 // Documents is a list of documents. 201 type Documents []Metadata 202 203 func (ds Documents) Len() int { 204 return len(ds) 205 } 206 207 func (ds Documents) Less(i, j int) bool { 208 l, r := ds[i], ds[j] 209 210 return l.Compare(r) < 1 211 } 212 213 func (ds Documents) Swap(i, j int) { 214 ds[i], ds[j] = ds[j], ds[i] 215 } 216 217 // Encoded is a serialized document metadata. 218 type Encoded struct { 219 Bytes []byte 220 } 221 222 // Document contains either metadata or an encoded metadata 223 // but never both. 224 type Document struct { 225 encoded Encoded 226 metadata Metadata 227 228 hasEncoded bool 229 hasMetadata bool 230 } 231 232 // NewDocumentFromMetadata creates a Document from a Metadata. 233 func NewDocumentFromMetadata(m Metadata) Document { 234 return Document{metadata: m, hasMetadata: true} 235 } 236 237 // NewDocumentFromEncoded creates a Document from an Encoded. 238 func NewDocumentFromEncoded(e Encoded) Document { 239 return Document{encoded: e, hasEncoded: true} 240 } 241 242 // Metadata returns the metadata it contains, if it has one. Otherwise returns an empty metadata 243 // and false. 244 func (d *Document) Metadata() (Metadata, bool) { 245 if d.hasMetadata { 246 return d.metadata, true 247 } 248 249 return Metadata{}, false 250 } 251 252 // Encoded returns the encoded metadata it contains, if it has one. Otherwise returns an 253 // empty encoded metadata and false. 254 func (d *Document) Encoded() (Encoded, bool) { 255 if d.hasEncoded { 256 return d.encoded, true 257 } 258 259 return Encoded{}, false 260 }