github.com/m3db/m3@v1.5.0/src/dbnode/storage/index/convert/convert.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 // Package convert contains various conversions. 22 package convert 23 24 import ( 25 "bytes" 26 "errors" 27 "fmt" 28 "unicode/utf8" 29 30 "github.com/m3db/m3/src/dbnode/ts" 31 "github.com/m3db/m3/src/m3ninx/doc" 32 "github.com/m3db/m3/src/query/graphite/graphite" 33 "github.com/m3db/m3/src/x/ident" 34 "github.com/m3db/m3/src/x/pool" 35 "github.com/m3db/m3/src/x/serialize" 36 ) 37 38 const ( 39 // NB: this assumes that series ID has a format: 40 // {tag1="value1",tag2="value2",...} 41 // 42 // Thus firstTagBytesPosition points to the 't' immediately after curly brace '{' 43 firstTagBytesPosition int = 1 44 // distanceBetweenTagNameAndValue corresponds to '="' in series ID that separates tag name from 45 // it's value 46 distanceBetweenTagNameAndValue int = 2 47 // distanceBetweenTagValueAndNextName corresponds to '",' in series ID that separates 48 // tag's value from the following tag name 49 distanceBetweenTagValueAndNextName int = 2 50 ) 51 52 var ( 53 // ReservedFieldNameID is the field name used to index the ID in the 54 // m3ninx subsytem. 55 ReservedFieldNameID = doc.IDReservedFieldName 56 57 // ErrUsingReservedFieldName is the error returned when a metric 58 // cannot be parsed due to using a resereved field name 59 ErrUsingReservedFieldName = errors.New( 60 "unable to parse metric using reserved field name: " + 61 string(ReservedFieldNameID)) 62 63 errInvalidResultMissingID = errors.New( 64 "corrupt data, unable to extract id") 65 ) 66 67 // Validate returns a bool indicating whether the document is valid. 68 func Validate(d doc.Metadata) error { 69 if !utf8.Valid(d.ID) { 70 return fmt.Errorf("document has invalid non-UTF8 ID: id=%v, id_hex=%x", 71 d.ID, d.ID) 72 } 73 74 for _, f := range d.Fields { 75 if !utf8.Valid(f.Name) { 76 return fmt.Errorf("document has invalid non-UTF8 field name: name=%v, name_hex=%x", 77 f.Name, f.Name) 78 } 79 80 if bytes.Equal(f.Name, ReservedFieldNameID) { 81 return ErrUsingReservedFieldName 82 } 83 84 if !utf8.Valid(f.Value) { 85 return fmt.Errorf("document has invalid non-UTF8 field value: value=%v, value_hex=%x", 86 f.Value, f.Value) 87 } 88 } 89 90 return nil 91 } 92 93 // ValidateSeries will validate a series for use with m3ninx. 94 func ValidateSeries(id ident.ID, tags ident.Tags) error { 95 if idBytes := id.Bytes(); !utf8.Valid(idBytes) { 96 return fmt.Errorf("series has invalid non-UTF8 ID: id=%s, id_hex=%x", 97 idBytes, idBytes) 98 } 99 for _, tag := range tags.Values() { 100 if err := ValidateSeriesTag(tag); err != nil { 101 return err 102 } 103 } 104 return nil 105 } 106 107 // ValidateSeriesTag validates a series tag for use with m3ninx. 108 func ValidateSeriesTag(tag ident.Tag) error { 109 tagName := tag.Name.Bytes() 110 tagValue := tag.Value.Bytes() 111 if bytes.Equal(ReservedFieldNameID, tagName) { 112 return ErrUsingReservedFieldName 113 } 114 if !utf8.Valid(tagName) { 115 return fmt.Errorf("series contains invalid non-UTF8 field name: "+ 116 "field=%s, field_hex=%v", tagName, tagName) 117 } 118 if !utf8.Valid(tagValue) { 119 return fmt.Errorf("series contains invalid non-UTF8 field value: "+ 120 "field=%s, field_value=%s, field_value_hex=%x", 121 tagName, tagValue, tagValue) 122 } 123 return nil 124 } 125 126 // FromSeriesIDAndTags converts the provided series id+tags into a document. 127 func FromSeriesIDAndTags(id ident.ID, tags ident.Tags) (doc.Metadata, error) { 128 var ( 129 clonedID = clone(id.Bytes()) 130 fields = make([]doc.Field, 0, len(tags.Values())) 131 expectedStart = firstTagBytesPosition 132 ) 133 for _, tag := range tags.Values() { 134 nameBytes, valueBytes := tag.Name.Bytes(), tag.Value.Bytes() 135 136 var clonedName, clonedValue []byte 137 clonedName, expectedStart = findSliceOrClone(clonedID, nameBytes, expectedStart, 138 distanceBetweenTagNameAndValue) 139 clonedValue, expectedStart = findSliceOrClone(clonedID, valueBytes, expectedStart, 140 distanceBetweenTagValueAndNextName) 141 142 fields = append(fields, doc.Field{ 143 Name: clonedName, 144 Value: clonedValue, 145 }) 146 } 147 148 d := doc.Metadata{ 149 ID: clonedID, 150 Fields: fields, 151 } 152 if err := Validate(d); err != nil { 153 return doc.Metadata{}, err 154 } 155 return d, nil 156 } 157 158 // FromSeriesIDAndTagIter converts the provided series id+tags into a document. 159 func FromSeriesIDAndTagIter(id ident.ID, tags ident.TagIterator) (doc.Metadata, error) { 160 var ( 161 clonedID = clone(id.Bytes()) 162 fields = make([]doc.Field, 0, tags.Remaining()) 163 expectedStart = firstTagBytesPosition 164 ) 165 for tags.Next() { 166 tag := tags.Current() 167 nameBytes, valueBytes := tag.Name.Bytes(), tag.Value.Bytes() 168 169 var clonedName, clonedValue []byte 170 clonedName, expectedStart = findSliceOrClone(clonedID, nameBytes, expectedStart, 171 distanceBetweenTagNameAndValue) 172 clonedValue, expectedStart = findSliceOrClone(clonedID, valueBytes, expectedStart, 173 distanceBetweenTagValueAndNextName) 174 175 fields = append(fields, doc.Field{ 176 Name: clonedName, 177 Value: clonedValue, 178 }) 179 } 180 if err := tags.Err(); err != nil { 181 return doc.Metadata{}, err 182 } 183 184 d := doc.Metadata{ 185 ID: clonedID, 186 Fields: fields, 187 } 188 if err := Validate(d); err != nil { 189 return doc.Metadata{}, err 190 } 191 return d, nil 192 } 193 194 // FromSeriesIDAndEncodedTags converts the provided series id and encoded tags into a doc.Metadata. 195 func FromSeriesIDAndEncodedTags(id ident.BytesID, encodedTags ts.EncodedTags) (doc.Metadata, error) { 196 var ( 197 byteOrder = serialize.ByteOrder 198 total = len(encodedTags) 199 ) 200 if total == 0 { 201 // No tags set for this series 202 return doc.Metadata{ 203 ID: clone(id.Bytes()), 204 Fields: nil, 205 }, nil 206 } 207 208 if total < 4 { 209 return doc.Metadata{}, fmt.Errorf("encoded tags too short: size=%d, need=%d", total, 4) 210 } 211 212 header := byteOrder.Uint16(encodedTags[:2]) 213 encodedTags = encodedTags[2:] 214 if header != serialize.HeaderMagicNumber { 215 return doc.Metadata{}, serialize.ErrIncorrectHeader 216 } 217 218 length := int(byteOrder.Uint16(encodedTags[:2])) 219 encodedTags = encodedTags[2:] 220 221 var ( 222 clonedID = clone(id.Bytes()) 223 fields = make([]doc.Field, 0, length) 224 expectedStart = firstTagBytesPosition 225 ) 226 227 for i := 0; i < length; i++ { 228 if len(encodedTags) < 2 { 229 return doc.Metadata{}, fmt.Errorf("missing size for tag name: index=%d", i) 230 } 231 numBytesName := int(byteOrder.Uint16(encodedTags[:2])) 232 if numBytesName == 0 { 233 return doc.Metadata{}, serialize.ErrEmptyTagNameLiteral 234 } 235 encodedTags = encodedTags[2:] 236 237 bytesName := encodedTags[:numBytesName] 238 encodedTags = encodedTags[numBytesName:] 239 240 if len(encodedTags) < 2 { 241 return doc.Metadata{}, fmt.Errorf("missing size for tag value: index=%d", i) 242 } 243 244 numBytesValue := int(byteOrder.Uint16(encodedTags[:2])) 245 encodedTags = encodedTags[2:] 246 247 bytesValue := encodedTags[:numBytesValue] 248 encodedTags = encodedTags[numBytesValue:] 249 250 var clonedName, clonedValue []byte 251 clonedName, expectedStart = findSliceOrClone(clonedID, bytesName, expectedStart, 252 distanceBetweenTagNameAndValue) 253 clonedValue, expectedStart = findSliceOrClone(clonedID, bytesValue, expectedStart, 254 distanceBetweenTagValueAndNextName) 255 256 fields = append(fields, doc.Field{ 257 Name: clonedName, 258 Value: clonedValue, 259 }) 260 } 261 262 d := doc.Metadata{ 263 ID: clonedID, 264 Fields: fields, 265 } 266 if err := Validate(d); err != nil { 267 return doc.Metadata{}, err 268 } 269 return d, nil 270 } 271 272 func findSliceOrClone(id, tag []byte, expectedStart, nextPositionDistance int) ([]byte, int) { //nolint:unparam 273 n := len(tag) 274 expectedEnd := expectedStart + n 275 if expectedStart != -1 && expectedEnd <= len(id) && 276 bytes.Equal(id[expectedStart:expectedEnd], tag) { 277 return id[expectedStart:expectedEnd], expectedEnd + nextPositionDistance 278 } else if idx := bytes.Index(id, tag); idx != -1 { 279 return id[idx : idx+n], expectedEnd + nextPositionDistance 280 } else { 281 return clone(tag), -1 282 } 283 } 284 285 // TagsFromTagsIter returns an ident.Tags from a TagIterator. It also tries 286 // to re-use bytes from the seriesID if they're also present in the tags 287 // instead of re-allocating them. This requires that the ident.Tags that is 288 // returned will have the same (or shorter) life time as the seriesID, 289 // otherwise the operation is unsafe. 290 func TagsFromTagsIter( 291 seriesID ident.ID, 292 iter ident.TagIterator, 293 idPool ident.Pool, 294 ) (ident.Tags, error) { 295 var tags ident.Tags 296 if idPool != nil { 297 tags = idPool.Tags() 298 } else { 299 tagSlice := make([]ident.Tag, 0, iter.Len()) 300 tags = ident.NewTags(tagSlice...) 301 } 302 303 seriesIDBytes := ident.BytesID(seriesID.Bytes()) 304 for iter.Next() { 305 curr := iter.Current() 306 307 var ( 308 nameBytes, valueBytes = curr.Name.Bytes(), curr.Value.Bytes() 309 tag ident.Tag 310 idRef bool 311 ) 312 if idx := bytes.Index(seriesIDBytes, nameBytes); idx != -1 { 313 tag.Name = seriesIDBytes[idx : idx+len(nameBytes)] 314 idRef = true 315 } else { 316 if idPool != nil { 317 // NB(r): Fast path for if a graphite tag name to save 318 // a lot of space is to reuse a preallocated tag name. 319 if idx, ok := graphite.TagIndex(nameBytes); ok { 320 tag.Name = graphite.TagNameID(idx) 321 } else { 322 tag.Name = idPool.Clone(curr.Name) 323 } 324 } else { 325 copiedBytes := append([]byte(nil), curr.Name.Bytes()...) 326 tag.Name = ident.BytesID(copiedBytes) 327 } 328 } 329 if idx := bytes.Index(seriesIDBytes, valueBytes); idx != -1 { 330 tag.Value = seriesIDBytes[idx : idx+len(valueBytes)] 331 idRef = true 332 } else { 333 if idPool != nil { 334 tag.Value = idPool.Clone(curr.Value) 335 } else { 336 copiedBytes := append([]byte(nil), curr.Value.Bytes()...) 337 tag.Value = ident.BytesID(copiedBytes) 338 } 339 } 340 341 if idRef { 342 tag.NoFinalize() // Taken ref, cannot finalize this. 343 } 344 345 tags.Append(tag) 346 } 347 348 if err := iter.Err(); err != nil { 349 return ident.Tags{}, err 350 } 351 return tags, nil 352 } 353 354 // NB(prateek): we take an independent copy of the bytes underlying 355 // any ids provided, as we need to maintain the lifecycle of the indexed 356 // bytes separately from the rest of the storage subsystem. 357 func clone(original []byte) []byte { 358 clone := make([]byte, len(original)) 359 copy(clone, original) 360 return clone 361 } 362 363 // Opts are the pools required for conversions. 364 type Opts struct { 365 IdentPool ident.Pool 366 CheckedBytesPool pool.CheckedBytesPool 367 NoClone bool 368 } 369 370 // wrapBytes wraps the provided bytes into an ident.ID backed by pooled types, 371 // such that calling Finalize() on the returned type returns the resources to 372 // the pools. 373 func (o Opts) wrapBytes(b []byte) ident.ID { 374 if o.NoClone { 375 return ident.BytesID(b) 376 } 377 cb := o.CheckedBytesPool.Get(len(b)) 378 cb.IncRef() 379 cb.AppendAll(b) 380 id := o.IdentPool.BinaryID(cb) 381 // release held reference so now the only reference to the bytes is owned by `id` 382 cb.DecRef() 383 return id 384 } 385 386 // ToSeries converts the provided doc to metric id+tags. 387 func ToSeries(d doc.Metadata, opts Opts) (ident.ID, ident.TagIterator, error) { 388 if len(d.ID) == 0 { 389 return nil, nil, errInvalidResultMissingID 390 } 391 return opts.wrapBytes(d.ID), ToSeriesTags(d, opts), nil 392 } 393 394 // ToSeriesTags converts the provided doc to metric tags. 395 func ToSeriesTags(d doc.Metadata, opts Opts) ident.TagIterator { 396 return newTagIter(d, opts) 397 } 398 399 // tagIter exposes an ident.TagIterator interface over a doc.Metadata. 400 type tagIter struct { 401 docFields doc.Fields 402 403 err error 404 done bool 405 currentIdx int 406 currentTag ident.Tag 407 408 opts Opts 409 } 410 411 // NB: force tagIter to implement the ident.TagIterator interface. 412 var _ ident.TagIterator = &tagIter{} 413 414 func newTagIter(d doc.Metadata, opts Opts) ident.TagIterator { 415 return &tagIter{ 416 docFields: d.Fields, 417 currentIdx: -1, 418 opts: opts, 419 } 420 } 421 422 func (t *tagIter) Next() bool { 423 if t.err != nil || t.done { 424 return false 425 } 426 hasNext := t.parseNext() 427 if !hasNext { 428 t.done = true 429 } 430 return hasNext 431 } 432 433 func (t *tagIter) parseNext() (hasNext bool) { 434 t.releaseCurrent() 435 t.currentIdx++ 436 // early terminate if we know there's no more fields 437 if t.currentIdx >= len(t.docFields) { 438 return false 439 } 440 // if there are fields, we have to ensure the next field 441 // is not using the reserved ID fieldname 442 next := t.docFields[t.currentIdx] 443 if bytes.Equal(ReservedFieldNameID, next.Name) { 444 t.err = ErrUsingReservedFieldName 445 return false 446 } 447 // otherwise, we're good. 448 t.currentTag = ident.Tag{ 449 Name: t.opts.wrapBytes(next.Name), 450 Value: t.opts.wrapBytes(next.Value), 451 } 452 return true 453 } 454 455 func (t *tagIter) releaseCurrent() { 456 if t.currentTag.Name != nil { 457 t.currentTag.Name.Finalize() 458 t.currentTag.Name = nil 459 } 460 if t.currentTag.Value != nil { 461 t.currentTag.Value.Finalize() 462 t.currentTag.Value = nil 463 } 464 } 465 466 func (t *tagIter) Current() ident.Tag { 467 return t.currentTag 468 } 469 470 func (t *tagIter) CurrentIndex() int { 471 if t.currentIdx >= 0 { 472 return t.currentIdx 473 } 474 return 0 475 } 476 477 func (t *tagIter) Err() error { 478 return t.err 479 } 480 481 func (t *tagIter) Close() { 482 t.releaseCurrent() 483 t.done = true 484 } 485 486 func (t *tagIter) Len() int { 487 return len(t.docFields) 488 } 489 490 func (t *tagIter) Remaining() int { 491 l := len(t.docFields) - (t.currentIdx + 1) 492 return l 493 } 494 495 func (t *tagIter) Duplicate() ident.TagIterator { 496 var dupe = *t 497 if t.currentTag.Name != nil { 498 dupe.currentTag = t.opts.IdentPool.CloneTag(t.currentTag) 499 } 500 return &dupe 501 } 502 503 func (t *tagIter) Rewind() { 504 t.releaseCurrent() 505 t.currentIdx = -1 506 t.done = false 507 }