github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/m3ninx/doc/document.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package doc
    22  
    23  import (
    24  	"bytes"
    25  	"errors"
    26  	"fmt"
    27  	"sort"
    28  	"unicode/utf8"
    29  )
    30  
    31  var (
    32  	errReservedFieldName = fmt.Errorf("'%s' is a reserved field name", IDReservedFieldName)
    33  	// ErrEmptyDocument is an error for an empty document.
    34  	ErrEmptyDocument = errors.New("document cannot be empty")
    35  )
    36  
    37  // IDReservedFieldName is the field name reserved for IDs.
    38  var IDReservedFieldName = []byte("_m3ninx_id")
    39  
    40  // Field represents a field in a document. It is composed of a name and a value.
    41  type Field struct {
    42  	Name  []byte
    43  	Value []byte
    44  }
    45  
    46  // Fields is a list of fields.
    47  type Fields []Field
    48  
    49  func (f Fields) Len() int {
    50  	return len(f)
    51  }
    52  
    53  func (f Fields) Less(i, j int) bool {
    54  	l, r := f[i], f[j]
    55  
    56  	c := bytes.Compare(l.Name, r.Name)
    57  	switch {
    58  	case c < 0:
    59  		return true
    60  	case c > 0:
    61  		return false
    62  	}
    63  
    64  	c = bytes.Compare(l.Value, r.Value)
    65  	switch {
    66  	case c < 0:
    67  		return true
    68  	case c > 0:
    69  		return false
    70  	}
    71  
    72  	return true
    73  }
    74  
    75  func (f Fields) Swap(i, j int) {
    76  	f[i], f[j] = f[j], f[i]
    77  }
    78  
    79  func (f Fields) shallowCopy() Fields {
    80  	cp := make([]Field, 0, len(f))
    81  	for _, fld := range f {
    82  		cp = append(cp, Field{
    83  			Name:  fld.Name,
    84  			Value: fld.Value,
    85  		})
    86  	}
    87  	return cp
    88  }
    89  
    90  // Metadata represents a document to be indexed.
    91  type Metadata struct {
    92  	ID            []byte
    93  	Fields        []Field
    94  	OnIndexSeries OnIndexSeries
    95  }
    96  
    97  // Get returns the value of the specified field name in the document if it exists.
    98  func (m Metadata) Get(fieldName []byte) ([]byte, bool) {
    99  	for _, f := range m.Fields { // nolint:gocritic
   100  		if bytes.Equal(fieldName, f.Name) {
   101  			return f.Value, true
   102  		}
   103  	}
   104  	return nil, false
   105  }
   106  
   107  // Compare returns an integer comparing two documents. The result will be 0 if the documents
   108  // are equal, -1 if d is ordered before other, and 1 if d is ordered aftered other.
   109  func (m Metadata) Compare(other Metadata) int {
   110  	if c := bytes.Compare(m.ID, other.ID); c != 0 {
   111  		return c
   112  	}
   113  
   114  	l, r := Fields(m.Fields), Fields(other.Fields)
   115  
   116  	// Make a shallow copy of the Fields so we don't mutate the document.
   117  	if !sort.IsSorted(l) {
   118  		l = l.shallowCopy()
   119  		sort.Sort(l)
   120  	}
   121  	if !sort.IsSorted(r) {
   122  		r = r.shallowCopy()
   123  		sort.Sort(r)
   124  	}
   125  
   126  	min := len(l)
   127  	if len(r) < min {
   128  		min = len(r)
   129  	}
   130  
   131  	for i := 0; i < min; i++ {
   132  		if c := bytes.Compare(l[i].Name, r[i].Name); c != 0 {
   133  			return c
   134  		}
   135  		if c := bytes.Compare(l[i].Value, r[i].Value); c != 0 {
   136  			return c
   137  		}
   138  	}
   139  
   140  	if len(l) < len(r) {
   141  		return -1
   142  	} else if len(l) > len(r) {
   143  		return 1
   144  	}
   145  
   146  	return 0
   147  }
   148  
   149  // Equal returns a bool indicating whether d is equal to other.
   150  func (m Metadata) Equal(other Metadata) bool {
   151  	return m.Compare(other) == 0
   152  }
   153  
   154  // Validate returns a bool indicating whether the document is valid.
   155  func (m Metadata) Validate() error {
   156  	if len(m.Fields) == 0 && !m.HasID() {
   157  		return ErrEmptyDocument
   158  	}
   159  
   160  	if !utf8.Valid(m.ID) {
   161  		return fmt.Errorf("document has invalid ID: id=%v, id_hex=%x", m.ID, m.ID)
   162  	}
   163  
   164  	for _, f := range m.Fields { // nolint:gocritic
   165  		// TODO: Should we enforce uniqueness of field names?
   166  		if !utf8.Valid(f.Name) {
   167  			return fmt.Errorf("document has invalid field name: name=%v, name_hex=%x",
   168  				f.Name, f.Name)
   169  		}
   170  
   171  		if bytes.Equal(f.Name, IDReservedFieldName) {
   172  			return errReservedFieldName
   173  		}
   174  
   175  		if !utf8.Valid(f.Value) {
   176  			return fmt.Errorf("document has invalid field value: value=%v, value_hex=%x",
   177  				f.Value, f.Value)
   178  		}
   179  	}
   180  
   181  	return nil
   182  }
   183  
   184  // HasID returns a bool indicating whether the document has an ID or not.
   185  func (m Metadata) HasID() bool {
   186  	return len(m.ID) > 0
   187  }
   188  
   189  func (m Metadata) String() string {
   190  	var buf bytes.Buffer
   191  	for i, f := range m.Fields { // nolint:gocritic
   192  		buf.WriteString(fmt.Sprintf("%s: %s", f.Name, f.Value))
   193  		if i != len(m.Fields)-1 {
   194  			buf.WriteString(", ")
   195  		}
   196  	}
   197  	return fmt.Sprintf("{id: %s, fields: {%s}}", m.ID, buf.String())
   198  }
   199  
   200  // Documents is a list of documents.
   201  type Documents []Metadata
   202  
   203  func (ds Documents) Len() int {
   204  	return len(ds)
   205  }
   206  
   207  func (ds Documents) Less(i, j int) bool {
   208  	l, r := ds[i], ds[j]
   209  
   210  	return l.Compare(r) < 1
   211  }
   212  
   213  func (ds Documents) Swap(i, j int) {
   214  	ds[i], ds[j] = ds[j], ds[i]
   215  }
   216  
   217  // Encoded is a serialized document metadata.
   218  type Encoded struct {
   219  	Bytes []byte
   220  }
   221  
   222  // Document contains either metadata or an encoded metadata
   223  // but never both.
   224  type Document struct {
   225  	encoded  Encoded
   226  	metadata Metadata
   227  
   228  	hasEncoded  bool
   229  	hasMetadata bool
   230  }
   231  
   232  // NewDocumentFromMetadata creates a Document from a Metadata.
   233  func NewDocumentFromMetadata(m Metadata) Document {
   234  	return Document{metadata: m, hasMetadata: true}
   235  }
   236  
   237  // NewDocumentFromEncoded creates a Document from an Encoded.
   238  func NewDocumentFromEncoded(e Encoded) Document {
   239  	return Document{encoded: e, hasEncoded: true}
   240  }
   241  
   242  // Metadata returns the metadata it contains, if it has one. Otherwise returns an empty metadata
   243  // and false.
   244  func (d *Document) Metadata() (Metadata, bool) {
   245  	if d.hasMetadata {
   246  		return d.metadata, true
   247  	}
   248  
   249  	return Metadata{}, false
   250  }
   251  
   252  // Encoded returns the encoded metadata it contains, if it has one. Otherwise returns an
   253  // empty encoded metadata and false.
   254  func (d *Document) Encoded() (Encoded, bool) {
   255  	if d.hasEncoded {
   256  		return d.encoded, true
   257  	}
   258  
   259  	return Encoded{}, false
   260  }