github.com/weaviate/weaviate@v1.24.6/entities/storobj/storage_object.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package storobj
    13  
    14  import (
    15  	"bytes"
    16  	"encoding/binary"
    17  	"encoding/json"
    18  	"fmt"
    19  	"math"
    20  
    21  	"github.com/buger/jsonparser"
    22  
    23  	"github.com/go-openapi/strfmt"
    24  	"github.com/google/uuid"
    25  	"github.com/pkg/errors"
    26  	"github.com/vmihailenco/msgpack/v5"
    27  	"github.com/weaviate/weaviate/entities/additional"
    28  	"github.com/weaviate/weaviate/entities/models"
    29  	"github.com/weaviate/weaviate/entities/schema"
    30  	"github.com/weaviate/weaviate/entities/search"
    31  	"github.com/weaviate/weaviate/usecases/byteops"
    32  )
    33  
    34  var bufPool *bufferPool
    35  
    36  type Vectors map[string][]float32
    37  
    38  func init() {
    39  	// a 10kB buffer should be large enough for typical cases, it can fit a
    40  	// 1536d uncompressed vector and about 3kB of object payload. If the
    41  	// initial size is not large enoug, the caller can always allocate a larger
    42  	// buffer and return that to the pool instead.
    43  	bufPool = newBufferPool(10 * 1024)
    44  }
    45  
    46  type Object struct {
    47  	MarshallerVersion uint8
    48  	Object            models.Object `json:"object"`
    49  	Vector            []float32     `json:"vector"`
    50  	VectorLen         int           `json:"-"`
    51  	BelongsToNode     string        `json:"-"`
    52  	BelongsToShard    string        `json:"-"`
    53  	IsConsistent      bool          `json:"-"`
    54  	DocID             uint64
    55  	Vectors           map[string][]float32 `json:"vectors"`
    56  }
    57  
    58  func New(docID uint64) *Object {
    59  	return &Object{
    60  		MarshallerVersion: 1,
    61  		DocID:             docID,
    62  	}
    63  }
    64  
    65  func FromObject(object *models.Object, vector []float32, vectors models.Vectors) *Object {
    66  	// clear out nil entries of properties to make sure leaving a property out and setting it nil is identical
    67  	properties, ok := object.Properties.(map[string]interface{})
    68  	if ok {
    69  		for key, prop := range properties {
    70  			if prop == nil {
    71  				delete(properties, key)
    72  			}
    73  		}
    74  		object.Properties = properties
    75  	}
    76  
    77  	var vecs map[string][]float32
    78  	if vectors != nil {
    79  		vecs = make(map[string][]float32)
    80  		for targetVector, vector := range vectors {
    81  			vecs[targetVector] = vector
    82  		}
    83  	}
    84  
    85  	return &Object{
    86  		Object:            *object,
    87  		Vector:            vector,
    88  		MarshallerVersion: 1,
    89  		VectorLen:         len(vector),
    90  		Vectors:           vecs,
    91  	}
    92  }
    93  
    94  func FromBinary(data []byte) (*Object, error) {
    95  	ko := &Object{}
    96  	if err := ko.UnmarshalBinary(data); err != nil {
    97  		return nil, err
    98  	}
    99  
   100  	return ko, nil
   101  }
   102  
   103  func FromBinaryUUIDOnly(data []byte) (*Object, error) {
   104  	ko := &Object{}
   105  
   106  	rw := byteops.NewReadWriter(data)
   107  	version := rw.ReadUint8()
   108  	if version != 1 {
   109  		return nil, errors.Errorf("unsupported binary marshaller version %d", version)
   110  	}
   111  
   112  	ko.MarshallerVersion = version
   113  
   114  	ko.DocID = rw.ReadUint64()
   115  	rw.MoveBufferPositionForward(1) // ignore kind-byte
   116  	uuidObj, err := uuid.FromBytes(rw.ReadBytesFromBuffer(16))
   117  	if err != nil {
   118  		return nil, fmt.Errorf("parse uuid: %w", err)
   119  	}
   120  	ko.Object.ID = strfmt.UUID(uuidObj.String())
   121  
   122  	rw.MoveBufferPositionForward(16)
   123  
   124  	vecLen := rw.ReadUint16()
   125  	rw.MoveBufferPositionForward(uint64(vecLen * 4))
   126  	classNameLen := rw.ReadUint16()
   127  
   128  	ko.Object.Class = string(rw.ReadBytesFromBuffer(uint64(classNameLen)))
   129  
   130  	return ko, nil
   131  }
   132  
   133  func FromBinaryOptional(data []byte,
   134  	addProp additional.Properties,
   135  ) (*Object, error) {
   136  	ko := &Object{}
   137  
   138  	rw := byteops.NewReadWriter(data)
   139  	ko.MarshallerVersion = rw.ReadUint8()
   140  	if ko.MarshallerVersion != 1 {
   141  		return nil, errors.Errorf("unsupported binary marshaller version %d", ko.MarshallerVersion)
   142  	}
   143  	ko.DocID = rw.ReadUint64()
   144  	rw.MoveBufferPositionForward(1) // ignore kind-byte
   145  	uuidObj, err := uuid.FromBytes(rw.ReadBytesFromBuffer(16))
   146  	if err != nil {
   147  		return nil, fmt.Errorf("parse uuid: %w", err)
   148  	}
   149  	uuidParsed := strfmt.UUID(uuidObj.String())
   150  
   151  	createTime := int64(rw.ReadUint64())
   152  	updateTime := int64(rw.ReadUint64())
   153  	vectorLength := rw.ReadUint16()
   154  	// The vector length should always be returned (for usage metrics purposes) even if the vector itself is skipped
   155  	ko.VectorLen = int(vectorLength)
   156  	if addProp.Vector {
   157  		ko.Object.Vector = make([]float32, vectorLength)
   158  		vectorBytes := rw.ReadBytesFromBuffer(uint64(vectorLength) * 4)
   159  		for i := 0; i < int(vectorLength); i++ {
   160  			bits := binary.LittleEndian.Uint32(vectorBytes[i*4 : (i+1)*4])
   161  			ko.Object.Vector[i] = math.Float32frombits(bits)
   162  		}
   163  	} else {
   164  		rw.MoveBufferPositionForward(uint64(vectorLength) * 4)
   165  		ko.Object.Vector = nil
   166  	}
   167  	ko.Vector = ko.Object.Vector
   168  
   169  	classNameLen := rw.ReadUint16()
   170  	className := string(rw.ReadBytesFromBuffer(uint64(classNameLen)))
   171  
   172  	propLength := rw.ReadUint32()
   173  	var props []byte
   174  	if addProp.NoProps {
   175  		rw.MoveBufferPositionForward(uint64(propLength))
   176  	} else {
   177  		props = rw.ReadBytesFromBuffer(uint64(propLength))
   178  	}
   179  
   180  	var meta []byte
   181  	metaLength := rw.ReadUint32()
   182  	if addProp.Classification || len(addProp.ModuleParams) > 0 {
   183  		meta = rw.ReadBytesFromBuffer(uint64(metaLength))
   184  	} else {
   185  		rw.MoveBufferPositionForward(uint64(metaLength))
   186  	}
   187  
   188  	vectorWeightsLength := rw.ReadUint32()
   189  	vectorWeights := rw.ReadBytesFromBuffer(uint64(vectorWeightsLength))
   190  
   191  	if len(addProp.Vectors) > 0 {
   192  		vectors, err := unmarshalTargetVectors(&rw)
   193  		if err != nil {
   194  			return nil, err
   195  		}
   196  		ko.Vectors = vectors
   197  
   198  		if vectors != nil {
   199  			ko.Object.Vectors = make(models.Vectors)
   200  			for vecName, vec := range vectors {
   201  				ko.Object.Vectors[vecName] = vec
   202  			}
   203  		}
   204  	}
   205  
   206  	// some object members need additional "enrichment". Only do this if necessary, ie if they are actually present
   207  	if len(props) > 0 ||
   208  		len(meta) > 0 ||
   209  		vectorWeightsLength > 0 &&
   210  			!( // if the length is 4 and the encoded value is "null" (in ascii), vectorweights are not actually present
   211  			vectorWeightsLength == 4 &&
   212  				vectorWeights[0] == 110 && // n
   213  				vectorWeights[1] == 117 && // u
   214  				vectorWeights[2] == 108 && // l
   215  				vectorWeights[3] == 108) { // l
   216  
   217  		if err := ko.parseObject(
   218  			uuidParsed,
   219  			createTime,
   220  			updateTime,
   221  			className,
   222  			props,
   223  			meta,
   224  			vectorWeights,
   225  		); err != nil {
   226  			return nil, errors.Wrap(err, "parse")
   227  		}
   228  	} else {
   229  		ko.Object.ID = uuidParsed
   230  		ko.Object.CreationTimeUnix = createTime
   231  		ko.Object.LastUpdateTimeUnix = updateTime
   232  		ko.Object.Class = className
   233  	}
   234  
   235  	return ko, nil
   236  }
   237  
   238  type bucket interface {
   239  	GetBySecondary(int, []byte) ([]byte, error)
   240  	GetBySecondaryWithBuffer(int, []byte, []byte) ([]byte, []byte, error)
   241  }
   242  
   243  func ObjectsByDocID(bucket bucket, ids []uint64,
   244  	additional additional.Properties,
   245  ) ([]*Object, error) {
   246  	if bucket == nil {
   247  		return nil, fmt.Errorf("objects bucket not found")
   248  	}
   249  
   250  	var (
   251  		docIDBuf = make([]byte, 8)
   252  		out      = make([]*Object, len(ids))
   253  		i        = 0
   254  		lsmBuf   = bufPool.Get()
   255  	)
   256  
   257  	defer func() {
   258  		bufPool.Put(lsmBuf)
   259  	}()
   260  
   261  	for _, id := range ids {
   262  		binary.LittleEndian.PutUint64(docIDBuf, id)
   263  		res, newBuf, err := bucket.GetBySecondaryWithBuffer(0, docIDBuf, lsmBuf)
   264  		if err != nil {
   265  			return nil, err
   266  		}
   267  
   268  		lsmBuf = newBuf // may have changed, e.g. because it was grown
   269  
   270  		if res == nil {
   271  			continue
   272  		}
   273  
   274  		unmarshalled, err := FromBinaryOptional(res, additional)
   275  		if err != nil {
   276  			return nil, errors.Wrapf(err, "unmarshal data object at position %d", i)
   277  		}
   278  
   279  		out[i] = unmarshalled
   280  		i++
   281  	}
   282  
   283  	return out[:i], nil
   284  }
   285  
   286  func (ko *Object) Class() schema.ClassName {
   287  	return schema.ClassName(ko.Object.Class)
   288  }
   289  
   290  func (ko *Object) SetDocID(id uint64) {
   291  	ko.DocID = id
   292  }
   293  
   294  func (ko *Object) GetDocID() uint64 {
   295  	return ko.DocID
   296  }
   297  
   298  func (ko *Object) CreationTimeUnix() int64 {
   299  	return ko.Object.CreationTimeUnix
   300  }
   301  
   302  func (ko *Object) ExplainScore() string {
   303  	props := ko.AdditionalProperties()
   304  	if props != nil {
   305  		iface := props["explainScore"]
   306  		if iface != nil {
   307  			return iface.(string)
   308  		}
   309  	}
   310  	return ""
   311  }
   312  
   313  func (ko *Object) ID() strfmt.UUID {
   314  	return ko.Object.ID
   315  }
   316  
   317  func (ko *Object) SetID(id strfmt.UUID) {
   318  	ko.Object.ID = id
   319  }
   320  
   321  func (ko *Object) SetClass(class string) {
   322  	ko.Object.Class = class
   323  }
   324  
   325  func (ko *Object) LastUpdateTimeUnix() int64 {
   326  	return ko.Object.LastUpdateTimeUnix
   327  }
   328  
   329  // AdditionalProperties groups all properties which are stored with the
   330  // object and not generated at runtime
   331  func (ko *Object) AdditionalProperties() models.AdditionalProperties {
   332  	return ko.Object.Additional
   333  }
   334  
   335  func (ko *Object) Properties() models.PropertySchema {
   336  	return ko.Object.Properties
   337  }
   338  
   339  func (ko *Object) PropertiesWithAdditional(
   340  	additional additional.Properties,
   341  ) models.PropertySchema {
   342  	properties := ko.Properties()
   343  
   344  	if additional.RefMeta {
   345  		// nothing to remove
   346  		return properties
   347  	}
   348  
   349  	asMap, ok := properties.(map[string]interface{})
   350  	if !ok || asMap == nil {
   351  		return properties
   352  	}
   353  
   354  	for propName, value := range asMap {
   355  		asRefs, ok := value.(models.MultipleRef)
   356  		if !ok {
   357  			// not a ref, we can skip
   358  			continue
   359  		}
   360  
   361  		for i := range asRefs {
   362  			asRefs[i].Classification = nil
   363  		}
   364  
   365  		asMap[propName] = asRefs
   366  	}
   367  
   368  	return asMap
   369  }
   370  
   371  func (ko *Object) SetProperties(schema models.PropertySchema) {
   372  	ko.Object.Properties = schema
   373  }
   374  
   375  func (ko *Object) VectorWeights() models.VectorWeights {
   376  	return ko.Object.VectorWeights
   377  }
   378  
   379  func (ko *Object) SearchResult(additional additional.Properties, tenant string) *search.Result {
   380  	propertiesMap, ok := ko.PropertiesWithAdditional(additional).(map[string]interface{})
   381  	if !ok || propertiesMap == nil {
   382  		propertiesMap = map[string]interface{}{}
   383  	}
   384  	propertiesMap["id"] = ko.ID()
   385  	ko.SetProperties(propertiesMap)
   386  
   387  	additionalProperties := models.AdditionalProperties{}
   388  	if ko.AdditionalProperties() != nil {
   389  		if interpretation, ok := additional.ModuleParams["interpretation"]; ok {
   390  			if interpretationValue, ok := interpretation.(bool); ok && interpretationValue {
   391  				additionalProperties["interpretation"] = ko.AdditionalProperties()["interpretation"]
   392  			}
   393  		}
   394  		if additional.Classification {
   395  			additionalProperties["classification"] = ko.AdditionalProperties()["classification"]
   396  		}
   397  		if additional.Group {
   398  			additionalProperties["group"] = ko.AdditionalProperties()["group"]
   399  		}
   400  	}
   401  	if ko.ExplainScore() != "" {
   402  		additionalProperties["explainScore"] = ko.ExplainScore()
   403  	}
   404  
   405  	return &search.Result{
   406  		ID:        ko.ID(),
   407  		DocID:     &ko.DocID,
   408  		ClassName: ko.Class().String(),
   409  		Schema:    ko.Properties(),
   410  		Vector:    ko.Vector,
   411  		Vectors:   ko.asVectors(ko.Vectors),
   412  		Dims:      ko.VectorLen,
   413  		// VectorWeights: ko.VectorWeights(), // TODO: add vector weights
   414  		Created:              ko.CreationTimeUnix(),
   415  		Updated:              ko.LastUpdateTimeUnix(),
   416  		AdditionalProperties: additionalProperties,
   417  		// Score is filled in later
   418  		ExplainScore: ko.ExplainScore(),
   419  		IsConsistent: ko.IsConsistent,
   420  		Tenant:       tenant, // not part of the binary
   421  		// TODO: Beacon?
   422  	}
   423  }
   424  
   425  func (ko *Object) asVectors(in map[string][]float32) models.Vectors {
   426  	if len(in) > 0 {
   427  		out := make(models.Vectors)
   428  		for targetVector, vector := range in {
   429  			out[targetVector] = vector
   430  		}
   431  		return out
   432  	}
   433  	return nil
   434  }
   435  
   436  func (ko *Object) SearchResultWithDist(addl additional.Properties, dist float32) search.Result {
   437  	res := ko.SearchResult(addl, "")
   438  	res.Dist = dist
   439  	res.Certainty = float32(additional.DistToCertainty(float64(dist)))
   440  	return *res
   441  }
   442  
   443  func (ko *Object) SearchResultWithScore(addl additional.Properties, score float32) search.Result {
   444  	res := ko.SearchResult(addl, "")
   445  	res.Score = score
   446  	return *res
   447  }
   448  
   449  func (ko *Object) SearchResultWithScoreAndTenant(addl additional.Properties, score float32, tenant string) search.Result {
   450  	res := ko.SearchResult(addl, tenant)
   451  	res.Score = score
   452  	return *res
   453  }
   454  
   455  func (ko *Object) Valid() bool {
   456  	return ko.ID() != "" &&
   457  		ko.Class().String() != ""
   458  }
   459  
   460  func SearchResults(in []*Object, additional additional.Properties, tenant string) search.Results {
   461  	out := make(search.Results, len(in))
   462  
   463  	for i, elem := range in {
   464  		out[i] = *(elem.SearchResult(additional, tenant))
   465  	}
   466  
   467  	return out
   468  }
   469  
   470  func SearchResultsWithScore(in []*Object, scores []float32, additional additional.Properties, tenant string) search.Results {
   471  	out := make(search.Results, len(in))
   472  
   473  	for i, elem := range in {
   474  		score := scores[i]
   475  		out[i] = elem.SearchResultWithScoreAndTenant(additional, score, tenant)
   476  	}
   477  
   478  	return out
   479  }
   480  
   481  func SearchResultsWithDists(in []*Object, addl additional.Properties,
   482  	dists []float32,
   483  ) search.Results {
   484  	out := make(search.Results, len(in))
   485  
   486  	for i, elem := range in {
   487  		out[i] = elem.SearchResultWithDist(addl, dists[i])
   488  	}
   489  
   490  	return out
   491  }
   492  
   493  func DocIDFromBinary(in []byte) (uint64, error) {
   494  	var version uint8
   495  	r := bytes.NewReader(in)
   496  	le := binary.LittleEndian
   497  	if err := binary.Read(r, le, &version); err != nil {
   498  		return 0, err
   499  	}
   500  
   501  	if version != 1 {
   502  		return 0, errors.Errorf("unsupported binary marshaller version %d", version)
   503  	}
   504  
   505  	var docID uint64
   506  	err := binary.Read(r, le, &docID)
   507  	return docID, err
   508  }
   509  
   510  // MarshalBinary creates the binary representation of a kind object. Regardless
   511  // of the marshaller version the first byte is a uint8 indicating the version
   512  // followed by the payload which depends on the specific version
   513  //
   514  // Version 1
   515  // No. of B   | Type          | Content
   516  // ------------------------------------------------
   517  // 1          | uint8         | MarshallerVersion = 1
   518  // 8          | uint64        | index id, keep early so id-only lookups are maximum efficient
   519  // 1          | uint8         | kind, 0=action, 1=thing - deprecated
   520  // 16         | uint128       | uuid
   521  // 8          | int64         | create time
   522  // 8          | int64         | update time
   523  // 2          | uint16        | VectorLength
   524  // n*4        | []float32     | vector of length n
   525  // 2          | uint16        | length of class name
   526  // n          | []byte        | className
   527  // 4          | uint32        | length of schema json
   528  // n          | []byte        | schema as json
   529  // 4          | uint32        | length of meta json
   530  // n          | []byte        | meta as json
   531  // 4          | uint32        | length of vectorweights json
   532  // n          | []byte        | vectorweights as json
   533  // 4          | uint32        | length of packed target vectors offsets (in bytes)
   534  // n          | []byte        | packed target vectors offsets map { name : offset_in_bytes }
   535  // 4          | uint32        | length of target vectors segment (in bytes)
   536  // n          | uint16+[]byte | target vectors segment: sequence of vec_length + vec (uint16 + []byte), (uint16 + []byte) ...
   537  
   538  func (ko *Object) MarshalBinary() ([]byte, error) {
   539  	if ko.MarshallerVersion != 1 {
   540  		return nil, errors.Errorf("unsupported marshaller version %d", ko.MarshallerVersion)
   541  	}
   542  
   543  	kindByte := uint8(0)
   544  	// Deprecated Kind field
   545  	kindByte = 1
   546  
   547  	idParsed, err := uuid.Parse(ko.ID().String())
   548  	if err != nil {
   549  		return nil, err
   550  	}
   551  	idBytes, err := idParsed.MarshalBinary()
   552  	if err != nil {
   553  		return nil, err
   554  	}
   555  	vectorLength := uint32(len(ko.Vector))
   556  	className := []byte(ko.Class())
   557  	classNameLength := uint32(len(className))
   558  	schema, err := json.Marshal(ko.Properties())
   559  	if err != nil {
   560  		return nil, err
   561  	}
   562  	schemaLength := uint32(len(schema))
   563  	meta, err := json.Marshal(ko.AdditionalProperties())
   564  	if err != nil {
   565  		return nil, err
   566  	}
   567  	metaLength := uint32(len(meta))
   568  	vectorWeights, err := json.Marshal(ko.VectorWeights())
   569  	if err != nil {
   570  		return nil, err
   571  	}
   572  	vectorWeightsLength := uint32(len(vectorWeights))
   573  
   574  	var targetVectorsOffsets []byte
   575  	targetVectorsOffsetsLength := uint32(0)
   576  	targetVectorsSegmentLength := uint32(0)
   577  
   578  	targetVectorsOffsetOrder := make([]string, 0, len(ko.Vectors))
   579  	if len(ko.Vectors) > 0 {
   580  		offsetsMap := map[string]uint32{}
   581  		for name, vec := range ko.Vectors {
   582  			offsetsMap[name] = targetVectorsSegmentLength
   583  			targetVectorsSegmentLength += 2 + 4*uint32(len(vec)) // 2 for vec length + vec bytes
   584  			targetVectorsOffsetOrder = append(targetVectorsOffsetOrder, name)
   585  		}
   586  
   587  		targetVectorsOffsets, err = msgpack.Marshal(offsetsMap)
   588  		if err != nil {
   589  			return nil, fmt.Errorf("Could not marshal target vectors offsets: %w", err)
   590  		}
   591  		targetVectorsOffsetsLength = uint32(len(targetVectorsOffsets))
   592  	}
   593  
   594  	totalBufferLength := 1 + 8 + 1 + 16 + 8 + 8 +
   595  		2 + vectorLength*4 +
   596  		2 + classNameLength +
   597  		4 + schemaLength +
   598  		4 + metaLength +
   599  		4 + vectorWeightsLength +
   600  		4 + targetVectorsOffsetsLength +
   601  		4 + targetVectorsSegmentLength
   602  
   603  	byteBuffer := make([]byte, totalBufferLength)
   604  	rw := byteops.NewReadWriter(byteBuffer)
   605  	rw.WriteByte(ko.MarshallerVersion)
   606  	rw.WriteUint64(ko.DocID)
   607  	rw.WriteByte(kindByte)
   608  
   609  	rw.CopyBytesToBuffer(idBytes)
   610  
   611  	rw.WriteUint64(uint64(ko.CreationTimeUnix()))
   612  	rw.WriteUint64(uint64(ko.LastUpdateTimeUnix()))
   613  	rw.WriteUint16(uint16(vectorLength))
   614  
   615  	for j := uint32(0); j < vectorLength; j++ {
   616  		rw.WriteUint32(math.Float32bits(ko.Vector[j]))
   617  	}
   618  
   619  	rw.WriteUint16(uint16(classNameLength))
   620  	err = rw.CopyBytesToBuffer(className)
   621  	if err != nil {
   622  		return byteBuffer, errors.Wrap(err, "Could not copy className")
   623  	}
   624  
   625  	rw.WriteUint32(schemaLength)
   626  	err = rw.CopyBytesToBuffer(schema)
   627  	if err != nil {
   628  		return byteBuffer, errors.Wrap(err, "Could not copy schema")
   629  	}
   630  
   631  	rw.WriteUint32(metaLength)
   632  	err = rw.CopyBytesToBuffer(meta)
   633  	if err != nil {
   634  		return byteBuffer, errors.Wrap(err, "Could not copy meta")
   635  	}
   636  
   637  	rw.WriteUint32(vectorWeightsLength)
   638  	err = rw.CopyBytesToBuffer(vectorWeights)
   639  	if err != nil {
   640  		return byteBuffer, errors.Wrap(err, "Could not copy vectorWeights")
   641  	}
   642  
   643  	rw.WriteUint32(targetVectorsOffsetsLength)
   644  	if targetVectorsOffsetsLength > 0 {
   645  		err = rw.CopyBytesToBuffer(targetVectorsOffsets)
   646  		if err != nil {
   647  			return byteBuffer, errors.Wrap(err, "Could not copy targetVectorsOffsets")
   648  		}
   649  	}
   650  
   651  	rw.WriteUint32(targetVectorsSegmentLength)
   652  	for _, name := range targetVectorsOffsetOrder {
   653  		vec := ko.Vectors[name]
   654  		vecLen := len(vec)
   655  
   656  		rw.WriteUint16(uint16(vecLen))
   657  		for j := 0; j < vecLen; j++ {
   658  			rw.WriteUint32(math.Float32bits(vec[j]))
   659  		}
   660  	}
   661  
   662  	return byteBuffer, nil
   663  }
   664  
   665  // UnmarshalPropertiesFromObject only unmarshals and returns the properties part of the object
   666  //
   667  // Check MarshalBinary for the order of elements in the input array
   668  func UnmarshalPropertiesFromObject(data []byte, properties *map[string]interface{}, aggregationProperties []string, propStrings [][]string) error {
   669  	if data[0] != uint8(1) {
   670  		return errors.Errorf("unsupported binary marshaller version %d", data[0])
   671  	}
   672  
   673  	// clear out old values in case an object misses values. This should NOT shrink the capacity of the map, eg there
   674  	// are no allocations when adding the properties of the next object again
   675  	for k := range *properties {
   676  		delete(*properties, k)
   677  	}
   678  
   679  	startPos := uint64(1 + 8 + 1 + 16 + 8 + 8) // elements at the start
   680  	rw := byteops.NewReadWriter(data, byteops.WithPosition(startPos))
   681  	// get the length of the vector, each element is a float32 (4 bytes)
   682  	vectorLength := uint64(rw.ReadUint16())
   683  	rw.MoveBufferPositionForward(vectorLength * 4)
   684  
   685  	classnameLength := uint64(rw.ReadUint16())
   686  	rw.MoveBufferPositionForward(classnameLength)
   687  	propertyLength := uint64(rw.ReadUint32())
   688  
   689  	jsonparser.EachKey(data[rw.Position:rw.Position+propertyLength], func(idx int, value []byte, dataType jsonparser.ValueType, err error) {
   690  		var errParse error
   691  		switch dataType {
   692  		case jsonparser.Number, jsonparser.String, jsonparser.Boolean:
   693  			val, err := parseValues(dataType, value)
   694  			errParse = err
   695  			(*properties)[aggregationProperties[idx]] = val
   696  		case jsonparser.Array: // can be a beacon or an actual array
   697  			arrayEntries := value[1 : len(value)-1] // without leading and trailing []
   698  			beaconVal, errBeacon := jsonparser.GetUnsafeString(arrayEntries, "beacon")
   699  			if errBeacon == nil {
   700  				(*properties)[aggregationProperties[idx]] = []interface{}{map[string]interface{}{"beacon": beaconVal}}
   701  			} else {
   702  				// check how many entries there are in the array by counting the ",". This allows us to allocate an
   703  				// array with the right size without extending it with every append.
   704  				// The size can be too large for string arrays, when they contain "," as part of their content.
   705  				entryCount := 0
   706  				for _, b := range arrayEntries {
   707  					if b == uint8(44) { // ',' as byte
   708  						entryCount++
   709  					}
   710  				}
   711  
   712  				array := make([]interface{}, 0, entryCount)
   713  				jsonparser.ArrayEach(value, func(innerValue []byte, innerDataType jsonparser.ValueType, offset int, innerErr error) {
   714  					var val interface{}
   715  
   716  					switch innerDataType {
   717  					case jsonparser.Number, jsonparser.String, jsonparser.Boolean:
   718  						val, errParse = parseValues(innerDataType, innerValue)
   719  					default:
   720  						panic("Unknown data type ArrayEach") // returning an error would be better
   721  					}
   722  					array = append(array, val)
   723  				})
   724  				(*properties)[aggregationProperties[idx]] = array
   725  
   726  			}
   727  		default:
   728  			panic("Unknown data type EachKey") // returning an error would be better
   729  		}
   730  		if errParse != nil {
   731  			panic(errParse)
   732  		}
   733  	}, propStrings...)
   734  
   735  	return nil
   736  }
   737  
   738  func parseValues(dt jsonparser.ValueType, value []byte) (interface{}, error) {
   739  	switch dt {
   740  	case jsonparser.Number:
   741  		return jsonparser.ParseFloat(value)
   742  	case jsonparser.String:
   743  		return jsonparser.ParseString(value)
   744  	case jsonparser.Boolean:
   745  		return jsonparser.ParseBoolean(value)
   746  	default:
   747  		panic("Unknown data type") // returning an error would be better
   748  	}
   749  }
   750  
   751  // UnmarshalBinary is the versioned way to unmarshal a kind object from binary,
   752  // see MarshalBinary for the exact contents of each version
   753  func (ko *Object) UnmarshalBinary(data []byte) error {
   754  	version := data[0]
   755  	if version != 1 {
   756  		return errors.Errorf("unsupported binary marshaller version %d", version)
   757  	}
   758  	ko.MarshallerVersion = version
   759  
   760  	rw := byteops.NewReadWriter(data, byteops.WithPosition(1))
   761  	ko.DocID = rw.ReadUint64()
   762  	rw.MoveBufferPositionForward(1) // kind-byte
   763  
   764  	uuidParsed, err := uuid.FromBytes(data[rw.Position : rw.Position+16])
   765  	if err != nil {
   766  		return err
   767  	}
   768  	rw.MoveBufferPositionForward(16)
   769  
   770  	createTime := int64(rw.ReadUint64())
   771  	updateTime := int64(rw.ReadUint64())
   772  
   773  	vectorLength := rw.ReadUint16()
   774  	ko.VectorLen = int(vectorLength)
   775  	ko.Vector = make([]float32, vectorLength)
   776  	for j := 0; j < int(vectorLength); j++ {
   777  		ko.Vector[j] = math.Float32frombits(rw.ReadUint32())
   778  	}
   779  
   780  	classNameLength := uint64(rw.ReadUint16())
   781  	className, err := rw.CopyBytesFromBuffer(classNameLength, nil)
   782  	if err != nil {
   783  		return errors.Wrap(err, "Could not copy class name")
   784  	}
   785  
   786  	schemaLength := uint64(rw.ReadUint32())
   787  	schema, err := rw.CopyBytesFromBuffer(schemaLength, nil)
   788  	if err != nil {
   789  		return errors.Wrap(err, "Could not copy schema")
   790  	}
   791  
   792  	metaLength := uint64(rw.ReadUint32())
   793  	meta, err := rw.CopyBytesFromBuffer(metaLength, nil)
   794  	if err != nil {
   795  		return errors.Wrap(err, "Could not copy meta")
   796  	}
   797  
   798  	vectorWeightsLength := uint64(rw.ReadUint32())
   799  	vectorWeights, err := rw.CopyBytesFromBuffer(vectorWeightsLength, nil)
   800  	if err != nil {
   801  		return errors.Wrap(err, "Could not copy vectorWeights")
   802  	}
   803  
   804  	vectors, err := unmarshalTargetVectors(&rw)
   805  	if err != nil {
   806  		return err
   807  	}
   808  	ko.Vectors = vectors
   809  
   810  	return ko.parseObject(
   811  		strfmt.UUID(uuidParsed.String()),
   812  		createTime,
   813  		updateTime,
   814  		string(className),
   815  		schema,
   816  		meta,
   817  		vectorWeights,
   818  	)
   819  }
   820  
   821  func unmarshalTargetVectors(rw *byteops.ReadWriter) (map[string][]float32, error) {
   822  	// This check prevents from panic when somebody is upgrading from version that
   823  	// didn't have multi vector support. This check is needed bc with named vectors
   824  	// feature storage object can have vectors data appended at the end of the file
   825  	if rw.Position < uint64(len(rw.Buffer)) {
   826  		targetVectorsOffsets := rw.ReadBytesFromBufferWithUint32LengthIndicator()
   827  		targetVectorsSegmentLength := rw.ReadUint32()
   828  		pos := rw.Position
   829  
   830  		if len(targetVectorsOffsets) > 0 {
   831  			var tvOffsets map[string]uint32
   832  			if err := msgpack.Unmarshal(targetVectorsOffsets, &tvOffsets); err != nil {
   833  				return nil, fmt.Errorf("Could not unmarshal target vectors offset: %w", err)
   834  			}
   835  
   836  			targetVectors := map[string][]float32{}
   837  			for name, offset := range tvOffsets {
   838  				rw.MoveBufferToAbsolutePosition(pos + uint64(offset))
   839  				vecLen := rw.ReadUint16()
   840  				vec := make([]float32, vecLen)
   841  				for j := uint16(0); j < vecLen; j++ {
   842  					vec[j] = math.Float32frombits(rw.ReadUint32())
   843  				}
   844  				targetVectors[name] = vec
   845  			}
   846  
   847  			rw.MoveBufferToAbsolutePosition(pos + uint64(targetVectorsSegmentLength))
   848  			return targetVectors, nil
   849  		}
   850  	}
   851  	return nil, nil
   852  }
   853  
   854  func VectorFromBinary(in []byte, buffer []float32) ([]float32, error) {
   855  	if len(in) == 0 {
   856  		return nil, nil
   857  	}
   858  
   859  	version := in[0]
   860  	if version != 1 {
   861  		return nil, errors.Errorf("unsupported marshaller version %d", version)
   862  	}
   863  
   864  	// since we know the version and know that the blob is not len(0), we can
   865  	// assume that we can directly access the vector length field. The only
   866  	// situation where this is not accessible would be on corrupted data - where
   867  	// it would be acceptable to panic
   868  	vecLen := binary.LittleEndian.Uint16(in[42:44])
   869  
   870  	var out []float32
   871  	if cap(buffer) >= int(vecLen) {
   872  		out = buffer[:vecLen]
   873  	} else {
   874  		out = make([]float32, vecLen)
   875  	}
   876  	vecStart := 44
   877  	vecEnd := vecStart + int(vecLen*4)
   878  
   879  	i := 0
   880  	for start := vecStart; start < vecEnd; start += 4 {
   881  		asUint := binary.LittleEndian.Uint32(in[start : start+4])
   882  		out[i] = math.Float32frombits(asUint)
   883  		i++
   884  	}
   885  
   886  	return out, nil
   887  }
   888  
   889  func (ko *Object) parseObject(uuid strfmt.UUID, create, update int64, className string,
   890  	propsB []byte, additionalB []byte, vectorWeightsB []byte,
   891  ) error {
   892  	var props map[string]interface{}
   893  	if err := json.Unmarshal(propsB, &props); err != nil {
   894  		return err
   895  	}
   896  
   897  	if err := enrichSchemaTypes(props, false); err != nil {
   898  		return errors.Wrap(err, "enrich schema datatypes")
   899  	}
   900  
   901  	var additionalProperties models.AdditionalProperties
   902  	if len(additionalB) > 0 {
   903  		if err := json.Unmarshal(additionalB, &additionalProperties); err != nil {
   904  			return err
   905  		}
   906  
   907  		if prop, ok := additionalProperties["classification"]; ok {
   908  			if classificationMap, ok := prop.(map[string]interface{}); ok {
   909  				marshalled, err := json.Marshal(classificationMap)
   910  				if err != nil {
   911  					return err
   912  				}
   913  				var classification additional.Classification
   914  				err = json.Unmarshal(marshalled, &classification)
   915  				if err != nil {
   916  					return err
   917  				}
   918  				additionalProperties["classification"] = &classification
   919  			}
   920  		}
   921  
   922  		if prop, ok := additionalProperties["group"]; ok {
   923  			if groupMap, ok := prop.(map[string]interface{}); ok {
   924  				marshalled, err := json.Marshal(groupMap)
   925  				if err != nil {
   926  					return err
   927  				}
   928  				var group additional.Group
   929  				err = json.Unmarshal(marshalled, &group)
   930  				if err != nil {
   931  					return err
   932  				}
   933  
   934  				for i, hit := range group.Hits {
   935  					if groupHitAdditionalMap, ok := hit["_additional"].(map[string]interface{}); ok {
   936  						marshalled, err := json.Marshal(groupHitAdditionalMap)
   937  						if err != nil {
   938  							return err
   939  						}
   940  						var groupHitsAdditional additional.GroupHitAdditional
   941  						err = json.Unmarshal(marshalled, &groupHitsAdditional)
   942  						if err != nil {
   943  							return err
   944  						}
   945  						group.Hits[i]["_additional"] = &groupHitsAdditional
   946  					}
   947  				}
   948  
   949  				additionalProperties["group"] = &group
   950  			}
   951  		}
   952  	}
   953  
   954  	var vectorWeights interface{}
   955  	if err := json.Unmarshal(vectorWeightsB, &vectorWeights); err != nil {
   956  		return err
   957  	}
   958  
   959  	ko.Object = models.Object{
   960  		Class:              className,
   961  		CreationTimeUnix:   create,
   962  		LastUpdateTimeUnix: update,
   963  		ID:                 uuid,
   964  		Properties:         props,
   965  		VectorWeights:      vectorWeights,
   966  		Additional:         additionalProperties,
   967  	}
   968  
   969  	return nil
   970  }
   971  
   972  // DeepCopyDangerous creates a deep copy of the underlying Object
   973  // WARNING: This was purpose built for the batch ref usecase and only covers
   974  // the situations that are required there. This means that cases which aren't
   975  // reflected in that usecase may still contain references. Thus the suffix
   976  // "Dangerous". If needed, make sure everything is copied and remove the
   977  // suffix.
   978  func (ko *Object) DeepCopyDangerous() *Object {
   979  	o := &Object{
   980  		MarshallerVersion: ko.MarshallerVersion,
   981  		DocID:             ko.DocID,
   982  		Object:            deepCopyObject(ko.Object),
   983  		Vector:            deepCopyVector(ko.Vector),
   984  		Vectors:           deepCopyVectors(ko.Vectors),
   985  	}
   986  
   987  	return o
   988  }
   989  
   990  func AddOwnership(objs []*Object, node, shard string) {
   991  	for i := range objs {
   992  		objs[i].BelongsToNode = node
   993  		objs[i].BelongsToShard = shard
   994  	}
   995  }
   996  
   997  func deepCopyVector(orig []float32) []float32 {
   998  	out := make([]float32, len(orig))
   999  	copy(out, orig)
  1000  	return out
  1001  }
  1002  
  1003  func deepCopyVectors[V []float32 | models.Vector](orig map[string]V) map[string]V {
  1004  	out := make(map[string]V, len(orig))
  1005  	for key, vec := range orig {
  1006  		out[key] = deepCopyVector(vec)
  1007  	}
  1008  	return out
  1009  }
  1010  
  1011  func deepCopyObject(orig models.Object) models.Object {
  1012  	return models.Object{
  1013  		Class:              orig.Class,
  1014  		ID:                 orig.ID,
  1015  		CreationTimeUnix:   orig.CreationTimeUnix,
  1016  		LastUpdateTimeUnix: orig.LastUpdateTimeUnix,
  1017  		Vector:             deepCopyVector(orig.Vector),
  1018  		VectorWeights:      orig.VectorWeights,
  1019  		Additional:         orig.Additional, // WARNING: not a deep copy!!
  1020  		Properties:         deepCopyProperties(orig.Properties),
  1021  		Vectors:            deepCopyVectors(orig.Vectors),
  1022  	}
  1023  }
  1024  
  1025  func deepCopyProperties(orig models.PropertySchema) models.PropertySchema {
  1026  	if orig == nil {
  1027  		return nil
  1028  	}
  1029  
  1030  	asMap, ok := orig.(map[string]interface{})
  1031  	if !ok {
  1032  		// not a map, don't know what to do with this
  1033  		return nil
  1034  	}
  1035  
  1036  	out := map[string]interface{}{}
  1037  
  1038  	for key, value := range asMap {
  1039  		if mref, ok := value.(models.MultipleRef); ok {
  1040  			out[key] = deepCopyMRef(mref)
  1041  			continue
  1042  		}
  1043  
  1044  		// Note: This is not a true deep copy, value could still be a pointer type,
  1045  		// such as *models.GeoCoordinates, thus leading to passing a reference
  1046  		// instead of actually making a copy. However, for the purposes we need
  1047  		// this method for this is acceptable based on our current knowledge
  1048  		out[key] = value
  1049  	}
  1050  
  1051  	return out
  1052  }
  1053  
  1054  func deepCopyMRef(orig models.MultipleRef) models.MultipleRef {
  1055  	if orig == nil {
  1056  		return nil
  1057  	}
  1058  
  1059  	out := make(models.MultipleRef, len(orig))
  1060  	for i, ref := range orig {
  1061  		// models.SingleRef contains only pass-by-value props, so a simple deref as
  1062  		// the struct creates a copy
  1063  		copiedRef := *ref
  1064  		out[i] = &copiedRef
  1065  	}
  1066  
  1067  	return out
  1068  }