github.com/weaviate/weaviate@v1.24.6/entities/storobj/storage_object.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package storobj 13 14 import ( 15 "bytes" 16 "encoding/binary" 17 "encoding/json" 18 "fmt" 19 "math" 20 21 "github.com/buger/jsonparser" 22 23 "github.com/go-openapi/strfmt" 24 "github.com/google/uuid" 25 "github.com/pkg/errors" 26 "github.com/vmihailenco/msgpack/v5" 27 "github.com/weaviate/weaviate/entities/additional" 28 "github.com/weaviate/weaviate/entities/models" 29 "github.com/weaviate/weaviate/entities/schema" 30 "github.com/weaviate/weaviate/entities/search" 31 "github.com/weaviate/weaviate/usecases/byteops" 32 ) 33 34 var bufPool *bufferPool 35 36 type Vectors map[string][]float32 37 38 func init() { 39 // a 10kB buffer should be large enough for typical cases, it can fit a 40 // 1536d uncompressed vector and about 3kB of object payload. If the 41 // initial size is not large enoug, the caller can always allocate a larger 42 // buffer and return that to the pool instead. 43 bufPool = newBufferPool(10 * 1024) 44 } 45 46 type Object struct { 47 MarshallerVersion uint8 48 Object models.Object `json:"object"` 49 Vector []float32 `json:"vector"` 50 VectorLen int `json:"-"` 51 BelongsToNode string `json:"-"` 52 BelongsToShard string `json:"-"` 53 IsConsistent bool `json:"-"` 54 DocID uint64 55 Vectors map[string][]float32 `json:"vectors"` 56 } 57 58 func New(docID uint64) *Object { 59 return &Object{ 60 MarshallerVersion: 1, 61 DocID: docID, 62 } 63 } 64 65 func FromObject(object *models.Object, vector []float32, vectors models.Vectors) *Object { 66 // clear out nil entries of properties to make sure leaving a property out and setting it nil is identical 67 properties, ok := object.Properties.(map[string]interface{}) 68 if ok { 69 for key, prop := range properties { 70 if prop == nil { 71 delete(properties, key) 72 } 73 } 74 object.Properties = properties 75 } 76 77 var vecs map[string][]float32 78 if vectors != nil { 79 vecs = make(map[string][]float32) 80 for targetVector, vector := range vectors { 81 vecs[targetVector] = vector 82 } 83 } 84 85 return &Object{ 86 Object: *object, 87 Vector: vector, 88 MarshallerVersion: 1, 89 VectorLen: len(vector), 90 Vectors: vecs, 91 } 92 } 93 94 func FromBinary(data []byte) (*Object, error) { 95 ko := &Object{} 96 if err := ko.UnmarshalBinary(data); err != nil { 97 return nil, err 98 } 99 100 return ko, nil 101 } 102 103 func FromBinaryUUIDOnly(data []byte) (*Object, error) { 104 ko := &Object{} 105 106 rw := byteops.NewReadWriter(data) 107 version := rw.ReadUint8() 108 if version != 1 { 109 return nil, errors.Errorf("unsupported binary marshaller version %d", version) 110 } 111 112 ko.MarshallerVersion = version 113 114 ko.DocID = rw.ReadUint64() 115 rw.MoveBufferPositionForward(1) // ignore kind-byte 116 uuidObj, err := uuid.FromBytes(rw.ReadBytesFromBuffer(16)) 117 if err != nil { 118 return nil, fmt.Errorf("parse uuid: %w", err) 119 } 120 ko.Object.ID = strfmt.UUID(uuidObj.String()) 121 122 rw.MoveBufferPositionForward(16) 123 124 vecLen := rw.ReadUint16() 125 rw.MoveBufferPositionForward(uint64(vecLen * 4)) 126 classNameLen := rw.ReadUint16() 127 128 ko.Object.Class = string(rw.ReadBytesFromBuffer(uint64(classNameLen))) 129 130 return ko, nil 131 } 132 133 func FromBinaryOptional(data []byte, 134 addProp additional.Properties, 135 ) (*Object, error) { 136 ko := &Object{} 137 138 rw := byteops.NewReadWriter(data) 139 ko.MarshallerVersion = rw.ReadUint8() 140 if ko.MarshallerVersion != 1 { 141 return nil, errors.Errorf("unsupported binary marshaller version %d", ko.MarshallerVersion) 142 } 143 ko.DocID = rw.ReadUint64() 144 rw.MoveBufferPositionForward(1) // ignore kind-byte 145 uuidObj, err := uuid.FromBytes(rw.ReadBytesFromBuffer(16)) 146 if err != nil { 147 return nil, fmt.Errorf("parse uuid: %w", err) 148 } 149 uuidParsed := strfmt.UUID(uuidObj.String()) 150 151 createTime := int64(rw.ReadUint64()) 152 updateTime := int64(rw.ReadUint64()) 153 vectorLength := rw.ReadUint16() 154 // The vector length should always be returned (for usage metrics purposes) even if the vector itself is skipped 155 ko.VectorLen = int(vectorLength) 156 if addProp.Vector { 157 ko.Object.Vector = make([]float32, vectorLength) 158 vectorBytes := rw.ReadBytesFromBuffer(uint64(vectorLength) * 4) 159 for i := 0; i < int(vectorLength); i++ { 160 bits := binary.LittleEndian.Uint32(vectorBytes[i*4 : (i+1)*4]) 161 ko.Object.Vector[i] = math.Float32frombits(bits) 162 } 163 } else { 164 rw.MoveBufferPositionForward(uint64(vectorLength) * 4) 165 ko.Object.Vector = nil 166 } 167 ko.Vector = ko.Object.Vector 168 169 classNameLen := rw.ReadUint16() 170 className := string(rw.ReadBytesFromBuffer(uint64(classNameLen))) 171 172 propLength := rw.ReadUint32() 173 var props []byte 174 if addProp.NoProps { 175 rw.MoveBufferPositionForward(uint64(propLength)) 176 } else { 177 props = rw.ReadBytesFromBuffer(uint64(propLength)) 178 } 179 180 var meta []byte 181 metaLength := rw.ReadUint32() 182 if addProp.Classification || len(addProp.ModuleParams) > 0 { 183 meta = rw.ReadBytesFromBuffer(uint64(metaLength)) 184 } else { 185 rw.MoveBufferPositionForward(uint64(metaLength)) 186 } 187 188 vectorWeightsLength := rw.ReadUint32() 189 vectorWeights := rw.ReadBytesFromBuffer(uint64(vectorWeightsLength)) 190 191 if len(addProp.Vectors) > 0 { 192 vectors, err := unmarshalTargetVectors(&rw) 193 if err != nil { 194 return nil, err 195 } 196 ko.Vectors = vectors 197 198 if vectors != nil { 199 ko.Object.Vectors = make(models.Vectors) 200 for vecName, vec := range vectors { 201 ko.Object.Vectors[vecName] = vec 202 } 203 } 204 } 205 206 // some object members need additional "enrichment". Only do this if necessary, ie if they are actually present 207 if len(props) > 0 || 208 len(meta) > 0 || 209 vectorWeightsLength > 0 && 210 !( // if the length is 4 and the encoded value is "null" (in ascii), vectorweights are not actually present 211 vectorWeightsLength == 4 && 212 vectorWeights[0] == 110 && // n 213 vectorWeights[1] == 117 && // u 214 vectorWeights[2] == 108 && // l 215 vectorWeights[3] == 108) { // l 216 217 if err := ko.parseObject( 218 uuidParsed, 219 createTime, 220 updateTime, 221 className, 222 props, 223 meta, 224 vectorWeights, 225 ); err != nil { 226 return nil, errors.Wrap(err, "parse") 227 } 228 } else { 229 ko.Object.ID = uuidParsed 230 ko.Object.CreationTimeUnix = createTime 231 ko.Object.LastUpdateTimeUnix = updateTime 232 ko.Object.Class = className 233 } 234 235 return ko, nil 236 } 237 238 type bucket interface { 239 GetBySecondary(int, []byte) ([]byte, error) 240 GetBySecondaryWithBuffer(int, []byte, []byte) ([]byte, []byte, error) 241 } 242 243 func ObjectsByDocID(bucket bucket, ids []uint64, 244 additional additional.Properties, 245 ) ([]*Object, error) { 246 if bucket == nil { 247 return nil, fmt.Errorf("objects bucket not found") 248 } 249 250 var ( 251 docIDBuf = make([]byte, 8) 252 out = make([]*Object, len(ids)) 253 i = 0 254 lsmBuf = bufPool.Get() 255 ) 256 257 defer func() { 258 bufPool.Put(lsmBuf) 259 }() 260 261 for _, id := range ids { 262 binary.LittleEndian.PutUint64(docIDBuf, id) 263 res, newBuf, err := bucket.GetBySecondaryWithBuffer(0, docIDBuf, lsmBuf) 264 if err != nil { 265 return nil, err 266 } 267 268 lsmBuf = newBuf // may have changed, e.g. because it was grown 269 270 if res == nil { 271 continue 272 } 273 274 unmarshalled, err := FromBinaryOptional(res, additional) 275 if err != nil { 276 return nil, errors.Wrapf(err, "unmarshal data object at position %d", i) 277 } 278 279 out[i] = unmarshalled 280 i++ 281 } 282 283 return out[:i], nil 284 } 285 286 func (ko *Object) Class() schema.ClassName { 287 return schema.ClassName(ko.Object.Class) 288 } 289 290 func (ko *Object) SetDocID(id uint64) { 291 ko.DocID = id 292 } 293 294 func (ko *Object) GetDocID() uint64 { 295 return ko.DocID 296 } 297 298 func (ko *Object) CreationTimeUnix() int64 { 299 return ko.Object.CreationTimeUnix 300 } 301 302 func (ko *Object) ExplainScore() string { 303 props := ko.AdditionalProperties() 304 if props != nil { 305 iface := props["explainScore"] 306 if iface != nil { 307 return iface.(string) 308 } 309 } 310 return "" 311 } 312 313 func (ko *Object) ID() strfmt.UUID { 314 return ko.Object.ID 315 } 316 317 func (ko *Object) SetID(id strfmt.UUID) { 318 ko.Object.ID = id 319 } 320 321 func (ko *Object) SetClass(class string) { 322 ko.Object.Class = class 323 } 324 325 func (ko *Object) LastUpdateTimeUnix() int64 { 326 return ko.Object.LastUpdateTimeUnix 327 } 328 329 // AdditionalProperties groups all properties which are stored with the 330 // object and not generated at runtime 331 func (ko *Object) AdditionalProperties() models.AdditionalProperties { 332 return ko.Object.Additional 333 } 334 335 func (ko *Object) Properties() models.PropertySchema { 336 return ko.Object.Properties 337 } 338 339 func (ko *Object) PropertiesWithAdditional( 340 additional additional.Properties, 341 ) models.PropertySchema { 342 properties := ko.Properties() 343 344 if additional.RefMeta { 345 // nothing to remove 346 return properties 347 } 348 349 asMap, ok := properties.(map[string]interface{}) 350 if !ok || asMap == nil { 351 return properties 352 } 353 354 for propName, value := range asMap { 355 asRefs, ok := value.(models.MultipleRef) 356 if !ok { 357 // not a ref, we can skip 358 continue 359 } 360 361 for i := range asRefs { 362 asRefs[i].Classification = nil 363 } 364 365 asMap[propName] = asRefs 366 } 367 368 return asMap 369 } 370 371 func (ko *Object) SetProperties(schema models.PropertySchema) { 372 ko.Object.Properties = schema 373 } 374 375 func (ko *Object) VectorWeights() models.VectorWeights { 376 return ko.Object.VectorWeights 377 } 378 379 func (ko *Object) SearchResult(additional additional.Properties, tenant string) *search.Result { 380 propertiesMap, ok := ko.PropertiesWithAdditional(additional).(map[string]interface{}) 381 if !ok || propertiesMap == nil { 382 propertiesMap = map[string]interface{}{} 383 } 384 propertiesMap["id"] = ko.ID() 385 ko.SetProperties(propertiesMap) 386 387 additionalProperties := models.AdditionalProperties{} 388 if ko.AdditionalProperties() != nil { 389 if interpretation, ok := additional.ModuleParams["interpretation"]; ok { 390 if interpretationValue, ok := interpretation.(bool); ok && interpretationValue { 391 additionalProperties["interpretation"] = ko.AdditionalProperties()["interpretation"] 392 } 393 } 394 if additional.Classification { 395 additionalProperties["classification"] = ko.AdditionalProperties()["classification"] 396 } 397 if additional.Group { 398 additionalProperties["group"] = ko.AdditionalProperties()["group"] 399 } 400 } 401 if ko.ExplainScore() != "" { 402 additionalProperties["explainScore"] = ko.ExplainScore() 403 } 404 405 return &search.Result{ 406 ID: ko.ID(), 407 DocID: &ko.DocID, 408 ClassName: ko.Class().String(), 409 Schema: ko.Properties(), 410 Vector: ko.Vector, 411 Vectors: ko.asVectors(ko.Vectors), 412 Dims: ko.VectorLen, 413 // VectorWeights: ko.VectorWeights(), // TODO: add vector weights 414 Created: ko.CreationTimeUnix(), 415 Updated: ko.LastUpdateTimeUnix(), 416 AdditionalProperties: additionalProperties, 417 // Score is filled in later 418 ExplainScore: ko.ExplainScore(), 419 IsConsistent: ko.IsConsistent, 420 Tenant: tenant, // not part of the binary 421 // TODO: Beacon? 422 } 423 } 424 425 func (ko *Object) asVectors(in map[string][]float32) models.Vectors { 426 if len(in) > 0 { 427 out := make(models.Vectors) 428 for targetVector, vector := range in { 429 out[targetVector] = vector 430 } 431 return out 432 } 433 return nil 434 } 435 436 func (ko *Object) SearchResultWithDist(addl additional.Properties, dist float32) search.Result { 437 res := ko.SearchResult(addl, "") 438 res.Dist = dist 439 res.Certainty = float32(additional.DistToCertainty(float64(dist))) 440 return *res 441 } 442 443 func (ko *Object) SearchResultWithScore(addl additional.Properties, score float32) search.Result { 444 res := ko.SearchResult(addl, "") 445 res.Score = score 446 return *res 447 } 448 449 func (ko *Object) SearchResultWithScoreAndTenant(addl additional.Properties, score float32, tenant string) search.Result { 450 res := ko.SearchResult(addl, tenant) 451 res.Score = score 452 return *res 453 } 454 455 func (ko *Object) Valid() bool { 456 return ko.ID() != "" && 457 ko.Class().String() != "" 458 } 459 460 func SearchResults(in []*Object, additional additional.Properties, tenant string) search.Results { 461 out := make(search.Results, len(in)) 462 463 for i, elem := range in { 464 out[i] = *(elem.SearchResult(additional, tenant)) 465 } 466 467 return out 468 } 469 470 func SearchResultsWithScore(in []*Object, scores []float32, additional additional.Properties, tenant string) search.Results { 471 out := make(search.Results, len(in)) 472 473 for i, elem := range in { 474 score := scores[i] 475 out[i] = elem.SearchResultWithScoreAndTenant(additional, score, tenant) 476 } 477 478 return out 479 } 480 481 func SearchResultsWithDists(in []*Object, addl additional.Properties, 482 dists []float32, 483 ) search.Results { 484 out := make(search.Results, len(in)) 485 486 for i, elem := range in { 487 out[i] = elem.SearchResultWithDist(addl, dists[i]) 488 } 489 490 return out 491 } 492 493 func DocIDFromBinary(in []byte) (uint64, error) { 494 var version uint8 495 r := bytes.NewReader(in) 496 le := binary.LittleEndian 497 if err := binary.Read(r, le, &version); err != nil { 498 return 0, err 499 } 500 501 if version != 1 { 502 return 0, errors.Errorf("unsupported binary marshaller version %d", version) 503 } 504 505 var docID uint64 506 err := binary.Read(r, le, &docID) 507 return docID, err 508 } 509 510 // MarshalBinary creates the binary representation of a kind object. Regardless 511 // of the marshaller version the first byte is a uint8 indicating the version 512 // followed by the payload which depends on the specific version 513 // 514 // Version 1 515 // No. of B | Type | Content 516 // ------------------------------------------------ 517 // 1 | uint8 | MarshallerVersion = 1 518 // 8 | uint64 | index id, keep early so id-only lookups are maximum efficient 519 // 1 | uint8 | kind, 0=action, 1=thing - deprecated 520 // 16 | uint128 | uuid 521 // 8 | int64 | create time 522 // 8 | int64 | update time 523 // 2 | uint16 | VectorLength 524 // n*4 | []float32 | vector of length n 525 // 2 | uint16 | length of class name 526 // n | []byte | className 527 // 4 | uint32 | length of schema json 528 // n | []byte | schema as json 529 // 4 | uint32 | length of meta json 530 // n | []byte | meta as json 531 // 4 | uint32 | length of vectorweights json 532 // n | []byte | vectorweights as json 533 // 4 | uint32 | length of packed target vectors offsets (in bytes) 534 // n | []byte | packed target vectors offsets map { name : offset_in_bytes } 535 // 4 | uint32 | length of target vectors segment (in bytes) 536 // n | uint16+[]byte | target vectors segment: sequence of vec_length + vec (uint16 + []byte), (uint16 + []byte) ... 537 538 func (ko *Object) MarshalBinary() ([]byte, error) { 539 if ko.MarshallerVersion != 1 { 540 return nil, errors.Errorf("unsupported marshaller version %d", ko.MarshallerVersion) 541 } 542 543 kindByte := uint8(0) 544 // Deprecated Kind field 545 kindByte = 1 546 547 idParsed, err := uuid.Parse(ko.ID().String()) 548 if err != nil { 549 return nil, err 550 } 551 idBytes, err := idParsed.MarshalBinary() 552 if err != nil { 553 return nil, err 554 } 555 vectorLength := uint32(len(ko.Vector)) 556 className := []byte(ko.Class()) 557 classNameLength := uint32(len(className)) 558 schema, err := json.Marshal(ko.Properties()) 559 if err != nil { 560 return nil, err 561 } 562 schemaLength := uint32(len(schema)) 563 meta, err := json.Marshal(ko.AdditionalProperties()) 564 if err != nil { 565 return nil, err 566 } 567 metaLength := uint32(len(meta)) 568 vectorWeights, err := json.Marshal(ko.VectorWeights()) 569 if err != nil { 570 return nil, err 571 } 572 vectorWeightsLength := uint32(len(vectorWeights)) 573 574 var targetVectorsOffsets []byte 575 targetVectorsOffsetsLength := uint32(0) 576 targetVectorsSegmentLength := uint32(0) 577 578 targetVectorsOffsetOrder := make([]string, 0, len(ko.Vectors)) 579 if len(ko.Vectors) > 0 { 580 offsetsMap := map[string]uint32{} 581 for name, vec := range ko.Vectors { 582 offsetsMap[name] = targetVectorsSegmentLength 583 targetVectorsSegmentLength += 2 + 4*uint32(len(vec)) // 2 for vec length + vec bytes 584 targetVectorsOffsetOrder = append(targetVectorsOffsetOrder, name) 585 } 586 587 targetVectorsOffsets, err = msgpack.Marshal(offsetsMap) 588 if err != nil { 589 return nil, fmt.Errorf("Could not marshal target vectors offsets: %w", err) 590 } 591 targetVectorsOffsetsLength = uint32(len(targetVectorsOffsets)) 592 } 593 594 totalBufferLength := 1 + 8 + 1 + 16 + 8 + 8 + 595 2 + vectorLength*4 + 596 2 + classNameLength + 597 4 + schemaLength + 598 4 + metaLength + 599 4 + vectorWeightsLength + 600 4 + targetVectorsOffsetsLength + 601 4 + targetVectorsSegmentLength 602 603 byteBuffer := make([]byte, totalBufferLength) 604 rw := byteops.NewReadWriter(byteBuffer) 605 rw.WriteByte(ko.MarshallerVersion) 606 rw.WriteUint64(ko.DocID) 607 rw.WriteByte(kindByte) 608 609 rw.CopyBytesToBuffer(idBytes) 610 611 rw.WriteUint64(uint64(ko.CreationTimeUnix())) 612 rw.WriteUint64(uint64(ko.LastUpdateTimeUnix())) 613 rw.WriteUint16(uint16(vectorLength)) 614 615 for j := uint32(0); j < vectorLength; j++ { 616 rw.WriteUint32(math.Float32bits(ko.Vector[j])) 617 } 618 619 rw.WriteUint16(uint16(classNameLength)) 620 err = rw.CopyBytesToBuffer(className) 621 if err != nil { 622 return byteBuffer, errors.Wrap(err, "Could not copy className") 623 } 624 625 rw.WriteUint32(schemaLength) 626 err = rw.CopyBytesToBuffer(schema) 627 if err != nil { 628 return byteBuffer, errors.Wrap(err, "Could not copy schema") 629 } 630 631 rw.WriteUint32(metaLength) 632 err = rw.CopyBytesToBuffer(meta) 633 if err != nil { 634 return byteBuffer, errors.Wrap(err, "Could not copy meta") 635 } 636 637 rw.WriteUint32(vectorWeightsLength) 638 err = rw.CopyBytesToBuffer(vectorWeights) 639 if err != nil { 640 return byteBuffer, errors.Wrap(err, "Could not copy vectorWeights") 641 } 642 643 rw.WriteUint32(targetVectorsOffsetsLength) 644 if targetVectorsOffsetsLength > 0 { 645 err = rw.CopyBytesToBuffer(targetVectorsOffsets) 646 if err != nil { 647 return byteBuffer, errors.Wrap(err, "Could not copy targetVectorsOffsets") 648 } 649 } 650 651 rw.WriteUint32(targetVectorsSegmentLength) 652 for _, name := range targetVectorsOffsetOrder { 653 vec := ko.Vectors[name] 654 vecLen := len(vec) 655 656 rw.WriteUint16(uint16(vecLen)) 657 for j := 0; j < vecLen; j++ { 658 rw.WriteUint32(math.Float32bits(vec[j])) 659 } 660 } 661 662 return byteBuffer, nil 663 } 664 665 // UnmarshalPropertiesFromObject only unmarshals and returns the properties part of the object 666 // 667 // Check MarshalBinary for the order of elements in the input array 668 func UnmarshalPropertiesFromObject(data []byte, properties *map[string]interface{}, aggregationProperties []string, propStrings [][]string) error { 669 if data[0] != uint8(1) { 670 return errors.Errorf("unsupported binary marshaller version %d", data[0]) 671 } 672 673 // clear out old values in case an object misses values. This should NOT shrink the capacity of the map, eg there 674 // are no allocations when adding the properties of the next object again 675 for k := range *properties { 676 delete(*properties, k) 677 } 678 679 startPos := uint64(1 + 8 + 1 + 16 + 8 + 8) // elements at the start 680 rw := byteops.NewReadWriter(data, byteops.WithPosition(startPos)) 681 // get the length of the vector, each element is a float32 (4 bytes) 682 vectorLength := uint64(rw.ReadUint16()) 683 rw.MoveBufferPositionForward(vectorLength * 4) 684 685 classnameLength := uint64(rw.ReadUint16()) 686 rw.MoveBufferPositionForward(classnameLength) 687 propertyLength := uint64(rw.ReadUint32()) 688 689 jsonparser.EachKey(data[rw.Position:rw.Position+propertyLength], func(idx int, value []byte, dataType jsonparser.ValueType, err error) { 690 var errParse error 691 switch dataType { 692 case jsonparser.Number, jsonparser.String, jsonparser.Boolean: 693 val, err := parseValues(dataType, value) 694 errParse = err 695 (*properties)[aggregationProperties[idx]] = val 696 case jsonparser.Array: // can be a beacon or an actual array 697 arrayEntries := value[1 : len(value)-1] // without leading and trailing [] 698 beaconVal, errBeacon := jsonparser.GetUnsafeString(arrayEntries, "beacon") 699 if errBeacon == nil { 700 (*properties)[aggregationProperties[idx]] = []interface{}{map[string]interface{}{"beacon": beaconVal}} 701 } else { 702 // check how many entries there are in the array by counting the ",". This allows us to allocate an 703 // array with the right size without extending it with every append. 704 // The size can be too large for string arrays, when they contain "," as part of their content. 705 entryCount := 0 706 for _, b := range arrayEntries { 707 if b == uint8(44) { // ',' as byte 708 entryCount++ 709 } 710 } 711 712 array := make([]interface{}, 0, entryCount) 713 jsonparser.ArrayEach(value, func(innerValue []byte, innerDataType jsonparser.ValueType, offset int, innerErr error) { 714 var val interface{} 715 716 switch innerDataType { 717 case jsonparser.Number, jsonparser.String, jsonparser.Boolean: 718 val, errParse = parseValues(innerDataType, innerValue) 719 default: 720 panic("Unknown data type ArrayEach") // returning an error would be better 721 } 722 array = append(array, val) 723 }) 724 (*properties)[aggregationProperties[idx]] = array 725 726 } 727 default: 728 panic("Unknown data type EachKey") // returning an error would be better 729 } 730 if errParse != nil { 731 panic(errParse) 732 } 733 }, propStrings...) 734 735 return nil 736 } 737 738 func parseValues(dt jsonparser.ValueType, value []byte) (interface{}, error) { 739 switch dt { 740 case jsonparser.Number: 741 return jsonparser.ParseFloat(value) 742 case jsonparser.String: 743 return jsonparser.ParseString(value) 744 case jsonparser.Boolean: 745 return jsonparser.ParseBoolean(value) 746 default: 747 panic("Unknown data type") // returning an error would be better 748 } 749 } 750 751 // UnmarshalBinary is the versioned way to unmarshal a kind object from binary, 752 // see MarshalBinary for the exact contents of each version 753 func (ko *Object) UnmarshalBinary(data []byte) error { 754 version := data[0] 755 if version != 1 { 756 return errors.Errorf("unsupported binary marshaller version %d", version) 757 } 758 ko.MarshallerVersion = version 759 760 rw := byteops.NewReadWriter(data, byteops.WithPosition(1)) 761 ko.DocID = rw.ReadUint64() 762 rw.MoveBufferPositionForward(1) // kind-byte 763 764 uuidParsed, err := uuid.FromBytes(data[rw.Position : rw.Position+16]) 765 if err != nil { 766 return err 767 } 768 rw.MoveBufferPositionForward(16) 769 770 createTime := int64(rw.ReadUint64()) 771 updateTime := int64(rw.ReadUint64()) 772 773 vectorLength := rw.ReadUint16() 774 ko.VectorLen = int(vectorLength) 775 ko.Vector = make([]float32, vectorLength) 776 for j := 0; j < int(vectorLength); j++ { 777 ko.Vector[j] = math.Float32frombits(rw.ReadUint32()) 778 } 779 780 classNameLength := uint64(rw.ReadUint16()) 781 className, err := rw.CopyBytesFromBuffer(classNameLength, nil) 782 if err != nil { 783 return errors.Wrap(err, "Could not copy class name") 784 } 785 786 schemaLength := uint64(rw.ReadUint32()) 787 schema, err := rw.CopyBytesFromBuffer(schemaLength, nil) 788 if err != nil { 789 return errors.Wrap(err, "Could not copy schema") 790 } 791 792 metaLength := uint64(rw.ReadUint32()) 793 meta, err := rw.CopyBytesFromBuffer(metaLength, nil) 794 if err != nil { 795 return errors.Wrap(err, "Could not copy meta") 796 } 797 798 vectorWeightsLength := uint64(rw.ReadUint32()) 799 vectorWeights, err := rw.CopyBytesFromBuffer(vectorWeightsLength, nil) 800 if err != nil { 801 return errors.Wrap(err, "Could not copy vectorWeights") 802 } 803 804 vectors, err := unmarshalTargetVectors(&rw) 805 if err != nil { 806 return err 807 } 808 ko.Vectors = vectors 809 810 return ko.parseObject( 811 strfmt.UUID(uuidParsed.String()), 812 createTime, 813 updateTime, 814 string(className), 815 schema, 816 meta, 817 vectorWeights, 818 ) 819 } 820 821 func unmarshalTargetVectors(rw *byteops.ReadWriter) (map[string][]float32, error) { 822 // This check prevents from panic when somebody is upgrading from version that 823 // didn't have multi vector support. This check is needed bc with named vectors 824 // feature storage object can have vectors data appended at the end of the file 825 if rw.Position < uint64(len(rw.Buffer)) { 826 targetVectorsOffsets := rw.ReadBytesFromBufferWithUint32LengthIndicator() 827 targetVectorsSegmentLength := rw.ReadUint32() 828 pos := rw.Position 829 830 if len(targetVectorsOffsets) > 0 { 831 var tvOffsets map[string]uint32 832 if err := msgpack.Unmarshal(targetVectorsOffsets, &tvOffsets); err != nil { 833 return nil, fmt.Errorf("Could not unmarshal target vectors offset: %w", err) 834 } 835 836 targetVectors := map[string][]float32{} 837 for name, offset := range tvOffsets { 838 rw.MoveBufferToAbsolutePosition(pos + uint64(offset)) 839 vecLen := rw.ReadUint16() 840 vec := make([]float32, vecLen) 841 for j := uint16(0); j < vecLen; j++ { 842 vec[j] = math.Float32frombits(rw.ReadUint32()) 843 } 844 targetVectors[name] = vec 845 } 846 847 rw.MoveBufferToAbsolutePosition(pos + uint64(targetVectorsSegmentLength)) 848 return targetVectors, nil 849 } 850 } 851 return nil, nil 852 } 853 854 func VectorFromBinary(in []byte, buffer []float32) ([]float32, error) { 855 if len(in) == 0 { 856 return nil, nil 857 } 858 859 version := in[0] 860 if version != 1 { 861 return nil, errors.Errorf("unsupported marshaller version %d", version) 862 } 863 864 // since we know the version and know that the blob is not len(0), we can 865 // assume that we can directly access the vector length field. The only 866 // situation where this is not accessible would be on corrupted data - where 867 // it would be acceptable to panic 868 vecLen := binary.LittleEndian.Uint16(in[42:44]) 869 870 var out []float32 871 if cap(buffer) >= int(vecLen) { 872 out = buffer[:vecLen] 873 } else { 874 out = make([]float32, vecLen) 875 } 876 vecStart := 44 877 vecEnd := vecStart + int(vecLen*4) 878 879 i := 0 880 for start := vecStart; start < vecEnd; start += 4 { 881 asUint := binary.LittleEndian.Uint32(in[start : start+4]) 882 out[i] = math.Float32frombits(asUint) 883 i++ 884 } 885 886 return out, nil 887 } 888 889 func (ko *Object) parseObject(uuid strfmt.UUID, create, update int64, className string, 890 propsB []byte, additionalB []byte, vectorWeightsB []byte, 891 ) error { 892 var props map[string]interface{} 893 if err := json.Unmarshal(propsB, &props); err != nil { 894 return err 895 } 896 897 if err := enrichSchemaTypes(props, false); err != nil { 898 return errors.Wrap(err, "enrich schema datatypes") 899 } 900 901 var additionalProperties models.AdditionalProperties 902 if len(additionalB) > 0 { 903 if err := json.Unmarshal(additionalB, &additionalProperties); err != nil { 904 return err 905 } 906 907 if prop, ok := additionalProperties["classification"]; ok { 908 if classificationMap, ok := prop.(map[string]interface{}); ok { 909 marshalled, err := json.Marshal(classificationMap) 910 if err != nil { 911 return err 912 } 913 var classification additional.Classification 914 err = json.Unmarshal(marshalled, &classification) 915 if err != nil { 916 return err 917 } 918 additionalProperties["classification"] = &classification 919 } 920 } 921 922 if prop, ok := additionalProperties["group"]; ok { 923 if groupMap, ok := prop.(map[string]interface{}); ok { 924 marshalled, err := json.Marshal(groupMap) 925 if err != nil { 926 return err 927 } 928 var group additional.Group 929 err = json.Unmarshal(marshalled, &group) 930 if err != nil { 931 return err 932 } 933 934 for i, hit := range group.Hits { 935 if groupHitAdditionalMap, ok := hit["_additional"].(map[string]interface{}); ok { 936 marshalled, err := json.Marshal(groupHitAdditionalMap) 937 if err != nil { 938 return err 939 } 940 var groupHitsAdditional additional.GroupHitAdditional 941 err = json.Unmarshal(marshalled, &groupHitsAdditional) 942 if err != nil { 943 return err 944 } 945 group.Hits[i]["_additional"] = &groupHitsAdditional 946 } 947 } 948 949 additionalProperties["group"] = &group 950 } 951 } 952 } 953 954 var vectorWeights interface{} 955 if err := json.Unmarshal(vectorWeightsB, &vectorWeights); err != nil { 956 return err 957 } 958 959 ko.Object = models.Object{ 960 Class: className, 961 CreationTimeUnix: create, 962 LastUpdateTimeUnix: update, 963 ID: uuid, 964 Properties: props, 965 VectorWeights: vectorWeights, 966 Additional: additionalProperties, 967 } 968 969 return nil 970 } 971 972 // DeepCopyDangerous creates a deep copy of the underlying Object 973 // WARNING: This was purpose built for the batch ref usecase and only covers 974 // the situations that are required there. This means that cases which aren't 975 // reflected in that usecase may still contain references. Thus the suffix 976 // "Dangerous". If needed, make sure everything is copied and remove the 977 // suffix. 978 func (ko *Object) DeepCopyDangerous() *Object { 979 o := &Object{ 980 MarshallerVersion: ko.MarshallerVersion, 981 DocID: ko.DocID, 982 Object: deepCopyObject(ko.Object), 983 Vector: deepCopyVector(ko.Vector), 984 Vectors: deepCopyVectors(ko.Vectors), 985 } 986 987 return o 988 } 989 990 func AddOwnership(objs []*Object, node, shard string) { 991 for i := range objs { 992 objs[i].BelongsToNode = node 993 objs[i].BelongsToShard = shard 994 } 995 } 996 997 func deepCopyVector(orig []float32) []float32 { 998 out := make([]float32, len(orig)) 999 copy(out, orig) 1000 return out 1001 } 1002 1003 func deepCopyVectors[V []float32 | models.Vector](orig map[string]V) map[string]V { 1004 out := make(map[string]V, len(orig)) 1005 for key, vec := range orig { 1006 out[key] = deepCopyVector(vec) 1007 } 1008 return out 1009 } 1010 1011 func deepCopyObject(orig models.Object) models.Object { 1012 return models.Object{ 1013 Class: orig.Class, 1014 ID: orig.ID, 1015 CreationTimeUnix: orig.CreationTimeUnix, 1016 LastUpdateTimeUnix: orig.LastUpdateTimeUnix, 1017 Vector: deepCopyVector(orig.Vector), 1018 VectorWeights: orig.VectorWeights, 1019 Additional: orig.Additional, // WARNING: not a deep copy!! 1020 Properties: deepCopyProperties(orig.Properties), 1021 Vectors: deepCopyVectors(orig.Vectors), 1022 } 1023 } 1024 1025 func deepCopyProperties(orig models.PropertySchema) models.PropertySchema { 1026 if orig == nil { 1027 return nil 1028 } 1029 1030 asMap, ok := orig.(map[string]interface{}) 1031 if !ok { 1032 // not a map, don't know what to do with this 1033 return nil 1034 } 1035 1036 out := map[string]interface{}{} 1037 1038 for key, value := range asMap { 1039 if mref, ok := value.(models.MultipleRef); ok { 1040 out[key] = deepCopyMRef(mref) 1041 continue 1042 } 1043 1044 // Note: This is not a true deep copy, value could still be a pointer type, 1045 // such as *models.GeoCoordinates, thus leading to passing a reference 1046 // instead of actually making a copy. However, for the purposes we need 1047 // this method for this is acceptable based on our current knowledge 1048 out[key] = value 1049 } 1050 1051 return out 1052 } 1053 1054 func deepCopyMRef(orig models.MultipleRef) models.MultipleRef { 1055 if orig == nil { 1056 return nil 1057 } 1058 1059 out := make(models.MultipleRef, len(orig)) 1060 for i, ref := range orig { 1061 // models.SingleRef contains only pass-by-value props, so a simple deref as 1062 // the struct creates a copy 1063 copiedRef := *ref 1064 out[i] = &copiedRef 1065 } 1066 1067 return out 1068 }