github.com/milvus-io/milvus-sdk-go/v2@v2.4.1/entity/columns.go (about) 1 // Copyright (C) 2019-2021 Zilliz. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance 4 // with the License. You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software distributed under the License 9 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 // or implied. See the License for the specific language governing permissions and limitations under the License. 11 12 package entity 13 14 import ( 15 "encoding/binary" 16 "fmt" 17 "math" 18 19 "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" 20 21 "github.com/cockroachdb/errors" 22 ) 23 24 //go:generate go run gen/gen.go 25 26 // Column interface field type for column-based data frame 27 type Column interface { 28 Name() string 29 Type() FieldType 30 Len() int 31 Slice(int, int) Column 32 FieldData() *schemapb.FieldData 33 AppendValue(interface{}) error 34 Get(int) (interface{}, error) 35 GetAsInt64(int) (int64, error) 36 GetAsString(int) (string, error) 37 GetAsDouble(int) (float64, error) 38 GetAsBool(int) (bool, error) 39 } 40 41 // ColumnBase adds conversion methods support for fixed-type columns. 42 type ColumnBase struct{} 43 44 func (b ColumnBase) GetAsInt64(_ int) (int64, error) { 45 return 0, errors.New("conversion between fixed-type column not support") 46 } 47 48 func (b ColumnBase) GetAsString(_ int) (string, error) { 49 return "", errors.New("conversion between fixed-type column not support") 50 } 51 52 func (b ColumnBase) GetAsDouble(_ int) (float64, error) { 53 return 0, errors.New("conversion between fixed-type column not support") 54 } 55 56 func (b ColumnBase) GetAsBool(_ int) (bool, error) { 57 return false, errors.New("conversion between fixed-type column not support") 58 } 59 60 // Vector interface vector used int search 61 type Vector interface { 62 Dim() int 63 Serialize() []byte 64 FieldType() FieldType 65 } 66 67 // FloatVector float32 vector wrapper. 68 type FloatVector []float32 69 70 // Dim returns vector dimension. 71 func (fv FloatVector) Dim() int { 72 return len(fv) 73 } 74 75 // FieldType returns coresponding field type. 76 func (fv FloatVector) FieldType() FieldType { 77 return FieldTypeFloatVector 78 } 79 80 // Serialize serializes vector into byte slice, used in search placeholder 81 // LittleEndian is used for convention 82 func (fv FloatVector) Serialize() []byte { 83 data := make([]byte, 0, 4*len(fv)) // float32 occupies 4 bytes 84 buf := make([]byte, 4) 85 for _, f := range fv { 86 binary.LittleEndian.PutUint32(buf, math.Float32bits(f)) 87 data = append(data, buf...) 88 } 89 return data 90 } 91 92 // FloatVector float32 vector wrapper. 93 type Float16Vector []byte 94 95 // Dim returns vector dimension. 96 func (fv Float16Vector) Dim() int { 97 return len(fv) / 2 98 } 99 100 // FieldType returns coresponding field type. 101 func (fv Float16Vector) FieldType() FieldType { 102 return FieldTypeFloat16Vector 103 } 104 105 func (fv Float16Vector) Serialize() []byte { 106 return fv 107 } 108 109 // FloatVector float32 vector wrapper. 110 type BFloat16Vector []byte 111 112 // Dim returns vector dimension. 113 func (fv BFloat16Vector) Dim() int { 114 return len(fv) / 2 115 } 116 117 // FieldType returns coresponding field type. 118 func (fv BFloat16Vector) FieldType() FieldType { 119 return FieldTypeBFloat16Vector 120 } 121 122 func (fv BFloat16Vector) Serialize() []byte { 123 return fv 124 } 125 126 // BinaryVector []byte vector wrapper 127 type BinaryVector []byte 128 129 // Dim return vector dimension, note that binary vector is bits count 130 func (bv BinaryVector) Dim() int { 131 return 8 * len(bv) 132 } 133 134 // Serialize just return bytes 135 func (bv BinaryVector) Serialize() []byte { 136 return bv 137 } 138 139 // FieldType returns coresponding field type. 140 func (bv BinaryVector) FieldType() FieldType { 141 return FieldTypeBinaryVector 142 } 143 144 var errFieldDataTypeNotMatch = errors.New("FieldData type not matched") 145 146 // IDColumns converts schemapb.IDs to corresponding column 147 // currently Int64 / string may be in IDs 148 func IDColumns(schema *Schema, idField *schemapb.IDs, begin, end int) (Column, error) { 149 var idColumn Column 150 151 pkField := schema.PKField() 152 if pkField == nil { 153 return nil, errors.New("PK Field not found") 154 } 155 switch pkField.DataType { 156 case FieldTypeInt64: 157 data := idField.GetIntId().GetData() 158 if data == nil { 159 return NewColumnInt64(pkField.Name, nil), nil 160 } 161 if end >= 0 { 162 idColumn = NewColumnInt64(pkField.Name, data[begin:end]) 163 } else { 164 idColumn = NewColumnInt64(pkField.Name, data[begin:]) 165 } 166 case FieldTypeVarChar, FieldTypeString: 167 data := idField.GetStrId().GetData() 168 if data == nil { 169 return NewColumnVarChar(pkField.Name, nil), nil 170 } 171 if end >= 0 { 172 idColumn = NewColumnVarChar(pkField.Name, data[begin:end]) 173 } else { 174 idColumn = NewColumnVarChar(pkField.Name, data[begin:]) 175 } 176 default: 177 return nil, fmt.Errorf("unsupported id type %v", pkField.DataType) 178 } 179 if idField == nil { 180 return nil, errors.New("nil Ids from response") 181 } 182 return idColumn, nil 183 } 184 185 // FieldDataColumn converts schemapb.FieldData to Column, used int search result conversion logic 186 // begin, end specifies the start and end positions 187 func FieldDataColumn(fd *schemapb.FieldData, begin, end int) (Column, error) { 188 switch fd.GetType() { 189 case schemapb.DataType_Bool: 190 data, ok := fd.GetScalars().GetData().(*schemapb.ScalarField_BoolData) 191 if !ok { 192 return nil, errFieldDataTypeNotMatch 193 } 194 if end < 0 { 195 return NewColumnBool(fd.GetFieldName(), data.BoolData.GetData()[begin:]), nil 196 } 197 return NewColumnBool(fd.GetFieldName(), data.BoolData.GetData()[begin:end]), nil 198 199 case schemapb.DataType_Int8: 200 data, ok := getIntData(fd) 201 if !ok { 202 return nil, errFieldDataTypeNotMatch 203 } 204 values := make([]int8, 0, len(data.IntData.GetData())) 205 for _, v := range data.IntData.GetData() { 206 values = append(values, int8(v)) 207 } 208 209 if end < 0 { 210 return NewColumnInt8(fd.GetFieldName(), values[begin:]), nil 211 } 212 213 return NewColumnInt8(fd.GetFieldName(), values[begin:end]), nil 214 215 case schemapb.DataType_Int16: 216 data, ok := getIntData(fd) 217 if !ok { 218 return nil, errFieldDataTypeNotMatch 219 } 220 values := make([]int16, 0, len(data.IntData.GetData())) 221 for _, v := range data.IntData.GetData() { 222 values = append(values, int16(v)) 223 } 224 if end < 0 { 225 return NewColumnInt16(fd.GetFieldName(), values[begin:]), nil 226 } 227 228 return NewColumnInt16(fd.GetFieldName(), values[begin:end]), nil 229 230 case schemapb.DataType_Int32: 231 data, ok := getIntData(fd) 232 if !ok { 233 return nil, errFieldDataTypeNotMatch 234 } 235 if end < 0 { 236 return NewColumnInt32(fd.GetFieldName(), data.IntData.GetData()[begin:]), nil 237 } 238 return NewColumnInt32(fd.GetFieldName(), data.IntData.GetData()[begin:end]), nil 239 240 case schemapb.DataType_Int64: 241 data, ok := fd.GetScalars().GetData().(*schemapb.ScalarField_LongData) 242 if !ok { 243 return nil, errFieldDataTypeNotMatch 244 } 245 if end < 0 { 246 return NewColumnInt64(fd.GetFieldName(), data.LongData.GetData()[begin:]), nil 247 } 248 return NewColumnInt64(fd.GetFieldName(), data.LongData.GetData()[begin:end]), nil 249 250 case schemapb.DataType_Float: 251 data, ok := fd.GetScalars().GetData().(*schemapb.ScalarField_FloatData) 252 if !ok { 253 return nil, errFieldDataTypeNotMatch 254 } 255 if end < 0 { 256 return NewColumnFloat(fd.GetFieldName(), data.FloatData.GetData()[begin:]), nil 257 } 258 return NewColumnFloat(fd.GetFieldName(), data.FloatData.GetData()[begin:end]), nil 259 260 case schemapb.DataType_Double: 261 data, ok := fd.GetScalars().GetData().(*schemapb.ScalarField_DoubleData) 262 if !ok { 263 return nil, errFieldDataTypeNotMatch 264 } 265 if end < 0 { 266 return NewColumnDouble(fd.GetFieldName(), data.DoubleData.GetData()[begin:]), nil 267 } 268 return NewColumnDouble(fd.GetFieldName(), data.DoubleData.GetData()[begin:end]), nil 269 270 case schemapb.DataType_String: 271 data, ok := fd.GetScalars().GetData().(*schemapb.ScalarField_StringData) 272 if !ok { 273 return nil, errFieldDataTypeNotMatch 274 } 275 if end < 0 { 276 return NewColumnString(fd.GetFieldName(), data.StringData.GetData()[begin:]), nil 277 } 278 return NewColumnString(fd.GetFieldName(), data.StringData.GetData()[begin:end]), nil 279 280 case schemapb.DataType_VarChar: 281 data, ok := fd.GetScalars().GetData().(*schemapb.ScalarField_StringData) 282 if !ok { 283 return nil, errFieldDataTypeNotMatch 284 } 285 if end < 0 { 286 return NewColumnVarChar(fd.GetFieldName(), data.StringData.GetData()[begin:]), nil 287 } 288 return NewColumnVarChar(fd.GetFieldName(), data.StringData.GetData()[begin:end]), nil 289 290 case schemapb.DataType_Array: 291 data := fd.GetScalars().GetArrayData() 292 if data == nil { 293 return nil, errFieldDataTypeNotMatch 294 } 295 var arrayData []*schemapb.ScalarField 296 if end < 0 { 297 arrayData = data.GetData()[begin:] 298 } else { 299 arrayData = data.GetData()[begin:end] 300 } 301 302 return parseArrayData(fd.GetFieldName(), data.GetElementType(), arrayData) 303 304 case schemapb.DataType_JSON: 305 data, ok := fd.GetScalars().GetData().(*schemapb.ScalarField_JsonData) 306 isDynamic := fd.GetIsDynamic() 307 if !ok { 308 return nil, errFieldDataTypeNotMatch 309 } 310 if end < 0 { 311 return NewColumnJSONBytes(fd.GetFieldName(), data.JsonData.GetData()[begin:]).WithIsDynamic(isDynamic), nil 312 } 313 return NewColumnJSONBytes(fd.GetFieldName(), data.JsonData.GetData()[begin:end]).WithIsDynamic(isDynamic), nil 314 315 case schemapb.DataType_FloatVector: 316 vectors := fd.GetVectors() 317 x, ok := vectors.GetData().(*schemapb.VectorField_FloatVector) 318 if !ok { 319 return nil, errFieldDataTypeNotMatch 320 } 321 data := x.FloatVector.GetData() 322 dim := int(vectors.GetDim()) 323 if end < 0 { 324 end = int(len(data) / dim) 325 } 326 vector := make([][]float32, 0, end-begin) // shall not have remanunt 327 for i := begin; i < end; i++ { 328 v := make([]float32, dim) 329 copy(v, data[i*dim:(i+1)*dim]) 330 vector = append(vector, v) 331 } 332 return NewColumnFloatVector(fd.GetFieldName(), dim, vector), nil 333 334 case schemapb.DataType_BinaryVector: 335 vectors := fd.GetVectors() 336 x, ok := vectors.GetData().(*schemapb.VectorField_BinaryVector) 337 if !ok { 338 return nil, errFieldDataTypeNotMatch 339 } 340 data := x.BinaryVector 341 if data == nil { 342 return nil, errFieldDataTypeNotMatch 343 } 344 dim := int(vectors.GetDim()) 345 blen := dim / 8 346 if end < 0 { 347 end = int(len(data) / blen) 348 } 349 vector := make([][]byte, 0, end-begin) 350 for i := begin; i < end; i++ { 351 v := make([]byte, blen) 352 copy(v, data[i*blen:(i+1)*blen]) 353 vector = append(vector, v) 354 } 355 return NewColumnBinaryVector(fd.GetFieldName(), dim, vector), nil 356 357 case schemapb.DataType_Float16Vector: 358 vectors := fd.GetVectors() 359 x, ok := vectors.GetData().(*schemapb.VectorField_Float16Vector) 360 if !ok { 361 return nil, errFieldDataTypeNotMatch 362 } 363 data := x.Float16Vector 364 dim := int(vectors.GetDim()) 365 if end < 0 { 366 end = int(len(data) / dim / 2) 367 } 368 vector := make([][]byte, 0, end-begin) 369 for i := begin; i < end; i++ { 370 v := make([]byte, dim*2) 371 copy(v, data[i*dim*2:(i+1)*dim*2]) 372 vector = append(vector, v) 373 } 374 return NewColumnFloat16Vector(fd.GetFieldName(), dim, vector), nil 375 376 case schemapb.DataType_BFloat16Vector: 377 vectors := fd.GetVectors() 378 x, ok := vectors.GetData().(*schemapb.VectorField_Bfloat16Vector) 379 if !ok { 380 return nil, errFieldDataTypeNotMatch 381 } 382 data := x.Bfloat16Vector 383 dim := int(vectors.GetDim()) 384 385 if end < 0 { 386 end = int(len(data) / dim / 2) 387 } 388 vector := make([][]byte, 0, end-begin) // shall not have remanunt 389 for i := begin; i < end; i++ { 390 v := make([]byte, dim*2) 391 copy(v, data[i*dim*2:(i+1)*dim*2]) 392 vector = append(vector, v) 393 } 394 return NewColumnBFloat16Vector(fd.GetFieldName(), dim, vector), nil 395 case schemapb.DataType_SparseFloatVector: 396 sparseVectors := fd.GetVectors().GetSparseFloatVector() 397 if sparseVectors == nil { 398 return nil, errFieldDataTypeNotMatch 399 } 400 data := sparseVectors.Contents 401 if end < 0 { 402 end = len(data) 403 } 404 data = data[begin:end] 405 vectors := make([]SparseEmbedding, 0, len(data)) 406 for _, bs := range data { 407 vector, err := deserializeSliceSparceEmbedding(bs) 408 if err != nil { 409 return nil, err 410 } 411 vectors = append(vectors, vector) 412 } 413 return NewColumnSparseVectors(fd.GetFieldName(), vectors), nil 414 default: 415 return nil, fmt.Errorf("unsupported data type %s", fd.GetType()) 416 } 417 } 418 419 func parseArrayData(fieldName string, elementType schemapb.DataType, fieldDataList []*schemapb.ScalarField) (Column, error) { 420 421 switch elementType { 422 case schemapb.DataType_Bool: 423 var data [][]bool 424 for _, fd := range fieldDataList { 425 data = append(data, fd.GetBoolData().GetData()) 426 } 427 return NewColumnBoolArray(fieldName, data), nil 428 429 case schemapb.DataType_Int8: 430 var data [][]int8 431 for _, fd := range fieldDataList { 432 raw := fd.GetIntData().GetData() 433 row := make([]int8, 0, len(raw)) 434 for _, item := range raw { 435 row = append(row, int8(item)) 436 } 437 data = append(data, row) 438 } 439 return NewColumnInt8Array(fieldName, data), nil 440 441 case schemapb.DataType_Int16: 442 var data [][]int16 443 for _, fd := range fieldDataList { 444 raw := fd.GetIntData().GetData() 445 row := make([]int16, 0, len(raw)) 446 for _, item := range raw { 447 row = append(row, int16(item)) 448 } 449 data = append(data, row) 450 } 451 return NewColumnInt16Array(fieldName, data), nil 452 453 case schemapb.DataType_Int32: 454 var data [][]int32 455 for _, fd := range fieldDataList { 456 data = append(data, fd.GetIntData().GetData()) 457 } 458 return NewColumnInt32Array(fieldName, data), nil 459 460 case schemapb.DataType_Int64: 461 var data [][]int64 462 for _, fd := range fieldDataList { 463 data = append(data, fd.GetLongData().GetData()) 464 } 465 return NewColumnInt64Array(fieldName, data), nil 466 467 case schemapb.DataType_Float: 468 var data [][]float32 469 for _, fd := range fieldDataList { 470 data = append(data, fd.GetFloatData().GetData()) 471 } 472 return NewColumnFloatArray(fieldName, data), nil 473 474 case schemapb.DataType_Double: 475 var data [][]float64 476 for _, fd := range fieldDataList { 477 data = append(data, fd.GetDoubleData().GetData()) 478 } 479 return NewColumnDoubleArray(fieldName, data), nil 480 481 case schemapb.DataType_VarChar, schemapb.DataType_String: 482 var data [][][]byte 483 for _, fd := range fieldDataList { 484 strs := fd.GetStringData().GetData() 485 bytesData := make([][]byte, 0, len(strs)) 486 for _, str := range strs { 487 bytesData = append(bytesData, []byte(str)) 488 } 489 data = append(data, bytesData) 490 } 491 492 return NewColumnVarCharArray(fieldName, data), nil 493 494 default: 495 return nil, fmt.Errorf("unsupported element type %s", elementType) 496 } 497 } 498 499 // getIntData get int32 slice from result field data 500 // also handles LongData bug (see also https://github.com/milvus-io/milvus/issues/23850) 501 func getIntData(fd *schemapb.FieldData) (*schemapb.ScalarField_IntData, bool) { 502 switch data := fd.GetScalars().GetData().(type) { 503 case *schemapb.ScalarField_IntData: 504 return data, true 505 case *schemapb.ScalarField_LongData: 506 // only alway empty LongData for backward compatibility 507 if len(data.LongData.GetData()) == 0 { 508 return &schemapb.ScalarField_IntData{ 509 IntData: &schemapb.IntArray{}, 510 }, true 511 } 512 return nil, false 513 default: 514 return nil, false 515 } 516 } 517 518 // FieldDataColumn converts schemapb.FieldData to vector Column 519 func FieldDataVector(fd *schemapb.FieldData) (Column, error) { 520 switch fd.GetType() { 521 case schemapb.DataType_FloatVector: 522 vectors := fd.GetVectors() 523 x, ok := vectors.GetData().(*schemapb.VectorField_FloatVector) 524 if !ok { 525 return nil, errFieldDataTypeNotMatch 526 } 527 data := x.FloatVector.GetData() 528 dim := int(vectors.GetDim()) 529 vector := make([][]float32, 0, len(data)/dim) // shall not have remanunt 530 for i := 0; i < len(data)/dim; i++ { 531 v := make([]float32, dim) 532 copy(v, data[i*dim:(i+1)*dim]) 533 vector = append(vector, v) 534 } 535 return NewColumnFloatVector(fd.GetFieldName(), dim, vector), nil 536 case schemapb.DataType_BinaryVector: 537 vectors := fd.GetVectors() 538 x, ok := vectors.GetData().(*schemapb.VectorField_BinaryVector) 539 if !ok { 540 return nil, errFieldDataTypeNotMatch 541 } 542 data := x.BinaryVector 543 if data == nil { 544 return nil, errFieldDataTypeNotMatch 545 } 546 dim := int(vectors.GetDim()) 547 blen := dim / 8 548 vector := make([][]byte, 0, len(data)/blen) 549 for i := 0; i < len(data)/blen; i++ { 550 v := make([]byte, blen) 551 copy(v, data[i*blen:(i+1)*blen]) 552 vector = append(vector, v) 553 } 554 return NewColumnBinaryVector(fd.GetFieldName(), dim, vector), nil 555 case schemapb.DataType_Float16Vector: 556 vectors := fd.GetVectors() 557 x, ok := vectors.GetData().(*schemapb.VectorField_Float16Vector) 558 if !ok { 559 return nil, errFieldDataTypeNotMatch 560 } 561 data := x.Float16Vector 562 dim := int(vectors.GetDim()) 563 vector := make([][]byte, 0, len(data)/dim) // shall not have remanunt 564 for i := 0; i < len(data)/dim; i++ { 565 v := make([]byte, dim) 566 copy(v, data[i*dim:(i+1)*dim]) 567 vector = append(vector, v) 568 } 569 return NewColumnFloat16Vector(fd.GetFieldName(), dim, vector), nil 570 case schemapb.DataType_BFloat16Vector: 571 vectors := fd.GetVectors() 572 x, ok := vectors.GetData().(*schemapb.VectorField_Bfloat16Vector) 573 if !ok { 574 return nil, errFieldDataTypeNotMatch 575 } 576 data := x.Bfloat16Vector 577 dim := int(vectors.GetDim()) 578 vector := make([][]byte, 0, len(data)/dim) // shall not have remanunt 579 for i := 0; i < len(data)/dim; i++ { 580 v := make([]byte, dim) 581 copy(v, data[i*dim:(i+1)*dim]) 582 vector = append(vector, v) 583 } 584 return NewColumnBFloat16Vector(fd.GetFieldName(), dim, vector), nil 585 default: 586 return nil, errors.New("unsupported data type") 587 } 588 } 589 590 // defaultValueColumn will return the empty scalars column which will be fill with default value 591 func DefaultValueColumn(name string, dataType FieldType) (Column, error) { 592 switch dataType { 593 case FieldTypeBool: 594 return NewColumnBool(name, nil), nil 595 case FieldTypeInt8: 596 return NewColumnInt8(name, nil), nil 597 case FieldTypeInt16: 598 return NewColumnInt16(name, nil), nil 599 case FieldTypeInt32: 600 return NewColumnInt32(name, nil), nil 601 case FieldTypeInt64: 602 return NewColumnInt64(name, nil), nil 603 case FieldTypeFloat: 604 return NewColumnFloat(name, nil), nil 605 case FieldTypeDouble: 606 return NewColumnDouble(name, nil), nil 607 case FieldTypeString: 608 return NewColumnString(name, nil), nil 609 case FieldTypeVarChar: 610 return NewColumnVarChar(name, nil), nil 611 case FieldTypeJSON: 612 return NewColumnJSONBytes(name, nil), nil 613 614 default: 615 return nil, fmt.Errorf("default value unsupported data type %s", dataType) 616 } 617 }