github.com/milvus-io/milvus-sdk-go/v2@v2.4.1/entity/rows.go (about) 1 // Copyright (C) 2019-2021 Zilliz. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance 4 // with the License. You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software distributed under the License 9 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 // or implied. See the License for the specific language governing permissions and limitations under the License. 11 12 package entity 13 14 import ( 15 "encoding/json" 16 "fmt" 17 "go/ast" 18 "reflect" 19 "strconv" 20 "strings" 21 22 "github.com/cockroachdb/errors" 23 ) 24 25 const ( 26 // MilvusTag struct tag const for milvus row based struct 27 MilvusTag = `milvus` 28 29 // MilvusSkipTagValue struct tag const for skip this field. 30 MilvusSkipTagValue = `-` 31 32 // MilvusTagSep struct tag const for attribute separator 33 MilvusTagSep = `;` 34 35 //MilvusTagName struct tag const for field name 36 MilvusTagName = `NAME` 37 38 // VectorDimTag struct tag const for vector dimension 39 VectorDimTag = `DIM` 40 41 // VectorTypeTag struct tag const for binary vector type 42 VectorTypeTag = `VECTOR_TYPE` 43 44 // MilvusPrimaryKey struct tag const for primary key indicator 45 MilvusPrimaryKey = `PRIMARY_KEY` 46 47 // MilvusAutoID struct tag const for auto id indicator 48 MilvusAutoID = `AUTO_ID` 49 50 // DimMax dimension max value 51 DimMax = 65535 52 ) 53 54 // Row is the interface for milvus row based data 55 type Row interface { 56 Collection() string 57 Partition() string 58 Description() string 59 } 60 61 // MapRow is the alias type for map[string]interface{} implementing `Row` inteface with empty methods. 62 type MapRow map[string]interface{} 63 64 func (mr MapRow) Collection() string { 65 return "" 66 } 67 68 func (mr MapRow) Partition() string { 69 return "" 70 } 71 72 func (mr MapRow) Description() string { 73 return "" 74 } 75 76 // RowBase row base, returns default collection, partition name which is empty string 77 type RowBase struct{} 78 79 // Collection row base default collection name, which is empty string 80 // when empty string is passed, the parent struct type name is used 81 func (b RowBase) Collection() string { 82 return "" 83 } 84 85 // Partition row base default partition name, which is empty string 86 // when empty string is passed, the default partition is used, which currently is named `_default` 87 func (b RowBase) Partition() string { 88 return "" 89 } 90 91 // Description implement Row interface, default value is empty string 92 func (b RowBase) Description() string { 93 return "" 94 } 95 96 // ParseSchemaAny parses schema from interface{}. 97 func ParseSchemaAny(r interface{}) (*Schema, error) { 98 sch := &Schema{} 99 t := reflect.TypeOf(r) 100 if t.Kind() == reflect.Array || t.Kind() == reflect.Slice || t.Kind() == reflect.Ptr { 101 t = t.Elem() 102 } 103 104 // MapRow is not supported for schema definition 105 // TODO add PrimaryKey() interface later 106 if t.Kind() == reflect.Map { 107 return nil, fmt.Errorf("map row is not supported for schema definition") 108 } 109 110 if t.Kind() != reflect.Struct { 111 return nil, fmt.Errorf("unsupported data type: %+v", r) 112 } 113 114 // Collection method not overwrited, try use Row type name 115 if sch.CollectionName == "" { 116 sch.CollectionName = t.Name() 117 if sch.CollectionName == "" { 118 return nil, errors.New("collection name not provided") 119 } 120 } 121 sch.Fields = make([]*Field, 0, t.NumField()) 122 for i := 0; i < t.NumField(); i++ { 123 f := t.Field(i) 124 // ignore anonymous field for now 125 if f.Anonymous || !ast.IsExported(f.Name) { 126 continue 127 } 128 129 field := &Field{ 130 Name: f.Name, 131 } 132 ft := f.Type 133 if f.Type.Kind() == reflect.Ptr { 134 ft = ft.Elem() 135 } 136 fv := reflect.New(ft) 137 tag := f.Tag.Get(MilvusTag) 138 if tag == MilvusSkipTagValue { 139 continue 140 } 141 tagSettings := ParseTagSetting(tag, MilvusTagSep) 142 if _, has := tagSettings[MilvusPrimaryKey]; has { 143 field.PrimaryKey = true 144 } 145 if _, has := tagSettings[MilvusAutoID]; has { 146 field.AutoID = true 147 } 148 if name, has := tagSettings[MilvusTagName]; has { 149 field.Name = name 150 } 151 switch reflect.Indirect(fv).Kind() { 152 case reflect.Bool: 153 field.DataType = FieldTypeBool 154 case reflect.Int8: 155 field.DataType = FieldTypeInt8 156 case reflect.Int16: 157 field.DataType = FieldTypeInt16 158 case reflect.Int32: 159 field.DataType = FieldTypeInt32 160 case reflect.Int64: 161 field.DataType = FieldTypeInt64 162 case reflect.Float32: 163 field.DataType = FieldTypeFloat 164 case reflect.Float64: 165 field.DataType = FieldTypeDouble 166 case reflect.String: 167 field.DataType = FieldTypeString 168 case reflect.Array: 169 arrayLen := ft.Len() 170 elemType := ft.Elem() 171 switch elemType.Kind() { 172 case reflect.Uint8: 173 field.DataType = FieldTypeBinaryVector 174 //TODO maybe override by tag settings, when dim is not multiplier of 8 175 field.TypeParams = map[string]string{ 176 TypeParamDim: strconv.FormatInt(int64(arrayLen*8), 10), 177 } 178 case reflect.Float32: 179 field.DataType = FieldTypeFloatVector 180 field.TypeParams = map[string]string{ 181 TypeParamDim: strconv.FormatInt(int64(arrayLen), 10), 182 } 183 default: 184 return nil, fmt.Errorf("field %s is array of %v, which is not supported", f.Name, elemType) 185 } 186 case reflect.Slice: 187 dimStr, has := tagSettings[VectorDimTag] 188 if !has { 189 return nil, fmt.Errorf("field %s is slice but dim not provided", f.Name) 190 } 191 dim, err := strconv.ParseInt(dimStr, 10, 64) 192 if err != nil { 193 return nil, fmt.Errorf("dim value %s is not valid", dimStr) 194 } 195 if dim < 1 || dim > DimMax { 196 return nil, fmt.Errorf("dim value %d is out of range", dim) 197 } 198 field.TypeParams = map[string]string{ 199 TypeParamDim: dimStr, 200 } 201 elemType := ft.Elem() 202 switch elemType.Kind() { 203 case reflect.Uint8: // []byte, could be BinaryVector, fp16, bf 6 204 switch tagSettings[VectorTypeTag] { 205 case "fp16": 206 field.DataType = FieldTypeFloat16Vector 207 case "bf16": 208 field.DataType = FieldTypeBFloat16Vector 209 default: 210 field.DataType = FieldTypeBinaryVector 211 } 212 case reflect.Float32: 213 field.DataType = FieldTypeFloatVector 214 default: 215 return nil, fmt.Errorf("field %s is slice of %v, which is not supported", f.Name, elemType) 216 } 217 default: 218 return nil, fmt.Errorf("field %s is %v, which is not supported", field.Name, ft) 219 } 220 sch.Fields = append(sch.Fields, field) 221 } 222 223 return sch, nil 224 } 225 226 // ParseSchema parse Schema from row interface 227 func ParseSchema(r Row) (*Schema, error) { 228 schema, err := ParseSchemaAny(r) 229 if err != nil { 230 return nil, err 231 } 232 if r.Collection() != "" { 233 schema.CollectionName = r.Collection() 234 } 235 if schema.Description != "" { 236 schema.Description = r.Description() 237 } 238 return schema, nil 239 } 240 241 // ParseTagSetting parses struct tag into map settings 242 func ParseTagSetting(str string, sep string) map[string]string { 243 settings := map[string]string{} 244 names := strings.Split(str, sep) 245 246 for i := 0; i < len(names); i++ { 247 j := i 248 if len(names[j]) > 0 { 249 for { 250 if names[j][len(names[j])-1] == '\\' { 251 i++ 252 names[j] = names[j][0:len(names[j])-1] + sep + names[i] 253 names[i] = "" 254 } else { 255 break 256 } 257 } 258 } 259 260 values := strings.Split(names[j], ":") 261 k := strings.TrimSpace(strings.ToUpper(values[0])) 262 263 if len(values) >= 2 { 264 settings[k] = strings.Join(values[1:], ":") 265 } else if k != "" { 266 settings[k] = k 267 } 268 } 269 270 return settings 271 } 272 273 func AnyToColumns(rows []interface{}, schemas ...*Schema) ([]Column, error) { 274 rowsLen := len(rows) 275 if rowsLen == 0 { 276 return []Column{}, errors.New("0 length column") 277 } 278 279 var sch *Schema 280 var err error 281 // if schema not provided, try to parse from row 282 if len(schemas) == 0 { 283 sch, err = ParseSchemaAny(rows[0]) 284 if err != nil { 285 return []Column{}, err 286 } 287 } else { 288 // use first schema provided 289 sch = schemas[0] 290 } 291 292 isDynamic := sch.EnableDynamicField 293 var dynamicCol *ColumnJSONBytes 294 295 nameColumns := make(map[string]Column) 296 for _, field := range sch.Fields { 297 // skip auto id pk field 298 if field.PrimaryKey && field.AutoID { 299 continue 300 } 301 switch field.DataType { 302 case FieldTypeBool: 303 data := make([]bool, 0, rowsLen) 304 col := NewColumnBool(field.Name, data) 305 nameColumns[field.Name] = col 306 case FieldTypeInt8: 307 data := make([]int8, 0, rowsLen) 308 col := NewColumnInt8(field.Name, data) 309 nameColumns[field.Name] = col 310 case FieldTypeInt16: 311 data := make([]int16, 0, rowsLen) 312 col := NewColumnInt16(field.Name, data) 313 nameColumns[field.Name] = col 314 case FieldTypeInt32: 315 data := make([]int32, 0, rowsLen) 316 col := NewColumnInt32(field.Name, data) 317 nameColumns[field.Name] = col 318 case FieldTypeInt64: 319 data := make([]int64, 0, rowsLen) 320 col := NewColumnInt64(field.Name, data) 321 nameColumns[field.Name] = col 322 case FieldTypeFloat: 323 data := make([]float32, 0, rowsLen) 324 col := NewColumnFloat(field.Name, data) 325 nameColumns[field.Name] = col 326 case FieldTypeDouble: 327 data := make([]float64, 0, rowsLen) 328 col := NewColumnDouble(field.Name, data) 329 nameColumns[field.Name] = col 330 case FieldTypeString, FieldTypeVarChar: 331 data := make([]string, 0, rowsLen) 332 col := NewColumnString(field.Name, data) 333 nameColumns[field.Name] = col 334 case FieldTypeJSON: 335 data := make([][]byte, 0, rowsLen) 336 col := NewColumnJSONBytes(field.Name, data) 337 nameColumns[field.Name] = col 338 case FieldTypeArray: 339 col := NewArrayColumn(field) 340 if col == nil { 341 return nil, errors.Errorf("unsupported element type %s for Array", field.ElementType.String()) 342 } 343 nameColumns[field.Name] = col 344 case FieldTypeFloatVector: 345 data := make([][]float32, 0, rowsLen) 346 dimStr, has := field.TypeParams[TypeParamDim] 347 if !has { 348 return []Column{}, errors.New("vector field with no dim") 349 } 350 dim, err := strconv.ParseInt(dimStr, 10, 64) 351 if err != nil { 352 return []Column{}, fmt.Errorf("vector field with bad format dim: %s", err.Error()) 353 } 354 col := NewColumnFloatVector(field.Name, int(dim), data) 355 nameColumns[field.Name] = col 356 case FieldTypeBinaryVector: 357 data := make([][]byte, 0, rowsLen) 358 dimStr, has := field.TypeParams[TypeParamDim] 359 if !has { 360 return []Column{}, errors.New("vector field with no dim") 361 } 362 dim, err := strconv.ParseInt(dimStr, 10, 64) 363 if err != nil { 364 return []Column{}, fmt.Errorf("vector field with bad format dim: %s", err.Error()) 365 } 366 col := NewColumnBinaryVector(field.Name, int(dim), data) 367 nameColumns[field.Name] = col 368 case FieldTypeFloat16Vector: 369 data := make([][]byte, 0, rowsLen) 370 dimStr, has := field.TypeParams[TypeParamDim] 371 if !has { 372 return []Column{}, errors.New("vector field with no dim") 373 } 374 dim, err := strconv.ParseInt(dimStr, 10, 64) 375 if err != nil { 376 return []Column{}, fmt.Errorf("vector field with bad format dim: %s", err.Error()) 377 } 378 col := NewColumnFloat16Vector(field.Name, int(dim), data) 379 nameColumns[field.Name] = col 380 case FieldTypeBFloat16Vector: 381 data := make([][]byte, 0, rowsLen) 382 dimStr, has := field.TypeParams[TypeParamDim] 383 if !has { 384 return []Column{}, errors.New("vector field with no dim") 385 } 386 dim, err := strconv.ParseInt(dimStr, 10, 64) 387 if err != nil { 388 return []Column{}, fmt.Errorf("vector field with bad format dim: %s", err.Error()) 389 } 390 col := NewColumnBFloat16Vector(field.Name, int(dim), data) 391 nameColumns[field.Name] = col 392 case FieldTypeSparseVector: 393 data := make([]SparseEmbedding, 0, rowsLen) 394 col := NewColumnSparseVectors(field.Name, data) 395 nameColumns[field.Name] = col 396 } 397 } 398 399 if isDynamic { 400 dynamicCol = NewColumnJSONBytes("", make([][]byte, 0, rowsLen)).WithIsDynamic(true) 401 } 402 403 for _, row := range rows { 404 // collection schema name need not to be same, since receiver could has other names 405 v := reflect.ValueOf(row) 406 set, err := reflectValueCandi(v) 407 if err != nil { 408 return nil, err 409 } 410 411 for idx, field := range sch.Fields { 412 // skip dynamic field if visible 413 if isDynamic && field.IsDynamic { 414 continue 415 } 416 // skip auto id pk field 417 if field.PrimaryKey && field.AutoID { 418 // remove pk field from candidates set, avoid adding it into dynamic column 419 delete(set, field.Name) 420 continue 421 } 422 column, ok := nameColumns[field.Name] 423 if !ok { 424 return nil, fmt.Errorf("expected unhandled field %s", field.Name) 425 } 426 427 candi, ok := set[field.Name] 428 if !ok { 429 return nil, fmt.Errorf("row %d does not has field %s", idx, field.Name) 430 } 431 err := column.AppendValue(candi.v.Interface()) 432 if err != nil { 433 return nil, err 434 } 435 delete(set, field.Name) 436 } 437 438 if isDynamic { 439 m := make(map[string]interface{}) 440 for name, candi := range set { 441 m[name] = candi.v.Interface() 442 } 443 bs, err := json.Marshal(m) 444 if err != nil { 445 return nil, fmt.Errorf("failed to marshal dynamic field %w", err) 446 } 447 err = dynamicCol.AppendValue(bs) 448 if err != nil { 449 return nil, fmt.Errorf("failed to append value to dynamic field %w", err) 450 } 451 } 452 } 453 columns := make([]Column, 0, len(nameColumns)) 454 for _, column := range nameColumns { 455 columns = append(columns, column) 456 } 457 if isDynamic { 458 columns = append(columns, dynamicCol) 459 } 460 return columns, nil 461 } 462 463 func NewArrayColumn(f *Field) Column { 464 switch f.ElementType { 465 case FieldTypeBool: 466 return NewColumnBoolArray(f.Name, nil) 467 468 case FieldTypeInt8: 469 return NewColumnInt8Array(f.Name, nil) 470 471 case FieldTypeInt16: 472 return NewColumnInt16Array(f.Name, nil) 473 474 case FieldTypeInt32: 475 return NewColumnInt32Array(f.Name, nil) 476 477 case FieldTypeInt64: 478 return NewColumnInt64Array(f.Name, nil) 479 480 case FieldTypeFloat: 481 return NewColumnFloatArray(f.Name, nil) 482 483 case FieldTypeDouble: 484 return NewColumnDoubleArray(f.Name, nil) 485 486 case FieldTypeVarChar: 487 return NewColumnVarCharArray(f.Name, nil) 488 489 default: 490 return nil 491 } 492 } 493 494 // RowsToColumns rows to columns 495 func RowsToColumns(rows []Row, schemas ...*Schema) ([]Column, error) { 496 anys := make([]interface{}, 0, len(rows)) 497 for _, row := range rows { 498 anys = append(anys, row) 499 } 500 return AnyToColumns(anys, schemas...) 501 } 502 503 type fieldCandi struct { 504 name string 505 v reflect.Value 506 options map[string]string 507 } 508 509 func reflectValueCandi(v reflect.Value) (map[string]fieldCandi, error) { 510 if v.Kind() == reflect.Ptr { 511 v = v.Elem() 512 } 513 514 result := make(map[string]fieldCandi) 515 switch v.Kind() { 516 case reflect.Map: // map[string]interface{} 517 iter := v.MapRange() 518 for iter.Next() { 519 key := iter.Key().String() 520 result[key] = fieldCandi{ 521 name: key, 522 v: iter.Value(), 523 } 524 } 525 return result, nil 526 case reflect.Struct: 527 for i := 0; i < v.NumField(); i++ { 528 ft := v.Type().Field(i) 529 name := ft.Name 530 tag, ok := ft.Tag.Lookup(MilvusTag) 531 532 settings := make(map[string]string) 533 if ok { 534 if tag == MilvusSkipTagValue { 535 continue 536 } 537 settings = ParseTagSetting(tag, MilvusTagSep) 538 fn, has := settings[MilvusTagName] 539 if has { 540 // overwrite column to tag name 541 name = fn 542 } 543 } 544 _, ok = result[name] 545 // duplicated 546 if ok { 547 return nil, fmt.Errorf("column has duplicated name: %s when parsing field: %s", name, ft.Name) 548 } 549 550 v := v.Field(i) 551 if v.Kind() == reflect.Array { 552 v = v.Slice(0, v.Len()) 553 } 554 555 result[name] = fieldCandi{ 556 name: name, 557 v: v, 558 options: settings, 559 } 560 } 561 562 return result, nil 563 default: 564 return nil, fmt.Errorf("unsupport row type: %s", v.Kind().String()) 565 } 566 }