github.com/milvus-io/milvus-sdk-go/v2@v2.4.1/entity/rows.go (about)

     1  // Copyright (C) 2019-2021 Zilliz. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
     4  // with the License. You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software distributed under the License
     9  // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
    10  // or implied. See the License for the specific language governing permissions and limitations under the License.
    11  
    12  package entity
    13  
    14  import (
    15  	"encoding/json"
    16  	"fmt"
    17  	"go/ast"
    18  	"reflect"
    19  	"strconv"
    20  	"strings"
    21  
    22  	"github.com/cockroachdb/errors"
    23  )
    24  
    25  const (
    26  	// MilvusTag struct tag const for milvus row based struct
    27  	MilvusTag = `milvus`
    28  
    29  	// MilvusSkipTagValue struct tag const for skip this field.
    30  	MilvusSkipTagValue = `-`
    31  
    32  	// MilvusTagSep struct tag const for attribute separator
    33  	MilvusTagSep = `;`
    34  
    35  	//MilvusTagName struct tag const for field name
    36  	MilvusTagName = `NAME`
    37  
    38  	// VectorDimTag struct tag const for vector dimension
    39  	VectorDimTag = `DIM`
    40  
    41  	// VectorTypeTag struct tag const for binary vector type
    42  	VectorTypeTag = `VECTOR_TYPE`
    43  
    44  	// MilvusPrimaryKey struct tag const for primary key indicator
    45  	MilvusPrimaryKey = `PRIMARY_KEY`
    46  
    47  	// MilvusAutoID struct tag const for auto id indicator
    48  	MilvusAutoID = `AUTO_ID`
    49  
    50  	// DimMax dimension max value
    51  	DimMax = 65535
    52  )
    53  
    54  // Row is the interface for milvus row based data
    55  type Row interface {
    56  	Collection() string
    57  	Partition() string
    58  	Description() string
    59  }
    60  
    61  // MapRow is the alias type for map[string]interface{} implementing `Row` inteface with empty methods.
    62  type MapRow map[string]interface{}
    63  
    64  func (mr MapRow) Collection() string {
    65  	return ""
    66  }
    67  
    68  func (mr MapRow) Partition() string {
    69  	return ""
    70  }
    71  
    72  func (mr MapRow) Description() string {
    73  	return ""
    74  }
    75  
    76  // RowBase row base, returns default collection, partition name which is empty string
    77  type RowBase struct{}
    78  
    79  // Collection row base default collection name, which is empty string
    80  // when empty string is passed, the parent struct type name is used
    81  func (b RowBase) Collection() string {
    82  	return ""
    83  }
    84  
    85  // Partition row base default partition name, which is empty string
    86  // when empty string is passed, the default partition is used, which currently is named `_default`
    87  func (b RowBase) Partition() string {
    88  	return ""
    89  }
    90  
    91  // Description implement Row interface, default value is empty string
    92  func (b RowBase) Description() string {
    93  	return ""
    94  }
    95  
    96  // ParseSchemaAny parses schema from interface{}.
    97  func ParseSchemaAny(r interface{}) (*Schema, error) {
    98  	sch := &Schema{}
    99  	t := reflect.TypeOf(r)
   100  	if t.Kind() == reflect.Array || t.Kind() == reflect.Slice || t.Kind() == reflect.Ptr {
   101  		t = t.Elem()
   102  	}
   103  
   104  	// MapRow is not supported for schema definition
   105  	// TODO add PrimaryKey() interface later
   106  	if t.Kind() == reflect.Map {
   107  		return nil, fmt.Errorf("map row is not supported for schema definition")
   108  	}
   109  
   110  	if t.Kind() != reflect.Struct {
   111  		return nil, fmt.Errorf("unsupported data type: %+v", r)
   112  	}
   113  
   114  	// Collection method not overwrited, try use Row type name
   115  	if sch.CollectionName == "" {
   116  		sch.CollectionName = t.Name()
   117  		if sch.CollectionName == "" {
   118  			return nil, errors.New("collection name not provided")
   119  		}
   120  	}
   121  	sch.Fields = make([]*Field, 0, t.NumField())
   122  	for i := 0; i < t.NumField(); i++ {
   123  		f := t.Field(i)
   124  		// ignore anonymous field for now
   125  		if f.Anonymous || !ast.IsExported(f.Name) {
   126  			continue
   127  		}
   128  
   129  		field := &Field{
   130  			Name: f.Name,
   131  		}
   132  		ft := f.Type
   133  		if f.Type.Kind() == reflect.Ptr {
   134  			ft = ft.Elem()
   135  		}
   136  		fv := reflect.New(ft)
   137  		tag := f.Tag.Get(MilvusTag)
   138  		if tag == MilvusSkipTagValue {
   139  			continue
   140  		}
   141  		tagSettings := ParseTagSetting(tag, MilvusTagSep)
   142  		if _, has := tagSettings[MilvusPrimaryKey]; has {
   143  			field.PrimaryKey = true
   144  		}
   145  		if _, has := tagSettings[MilvusAutoID]; has {
   146  			field.AutoID = true
   147  		}
   148  		if name, has := tagSettings[MilvusTagName]; has {
   149  			field.Name = name
   150  		}
   151  		switch reflect.Indirect(fv).Kind() {
   152  		case reflect.Bool:
   153  			field.DataType = FieldTypeBool
   154  		case reflect.Int8:
   155  			field.DataType = FieldTypeInt8
   156  		case reflect.Int16:
   157  			field.DataType = FieldTypeInt16
   158  		case reflect.Int32:
   159  			field.DataType = FieldTypeInt32
   160  		case reflect.Int64:
   161  			field.DataType = FieldTypeInt64
   162  		case reflect.Float32:
   163  			field.DataType = FieldTypeFloat
   164  		case reflect.Float64:
   165  			field.DataType = FieldTypeDouble
   166  		case reflect.String:
   167  			field.DataType = FieldTypeString
   168  		case reflect.Array:
   169  			arrayLen := ft.Len()
   170  			elemType := ft.Elem()
   171  			switch elemType.Kind() {
   172  			case reflect.Uint8:
   173  				field.DataType = FieldTypeBinaryVector
   174  				//TODO maybe override by tag settings, when dim is not multiplier of 8
   175  				field.TypeParams = map[string]string{
   176  					TypeParamDim: strconv.FormatInt(int64(arrayLen*8), 10),
   177  				}
   178  			case reflect.Float32:
   179  				field.DataType = FieldTypeFloatVector
   180  				field.TypeParams = map[string]string{
   181  					TypeParamDim: strconv.FormatInt(int64(arrayLen), 10),
   182  				}
   183  			default:
   184  				return nil, fmt.Errorf("field %s is array of %v, which is not supported", f.Name, elemType)
   185  			}
   186  		case reflect.Slice:
   187  			dimStr, has := tagSettings[VectorDimTag]
   188  			if !has {
   189  				return nil, fmt.Errorf("field %s is slice but dim not provided", f.Name)
   190  			}
   191  			dim, err := strconv.ParseInt(dimStr, 10, 64)
   192  			if err != nil {
   193  				return nil, fmt.Errorf("dim value %s is not valid", dimStr)
   194  			}
   195  			if dim < 1 || dim > DimMax {
   196  				return nil, fmt.Errorf("dim value %d is out of range", dim)
   197  			}
   198  			field.TypeParams = map[string]string{
   199  				TypeParamDim: dimStr,
   200  			}
   201  			elemType := ft.Elem()
   202  			switch elemType.Kind() {
   203  			case reflect.Uint8: // []byte, could be BinaryVector, fp16, bf 6
   204  				switch tagSettings[VectorTypeTag] {
   205  				case "fp16":
   206  					field.DataType = FieldTypeFloat16Vector
   207  				case "bf16":
   208  					field.DataType = FieldTypeBFloat16Vector
   209  				default:
   210  					field.DataType = FieldTypeBinaryVector
   211  				}
   212  			case reflect.Float32:
   213  				field.DataType = FieldTypeFloatVector
   214  			default:
   215  				return nil, fmt.Errorf("field %s is slice of %v, which is not supported", f.Name, elemType)
   216  			}
   217  		default:
   218  			return nil, fmt.Errorf("field %s is %v, which is not supported", field.Name, ft)
   219  		}
   220  		sch.Fields = append(sch.Fields, field)
   221  	}
   222  
   223  	return sch, nil
   224  }
   225  
   226  // ParseSchema parse Schema from row interface
   227  func ParseSchema(r Row) (*Schema, error) {
   228  	schema, err := ParseSchemaAny(r)
   229  	if err != nil {
   230  		return nil, err
   231  	}
   232  	if r.Collection() != "" {
   233  		schema.CollectionName = r.Collection()
   234  	}
   235  	if schema.Description != "" {
   236  		schema.Description = r.Description()
   237  	}
   238  	return schema, nil
   239  }
   240  
   241  // ParseTagSetting parses struct tag into map settings
   242  func ParseTagSetting(str string, sep string) map[string]string {
   243  	settings := map[string]string{}
   244  	names := strings.Split(str, sep)
   245  
   246  	for i := 0; i < len(names); i++ {
   247  		j := i
   248  		if len(names[j]) > 0 {
   249  			for {
   250  				if names[j][len(names[j])-1] == '\\' {
   251  					i++
   252  					names[j] = names[j][0:len(names[j])-1] + sep + names[i]
   253  					names[i] = ""
   254  				} else {
   255  					break
   256  				}
   257  			}
   258  		}
   259  
   260  		values := strings.Split(names[j], ":")
   261  		k := strings.TrimSpace(strings.ToUpper(values[0]))
   262  
   263  		if len(values) >= 2 {
   264  			settings[k] = strings.Join(values[1:], ":")
   265  		} else if k != "" {
   266  			settings[k] = k
   267  		}
   268  	}
   269  
   270  	return settings
   271  }
   272  
   273  func AnyToColumns(rows []interface{}, schemas ...*Schema) ([]Column, error) {
   274  	rowsLen := len(rows)
   275  	if rowsLen == 0 {
   276  		return []Column{}, errors.New("0 length column")
   277  	}
   278  
   279  	var sch *Schema
   280  	var err error
   281  	// if schema not provided, try to parse from row
   282  	if len(schemas) == 0 {
   283  		sch, err = ParseSchemaAny(rows[0])
   284  		if err != nil {
   285  			return []Column{}, err
   286  		}
   287  	} else {
   288  		// use first schema provided
   289  		sch = schemas[0]
   290  	}
   291  
   292  	isDynamic := sch.EnableDynamicField
   293  	var dynamicCol *ColumnJSONBytes
   294  
   295  	nameColumns := make(map[string]Column)
   296  	for _, field := range sch.Fields {
   297  		// skip auto id pk field
   298  		if field.PrimaryKey && field.AutoID {
   299  			continue
   300  		}
   301  		switch field.DataType {
   302  		case FieldTypeBool:
   303  			data := make([]bool, 0, rowsLen)
   304  			col := NewColumnBool(field.Name, data)
   305  			nameColumns[field.Name] = col
   306  		case FieldTypeInt8:
   307  			data := make([]int8, 0, rowsLen)
   308  			col := NewColumnInt8(field.Name, data)
   309  			nameColumns[field.Name] = col
   310  		case FieldTypeInt16:
   311  			data := make([]int16, 0, rowsLen)
   312  			col := NewColumnInt16(field.Name, data)
   313  			nameColumns[field.Name] = col
   314  		case FieldTypeInt32:
   315  			data := make([]int32, 0, rowsLen)
   316  			col := NewColumnInt32(field.Name, data)
   317  			nameColumns[field.Name] = col
   318  		case FieldTypeInt64:
   319  			data := make([]int64, 0, rowsLen)
   320  			col := NewColumnInt64(field.Name, data)
   321  			nameColumns[field.Name] = col
   322  		case FieldTypeFloat:
   323  			data := make([]float32, 0, rowsLen)
   324  			col := NewColumnFloat(field.Name, data)
   325  			nameColumns[field.Name] = col
   326  		case FieldTypeDouble:
   327  			data := make([]float64, 0, rowsLen)
   328  			col := NewColumnDouble(field.Name, data)
   329  			nameColumns[field.Name] = col
   330  		case FieldTypeString, FieldTypeVarChar:
   331  			data := make([]string, 0, rowsLen)
   332  			col := NewColumnString(field.Name, data)
   333  			nameColumns[field.Name] = col
   334  		case FieldTypeJSON:
   335  			data := make([][]byte, 0, rowsLen)
   336  			col := NewColumnJSONBytes(field.Name, data)
   337  			nameColumns[field.Name] = col
   338  		case FieldTypeArray:
   339  			col := NewArrayColumn(field)
   340  			if col == nil {
   341  				return nil, errors.Errorf("unsupported element type %s for Array", field.ElementType.String())
   342  			}
   343  			nameColumns[field.Name] = col
   344  		case FieldTypeFloatVector:
   345  			data := make([][]float32, 0, rowsLen)
   346  			dimStr, has := field.TypeParams[TypeParamDim]
   347  			if !has {
   348  				return []Column{}, errors.New("vector field with no dim")
   349  			}
   350  			dim, err := strconv.ParseInt(dimStr, 10, 64)
   351  			if err != nil {
   352  				return []Column{}, fmt.Errorf("vector field with bad format dim: %s", err.Error())
   353  			}
   354  			col := NewColumnFloatVector(field.Name, int(dim), data)
   355  			nameColumns[field.Name] = col
   356  		case FieldTypeBinaryVector:
   357  			data := make([][]byte, 0, rowsLen)
   358  			dimStr, has := field.TypeParams[TypeParamDim]
   359  			if !has {
   360  				return []Column{}, errors.New("vector field with no dim")
   361  			}
   362  			dim, err := strconv.ParseInt(dimStr, 10, 64)
   363  			if err != nil {
   364  				return []Column{}, fmt.Errorf("vector field with bad format dim: %s", err.Error())
   365  			}
   366  			col := NewColumnBinaryVector(field.Name, int(dim), data)
   367  			nameColumns[field.Name] = col
   368  		case FieldTypeFloat16Vector:
   369  			data := make([][]byte, 0, rowsLen)
   370  			dimStr, has := field.TypeParams[TypeParamDim]
   371  			if !has {
   372  				return []Column{}, errors.New("vector field with no dim")
   373  			}
   374  			dim, err := strconv.ParseInt(dimStr, 10, 64)
   375  			if err != nil {
   376  				return []Column{}, fmt.Errorf("vector field with bad format dim: %s", err.Error())
   377  			}
   378  			col := NewColumnFloat16Vector(field.Name, int(dim), data)
   379  			nameColumns[field.Name] = col
   380  		case FieldTypeBFloat16Vector:
   381  			data := make([][]byte, 0, rowsLen)
   382  			dimStr, has := field.TypeParams[TypeParamDim]
   383  			if !has {
   384  				return []Column{}, errors.New("vector field with no dim")
   385  			}
   386  			dim, err := strconv.ParseInt(dimStr, 10, 64)
   387  			if err != nil {
   388  				return []Column{}, fmt.Errorf("vector field with bad format dim: %s", err.Error())
   389  			}
   390  			col := NewColumnBFloat16Vector(field.Name, int(dim), data)
   391  			nameColumns[field.Name] = col
   392  		case FieldTypeSparseVector:
   393  			data := make([]SparseEmbedding, 0, rowsLen)
   394  			col := NewColumnSparseVectors(field.Name, data)
   395  			nameColumns[field.Name] = col
   396  		}
   397  	}
   398  
   399  	if isDynamic {
   400  		dynamicCol = NewColumnJSONBytes("", make([][]byte, 0, rowsLen)).WithIsDynamic(true)
   401  	}
   402  
   403  	for _, row := range rows {
   404  		// collection schema name need not to be same, since receiver could has other names
   405  		v := reflect.ValueOf(row)
   406  		set, err := reflectValueCandi(v)
   407  		if err != nil {
   408  			return nil, err
   409  		}
   410  
   411  		for idx, field := range sch.Fields {
   412  			// skip dynamic field if visible
   413  			if isDynamic && field.IsDynamic {
   414  				continue
   415  			}
   416  			// skip auto id pk field
   417  			if field.PrimaryKey && field.AutoID {
   418  				// remove pk field from candidates set, avoid adding it into dynamic column
   419  				delete(set, field.Name)
   420  				continue
   421  			}
   422  			column, ok := nameColumns[field.Name]
   423  			if !ok {
   424  				return nil, fmt.Errorf("expected unhandled field %s", field.Name)
   425  			}
   426  
   427  			candi, ok := set[field.Name]
   428  			if !ok {
   429  				return nil, fmt.Errorf("row %d does not has field %s", idx, field.Name)
   430  			}
   431  			err := column.AppendValue(candi.v.Interface())
   432  			if err != nil {
   433  				return nil, err
   434  			}
   435  			delete(set, field.Name)
   436  		}
   437  
   438  		if isDynamic {
   439  			m := make(map[string]interface{})
   440  			for name, candi := range set {
   441  				m[name] = candi.v.Interface()
   442  			}
   443  			bs, err := json.Marshal(m)
   444  			if err != nil {
   445  				return nil, fmt.Errorf("failed to marshal dynamic field %w", err)
   446  			}
   447  			err = dynamicCol.AppendValue(bs)
   448  			if err != nil {
   449  				return nil, fmt.Errorf("failed to append value to dynamic field %w", err)
   450  			}
   451  		}
   452  	}
   453  	columns := make([]Column, 0, len(nameColumns))
   454  	for _, column := range nameColumns {
   455  		columns = append(columns, column)
   456  	}
   457  	if isDynamic {
   458  		columns = append(columns, dynamicCol)
   459  	}
   460  	return columns, nil
   461  }
   462  
   463  func NewArrayColumn(f *Field) Column {
   464  	switch f.ElementType {
   465  	case FieldTypeBool:
   466  		return NewColumnBoolArray(f.Name, nil)
   467  
   468  	case FieldTypeInt8:
   469  		return NewColumnInt8Array(f.Name, nil)
   470  
   471  	case FieldTypeInt16:
   472  		return NewColumnInt16Array(f.Name, nil)
   473  
   474  	case FieldTypeInt32:
   475  		return NewColumnInt32Array(f.Name, nil)
   476  
   477  	case FieldTypeInt64:
   478  		return NewColumnInt64Array(f.Name, nil)
   479  
   480  	case FieldTypeFloat:
   481  		return NewColumnFloatArray(f.Name, nil)
   482  
   483  	case FieldTypeDouble:
   484  		return NewColumnDoubleArray(f.Name, nil)
   485  
   486  	case FieldTypeVarChar:
   487  		return NewColumnVarCharArray(f.Name, nil)
   488  
   489  	default:
   490  		return nil
   491  	}
   492  }
   493  
   494  // RowsToColumns rows to columns
   495  func RowsToColumns(rows []Row, schemas ...*Schema) ([]Column, error) {
   496  	anys := make([]interface{}, 0, len(rows))
   497  	for _, row := range rows {
   498  		anys = append(anys, row)
   499  	}
   500  	return AnyToColumns(anys, schemas...)
   501  }
   502  
   503  type fieldCandi struct {
   504  	name    string
   505  	v       reflect.Value
   506  	options map[string]string
   507  }
   508  
   509  func reflectValueCandi(v reflect.Value) (map[string]fieldCandi, error) {
   510  	if v.Kind() == reflect.Ptr {
   511  		v = v.Elem()
   512  	}
   513  
   514  	result := make(map[string]fieldCandi)
   515  	switch v.Kind() {
   516  	case reflect.Map: // map[string]interface{}
   517  		iter := v.MapRange()
   518  		for iter.Next() {
   519  			key := iter.Key().String()
   520  			result[key] = fieldCandi{
   521  				name: key,
   522  				v:    iter.Value(),
   523  			}
   524  		}
   525  		return result, nil
   526  	case reflect.Struct:
   527  		for i := 0; i < v.NumField(); i++ {
   528  			ft := v.Type().Field(i)
   529  			name := ft.Name
   530  			tag, ok := ft.Tag.Lookup(MilvusTag)
   531  
   532  			settings := make(map[string]string)
   533  			if ok {
   534  				if tag == MilvusSkipTagValue {
   535  					continue
   536  				}
   537  				settings = ParseTagSetting(tag, MilvusTagSep)
   538  				fn, has := settings[MilvusTagName]
   539  				if has {
   540  					// overwrite column to tag name
   541  					name = fn
   542  				}
   543  			}
   544  			_, ok = result[name]
   545  			// duplicated
   546  			if ok {
   547  				return nil, fmt.Errorf("column has duplicated name: %s when parsing field: %s", name, ft.Name)
   548  			}
   549  
   550  			v := v.Field(i)
   551  			if v.Kind() == reflect.Array {
   552  				v = v.Slice(0, v.Len())
   553  			}
   554  
   555  			result[name] = fieldCandi{
   556  				name:    name,
   557  				v:       v,
   558  				options: settings,
   559  			}
   560  		}
   561  
   562  		return result, nil
   563  	default:
   564  		return nil, fmt.Errorf("unsupport row type: %s", v.Kind().String())
   565  	}
   566  }