github.com/milvus-io/milvus-sdk-go/v2@v2.4.1/client/insert.go (about)

     1  // Copyright (C) 2019-2021 Zilliz. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
     4  // with the License. You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software distributed under the License
     9  // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
    10  // or implied. See the License for the specific language governing permissions and limitations under the License.
    11  
    12  package client
    13  
    14  import (
    15  	"context"
    16  	"encoding/json"
    17  	"fmt"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/errors"
    21  	"github.com/golang/protobuf/proto"
    22  	"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
    23  	"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
    24  	"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
    25  	"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
    26  
    27  	"github.com/milvus-io/milvus-sdk-go/v2/entity"
    28  )
    29  
    30  // Insert Index  into collection with column-based format
    31  // collName is the collection name
    32  // partitionName is the partition to insert, if not specified(empty), default partition will be used
    33  // columns are slice of the column-based data
    34  func (c *GrpcClient) Insert(ctx context.Context, collName string, partitionName string, columns ...entity.Column) (entity.Column, error) {
    35  	if c.Service == nil {
    36  		return nil, ErrClientNotReady
    37  	}
    38  	var schema *entity.Schema
    39  	collInfo, ok := MetaCache.getCollectionInfo(collName)
    40  	if !ok {
    41  		coll, err := c.DescribeCollection(ctx, collName)
    42  		if err != nil {
    43  			return nil, err
    44  		}
    45  		schema = coll.Schema
    46  	} else {
    47  		schema = collInfo.Schema
    48  	}
    49  
    50  	// convert columns to field data
    51  	fieldsData, rowSize, err := c.processInsertColumns(schema, columns...)
    52  	if err != nil {
    53  		return nil, err
    54  	}
    55  
    56  	// 2. do insert request
    57  	req := &milvuspb.InsertRequest{
    58  		DbName:         "", // reserved
    59  		CollectionName: collName,
    60  		PartitionName:  partitionName,
    61  		FieldsData:     fieldsData,
    62  	}
    63  
    64  	req.NumRows = uint32(rowSize)
    65  
    66  	resp, err := c.Service.Insert(ctx, req)
    67  	if err != nil {
    68  		return nil, err
    69  	}
    70  	if err := handleRespStatus(resp.GetStatus()); err != nil {
    71  		return nil, err
    72  	}
    73  	MetaCache.setSessionTs(collName, resp.Timestamp)
    74  	// 3. parse id column
    75  	return entity.IDColumns(schema, resp.GetIDs(), 0, -1)
    76  }
    77  
    78  func (c *GrpcClient) processInsertColumns(colSchema *entity.Schema, columns ...entity.Column) ([]*schemapb.FieldData, int, error) {
    79  	// setup dynamic related var
    80  	isDynamic := colSchema.EnableDynamicField
    81  
    82  	// check columns and field matches
    83  	var rowSize int
    84  	mNameField := make(map[string]*entity.Field)
    85  	for _, field := range colSchema.Fields {
    86  		mNameField[field.Name] = field
    87  	}
    88  	mNameColumn := make(map[string]entity.Column)
    89  	var dynamicColumns []entity.Column
    90  	for _, column := range columns {
    91  		_, dup := mNameColumn[column.Name()]
    92  		if dup {
    93  			return nil, 0, fmt.Errorf("duplicated column %s found", column.Name())
    94  		}
    95  		l := column.Len()
    96  		if rowSize == 0 {
    97  			rowSize = l
    98  		} else {
    99  			if rowSize != l {
   100  				return nil, 0, errors.New("column size not match")
   101  			}
   102  		}
   103  		field, has := mNameField[column.Name()]
   104  		if !has {
   105  			if !isDynamic {
   106  				return nil, 0, fmt.Errorf("field %s does not exist in collection %s", column.Name(), colSchema.CollectionName)
   107  			}
   108  			// add to dynamic column list for further processing
   109  			dynamicColumns = append(dynamicColumns, column)
   110  			continue
   111  		}
   112  
   113  		mNameColumn[column.Name()] = column
   114  		if column.Type() != field.DataType {
   115  			return nil, 0, fmt.Errorf("param column %s has type %v but collection field definition is %v", column.Name(), column.FieldData(), field.DataType)
   116  		}
   117  		if field.DataType == entity.FieldTypeFloatVector || field.DataType == entity.FieldTypeBinaryVector {
   118  			dim := 0
   119  			switch column := column.(type) {
   120  			case *entity.ColumnFloatVector:
   121  				dim = column.Dim()
   122  			case *entity.ColumnBinaryVector:
   123  				dim = column.Dim()
   124  			}
   125  			if fmt.Sprintf("%d", dim) != field.TypeParams[entity.TypeParamDim] {
   126  				return nil, 0, fmt.Errorf("params column %s vector dim %d not match collection definition, which has dim of %s", field.Name, dim, field.TypeParams[entity.TypeParamDim])
   127  			}
   128  		}
   129  	}
   130  
   131  	// check all fixed field pass value
   132  	for _, field := range colSchema.Fields {
   133  		_, has := mNameColumn[field.Name]
   134  		if !has &&
   135  			!field.AutoID && !field.IsDynamic {
   136  			return nil, 0, fmt.Errorf("field %s not passed", field.Name)
   137  		}
   138  	}
   139  
   140  	fieldsData := make([]*schemapb.FieldData, 0, len(mNameColumn)+1)
   141  	for _, fixedColumn := range mNameColumn {
   142  		fieldsData = append(fieldsData, fixedColumn.FieldData())
   143  	}
   144  	if len(dynamicColumns) > 0 {
   145  		// use empty column name here
   146  		col, err := c.mergeDynamicColumns("", rowSize, dynamicColumns)
   147  		if err != nil {
   148  			return nil, 0, err
   149  		}
   150  		fieldsData = append(fieldsData, col)
   151  	}
   152  
   153  	return fieldsData, rowSize, nil
   154  }
   155  
   156  func (c *GrpcClient) mergeDynamicColumns(dynamicName string, rowSize int, columns []entity.Column) (*schemapb.FieldData, error) {
   157  	values := make([][]byte, 0, rowSize)
   158  	for i := 0; i < rowSize; i++ {
   159  		m := make(map[string]interface{})
   160  		for _, column := range columns {
   161  			// range guaranteed
   162  			m[column.Name()], _ = column.Get(i)
   163  		}
   164  		bs, err := json.Marshal(m)
   165  		if err != nil {
   166  			return nil, err
   167  		}
   168  		values = append(values, bs)
   169  	}
   170  	return &schemapb.FieldData{
   171  		Type:      schemapb.DataType_JSON,
   172  		FieldName: dynamicName,
   173  		Field: &schemapb.FieldData_Scalars{
   174  			Scalars: &schemapb.ScalarField{
   175  				Data: &schemapb.ScalarField_JsonData{
   176  					JsonData: &schemapb.JSONArray{
   177  						Data: values,
   178  					},
   179  				},
   180  			},
   181  		},
   182  		IsDynamic: true,
   183  	}, nil
   184  }
   185  
   186  // Flush force collection to flush memory records into storage
   187  // in sync mode, flush will wait all segments to be flushed
   188  func (c *GrpcClient) Flush(ctx context.Context, collName string, async bool, opts ...FlushOption) error {
   189  	_, _, _, _, err := c.FlushV2(ctx, collName, async, opts...)
   190  	return err
   191  }
   192  
   193  // Flush force collection to flush memory records into storage
   194  // in sync mode, flush will wait all segments to be flushed
   195  func (c *GrpcClient) FlushV2(ctx context.Context, collName string, async bool, opts ...FlushOption) ([]int64, []int64, int64, map[string]msgpb.MsgPosition, error) {
   196  	if c.Service == nil {
   197  		return nil, nil, 0, nil, ErrClientNotReady
   198  	}
   199  	if err := c.checkCollectionExists(ctx, collName); err != nil {
   200  		return nil, nil, 0, nil, err
   201  	}
   202  	req := &milvuspb.FlushRequest{
   203  		DbName:          "", // reserved,
   204  		CollectionNames: []string{collName},
   205  	}
   206  	for _, opt := range opts {
   207  		opt(req)
   208  	}
   209  	resp, err := c.Service.Flush(ctx, req)
   210  	if err != nil {
   211  		return nil, nil, 0, nil, err
   212  	}
   213  	if err := handleRespStatus(resp.GetStatus()); err != nil {
   214  		return nil, nil, 0, nil, err
   215  	}
   216  	channelCPs := resp.GetChannelCps()
   217  	if !async {
   218  		segmentIDs, has := resp.GetCollSegIDs()[collName]
   219  		ids := segmentIDs.GetData()
   220  		if has && len(ids) > 0 {
   221  			flushed := func() bool {
   222  				resp, err := c.Service.GetFlushState(ctx, &milvuspb.GetFlushStateRequest{
   223  					SegmentIDs:     ids,
   224  					FlushTs:        resp.GetCollFlushTs()[collName],
   225  					CollectionName: collName,
   226  				})
   227  				if err != nil {
   228  					// TODO max retry
   229  					return false
   230  				}
   231  				return resp.GetFlushed()
   232  			}
   233  			for !flushed() {
   234  				// respect context deadline/cancel
   235  				select {
   236  				case <-ctx.Done():
   237  					return nil, nil, 0, nil, errors.New("deadline exceeded")
   238  				default:
   239  				}
   240  				time.Sleep(200 * time.Millisecond)
   241  			}
   242  		}
   243  	}
   244  	channelCPEntities := make(map[string]msgpb.MsgPosition, len(channelCPs))
   245  	for k, v := range channelCPs {
   246  		channelCPEntities[k] = msgpb.MsgPosition{
   247  			ChannelName: v.GetChannelName(),
   248  			MsgID:       v.GetMsgID(),
   249  			MsgGroup:    v.GetMsgGroup(),
   250  			Timestamp:   v.GetTimestamp(),
   251  		}
   252  	}
   253  	return resp.GetCollSegIDs()[collName].GetData(), resp.GetFlushCollSegIDs()[collName].GetData(), resp.GetCollSealTimes()[collName], channelCPEntities, nil
   254  }
   255  
   256  // DeleteByPks deletes entries related to provided primary keys
   257  func (c *GrpcClient) DeleteByPks(ctx context.Context, collName string, partitionName string, ids entity.Column) error {
   258  	if c.Service == nil {
   259  		return ErrClientNotReady
   260  	}
   261  
   262  	// check collection name
   263  	if err := c.checkCollectionExists(ctx, collName); err != nil {
   264  		return err
   265  	}
   266  	coll, err := c.DescribeCollection(ctx, collName)
   267  	if err != nil {
   268  		return err
   269  	}
   270  	// check partition name
   271  	if partitionName != "" {
   272  		err := c.checkPartitionExists(ctx, collName, partitionName)
   273  		if err != nil {
   274  			return err
   275  		}
   276  	}
   277  	// check primary keys
   278  	if ids.Len() == 0 {
   279  		return errors.New("ids len must not be zero")
   280  	}
   281  	if ids.Type() != entity.FieldTypeInt64 && ids.Type() != entity.FieldTypeVarChar { // string key not supported yet
   282  		return errors.New("only int64 and varchar column can be primary key for now")
   283  	}
   284  
   285  	pkf := getPKField(coll.Schema)
   286  	// pkf shall not be nil since is returned from milvus
   287  	if ids.Name() != "" && pkf.Name != ids.Name() {
   288  		return errors.New("only delete by primary key is supported now")
   289  	}
   290  
   291  	expr := PKs2Expr(pkf.Name, ids)
   292  
   293  	req := &milvuspb.DeleteRequest{
   294  		DbName:         "",
   295  		CollectionName: collName,
   296  		PartitionName:  partitionName,
   297  		Expr:           expr,
   298  	}
   299  
   300  	resp, err := c.Service.Delete(ctx, req)
   301  	if err != nil {
   302  		return err
   303  	}
   304  	err = handleRespStatus(resp.GetStatus())
   305  	if err != nil {
   306  		return err
   307  	}
   308  	MetaCache.setSessionTs(collName, resp.Timestamp)
   309  	return nil
   310  }
   311  
   312  // Delete deletes entries match expression
   313  func (c *GrpcClient) Delete(ctx context.Context, collName string, partitionName string, expr string) error {
   314  	if c.Service == nil {
   315  		return ErrClientNotReady
   316  	}
   317  
   318  	// check collection name
   319  	if err := c.checkCollectionExists(ctx, collName); err != nil {
   320  		return err
   321  	}
   322  
   323  	// check partition name
   324  	if partitionName != "" {
   325  		err := c.checkPartitionExists(ctx, collName, partitionName)
   326  		if err != nil {
   327  			return err
   328  		}
   329  	}
   330  
   331  	req := &milvuspb.DeleteRequest{
   332  		DbName:         "",
   333  		CollectionName: collName,
   334  		PartitionName:  partitionName,
   335  		Expr:           expr,
   336  	}
   337  
   338  	resp, err := c.Service.Delete(ctx, req)
   339  	if err != nil {
   340  		return err
   341  	}
   342  	err = handleRespStatus(resp.GetStatus())
   343  	if err != nil {
   344  		return err
   345  	}
   346  	MetaCache.setSessionTs(collName, resp.Timestamp)
   347  	return nil
   348  }
   349  
   350  // Upsert Index into collection with column-based format
   351  // collName is the collection name
   352  // partitionName is the partition to upsert, if not specified(empty), default partition will be used
   353  // columns are slice of the column-based data
   354  func (c *GrpcClient) Upsert(ctx context.Context, collName string, partitionName string, columns ...entity.Column) (entity.Column, error) {
   355  	if c.Service == nil {
   356  		return nil, ErrClientNotReady
   357  	}
   358  	var schema *entity.Schema
   359  	collInfo, ok := MetaCache.getCollectionInfo(collName)
   360  	if !ok {
   361  		coll, err := c.DescribeCollection(ctx, collName)
   362  		if err != nil {
   363  			return nil, err
   364  		}
   365  		schema = coll.Schema
   366  	} else {
   367  		schema = collInfo.Schema
   368  	}
   369  
   370  	fieldsData, rowSize, err := c.processInsertColumns(schema, columns...)
   371  	if err != nil {
   372  		return nil, err
   373  	}
   374  
   375  	// 2. do upsert request
   376  	req := &milvuspb.UpsertRequest{
   377  		DbName:         "", // reserved
   378  		CollectionName: collName,
   379  		PartitionName:  partitionName,
   380  		FieldsData:     fieldsData,
   381  	}
   382  
   383  	req.NumRows = uint32(rowSize)
   384  
   385  	resp, err := c.Service.Upsert(ctx, req)
   386  	if err != nil {
   387  		return nil, err
   388  	}
   389  	if err := handleRespStatus(resp.GetStatus()); err != nil {
   390  		return nil, err
   391  	}
   392  	MetaCache.setSessionTs(collName, resp.Timestamp)
   393  	// 3. parse id column
   394  	return entity.IDColumns(schema, resp.GetIDs(), 0, -1)
   395  }
   396  
   397  // BulkInsert data files(json, numpy, etc.) on MinIO/S3 storage, read and parse them into sealed segments
   398  func (c *GrpcClient) BulkInsert(ctx context.Context, collName string, partitionName string, files []string, opts ...BulkInsertOption) (int64, error) {
   399  	if c.Service == nil {
   400  		return 0, ErrClientNotReady
   401  	}
   402  	req := &milvuspb.ImportRequest{
   403  		CollectionName: collName,
   404  		PartitionName:  partitionName,
   405  		Files:          files,
   406  	}
   407  
   408  	for _, opt := range opts {
   409  		opt(req)
   410  	}
   411  
   412  	resp, err := c.Service.Import(ctx, req)
   413  	if err != nil {
   414  		return 0, err
   415  	}
   416  	if err := handleRespStatus(resp.GetStatus()); err != nil {
   417  		return 0, err
   418  	}
   419  
   420  	return resp.Tasks[0], nil
   421  }
   422  
   423  // GetBulkInsertState checks import task state
   424  func (c *GrpcClient) GetBulkInsertState(ctx context.Context, taskID int64) (*entity.BulkInsertTaskState, error) {
   425  	if c.Service == nil {
   426  		return nil, ErrClientNotReady
   427  	}
   428  	req := &milvuspb.GetImportStateRequest{
   429  		Task: taskID,
   430  	}
   431  	resp, err := c.Service.GetImportState(ctx, req)
   432  	if err != nil {
   433  		return nil, err
   434  	}
   435  	if err := handleRespStatus(resp.GetStatus()); err != nil {
   436  		return nil, err
   437  	}
   438  
   439  	return &entity.BulkInsertTaskState{
   440  		ID:           resp.GetId(),
   441  		State:        entity.BulkInsertState(resp.GetState()),
   442  		RowCount:     resp.GetRowCount(),
   443  		IDList:       resp.GetIdList(),
   444  		Infos:        entity.KvPairsMap(resp.GetInfos()),
   445  		CollectionID: resp.GetCollectionId(),
   446  		SegmentIDs:   resp.GetSegmentIds(),
   447  		CreateTs:     resp.GetCreateTs(),
   448  	}, nil
   449  }
   450  
   451  // ListBulkInsertTasks list state of all import tasks
   452  func (c *GrpcClient) ListBulkInsertTasks(ctx context.Context, collName string, limit int64) ([]*entity.BulkInsertTaskState, error) {
   453  	if c.Service == nil {
   454  		return nil, ErrClientNotReady
   455  	}
   456  	req := &milvuspb.ListImportTasksRequest{
   457  		CollectionName: collName,
   458  		Limit:          limit,
   459  	}
   460  	resp, err := c.Service.ListImportTasks(ctx, req)
   461  	if err != nil {
   462  		return nil, err
   463  	}
   464  	if err := handleRespStatus(resp.GetStatus()); err != nil {
   465  		return nil, err
   466  	}
   467  
   468  	tasks := make([]*entity.BulkInsertTaskState, 0)
   469  	for _, task := range resp.GetTasks() {
   470  		tasks = append(tasks, &entity.BulkInsertTaskState{
   471  			ID:           task.GetId(),
   472  			State:        entity.BulkInsertState(task.GetState()),
   473  			RowCount:     task.GetRowCount(),
   474  			IDList:       task.GetIdList(),
   475  			Infos:        entity.KvPairsMap(task.GetInfos()),
   476  			CollectionID: task.GetCollectionId(),
   477  			SegmentIDs:   task.GetSegmentIds(),
   478  			CreateTs:     task.GetCreateTs(),
   479  		})
   480  	}
   481  
   482  	return tasks, nil
   483  }
   484  
   485  func vector2PlaceholderGroupBytes(vectors []entity.Vector) []byte {
   486  	phg := &commonpb.PlaceholderGroup{
   487  		Placeholders: []*commonpb.PlaceholderValue{
   488  			vector2Placeholder(vectors),
   489  		},
   490  	}
   491  
   492  	bs, _ := proto.Marshal(phg)
   493  	return bs
   494  }
   495  
   496  func vector2Placeholder(vectors []entity.Vector) *commonpb.PlaceholderValue {
   497  	var placeHolderType commonpb.PlaceholderType
   498  	ph := &commonpb.PlaceholderValue{
   499  		Tag:    "$0",
   500  		Values: make([][]byte, 0, len(vectors)),
   501  	}
   502  	if len(vectors) == 0 {
   503  		return ph
   504  	}
   505  	switch vectors[0].(type) {
   506  	case entity.FloatVector:
   507  		placeHolderType = commonpb.PlaceholderType_FloatVector
   508  	case entity.BinaryVector:
   509  		placeHolderType = commonpb.PlaceholderType_BinaryVector
   510  	case entity.BFloat16Vector:
   511  		placeHolderType = commonpb.PlaceholderType_BFloat16Vector
   512  	case entity.Float16Vector:
   513  		placeHolderType = commonpb.PlaceholderType_Float16Vector
   514  	case entity.SparseEmbedding:
   515  		placeHolderType = commonpb.PlaceholderType_SparseFloatVector
   516  	}
   517  	ph.Type = placeHolderType
   518  	for _, vector := range vectors {
   519  		ph.Values = append(ph.Values, vector.Serialize())
   520  	}
   521  	return ph
   522  }