github.com/milvus-io/milvus-sdk-go/v2@v2.4.1/client/insert.go (about) 1 // Copyright (C) 2019-2021 Zilliz. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance 4 // with the License. You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software distributed under the License 9 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 // or implied. See the License for the specific language governing permissions and limitations under the License. 11 12 package client 13 14 import ( 15 "context" 16 "encoding/json" 17 "fmt" 18 "time" 19 20 "github.com/cockroachdb/errors" 21 "github.com/golang/protobuf/proto" 22 "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" 23 "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" 24 "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" 25 "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" 26 27 "github.com/milvus-io/milvus-sdk-go/v2/entity" 28 ) 29 30 // Insert Index into collection with column-based format 31 // collName is the collection name 32 // partitionName is the partition to insert, if not specified(empty), default partition will be used 33 // columns are slice of the column-based data 34 func (c *GrpcClient) Insert(ctx context.Context, collName string, partitionName string, columns ...entity.Column) (entity.Column, error) { 35 if c.Service == nil { 36 return nil, ErrClientNotReady 37 } 38 var schema *entity.Schema 39 collInfo, ok := MetaCache.getCollectionInfo(collName) 40 if !ok { 41 coll, err := c.DescribeCollection(ctx, collName) 42 if err != nil { 43 return nil, err 44 } 45 schema = coll.Schema 46 } else { 47 schema = collInfo.Schema 48 } 49 50 // convert columns to field data 51 fieldsData, rowSize, err := c.processInsertColumns(schema, columns...) 52 if err != nil { 53 return nil, err 54 } 55 56 // 2. do insert request 57 req := &milvuspb.InsertRequest{ 58 DbName: "", // reserved 59 CollectionName: collName, 60 PartitionName: partitionName, 61 FieldsData: fieldsData, 62 } 63 64 req.NumRows = uint32(rowSize) 65 66 resp, err := c.Service.Insert(ctx, req) 67 if err != nil { 68 return nil, err 69 } 70 if err := handleRespStatus(resp.GetStatus()); err != nil { 71 return nil, err 72 } 73 MetaCache.setSessionTs(collName, resp.Timestamp) 74 // 3. parse id column 75 return entity.IDColumns(schema, resp.GetIDs(), 0, -1) 76 } 77 78 func (c *GrpcClient) processInsertColumns(colSchema *entity.Schema, columns ...entity.Column) ([]*schemapb.FieldData, int, error) { 79 // setup dynamic related var 80 isDynamic := colSchema.EnableDynamicField 81 82 // check columns and field matches 83 var rowSize int 84 mNameField := make(map[string]*entity.Field) 85 for _, field := range colSchema.Fields { 86 mNameField[field.Name] = field 87 } 88 mNameColumn := make(map[string]entity.Column) 89 var dynamicColumns []entity.Column 90 for _, column := range columns { 91 _, dup := mNameColumn[column.Name()] 92 if dup { 93 return nil, 0, fmt.Errorf("duplicated column %s found", column.Name()) 94 } 95 l := column.Len() 96 if rowSize == 0 { 97 rowSize = l 98 } else { 99 if rowSize != l { 100 return nil, 0, errors.New("column size not match") 101 } 102 } 103 field, has := mNameField[column.Name()] 104 if !has { 105 if !isDynamic { 106 return nil, 0, fmt.Errorf("field %s does not exist in collection %s", column.Name(), colSchema.CollectionName) 107 } 108 // add to dynamic column list for further processing 109 dynamicColumns = append(dynamicColumns, column) 110 continue 111 } 112 113 mNameColumn[column.Name()] = column 114 if column.Type() != field.DataType { 115 return nil, 0, fmt.Errorf("param column %s has type %v but collection field definition is %v", column.Name(), column.FieldData(), field.DataType) 116 } 117 if field.DataType == entity.FieldTypeFloatVector || field.DataType == entity.FieldTypeBinaryVector { 118 dim := 0 119 switch column := column.(type) { 120 case *entity.ColumnFloatVector: 121 dim = column.Dim() 122 case *entity.ColumnBinaryVector: 123 dim = column.Dim() 124 } 125 if fmt.Sprintf("%d", dim) != field.TypeParams[entity.TypeParamDim] { 126 return nil, 0, fmt.Errorf("params column %s vector dim %d not match collection definition, which has dim of %s", field.Name, dim, field.TypeParams[entity.TypeParamDim]) 127 } 128 } 129 } 130 131 // check all fixed field pass value 132 for _, field := range colSchema.Fields { 133 _, has := mNameColumn[field.Name] 134 if !has && 135 !field.AutoID && !field.IsDynamic { 136 return nil, 0, fmt.Errorf("field %s not passed", field.Name) 137 } 138 } 139 140 fieldsData := make([]*schemapb.FieldData, 0, len(mNameColumn)+1) 141 for _, fixedColumn := range mNameColumn { 142 fieldsData = append(fieldsData, fixedColumn.FieldData()) 143 } 144 if len(dynamicColumns) > 0 { 145 // use empty column name here 146 col, err := c.mergeDynamicColumns("", rowSize, dynamicColumns) 147 if err != nil { 148 return nil, 0, err 149 } 150 fieldsData = append(fieldsData, col) 151 } 152 153 return fieldsData, rowSize, nil 154 } 155 156 func (c *GrpcClient) mergeDynamicColumns(dynamicName string, rowSize int, columns []entity.Column) (*schemapb.FieldData, error) { 157 values := make([][]byte, 0, rowSize) 158 for i := 0; i < rowSize; i++ { 159 m := make(map[string]interface{}) 160 for _, column := range columns { 161 // range guaranteed 162 m[column.Name()], _ = column.Get(i) 163 } 164 bs, err := json.Marshal(m) 165 if err != nil { 166 return nil, err 167 } 168 values = append(values, bs) 169 } 170 return &schemapb.FieldData{ 171 Type: schemapb.DataType_JSON, 172 FieldName: dynamicName, 173 Field: &schemapb.FieldData_Scalars{ 174 Scalars: &schemapb.ScalarField{ 175 Data: &schemapb.ScalarField_JsonData{ 176 JsonData: &schemapb.JSONArray{ 177 Data: values, 178 }, 179 }, 180 }, 181 }, 182 IsDynamic: true, 183 }, nil 184 } 185 186 // Flush force collection to flush memory records into storage 187 // in sync mode, flush will wait all segments to be flushed 188 func (c *GrpcClient) Flush(ctx context.Context, collName string, async bool, opts ...FlushOption) error { 189 _, _, _, _, err := c.FlushV2(ctx, collName, async, opts...) 190 return err 191 } 192 193 // Flush force collection to flush memory records into storage 194 // in sync mode, flush will wait all segments to be flushed 195 func (c *GrpcClient) FlushV2(ctx context.Context, collName string, async bool, opts ...FlushOption) ([]int64, []int64, int64, map[string]msgpb.MsgPosition, error) { 196 if c.Service == nil { 197 return nil, nil, 0, nil, ErrClientNotReady 198 } 199 if err := c.checkCollectionExists(ctx, collName); err != nil { 200 return nil, nil, 0, nil, err 201 } 202 req := &milvuspb.FlushRequest{ 203 DbName: "", // reserved, 204 CollectionNames: []string{collName}, 205 } 206 for _, opt := range opts { 207 opt(req) 208 } 209 resp, err := c.Service.Flush(ctx, req) 210 if err != nil { 211 return nil, nil, 0, nil, err 212 } 213 if err := handleRespStatus(resp.GetStatus()); err != nil { 214 return nil, nil, 0, nil, err 215 } 216 channelCPs := resp.GetChannelCps() 217 if !async { 218 segmentIDs, has := resp.GetCollSegIDs()[collName] 219 ids := segmentIDs.GetData() 220 if has && len(ids) > 0 { 221 flushed := func() bool { 222 resp, err := c.Service.GetFlushState(ctx, &milvuspb.GetFlushStateRequest{ 223 SegmentIDs: ids, 224 FlushTs: resp.GetCollFlushTs()[collName], 225 CollectionName: collName, 226 }) 227 if err != nil { 228 // TODO max retry 229 return false 230 } 231 return resp.GetFlushed() 232 } 233 for !flushed() { 234 // respect context deadline/cancel 235 select { 236 case <-ctx.Done(): 237 return nil, nil, 0, nil, errors.New("deadline exceeded") 238 default: 239 } 240 time.Sleep(200 * time.Millisecond) 241 } 242 } 243 } 244 channelCPEntities := make(map[string]msgpb.MsgPosition, len(channelCPs)) 245 for k, v := range channelCPs { 246 channelCPEntities[k] = msgpb.MsgPosition{ 247 ChannelName: v.GetChannelName(), 248 MsgID: v.GetMsgID(), 249 MsgGroup: v.GetMsgGroup(), 250 Timestamp: v.GetTimestamp(), 251 } 252 } 253 return resp.GetCollSegIDs()[collName].GetData(), resp.GetFlushCollSegIDs()[collName].GetData(), resp.GetCollSealTimes()[collName], channelCPEntities, nil 254 } 255 256 // DeleteByPks deletes entries related to provided primary keys 257 func (c *GrpcClient) DeleteByPks(ctx context.Context, collName string, partitionName string, ids entity.Column) error { 258 if c.Service == nil { 259 return ErrClientNotReady 260 } 261 262 // check collection name 263 if err := c.checkCollectionExists(ctx, collName); err != nil { 264 return err 265 } 266 coll, err := c.DescribeCollection(ctx, collName) 267 if err != nil { 268 return err 269 } 270 // check partition name 271 if partitionName != "" { 272 err := c.checkPartitionExists(ctx, collName, partitionName) 273 if err != nil { 274 return err 275 } 276 } 277 // check primary keys 278 if ids.Len() == 0 { 279 return errors.New("ids len must not be zero") 280 } 281 if ids.Type() != entity.FieldTypeInt64 && ids.Type() != entity.FieldTypeVarChar { // string key not supported yet 282 return errors.New("only int64 and varchar column can be primary key for now") 283 } 284 285 pkf := getPKField(coll.Schema) 286 // pkf shall not be nil since is returned from milvus 287 if ids.Name() != "" && pkf.Name != ids.Name() { 288 return errors.New("only delete by primary key is supported now") 289 } 290 291 expr := PKs2Expr(pkf.Name, ids) 292 293 req := &milvuspb.DeleteRequest{ 294 DbName: "", 295 CollectionName: collName, 296 PartitionName: partitionName, 297 Expr: expr, 298 } 299 300 resp, err := c.Service.Delete(ctx, req) 301 if err != nil { 302 return err 303 } 304 err = handleRespStatus(resp.GetStatus()) 305 if err != nil { 306 return err 307 } 308 MetaCache.setSessionTs(collName, resp.Timestamp) 309 return nil 310 } 311 312 // Delete deletes entries match expression 313 func (c *GrpcClient) Delete(ctx context.Context, collName string, partitionName string, expr string) error { 314 if c.Service == nil { 315 return ErrClientNotReady 316 } 317 318 // check collection name 319 if err := c.checkCollectionExists(ctx, collName); err != nil { 320 return err 321 } 322 323 // check partition name 324 if partitionName != "" { 325 err := c.checkPartitionExists(ctx, collName, partitionName) 326 if err != nil { 327 return err 328 } 329 } 330 331 req := &milvuspb.DeleteRequest{ 332 DbName: "", 333 CollectionName: collName, 334 PartitionName: partitionName, 335 Expr: expr, 336 } 337 338 resp, err := c.Service.Delete(ctx, req) 339 if err != nil { 340 return err 341 } 342 err = handleRespStatus(resp.GetStatus()) 343 if err != nil { 344 return err 345 } 346 MetaCache.setSessionTs(collName, resp.Timestamp) 347 return nil 348 } 349 350 // Upsert Index into collection with column-based format 351 // collName is the collection name 352 // partitionName is the partition to upsert, if not specified(empty), default partition will be used 353 // columns are slice of the column-based data 354 func (c *GrpcClient) Upsert(ctx context.Context, collName string, partitionName string, columns ...entity.Column) (entity.Column, error) { 355 if c.Service == nil { 356 return nil, ErrClientNotReady 357 } 358 var schema *entity.Schema 359 collInfo, ok := MetaCache.getCollectionInfo(collName) 360 if !ok { 361 coll, err := c.DescribeCollection(ctx, collName) 362 if err != nil { 363 return nil, err 364 } 365 schema = coll.Schema 366 } else { 367 schema = collInfo.Schema 368 } 369 370 fieldsData, rowSize, err := c.processInsertColumns(schema, columns...) 371 if err != nil { 372 return nil, err 373 } 374 375 // 2. do upsert request 376 req := &milvuspb.UpsertRequest{ 377 DbName: "", // reserved 378 CollectionName: collName, 379 PartitionName: partitionName, 380 FieldsData: fieldsData, 381 } 382 383 req.NumRows = uint32(rowSize) 384 385 resp, err := c.Service.Upsert(ctx, req) 386 if err != nil { 387 return nil, err 388 } 389 if err := handleRespStatus(resp.GetStatus()); err != nil { 390 return nil, err 391 } 392 MetaCache.setSessionTs(collName, resp.Timestamp) 393 // 3. parse id column 394 return entity.IDColumns(schema, resp.GetIDs(), 0, -1) 395 } 396 397 // BulkInsert data files(json, numpy, etc.) on MinIO/S3 storage, read and parse them into sealed segments 398 func (c *GrpcClient) BulkInsert(ctx context.Context, collName string, partitionName string, files []string, opts ...BulkInsertOption) (int64, error) { 399 if c.Service == nil { 400 return 0, ErrClientNotReady 401 } 402 req := &milvuspb.ImportRequest{ 403 CollectionName: collName, 404 PartitionName: partitionName, 405 Files: files, 406 } 407 408 for _, opt := range opts { 409 opt(req) 410 } 411 412 resp, err := c.Service.Import(ctx, req) 413 if err != nil { 414 return 0, err 415 } 416 if err := handleRespStatus(resp.GetStatus()); err != nil { 417 return 0, err 418 } 419 420 return resp.Tasks[0], nil 421 } 422 423 // GetBulkInsertState checks import task state 424 func (c *GrpcClient) GetBulkInsertState(ctx context.Context, taskID int64) (*entity.BulkInsertTaskState, error) { 425 if c.Service == nil { 426 return nil, ErrClientNotReady 427 } 428 req := &milvuspb.GetImportStateRequest{ 429 Task: taskID, 430 } 431 resp, err := c.Service.GetImportState(ctx, req) 432 if err != nil { 433 return nil, err 434 } 435 if err := handleRespStatus(resp.GetStatus()); err != nil { 436 return nil, err 437 } 438 439 return &entity.BulkInsertTaskState{ 440 ID: resp.GetId(), 441 State: entity.BulkInsertState(resp.GetState()), 442 RowCount: resp.GetRowCount(), 443 IDList: resp.GetIdList(), 444 Infos: entity.KvPairsMap(resp.GetInfos()), 445 CollectionID: resp.GetCollectionId(), 446 SegmentIDs: resp.GetSegmentIds(), 447 CreateTs: resp.GetCreateTs(), 448 }, nil 449 } 450 451 // ListBulkInsertTasks list state of all import tasks 452 func (c *GrpcClient) ListBulkInsertTasks(ctx context.Context, collName string, limit int64) ([]*entity.BulkInsertTaskState, error) { 453 if c.Service == nil { 454 return nil, ErrClientNotReady 455 } 456 req := &milvuspb.ListImportTasksRequest{ 457 CollectionName: collName, 458 Limit: limit, 459 } 460 resp, err := c.Service.ListImportTasks(ctx, req) 461 if err != nil { 462 return nil, err 463 } 464 if err := handleRespStatus(resp.GetStatus()); err != nil { 465 return nil, err 466 } 467 468 tasks := make([]*entity.BulkInsertTaskState, 0) 469 for _, task := range resp.GetTasks() { 470 tasks = append(tasks, &entity.BulkInsertTaskState{ 471 ID: task.GetId(), 472 State: entity.BulkInsertState(task.GetState()), 473 RowCount: task.GetRowCount(), 474 IDList: task.GetIdList(), 475 Infos: entity.KvPairsMap(task.GetInfos()), 476 CollectionID: task.GetCollectionId(), 477 SegmentIDs: task.GetSegmentIds(), 478 CreateTs: task.GetCreateTs(), 479 }) 480 } 481 482 return tasks, nil 483 } 484 485 func vector2PlaceholderGroupBytes(vectors []entity.Vector) []byte { 486 phg := &commonpb.PlaceholderGroup{ 487 Placeholders: []*commonpb.PlaceholderValue{ 488 vector2Placeholder(vectors), 489 }, 490 } 491 492 bs, _ := proto.Marshal(phg) 493 return bs 494 } 495 496 func vector2Placeholder(vectors []entity.Vector) *commonpb.PlaceholderValue { 497 var placeHolderType commonpb.PlaceholderType 498 ph := &commonpb.PlaceholderValue{ 499 Tag: "$0", 500 Values: make([][]byte, 0, len(vectors)), 501 } 502 if len(vectors) == 0 { 503 return ph 504 } 505 switch vectors[0].(type) { 506 case entity.FloatVector: 507 placeHolderType = commonpb.PlaceholderType_FloatVector 508 case entity.BinaryVector: 509 placeHolderType = commonpb.PlaceholderType_BinaryVector 510 case entity.BFloat16Vector: 511 placeHolderType = commonpb.PlaceholderType_BFloat16Vector 512 case entity.Float16Vector: 513 placeHolderType = commonpb.PlaceholderType_Float16Vector 514 case entity.SparseEmbedding: 515 placeHolderType = commonpb.PlaceholderType_SparseFloatVector 516 } 517 ph.Type = placeHolderType 518 for _, vector := range vectors { 519 ph.Values = append(ph.Values, vector.Serialize()) 520 } 521 return ph 522 }