github.com/weaviate/weaviate@v1.24.6/usecases/sharding/remote_index.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package sharding 13 14 import ( 15 "context" 16 "fmt" 17 "io" 18 "math/rand" 19 20 "github.com/go-openapi/strfmt" 21 "github.com/pkg/errors" 22 "github.com/weaviate/weaviate/entities/additional" 23 "github.com/weaviate/weaviate/entities/aggregation" 24 "github.com/weaviate/weaviate/entities/filters" 25 "github.com/weaviate/weaviate/entities/search" 26 "github.com/weaviate/weaviate/entities/searchparams" 27 "github.com/weaviate/weaviate/entities/storobj" 28 "github.com/weaviate/weaviate/usecases/objects" 29 ) 30 31 type RemoteIndex struct { 32 class string 33 stateGetter shardingStateGetter 34 client RemoteIndexClient 35 nodeResolver nodeResolver 36 } 37 38 type shardingStateGetter interface { 39 // ShardOwner returns id of owner node 40 ShardOwner(class, shard string) (string, error) 41 ShardReplicas(class, shard string) ([]string, error) 42 } 43 44 func NewRemoteIndex(className string, 45 stateGetter shardingStateGetter, nodeResolver nodeResolver, 46 client RemoteIndexClient, 47 ) *RemoteIndex { 48 return &RemoteIndex{ 49 class: className, 50 stateGetter: stateGetter, 51 client: client, 52 nodeResolver: nodeResolver, 53 } 54 } 55 56 type nodeResolver interface { 57 NodeHostname(nodeName string) (string, bool) 58 } 59 60 type RemoteIndexClient interface { 61 PutObject(ctx context.Context, hostName, indexName, shardName string, 62 obj *storobj.Object) error 63 BatchPutObjects(ctx context.Context, hostName, indexName, shardName string, 64 objs []*storobj.Object, repl *additional.ReplicationProperties) []error 65 BatchAddReferences(ctx context.Context, hostName, indexName, shardName string, 66 refs objects.BatchReferences) []error 67 GetObject(ctx context.Context, hostname, indexName, shardName string, 68 id strfmt.UUID, props search.SelectProperties, 69 additional additional.Properties) (*storobj.Object, error) 70 Exists(ctx context.Context, hostname, indexName, shardName string, 71 id strfmt.UUID) (bool, error) 72 DeleteObject(ctx context.Context, hostname, indexName, shardName string, 73 id strfmt.UUID) error 74 MergeObject(ctx context.Context, hostname, indexName, shardName string, 75 mergeDoc objects.MergeDocument) error 76 MultiGetObjects(ctx context.Context, hostname, indexName, shardName string, 77 ids []strfmt.UUID) ([]*storobj.Object, error) 78 SearchShard(ctx context.Context, hostname, indexName, shardName string, 79 searchVector []float32, targetVector string, limit int, filters *filters.LocalFilter, 80 keywordRanking *searchparams.KeywordRanking, sort []filters.Sort, 81 cursor *filters.Cursor, groupBy *searchparams.GroupBy, 82 additional additional.Properties, 83 ) ([]*storobj.Object, []float32, error) 84 Aggregate(ctx context.Context, hostname, indexName, shardName string, 85 params aggregation.Params) (*aggregation.Result, error) 86 FindUUIDs(ctx context.Context, hostName, indexName, shardName string, 87 filters *filters.LocalFilter) ([]strfmt.UUID, error) 88 DeleteObjectBatch(ctx context.Context, hostName, indexName, shardName string, 89 uuids []strfmt.UUID, dryRun bool) objects.BatchSimpleObjects 90 GetShardQueueSize(ctx context.Context, hostName, indexName, shardName string) (int64, error) 91 GetShardStatus(ctx context.Context, hostName, indexName, shardName string) (string, error) 92 UpdateShardStatus(ctx context.Context, hostName, indexName, shardName, 93 targetStatus string) error 94 95 PutFile(ctx context.Context, hostName, indexName, shardName, fileName string, 96 payload io.ReadSeekCloser) error 97 } 98 99 func (ri *RemoteIndex) PutObject(ctx context.Context, shardName string, 100 obj *storobj.Object, 101 ) error { 102 owner, err := ri.stateGetter.ShardOwner(ri.class, shardName) 103 if err != nil { 104 return fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err) 105 } 106 107 host, ok := ri.nodeResolver.NodeHostname(owner) 108 if !ok { 109 return errors.Errorf("resolve node name %q to host", owner) 110 } 111 112 return ri.client.PutObject(ctx, host, ri.class, shardName, obj) 113 } 114 115 // helper for single errors that affect the entire batch, assign the error to 116 // every single item in the batch 117 func duplicateErr(in error, count int) []error { 118 out := make([]error, count) 119 for i := range out { 120 out[i] = in 121 } 122 return out 123 } 124 125 func (ri *RemoteIndex) BatchPutObjects(ctx context.Context, shardName string, 126 objs []*storobj.Object, 127 ) []error { 128 owner, err := ri.stateGetter.ShardOwner(ri.class, shardName) 129 if err != nil { 130 return duplicateErr(fmt.Errorf("class %s has no physical shard %q: %w", 131 ri.class, shardName, err), len(objs)) 132 } 133 134 host, ok := ri.nodeResolver.NodeHostname(owner) 135 if !ok { 136 return duplicateErr(fmt.Errorf("resolve node name %q to host", 137 owner), len(objs)) 138 } 139 140 return ri.client.BatchPutObjects(ctx, host, ri.class, shardName, objs, nil) 141 } 142 143 func (ri *RemoteIndex) BatchAddReferences(ctx context.Context, shardName string, 144 refs objects.BatchReferences, 145 ) []error { 146 owner, err := ri.stateGetter.ShardOwner(ri.class, shardName) 147 if err != nil { 148 return duplicateErr(fmt.Errorf("class %s has no physical shard %q: %w", 149 ri.class, shardName, err), len(refs)) 150 } 151 152 host, ok := ri.nodeResolver.NodeHostname(owner) 153 if !ok { 154 return duplicateErr(fmt.Errorf("resolve node name %q to host", 155 owner), len(refs)) 156 } 157 158 return ri.client.BatchAddReferences(ctx, host, ri.class, shardName, refs) 159 } 160 161 func (ri *RemoteIndex) Exists(ctx context.Context, shardName string, 162 id strfmt.UUID, 163 ) (bool, error) { 164 owner, err := ri.stateGetter.ShardOwner(ri.class, shardName) 165 if err != nil { 166 return false, fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err) 167 } 168 169 host, ok := ri.nodeResolver.NodeHostname(owner) 170 if !ok { 171 return false, errors.Errorf("resolve node name %q to host", owner) 172 } 173 174 return ri.client.Exists(ctx, host, ri.class, shardName, id) 175 } 176 177 func (ri *RemoteIndex) DeleteObject(ctx context.Context, shardName string, 178 id strfmt.UUID, 179 ) error { 180 owner, err := ri.stateGetter.ShardOwner(ri.class, shardName) 181 if err != nil { 182 return fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err) 183 } 184 185 host, ok := ri.nodeResolver.NodeHostname(owner) 186 if !ok { 187 return errors.Errorf("resolve node name %q to host", owner) 188 } 189 190 return ri.client.DeleteObject(ctx, host, ri.class, shardName, id) 191 } 192 193 func (ri *RemoteIndex) MergeObject(ctx context.Context, shardName string, 194 mergeDoc objects.MergeDocument, 195 ) error { 196 owner, err := ri.stateGetter.ShardOwner(ri.class, shardName) 197 if err != nil { 198 return fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err) 199 } 200 201 host, ok := ri.nodeResolver.NodeHostname(owner) 202 if !ok { 203 return errors.Errorf("resolve node name %q to host", owner) 204 } 205 206 return ri.client.MergeObject(ctx, host, ri.class, shardName, mergeDoc) 207 } 208 209 func (ri *RemoteIndex) GetObject(ctx context.Context, shardName string, 210 id strfmt.UUID, props search.SelectProperties, 211 additional additional.Properties, 212 ) (*storobj.Object, error) { 213 owner, err := ri.stateGetter.ShardOwner(ri.class, shardName) 214 if err != nil { 215 return nil, fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err) 216 } 217 218 host, ok := ri.nodeResolver.NodeHostname(owner) 219 if !ok { 220 return nil, errors.Errorf("resolve node name %q to host", owner) 221 } 222 223 return ri.client.GetObject(ctx, host, ri.class, shardName, id, props, additional) 224 } 225 226 func (ri *RemoteIndex) MultiGetObjects(ctx context.Context, shardName string, 227 ids []strfmt.UUID, 228 ) ([]*storobj.Object, error) { 229 owner, err := ri.stateGetter.ShardOwner(ri.class, shardName) 230 if err != nil { 231 return nil, fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err) 232 } 233 234 host, ok := ri.nodeResolver.NodeHostname(owner) 235 if !ok { 236 return nil, errors.Errorf("resolve node name %q to host", owner) 237 } 238 239 return ri.client.MultiGetObjects(ctx, host, ri.class, shardName, ids) 240 } 241 242 func (ri *RemoteIndex) SearchShard(ctx context.Context, shard string, 243 queryVec []float32, 244 targetVector string, 245 limit int, 246 filters *filters.LocalFilter, 247 keywordRanking *searchparams.KeywordRanking, 248 sort []filters.Sort, 249 cursor *filters.Cursor, 250 groupBy *searchparams.GroupBy, 251 adds additional.Properties, 252 replEnabled bool, 253 ) ([]*storobj.Object, []float32, string, error) { 254 type pair struct { 255 first []*storobj.Object 256 second []float32 257 } 258 f := func(node, host string) (interface{}, error) { 259 objs, scores, err := ri.client.SearchShard(ctx, host, ri.class, shard, 260 queryVec, targetVector, limit, filters, keywordRanking, sort, cursor, groupBy, adds) 261 if err != nil { 262 return nil, err 263 } 264 return pair{objs, scores}, err 265 } 266 rr, node, err := ri.queryReplicas(ctx, shard, f) 267 if err != nil { 268 return nil, nil, node, err 269 } 270 r := rr.(pair) 271 return r.first, r.second, node, err 272 } 273 274 func (ri *RemoteIndex) Aggregate( 275 ctx context.Context, 276 shard string, 277 params aggregation.Params, 278 ) (*aggregation.Result, error) { 279 f := func(_, host string) (interface{}, error) { 280 r, err := ri.client.Aggregate(ctx, host, ri.class, shard, params) 281 if err != nil { 282 return nil, err 283 } 284 return r, nil 285 } 286 rr, _, err := ri.queryReplicas(ctx, shard, f) 287 if err != nil { 288 return nil, err 289 } 290 return rr.(*aggregation.Result), err 291 } 292 293 func (ri *RemoteIndex) FindUUIDs(ctx context.Context, shardName string, 294 filters *filters.LocalFilter, 295 ) ([]strfmt.UUID, error) { 296 owner, err := ri.stateGetter.ShardOwner(ri.class, shardName) 297 if err != nil { 298 return nil, fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err) 299 } 300 301 host, ok := ri.nodeResolver.NodeHostname(owner) 302 if !ok { 303 return nil, errors.Errorf("resolve node name %q to host", owner) 304 } 305 306 return ri.client.FindUUIDs(ctx, host, ri.class, shardName, filters) 307 } 308 309 func (ri *RemoteIndex) DeleteObjectBatch(ctx context.Context, shardName string, 310 uuids []strfmt.UUID, dryRun bool, 311 ) objects.BatchSimpleObjects { 312 owner, err := ri.stateGetter.ShardOwner(ri.class, shardName) 313 if err != nil { 314 err := fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err) 315 return objects.BatchSimpleObjects{objects.BatchSimpleObject{Err: err}} 316 } 317 318 host, ok := ri.nodeResolver.NodeHostname(owner) 319 if !ok { 320 err := fmt.Errorf("resolve node name %q to host", owner) 321 return objects.BatchSimpleObjects{objects.BatchSimpleObject{Err: err}} 322 } 323 324 return ri.client.DeleteObjectBatch(ctx, host, ri.class, shardName, uuids, dryRun) 325 } 326 327 func (ri *RemoteIndex) GetShardQueueSize(ctx context.Context, shardName string) (int64, error) { 328 owner, err := ri.stateGetter.ShardOwner(ri.class, shardName) 329 if err != nil { 330 return 0, fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err) 331 } 332 333 host, ok := ri.nodeResolver.NodeHostname(owner) 334 if !ok { 335 return 0, errors.Errorf("resolve node name %q to host", owner) 336 } 337 338 return ri.client.GetShardQueueSize(ctx, host, ri.class, shardName) 339 } 340 341 func (ri *RemoteIndex) GetShardStatus(ctx context.Context, shardName string) (string, error) { 342 owner, err := ri.stateGetter.ShardOwner(ri.class, shardName) 343 if err != nil { 344 return "", fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err) 345 } 346 347 host, ok := ri.nodeResolver.NodeHostname(owner) 348 if !ok { 349 return "", errors.Errorf("resolve node name %q to host", owner) 350 } 351 352 return ri.client.GetShardStatus(ctx, host, ri.class, shardName) 353 } 354 355 func (ri *RemoteIndex) UpdateShardStatus(ctx context.Context, shardName, targetStatus string) error { 356 owner, err := ri.stateGetter.ShardOwner(ri.class, shardName) 357 if err != nil { 358 return fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err) 359 } 360 361 host, ok := ri.nodeResolver.NodeHostname(owner) 362 if !ok { 363 return errors.Errorf("resolve node name %q to host", owner) 364 } 365 366 return ri.client.UpdateShardStatus(ctx, host, ri.class, shardName, targetStatus) 367 } 368 369 func (ri *RemoteIndex) queryReplicas( 370 ctx context.Context, 371 shard string, 372 do func(nodeName, host string) (interface{}, error), 373 ) (resp interface{}, node string, err error) { 374 replicas, err := ri.stateGetter.ShardReplicas(ri.class, shard) 375 if err != nil || len(replicas) == 0 { 376 return nil, 377 "", 378 fmt.Errorf("class %q has no physical shard %q: %w", ri.class, shard, err) 379 } 380 381 queryOne := func(replica string) (interface{}, error) { 382 host, ok := ri.nodeResolver.NodeHostname(replica) 383 if !ok || host == "" { 384 return nil, errors.Errorf("resolve node name %q to host", replica) 385 } 386 return do(replica, host) 387 } 388 389 queryUntil := func(replicas []string) (resp interface{}, node string, err error) { 390 for _, node = range replicas { 391 if errC := ctx.Err(); errC != nil { 392 return nil, node, errC 393 } 394 if resp, err = queryOne(node); err == nil { 395 return resp, node, nil 396 } 397 } 398 return 399 } 400 first := rand.Intn(len(replicas)) 401 if resp, node, err = queryUntil(replicas[first:]); err != nil && first != 0 { 402 return queryUntil(replicas[:first]) 403 } 404 return 405 }