github.com/weaviate/weaviate@v1.24.6/usecases/sharding/remote_index.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package sharding
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"io"
    18  	"math/rand"
    19  
    20  	"github.com/go-openapi/strfmt"
    21  	"github.com/pkg/errors"
    22  	"github.com/weaviate/weaviate/entities/additional"
    23  	"github.com/weaviate/weaviate/entities/aggregation"
    24  	"github.com/weaviate/weaviate/entities/filters"
    25  	"github.com/weaviate/weaviate/entities/search"
    26  	"github.com/weaviate/weaviate/entities/searchparams"
    27  	"github.com/weaviate/weaviate/entities/storobj"
    28  	"github.com/weaviate/weaviate/usecases/objects"
    29  )
    30  
    31  type RemoteIndex struct {
    32  	class        string
    33  	stateGetter  shardingStateGetter
    34  	client       RemoteIndexClient
    35  	nodeResolver nodeResolver
    36  }
    37  
    38  type shardingStateGetter interface {
    39  	// ShardOwner returns id of owner node
    40  	ShardOwner(class, shard string) (string, error)
    41  	ShardReplicas(class, shard string) ([]string, error)
    42  }
    43  
    44  func NewRemoteIndex(className string,
    45  	stateGetter shardingStateGetter, nodeResolver nodeResolver,
    46  	client RemoteIndexClient,
    47  ) *RemoteIndex {
    48  	return &RemoteIndex{
    49  		class:        className,
    50  		stateGetter:  stateGetter,
    51  		client:       client,
    52  		nodeResolver: nodeResolver,
    53  	}
    54  }
    55  
    56  type nodeResolver interface {
    57  	NodeHostname(nodeName string) (string, bool)
    58  }
    59  
    60  type RemoteIndexClient interface {
    61  	PutObject(ctx context.Context, hostName, indexName, shardName string,
    62  		obj *storobj.Object) error
    63  	BatchPutObjects(ctx context.Context, hostName, indexName, shardName string,
    64  		objs []*storobj.Object, repl *additional.ReplicationProperties) []error
    65  	BatchAddReferences(ctx context.Context, hostName, indexName, shardName string,
    66  		refs objects.BatchReferences) []error
    67  	GetObject(ctx context.Context, hostname, indexName, shardName string,
    68  		id strfmt.UUID, props search.SelectProperties,
    69  		additional additional.Properties) (*storobj.Object, error)
    70  	Exists(ctx context.Context, hostname, indexName, shardName string,
    71  		id strfmt.UUID) (bool, error)
    72  	DeleteObject(ctx context.Context, hostname, indexName, shardName string,
    73  		id strfmt.UUID) error
    74  	MergeObject(ctx context.Context, hostname, indexName, shardName string,
    75  		mergeDoc objects.MergeDocument) error
    76  	MultiGetObjects(ctx context.Context, hostname, indexName, shardName string,
    77  		ids []strfmt.UUID) ([]*storobj.Object, error)
    78  	SearchShard(ctx context.Context, hostname, indexName, shardName string,
    79  		searchVector []float32, targetVector string, limit int, filters *filters.LocalFilter,
    80  		keywordRanking *searchparams.KeywordRanking, sort []filters.Sort,
    81  		cursor *filters.Cursor, groupBy *searchparams.GroupBy,
    82  		additional additional.Properties,
    83  	) ([]*storobj.Object, []float32, error)
    84  	Aggregate(ctx context.Context, hostname, indexName, shardName string,
    85  		params aggregation.Params) (*aggregation.Result, error)
    86  	FindUUIDs(ctx context.Context, hostName, indexName, shardName string,
    87  		filters *filters.LocalFilter) ([]strfmt.UUID, error)
    88  	DeleteObjectBatch(ctx context.Context, hostName, indexName, shardName string,
    89  		uuids []strfmt.UUID, dryRun bool) objects.BatchSimpleObjects
    90  	GetShardQueueSize(ctx context.Context, hostName, indexName, shardName string) (int64, error)
    91  	GetShardStatus(ctx context.Context, hostName, indexName, shardName string) (string, error)
    92  	UpdateShardStatus(ctx context.Context, hostName, indexName, shardName,
    93  		targetStatus string) error
    94  
    95  	PutFile(ctx context.Context, hostName, indexName, shardName, fileName string,
    96  		payload io.ReadSeekCloser) error
    97  }
    98  
    99  func (ri *RemoteIndex) PutObject(ctx context.Context, shardName string,
   100  	obj *storobj.Object,
   101  ) error {
   102  	owner, err := ri.stateGetter.ShardOwner(ri.class, shardName)
   103  	if err != nil {
   104  		return fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err)
   105  	}
   106  
   107  	host, ok := ri.nodeResolver.NodeHostname(owner)
   108  	if !ok {
   109  		return errors.Errorf("resolve node name %q to host", owner)
   110  	}
   111  
   112  	return ri.client.PutObject(ctx, host, ri.class, shardName, obj)
   113  }
   114  
   115  // helper for single errors that affect the entire batch, assign the error to
   116  // every single item in the batch
   117  func duplicateErr(in error, count int) []error {
   118  	out := make([]error, count)
   119  	for i := range out {
   120  		out[i] = in
   121  	}
   122  	return out
   123  }
   124  
   125  func (ri *RemoteIndex) BatchPutObjects(ctx context.Context, shardName string,
   126  	objs []*storobj.Object,
   127  ) []error {
   128  	owner, err := ri.stateGetter.ShardOwner(ri.class, shardName)
   129  	if err != nil {
   130  		return duplicateErr(fmt.Errorf("class %s has no physical shard %q: %w",
   131  			ri.class, shardName, err), len(objs))
   132  	}
   133  
   134  	host, ok := ri.nodeResolver.NodeHostname(owner)
   135  	if !ok {
   136  		return duplicateErr(fmt.Errorf("resolve node name %q to host",
   137  			owner), len(objs))
   138  	}
   139  
   140  	return ri.client.BatchPutObjects(ctx, host, ri.class, shardName, objs, nil)
   141  }
   142  
   143  func (ri *RemoteIndex) BatchAddReferences(ctx context.Context, shardName string,
   144  	refs objects.BatchReferences,
   145  ) []error {
   146  	owner, err := ri.stateGetter.ShardOwner(ri.class, shardName)
   147  	if err != nil {
   148  		return duplicateErr(fmt.Errorf("class %s has no physical shard %q: %w",
   149  			ri.class, shardName, err), len(refs))
   150  	}
   151  
   152  	host, ok := ri.nodeResolver.NodeHostname(owner)
   153  	if !ok {
   154  		return duplicateErr(fmt.Errorf("resolve node name %q to host",
   155  			owner), len(refs))
   156  	}
   157  
   158  	return ri.client.BatchAddReferences(ctx, host, ri.class, shardName, refs)
   159  }
   160  
   161  func (ri *RemoteIndex) Exists(ctx context.Context, shardName string,
   162  	id strfmt.UUID,
   163  ) (bool, error) {
   164  	owner, err := ri.stateGetter.ShardOwner(ri.class, shardName)
   165  	if err != nil {
   166  		return false, fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err)
   167  	}
   168  
   169  	host, ok := ri.nodeResolver.NodeHostname(owner)
   170  	if !ok {
   171  		return false, errors.Errorf("resolve node name %q to host", owner)
   172  	}
   173  
   174  	return ri.client.Exists(ctx, host, ri.class, shardName, id)
   175  }
   176  
   177  func (ri *RemoteIndex) DeleteObject(ctx context.Context, shardName string,
   178  	id strfmt.UUID,
   179  ) error {
   180  	owner, err := ri.stateGetter.ShardOwner(ri.class, shardName)
   181  	if err != nil {
   182  		return fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err)
   183  	}
   184  
   185  	host, ok := ri.nodeResolver.NodeHostname(owner)
   186  	if !ok {
   187  		return errors.Errorf("resolve node name %q to host", owner)
   188  	}
   189  
   190  	return ri.client.DeleteObject(ctx, host, ri.class, shardName, id)
   191  }
   192  
   193  func (ri *RemoteIndex) MergeObject(ctx context.Context, shardName string,
   194  	mergeDoc objects.MergeDocument,
   195  ) error {
   196  	owner, err := ri.stateGetter.ShardOwner(ri.class, shardName)
   197  	if err != nil {
   198  		return fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err)
   199  	}
   200  
   201  	host, ok := ri.nodeResolver.NodeHostname(owner)
   202  	if !ok {
   203  		return errors.Errorf("resolve node name %q to host", owner)
   204  	}
   205  
   206  	return ri.client.MergeObject(ctx, host, ri.class, shardName, mergeDoc)
   207  }
   208  
   209  func (ri *RemoteIndex) GetObject(ctx context.Context, shardName string,
   210  	id strfmt.UUID, props search.SelectProperties,
   211  	additional additional.Properties,
   212  ) (*storobj.Object, error) {
   213  	owner, err := ri.stateGetter.ShardOwner(ri.class, shardName)
   214  	if err != nil {
   215  		return nil, fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err)
   216  	}
   217  
   218  	host, ok := ri.nodeResolver.NodeHostname(owner)
   219  	if !ok {
   220  		return nil, errors.Errorf("resolve node name %q to host", owner)
   221  	}
   222  
   223  	return ri.client.GetObject(ctx, host, ri.class, shardName, id, props, additional)
   224  }
   225  
   226  func (ri *RemoteIndex) MultiGetObjects(ctx context.Context, shardName string,
   227  	ids []strfmt.UUID,
   228  ) ([]*storobj.Object, error) {
   229  	owner, err := ri.stateGetter.ShardOwner(ri.class, shardName)
   230  	if err != nil {
   231  		return nil, fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err)
   232  	}
   233  
   234  	host, ok := ri.nodeResolver.NodeHostname(owner)
   235  	if !ok {
   236  		return nil, errors.Errorf("resolve node name %q to host", owner)
   237  	}
   238  
   239  	return ri.client.MultiGetObjects(ctx, host, ri.class, shardName, ids)
   240  }
   241  
   242  func (ri *RemoteIndex) SearchShard(ctx context.Context, shard string,
   243  	queryVec []float32,
   244  	targetVector string,
   245  	limit int,
   246  	filters *filters.LocalFilter,
   247  	keywordRanking *searchparams.KeywordRanking,
   248  	sort []filters.Sort,
   249  	cursor *filters.Cursor,
   250  	groupBy *searchparams.GroupBy,
   251  	adds additional.Properties,
   252  	replEnabled bool,
   253  ) ([]*storobj.Object, []float32, string, error) {
   254  	type pair struct {
   255  		first  []*storobj.Object
   256  		second []float32
   257  	}
   258  	f := func(node, host string) (interface{}, error) {
   259  		objs, scores, err := ri.client.SearchShard(ctx, host, ri.class, shard,
   260  			queryVec, targetVector, limit, filters, keywordRanking, sort, cursor, groupBy, adds)
   261  		if err != nil {
   262  			return nil, err
   263  		}
   264  		return pair{objs, scores}, err
   265  	}
   266  	rr, node, err := ri.queryReplicas(ctx, shard, f)
   267  	if err != nil {
   268  		return nil, nil, node, err
   269  	}
   270  	r := rr.(pair)
   271  	return r.first, r.second, node, err
   272  }
   273  
   274  func (ri *RemoteIndex) Aggregate(
   275  	ctx context.Context,
   276  	shard string,
   277  	params aggregation.Params,
   278  ) (*aggregation.Result, error) {
   279  	f := func(_, host string) (interface{}, error) {
   280  		r, err := ri.client.Aggregate(ctx, host, ri.class, shard, params)
   281  		if err != nil {
   282  			return nil, err
   283  		}
   284  		return r, nil
   285  	}
   286  	rr, _, err := ri.queryReplicas(ctx, shard, f)
   287  	if err != nil {
   288  		return nil, err
   289  	}
   290  	return rr.(*aggregation.Result), err
   291  }
   292  
   293  func (ri *RemoteIndex) FindUUIDs(ctx context.Context, shardName string,
   294  	filters *filters.LocalFilter,
   295  ) ([]strfmt.UUID, error) {
   296  	owner, err := ri.stateGetter.ShardOwner(ri.class, shardName)
   297  	if err != nil {
   298  		return nil, fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err)
   299  	}
   300  
   301  	host, ok := ri.nodeResolver.NodeHostname(owner)
   302  	if !ok {
   303  		return nil, errors.Errorf("resolve node name %q to host", owner)
   304  	}
   305  
   306  	return ri.client.FindUUIDs(ctx, host, ri.class, shardName, filters)
   307  }
   308  
   309  func (ri *RemoteIndex) DeleteObjectBatch(ctx context.Context, shardName string,
   310  	uuids []strfmt.UUID, dryRun bool,
   311  ) objects.BatchSimpleObjects {
   312  	owner, err := ri.stateGetter.ShardOwner(ri.class, shardName)
   313  	if err != nil {
   314  		err := fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err)
   315  		return objects.BatchSimpleObjects{objects.BatchSimpleObject{Err: err}}
   316  	}
   317  
   318  	host, ok := ri.nodeResolver.NodeHostname(owner)
   319  	if !ok {
   320  		err := fmt.Errorf("resolve node name %q to host", owner)
   321  		return objects.BatchSimpleObjects{objects.BatchSimpleObject{Err: err}}
   322  	}
   323  
   324  	return ri.client.DeleteObjectBatch(ctx, host, ri.class, shardName, uuids, dryRun)
   325  }
   326  
   327  func (ri *RemoteIndex) GetShardQueueSize(ctx context.Context, shardName string) (int64, error) {
   328  	owner, err := ri.stateGetter.ShardOwner(ri.class, shardName)
   329  	if err != nil {
   330  		return 0, fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err)
   331  	}
   332  
   333  	host, ok := ri.nodeResolver.NodeHostname(owner)
   334  	if !ok {
   335  		return 0, errors.Errorf("resolve node name %q to host", owner)
   336  	}
   337  
   338  	return ri.client.GetShardQueueSize(ctx, host, ri.class, shardName)
   339  }
   340  
   341  func (ri *RemoteIndex) GetShardStatus(ctx context.Context, shardName string) (string, error) {
   342  	owner, err := ri.stateGetter.ShardOwner(ri.class, shardName)
   343  	if err != nil {
   344  		return "", fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err)
   345  	}
   346  
   347  	host, ok := ri.nodeResolver.NodeHostname(owner)
   348  	if !ok {
   349  		return "", errors.Errorf("resolve node name %q to host", owner)
   350  	}
   351  
   352  	return ri.client.GetShardStatus(ctx, host, ri.class, shardName)
   353  }
   354  
   355  func (ri *RemoteIndex) UpdateShardStatus(ctx context.Context, shardName, targetStatus string) error {
   356  	owner, err := ri.stateGetter.ShardOwner(ri.class, shardName)
   357  	if err != nil {
   358  		return fmt.Errorf("class %s has no physical shard %q: %w", ri.class, shardName, err)
   359  	}
   360  
   361  	host, ok := ri.nodeResolver.NodeHostname(owner)
   362  	if !ok {
   363  		return errors.Errorf("resolve node name %q to host", owner)
   364  	}
   365  
   366  	return ri.client.UpdateShardStatus(ctx, host, ri.class, shardName, targetStatus)
   367  }
   368  
   369  func (ri *RemoteIndex) queryReplicas(
   370  	ctx context.Context,
   371  	shard string,
   372  	do func(nodeName, host string) (interface{}, error),
   373  ) (resp interface{}, node string, err error) {
   374  	replicas, err := ri.stateGetter.ShardReplicas(ri.class, shard)
   375  	if err != nil || len(replicas) == 0 {
   376  		return nil,
   377  			"",
   378  			fmt.Errorf("class %q has no physical shard %q: %w", ri.class, shard, err)
   379  	}
   380  
   381  	queryOne := func(replica string) (interface{}, error) {
   382  		host, ok := ri.nodeResolver.NodeHostname(replica)
   383  		if !ok || host == "" {
   384  			return nil, errors.Errorf("resolve node name %q to host", replica)
   385  		}
   386  		return do(replica, host)
   387  	}
   388  
   389  	queryUntil := func(replicas []string) (resp interface{}, node string, err error) {
   390  		for _, node = range replicas {
   391  			if errC := ctx.Err(); errC != nil {
   392  				return nil, node, errC
   393  			}
   394  			if resp, err = queryOne(node); err == nil {
   395  				return resp, node, nil
   396  			}
   397  		}
   398  		return
   399  	}
   400  	first := rand.Intn(len(replicas))
   401  	if resp, node, err = queryUntil(replicas[first:]); err != nil && first != 0 {
   402  		return queryUntil(replicas[:first])
   403  	}
   404  	return
   405  }