github.com/weaviate/weaviate@v1.24.6/usecases/scaler/scaler.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package scaler
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"runtime"
    18  
    19  	enterrors "github.com/weaviate/weaviate/entities/errors"
    20  
    21  	"github.com/google/uuid"
    22  	"github.com/pkg/errors"
    23  	"github.com/sirupsen/logrus"
    24  	"github.com/weaviate/weaviate/entities/backup"
    25  	"github.com/weaviate/weaviate/usecases/sharding"
    26  )
    27  
    28  // TODOs: Performance
    29  //
    30  // 1. Improve performance of syncing a shard to multiple nodes (see rsync.Push).
    31  // We could concurrently sync same files to different nodes  while avoiding overlapping
    32  //
    33  // 2. To fail fast, we might consider creating all shards at once and re-initialize them in the final step
    34  //
    35  // 3. implement scaler.scaleIn
    36  
    37  var (
    38  	// ErrUnresolvedName cannot resolve the host address of a node
    39  	ErrUnresolvedName = errors.New("cannot resolve node name")
    40  	_NUMCPU           = runtime.NumCPU()
    41  )
    42  
    43  // Scaler scales out/in class replicas.
    44  //
    45  // It scales out a class by replicating its shards on new replicas
    46  type Scaler struct {
    47  	schema          SchemaManager
    48  	cluster         cluster
    49  	source          BackUpper // data source
    50  	client          client    // client for remote nodes
    51  	logger          logrus.FieldLogger
    52  	persistenceRoot string
    53  }
    54  
    55  // New returns a new instance of Scaler
    56  func New(cl cluster, source BackUpper,
    57  	c client, logger logrus.FieldLogger, persistenceRoot string,
    58  ) *Scaler {
    59  	return &Scaler{
    60  		cluster:         cl,
    61  		source:          source,
    62  		client:          c,
    63  		logger:          logger,
    64  		persistenceRoot: persistenceRoot,
    65  	}
    66  }
    67  
    68  // BackUpper is used to back up shards of a specific class
    69  type BackUpper interface {
    70  	// ShardsBackup returns class backup descriptor for a list of shards
    71  	ShardsBackup(_ context.Context, id, class string, shards []string) (backup.ClassDescriptor, error)
    72  	// ReleaseBackup releases the backup specified by its id
    73  	ReleaseBackup(ctx context.Context, id, className string) error
    74  }
    75  
    76  // cluster is used by the scaler to query cluster
    77  type cluster interface {
    78  	// Candidates returns list of existing nodes in the cluster
    79  	Candidates() []string
    80  	// LocalName returns name of this node
    81  	LocalName() string
    82  	// NodeHostname return hosts address for a specific node name
    83  	NodeHostname(name string) (string, bool)
    84  }
    85  
    86  // SchemaManager is used by the scaler to get and update sharding states
    87  type SchemaManager interface {
    88  	CopyShardingState(class string) *sharding.State
    89  }
    90  
    91  func (s *Scaler) SetSchemaManager(sm SchemaManager) {
    92  	s.schema = sm
    93  }
    94  
    95  // Scale increase/decrease class replicas.
    96  //
    97  // It returns the updated sharding state if successful. The caller must then
    98  // make sure to broadcast that state to all nodes as part of the "update"
    99  // transaction.
   100  func (s *Scaler) Scale(ctx context.Context, className string,
   101  	updated sharding.Config, prevReplFactor, newReplFactor int64,
   102  ) (*sharding.State, error) {
   103  	// First identify what the sharding state was before this change. This is
   104  	// mainly to be able to compare the diff later, so we know where we need to
   105  	// make changes
   106  	ssBefore := s.schema.CopyShardingState(className)
   107  	if ssBefore == nil {
   108  		return nil, fmt.Errorf("no sharding state for class %q", className)
   109  	}
   110  	if newReplFactor > prevReplFactor {
   111  		return s.scaleOut(ctx, className, ssBefore, updated, newReplFactor)
   112  	}
   113  
   114  	if newReplFactor < prevReplFactor {
   115  		return s.scaleIn(ctx, className, updated)
   116  	}
   117  
   118  	return nil, nil
   119  }
   120  
   121  // scaleOut replicate class shards on new replicas (nodes):
   122  //
   123  // * It calculates new sharding state
   124  // * It pushes locally existing shards to new replicas
   125  // * It delegates replication of remote shards to owner nodes
   126  func (s *Scaler) scaleOut(ctx context.Context, className string, ssBefore *sharding.State,
   127  	updated sharding.Config, replFactor int64,
   128  ) (*sharding.State, error) {
   129  	// Create a deep copy of the old sharding state, so we can start building the
   130  	// updated state. Because this is a deep copy we don't risk leaking our
   131  	// changes to anyone else. We can return the changes in the end where the
   132  	// caller can then make sure to broadcast the new state to the cluster.
   133  	ssAfter := ssBefore.DeepCopy()
   134  	ssAfter.Config = updated
   135  
   136  	// Identify all shards of the class and adjust the replicas. After this is
   137  	// done, the affected shards now belong to more nodes than they did before.
   138  	for name, shard := range ssAfter.Physical {
   139  		if err := shard.AdjustReplicas(int(replFactor), s.cluster); err != nil {
   140  			return nil, err
   141  		}
   142  		ssAfter.Physical[name] = shard
   143  	}
   144  	lDist, nodeDist := distributions(ssBefore, &ssAfter)
   145  	g, ctx := enterrors.NewErrorGroupWithContextWrapper(s.logger, ctx)
   146  	// resolve hosts beforehand
   147  	nodes := nodeDist.nodes()
   148  	hosts, err := hosts(nodes, s.cluster)
   149  	if err != nil {
   150  		return nil, err
   151  	}
   152  	for i, node := range nodes {
   153  		dist := nodeDist[node]
   154  		i := i
   155  		g.Go(func() error {
   156  			err := s.client.IncreaseReplicationFactor(ctx, hosts[i], className, dist)
   157  			if err != nil {
   158  				return fmt.Errorf("increase replication factor for class %q on node %q: %w", className, nodes[i], err)
   159  			}
   160  			return nil
   161  		})
   162  	}
   163  
   164  	g.Go(func() error {
   165  		if err := s.LocalScaleOut(ctx, className, lDist); err != nil {
   166  			return fmt.Errorf("increase local replication factor: %w", err)
   167  		}
   168  		return nil
   169  	})
   170  	if err := g.Wait(); err != nil {
   171  		return nil, err
   172  	}
   173  
   174  	// Finally, return sharding state back to schema manager. The schema manager
   175  	// will then broadcast this updated state to the cluster. This is essentially
   176  	// what will take the new replication shards live: On the new nodes, if
   177  	// traffic is incoming, IsShardLocal() would have returned false before. But
   178  	// now that a copy of the local shard is present it will return true and
   179  	// serve the traffic.
   180  	return &ssAfter, nil
   181  }
   182  
   183  // LocalScaleOut syncs local shards with new replicas.
   184  //
   185  // This is the meat&bones of this implementation.
   186  // For each shard, we're roughly doing the following:
   187  //   - Create shards backup, so the shards are safe to copy
   188  //   - Figure out the copy targets (i.e. each node that is part of the after
   189  //     state, but wasn't part of the before state yet)
   190  //   - Create an empty shard on the target node
   191  //   - Copy over all files from the backup
   192  //   - ReInit the shard to recognize the copied files
   193  //   - Release the single-shard backup
   194  func (s *Scaler) LocalScaleOut(ctx context.Context,
   195  	className string, dist ShardDist,
   196  ) error {
   197  	if len(dist) < 1 {
   198  		return nil
   199  	}
   200  	// Create backup of the sin
   201  	bakID := fmt.Sprintf("_internal_scaler_%s", uuid.New().String()) // todo better name
   202  	bak, err := s.source.ShardsBackup(ctx, bakID, className, dist.shards())
   203  	if err != nil {
   204  		return fmt.Errorf("create snapshot: %w", err)
   205  	}
   206  
   207  	defer func() {
   208  		err := s.source.ReleaseBackup(context.Background(), bakID, className)
   209  		if err != nil {
   210  			s.logger.WithField("scaler", "releaseBackup").WithField("class", className).Error(err)
   211  		}
   212  	}()
   213  	rsync := newRSync(s.client, s.cluster, s.persistenceRoot)
   214  	return rsync.Push(ctx, bak.Shards, dist, className, s.logger)
   215  }
   216  
   217  func (s *Scaler) scaleIn(ctx context.Context, className string,
   218  	updated sharding.Config,
   219  ) (*sharding.State, error) {
   220  	return nil, errors.Errorf("scaling in not supported yet")
   221  }