github.com/weaviate/weaviate@v1.24.6/usecases/scaler/rsync.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package scaler
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"io"
    18  	"os"
    19  	"path/filepath"
    20  
    21  	"github.com/sirupsen/logrus"
    22  	enterrors "github.com/weaviate/weaviate/entities/errors"
    23  
    24  	"github.com/weaviate/weaviate/entities/backup"
    25  )
    26  
    27  // client the client interface is used to communicate with remote nodes
    28  type client interface {
    29  	PutFile(ctx context.Context, hostName, indexName,
    30  		shardName, fileName string, payload io.ReadSeekCloser) error
    31  
    32  	// CreateShard creates an empty shard on the remote node.
    33  	// This is required in order to sync files to a specific shard on the remote node.
    34  	CreateShard(ctx context.Context,
    35  		hostName, indexName, shardName string) error
    36  
    37  	// ReInitShard re-initialized new shard after all files has been synced to the remote node
    38  	// Otherwise, it would not recognize the files when
    39  	// serving traffic later.
    40  	ReInitShard(ctx context.Context,
    41  		hostName, indexName, shardName string) error
    42  	IncreaseReplicationFactor(ctx context.Context, host, class string, dist ShardDist) error
    43  }
    44  
    45  // rsync synchronizes shards with remote nodes
    46  type rsync struct {
    47  	client          client
    48  	cluster         cluster
    49  	persistenceRoot string
    50  }
    51  
    52  func newRSync(c client, cl cluster, rootPath string) *rsync {
    53  	return &rsync{client: c, cluster: cl, persistenceRoot: rootPath}
    54  }
    55  
    56  // Push pushes local shards of a class to remote nodes
    57  func (r *rsync) Push(ctx context.Context, shardsBackups []*backup.ShardDescriptor, dist ShardDist, className string, logger logrus.FieldLogger) error {
    58  	g := enterrors.NewErrorGroupWrapper(logger)
    59  	g.SetLimit(_NUMCPU * 2)
    60  	for _, desc := range shardsBackups {
    61  		shardName := desc.Name
    62  		additions := dist[shardName]
    63  		desc := desc
    64  		g.Go(func() error {
    65  			return r.PushShard(ctx, className, desc, additions)
    66  		}, shardName)
    67  
    68  	}
    69  	return g.Wait()
    70  }
    71  
    72  // PushShard replicates a shard on a set of nodes
    73  func (r *rsync) PushShard(ctx context.Context, className string, desc *backup.ShardDescriptor, nodes []string) error {
    74  	// Iterate over the new target nodes and copy files
    75  	for _, node := range nodes {
    76  		host, ok := r.cluster.NodeHostname(node)
    77  		if !ok {
    78  			return fmt.Errorf("%w: %q", ErrUnresolvedName, node)
    79  		}
    80  		if err := r.client.CreateShard(ctx, host, className, desc.Name); err != nil {
    81  			return fmt.Errorf("create new shard on remote node %q: %w", node, err)
    82  		}
    83  
    84  		// Transfer each file that's part of the backup.
    85  		for _, file := range desc.Files {
    86  			err := r.PutFile(ctx, file, host, className, desc.Name)
    87  			if err != nil {
    88  				return fmt.Errorf("copy files to remote node %q: %w", node, err)
    89  			}
    90  		}
    91  
    92  		// Transfer shard metadata files
    93  		err := r.PutFile(ctx, desc.ShardVersionPath, host, className, desc.Name)
    94  		if err != nil {
    95  			return fmt.Errorf("copy shard version to remote node %q: %w", node, err)
    96  		}
    97  
    98  		err = r.PutFile(ctx, desc.DocIDCounterPath, host, className, desc.Name)
    99  		if err != nil {
   100  			return fmt.Errorf("copy index counter to remote node %q: %w", node, err)
   101  		}
   102  
   103  		err = r.PutFile(ctx, desc.PropLengthTrackerPath, host, className, desc.Name)
   104  		if err != nil {
   105  			return fmt.Errorf("copy prop length tracker to remote node %q: %w", node, err)
   106  		}
   107  
   108  		// Now that all files are on the remote node's new shard, the shard needs
   109  		// to be reinitialized. Otherwise, it would not recognize the files when
   110  		// serving traffic later.
   111  		if err := r.client.ReInitShard(ctx, host, className, desc.Name); err != nil {
   112  			return fmt.Errorf("create new shard on remote node %q: %w", node, err)
   113  		}
   114  	}
   115  	return nil
   116  }
   117  
   118  func (r *rsync) PutFile(ctx context.Context, sourceFileName string,
   119  	hostname, className, shardName string,
   120  ) error {
   121  	absPath := filepath.Join(r.persistenceRoot, sourceFileName)
   122  	f, err := os.Open(absPath)
   123  	if err != nil {
   124  		return fmt.Errorf("open file %q for reading: %w", absPath, err)
   125  	}
   126  
   127  	return r.client.PutFile(ctx, hostname, className, shardName, sourceFileName, f)
   128  }