github.com/weaviate/weaviate@v1.24.6/usecases/replica/coordinator.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package replica
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"sync"
    18  
    19  	enterrors "github.com/weaviate/weaviate/entities/errors"
    20  
    21  	"github.com/sirupsen/logrus"
    22  )
    23  
    24  type (
    25  	// readyOp asks a replica if it is ready to commit
    26  	readyOp func(_ context.Context, host, requestID string) error
    27  
    28  	// readyOp asks a replica to execute the actual operation
    29  	commitOp[T any] func(_ context.Context, host, requestID string) (T, error)
    30  
    31  	// readOp defines a generic read operation
    32  	readOp[T any] func(_ context.Context, host string, fullRead bool) (T, error)
    33  
    34  	// coordinator coordinates replication of write and read requests
    35  	coordinator[T any] struct {
    36  		Client
    37  		Resolver *resolver // node_name -> host_address
    38  		log      logrus.FieldLogger
    39  		Class    string
    40  		Shard    string
    41  		TxID     string // transaction ID
    42  	}
    43  )
    44  
    45  // newCoordinator used by the replicator
    46  func newCoordinator[T any](r *Replicator, shard, requestID string, l logrus.FieldLogger,
    47  ) *coordinator[T] {
    48  	return &coordinator[T]{
    49  		Client:   r.client,
    50  		Resolver: r.resolver,
    51  		log:      l,
    52  		Class:    r.class,
    53  		Shard:    shard,
    54  		TxID:     requestID,
    55  	}
    56  }
    57  
    58  // newCoordinator used by the Finder to read objects from replicas
    59  func newReadCoordinator[T any](f *Finder, shard string) *coordinator[T] {
    60  	return &coordinator[T]{
    61  		Resolver: f.resolver,
    62  		Class:    f.class,
    63  		Shard:    shard,
    64  	}
    65  }
    66  
    67  // broadcast sends write request to all replicas (first phase of a two-phase commit)
    68  func (c *coordinator[T]) broadcast(ctx context.Context,
    69  	replicas []string,
    70  	op readyOp, level int,
    71  ) <-chan string {
    72  	// prepare tells replicas to be ready
    73  	prepare := func() <-chan _Result[string] {
    74  		resChan := make(chan _Result[string], len(replicas))
    75  		f := func() { // broadcast
    76  			defer close(resChan)
    77  			var wg sync.WaitGroup
    78  			wg.Add(len(replicas))
    79  			for _, replica := range replicas {
    80  				replica := replica
    81  				g := func() {
    82  					defer wg.Done()
    83  					err := op(ctx, replica, c.TxID)
    84  					resChan <- _Result[string]{replica, err}
    85  				}
    86  				enterrors.GoWrapper(g, c.log)
    87  			}
    88  			wg.Wait()
    89  		}
    90  		enterrors.GoWrapper(f, c.log)
    91  		return resChan
    92  	}
    93  
    94  	// handle responses to prepare requests
    95  	replicaCh := make(chan string, len(replicas))
    96  	f := func() {
    97  		defer close(replicaCh)
    98  		actives := make([]string, 0, level) // cache for active replicas
    99  		for r := range prepare() {
   100  			if r.Err != nil { // connection error
   101  				c.log.WithField("op", "broadcast").Error(r.Err)
   102  				continue
   103  			}
   104  
   105  			level--
   106  			if level > 0 { // cache since level has not been reached yet
   107  				actives = append(actives, r.Value)
   108  				continue
   109  			}
   110  			if level == 0 { // consistency level has been reached
   111  				for _, x := range actives {
   112  					replicaCh <- x
   113  				}
   114  			}
   115  			replicaCh <- r.Value
   116  		}
   117  		if level > 0 { // abort: nothing has been sent to the caller
   118  			fs := logrus.Fields{"op": "broadcast", "active": len(actives), "total": len(replicas)}
   119  			c.log.WithFields(fs).Error("abort")
   120  			for _, node := range replicas {
   121  				c.Abort(ctx, node, c.Class, c.Shard, c.TxID)
   122  			}
   123  		}
   124  	}
   125  	enterrors.GoWrapper(f, c.log)
   126  	return replicaCh
   127  }
   128  
   129  // commitAll tells replicas to commit pending updates related to a specific request
   130  // (second phase of a two-phase commit)
   131  func (c *coordinator[T]) commitAll(ctx context.Context,
   132  	replicaCh <-chan string,
   133  	op commitOp[T],
   134  ) <-chan _Result[T] {
   135  	replyCh := make(chan _Result[T], cap(replicaCh))
   136  	f := func() { // tells active replicas to commit
   137  		wg := sync.WaitGroup{}
   138  		for replica := range replicaCh {
   139  			wg.Add(1)
   140  			replica := replica
   141  			g := func() {
   142  				defer wg.Done()
   143  				resp, err := op(ctx, replica, c.TxID)
   144  				replyCh <- _Result[T]{resp, err}
   145  			}
   146  			enterrors.GoWrapper(g, c.log)
   147  		}
   148  		wg.Wait()
   149  		close(replyCh)
   150  	}
   151  	enterrors.GoWrapper(f, c.log)
   152  
   153  	return replyCh
   154  }
   155  
   156  // Push pushes updates to all replicas of a specific shard
   157  func (c *coordinator[T]) Push(ctx context.Context,
   158  	cl ConsistencyLevel,
   159  	ask readyOp,
   160  	com commitOp[T],
   161  ) (<-chan _Result[T], int, error) {
   162  	state, err := c.Resolver.State(c.Shard, cl, "")
   163  	if err != nil {
   164  		return nil, 0, fmt.Errorf("%w : class %q shard %q", err, c.Class, c.Shard)
   165  	}
   166  	level := state.Level
   167  	nodeCh := c.broadcast(ctx, state.Hosts, ask, level)
   168  	return c.commitAll(context.Background(), nodeCh, com), level, nil
   169  }
   170  
   171  // Pull data from replica depending on consistency level
   172  // Pull involves just as many replicas to satisfy the consistency level.
   173  //
   174  // directCandidate when specified a direct request is set to this node (default to this node)
   175  func (c *coordinator[T]) Pull(ctx context.Context,
   176  	cl ConsistencyLevel,
   177  	op readOp[T], directCandidate string,
   178  ) (<-chan _Result[T], rState, error) {
   179  	state, err := c.Resolver.State(c.Shard, cl, directCandidate)
   180  	if err != nil {
   181  		return nil, state, fmt.Errorf("%w : class %q shard %q", err, c.Class, c.Shard)
   182  	}
   183  	level := state.Level
   184  	replyCh := make(chan _Result[T], level)
   185  
   186  	candidates := state.Hosts[:level]                          // direct ones
   187  	candidatePool := make(chan string, len(state.Hosts)-level) // remaining ones
   188  	for _, replica := range state.Hosts[level:] {
   189  		candidatePool <- replica
   190  	}
   191  	close(candidatePool) // pool is ready
   192  	f := func() {
   193  		wg := sync.WaitGroup{}
   194  		wg.Add(len(candidates))
   195  		for i := range candidates { // Ask direct candidate first
   196  			idx := i
   197  			f := func() {
   198  				defer wg.Done()
   199  				resp, err := op(ctx, candidates[idx], idx == 0)
   200  
   201  				// If node is not responding delegate request to another node
   202  				for err != nil {
   203  					if delegate, ok := <-candidatePool; ok {
   204  						resp, err = op(ctx, delegate, idx == 0)
   205  					} else {
   206  						break
   207  					}
   208  				}
   209  				replyCh <- _Result[T]{resp, err}
   210  			}
   211  			enterrors.GoWrapper(f, c.log)
   212  		}
   213  		wg.Wait()
   214  		close(replyCh)
   215  	}
   216  	enterrors.GoWrapper(f, c.log)
   217  
   218  	return replyCh, state, nil
   219  }