github.com/weaviate/weaviate@v1.24.6/usecases/replica/finder_stream.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package replica
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"strings"
    18  
    19  	enterrors "github.com/weaviate/weaviate/entities/errors"
    20  
    21  	"github.com/go-openapi/strfmt"
    22  	"github.com/sirupsen/logrus"
    23  	"github.com/weaviate/weaviate/entities/storobj"
    24  	"github.com/weaviate/weaviate/usecases/objects"
    25  )
    26  
    27  // pullSteam is used by the finder to pull objects from replicas
    28  type finderStream struct {
    29  	repairer
    30  	log logrus.FieldLogger
    31  }
    32  
    33  type (
    34  	// tuple is a container for the data received from a replica
    35  	tuple[T any] struct {
    36  		sender string
    37  		UTime  int64
    38  		o      T
    39  		ack    int
    40  		err    error
    41  	}
    42  
    43  	objTuple  tuple[objects.Replica]
    44  	objResult = _Result[*storobj.Object]
    45  )
    46  
    47  // readOne reads one replicated object
    48  func (f *finderStream) readOne(ctx context.Context,
    49  	shard string,
    50  	id strfmt.UUID,
    51  	ch <-chan _Result[findOneReply],
    52  	st rState,
    53  ) <-chan objResult {
    54  	// counters tracks the number of votes for each participant
    55  	resultCh := make(chan objResult, 1)
    56  	g := func() {
    57  		defer close(resultCh)
    58  		var (
    59  			votes      = make([]objTuple, 0, st.Level)
    60  			maxCount   = 0
    61  			contentIdx = -1
    62  		)
    63  
    64  		for r := range ch { // len(ch) == st.Level
    65  			resp := r.Value
    66  			if r.Err != nil { // a least one node is not responding
    67  				f.log.WithField("op", "get").WithField("replica", resp.sender).
    68  					WithField("class", f.class).WithField("shard", shard).
    69  					WithField("uuid", id).Error(r.Err)
    70  				resultCh <- objResult{nil, errRead}
    71  				return
    72  			}
    73  			if !resp.DigestRead {
    74  				contentIdx = len(votes)
    75  			}
    76  			votes = append(votes, objTuple{resp.sender, resp.UpdateTime, resp.Data, 0, nil})
    77  			for i := range votes { // count number of votes
    78  				if votes[i].UTime == resp.UpdateTime {
    79  					votes[i].ack++
    80  				}
    81  				if maxCount < votes[i].ack {
    82  					maxCount = votes[i].ack
    83  				}
    84  				if maxCount >= st.Level && contentIdx >= 0 {
    85  					resultCh <- objResult{votes[contentIdx].o.Object, nil}
    86  					return
    87  				}
    88  			}
    89  		}
    90  
    91  		obj, err := f.repairOne(ctx, shard, id, votes, st, contentIdx)
    92  		if err == nil {
    93  			resultCh <- objResult{obj, nil}
    94  			return
    95  		}
    96  
    97  		resultCh <- objResult{nil, errRepair}
    98  		var sb strings.Builder
    99  		for i, c := range votes {
   100  			if i != 0 {
   101  				sb.WriteByte(' ')
   102  			}
   103  			fmt.Fprintf(&sb, "%s:%d", c.sender, c.UTime)
   104  		}
   105  		f.log.WithField("op", "repair_one").WithField("class", f.class).
   106  			WithField("shard", shard).WithField("uuid", id).
   107  			WithField("msg", sb.String()).Error(err)
   108  	}
   109  	enterrors.GoWrapper(g, f.logger)
   110  	return resultCh
   111  }
   112  
   113  type (
   114  	batchResult _Result[[]*storobj.Object]
   115  
   116  	// vote represents objects received from a specific replica and the number of votes per object.
   117  	vote struct {
   118  		batchReply       // reply from a replica
   119  		Count      []int // number of votes per object
   120  		Err        error
   121  	}
   122  )
   123  
   124  type boolTuple tuple[RepairResponse]
   125  
   126  // readExistence checks if replicated object exists
   127  func (f *finderStream) readExistence(ctx context.Context,
   128  	shard string,
   129  	id strfmt.UUID,
   130  	ch <-chan _Result[existReply],
   131  	st rState,
   132  ) <-chan _Result[bool] {
   133  	resultCh := make(chan _Result[bool], 1)
   134  	g := func() {
   135  		defer close(resultCh)
   136  		var (
   137  			votes    = make([]boolTuple, 0, st.Level) // number of votes per replica
   138  			maxCount = 0
   139  		)
   140  
   141  		for r := range ch { // len(ch) == st.Level
   142  			resp := r.Value
   143  			if r.Err != nil { // at least one node is not responding
   144  				f.log.WithField("op", "exists").WithField("replica", resp.Sender).
   145  					WithField("class", f.class).WithField("shard", shard).
   146  					WithField("uuid", id).Error(r.Err)
   147  				resultCh <- _Result[bool]{false, errRead}
   148  				return
   149  			}
   150  
   151  			votes = append(votes, boolTuple{resp.Sender, resp.UpdateTime, resp.RepairResponse, 0, nil})
   152  			for i := range votes { // count number of votes
   153  				if votes[i].UTime == resp.UpdateTime {
   154  					votes[i].ack++
   155  				}
   156  				if maxCount < votes[i].ack {
   157  					maxCount = votes[i].ack
   158  				}
   159  				if maxCount >= st.Level {
   160  					exists := !votes[i].o.Deleted && votes[i].o.UpdateTime != 0
   161  					resultCh <- _Result[bool]{exists, nil}
   162  					return
   163  				}
   164  			}
   165  		}
   166  
   167  		obj, err := f.repairExist(ctx, shard, id, votes, st)
   168  		if err == nil {
   169  			resultCh <- _Result[bool]{obj, nil}
   170  			return
   171  		}
   172  		resultCh <- _Result[bool]{false, errRepair}
   173  
   174  		var sb strings.Builder
   175  		for i, c := range votes {
   176  			if i != 0 {
   177  				sb.WriteByte(' ')
   178  			}
   179  			fmt.Fprintf(&sb, "%s:%d", c.sender, c.UTime)
   180  		}
   181  		f.log.WithField("op", "repair_exist").WithField("class", f.class).
   182  			WithField("shard", shard).WithField("uuid", id).
   183  			WithField("msg", sb.String()).Error(err)
   184  	}
   185  	enterrors.GoWrapper(g, f.logger)
   186  	return resultCh
   187  }
   188  
   189  // readBatchPart reads in replicated objects specified by their ids
   190  // It checks each object x for consistency and sets x.IsConsistent
   191  func (f *finderStream) readBatchPart(ctx context.Context,
   192  	batch shardPart,
   193  	ids []strfmt.UUID,
   194  	ch <-chan _Result[batchReply], st rState,
   195  ) <-chan batchResult {
   196  	resultCh := make(chan batchResult, 1)
   197  
   198  	g := func() {
   199  		defer close(resultCh)
   200  		var (
   201  			N = len(ids) // number of requested objects
   202  			// votes counts number of votes per object for each node
   203  			votes      = make([]vote, 0, st.Level)
   204  			contentIdx = -1 // index of full read reply
   205  		)
   206  
   207  		for r := range ch { // len(ch) == st.Level
   208  			resp := r.Value
   209  			if r.Err != nil { // at least one node is not responding
   210  				f.log.WithField("op", "read_batch.get").WithField("replica", r.Value.Sender).
   211  					WithField("class", f.class).WithField("shard", batch.Shard).Error(r.Err)
   212  				resultCh <- batchResult{nil, errRead}
   213  				return
   214  			}
   215  			if !resp.IsDigest {
   216  				contentIdx = len(votes)
   217  			}
   218  
   219  			votes = append(votes, vote{resp, make([]int, N), nil})
   220  			M := 0
   221  			for i := 0; i < N; i++ {
   222  				max := 0
   223  				lastTime := resp.UpdateTimeAt(i)
   224  
   225  				for j := range votes { // count votes
   226  					if votes[j].UpdateTimeAt(i) == lastTime {
   227  						votes[j].Count[i]++
   228  					}
   229  					if max < votes[j].Count[i] {
   230  						max = votes[j].Count[i]
   231  					}
   232  				}
   233  				if max >= st.Level {
   234  					M++
   235  				}
   236  			}
   237  
   238  			if M == N { // all objects are consistent
   239  				for _, idx := range batch.Index {
   240  					batch.Data[idx].IsConsistent = true
   241  				}
   242  				resultCh <- batchResult{fromReplicas(votes[contentIdx].FullData), nil}
   243  				return
   244  			}
   245  		}
   246  		res, err := f.repairBatchPart(ctx, batch.Shard, ids, votes, st, contentIdx)
   247  		if err != nil {
   248  			resultCh <- batchResult{nil, errRepair}
   249  			f.log.WithField("op", "repair_batch").WithField("class", f.class).
   250  				WithField("shard", batch.Shard).WithField("uuids", ids).Error(err)
   251  			return
   252  		}
   253  		// count total number of votes
   254  		maxCount := len(votes) * len(votes)
   255  		sum := votes[0].Count
   256  		for _, vote := range votes[1:] {
   257  			for i, n := range vote.Count {
   258  				sum[i] += n
   259  			}
   260  		}
   261  		// set consistency flag
   262  		for i, n := range sum {
   263  			if x := res[i]; x != nil && n == maxCount { // if consistent
   264  				prev := batch.Data[batch.Index[i]]
   265  				x.BelongsToShard = prev.BelongsToShard
   266  				x.BelongsToNode = prev.BelongsToNode
   267  				batch.Data[batch.Index[i]] = x
   268  				x.IsConsistent = true
   269  			}
   270  		}
   271  
   272  		resultCh <- batchResult{res, nil}
   273  	}
   274  	enterrors.GoWrapper(g, f.logger)
   275  
   276  	return resultCh
   277  }
   278  
   279  // batchReply is a container of the batch received from a replica
   280  // The returned data may result from a full or digest read request
   281  type batchReply struct {
   282  	// Sender hostname of the sender
   283  	Sender string
   284  	// IsDigest is this reply from a digest read?
   285  	IsDigest bool
   286  	// FullData returned from a full read request
   287  	FullData []objects.Replica
   288  	// DigestData returned from a digest read request
   289  	DigestData []RepairResponse
   290  }
   291  
   292  // UpdateTimeAt gets update time from reply
   293  func (r batchReply) UpdateTimeAt(idx int) int64 {
   294  	if len(r.DigestData) != 0 {
   295  		return r.DigestData[idx].UpdateTime
   296  	}
   297  	return r.FullData[idx].UpdateTime()
   298  }