github.com/weaviate/weaviate@v1.24.6/usecases/replica/finder_stream.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package replica 13 14 import ( 15 "context" 16 "fmt" 17 "strings" 18 19 enterrors "github.com/weaviate/weaviate/entities/errors" 20 21 "github.com/go-openapi/strfmt" 22 "github.com/sirupsen/logrus" 23 "github.com/weaviate/weaviate/entities/storobj" 24 "github.com/weaviate/weaviate/usecases/objects" 25 ) 26 27 // pullSteam is used by the finder to pull objects from replicas 28 type finderStream struct { 29 repairer 30 log logrus.FieldLogger 31 } 32 33 type ( 34 // tuple is a container for the data received from a replica 35 tuple[T any] struct { 36 sender string 37 UTime int64 38 o T 39 ack int 40 err error 41 } 42 43 objTuple tuple[objects.Replica] 44 objResult = _Result[*storobj.Object] 45 ) 46 47 // readOne reads one replicated object 48 func (f *finderStream) readOne(ctx context.Context, 49 shard string, 50 id strfmt.UUID, 51 ch <-chan _Result[findOneReply], 52 st rState, 53 ) <-chan objResult { 54 // counters tracks the number of votes for each participant 55 resultCh := make(chan objResult, 1) 56 g := func() { 57 defer close(resultCh) 58 var ( 59 votes = make([]objTuple, 0, st.Level) 60 maxCount = 0 61 contentIdx = -1 62 ) 63 64 for r := range ch { // len(ch) == st.Level 65 resp := r.Value 66 if r.Err != nil { // a least one node is not responding 67 f.log.WithField("op", "get").WithField("replica", resp.sender). 68 WithField("class", f.class).WithField("shard", shard). 69 WithField("uuid", id).Error(r.Err) 70 resultCh <- objResult{nil, errRead} 71 return 72 } 73 if !resp.DigestRead { 74 contentIdx = len(votes) 75 } 76 votes = append(votes, objTuple{resp.sender, resp.UpdateTime, resp.Data, 0, nil}) 77 for i := range votes { // count number of votes 78 if votes[i].UTime == resp.UpdateTime { 79 votes[i].ack++ 80 } 81 if maxCount < votes[i].ack { 82 maxCount = votes[i].ack 83 } 84 if maxCount >= st.Level && contentIdx >= 0 { 85 resultCh <- objResult{votes[contentIdx].o.Object, nil} 86 return 87 } 88 } 89 } 90 91 obj, err := f.repairOne(ctx, shard, id, votes, st, contentIdx) 92 if err == nil { 93 resultCh <- objResult{obj, nil} 94 return 95 } 96 97 resultCh <- objResult{nil, errRepair} 98 var sb strings.Builder 99 for i, c := range votes { 100 if i != 0 { 101 sb.WriteByte(' ') 102 } 103 fmt.Fprintf(&sb, "%s:%d", c.sender, c.UTime) 104 } 105 f.log.WithField("op", "repair_one").WithField("class", f.class). 106 WithField("shard", shard).WithField("uuid", id). 107 WithField("msg", sb.String()).Error(err) 108 } 109 enterrors.GoWrapper(g, f.logger) 110 return resultCh 111 } 112 113 type ( 114 batchResult _Result[[]*storobj.Object] 115 116 // vote represents objects received from a specific replica and the number of votes per object. 117 vote struct { 118 batchReply // reply from a replica 119 Count []int // number of votes per object 120 Err error 121 } 122 ) 123 124 type boolTuple tuple[RepairResponse] 125 126 // readExistence checks if replicated object exists 127 func (f *finderStream) readExistence(ctx context.Context, 128 shard string, 129 id strfmt.UUID, 130 ch <-chan _Result[existReply], 131 st rState, 132 ) <-chan _Result[bool] { 133 resultCh := make(chan _Result[bool], 1) 134 g := func() { 135 defer close(resultCh) 136 var ( 137 votes = make([]boolTuple, 0, st.Level) // number of votes per replica 138 maxCount = 0 139 ) 140 141 for r := range ch { // len(ch) == st.Level 142 resp := r.Value 143 if r.Err != nil { // at least one node is not responding 144 f.log.WithField("op", "exists").WithField("replica", resp.Sender). 145 WithField("class", f.class).WithField("shard", shard). 146 WithField("uuid", id).Error(r.Err) 147 resultCh <- _Result[bool]{false, errRead} 148 return 149 } 150 151 votes = append(votes, boolTuple{resp.Sender, resp.UpdateTime, resp.RepairResponse, 0, nil}) 152 for i := range votes { // count number of votes 153 if votes[i].UTime == resp.UpdateTime { 154 votes[i].ack++ 155 } 156 if maxCount < votes[i].ack { 157 maxCount = votes[i].ack 158 } 159 if maxCount >= st.Level { 160 exists := !votes[i].o.Deleted && votes[i].o.UpdateTime != 0 161 resultCh <- _Result[bool]{exists, nil} 162 return 163 } 164 } 165 } 166 167 obj, err := f.repairExist(ctx, shard, id, votes, st) 168 if err == nil { 169 resultCh <- _Result[bool]{obj, nil} 170 return 171 } 172 resultCh <- _Result[bool]{false, errRepair} 173 174 var sb strings.Builder 175 for i, c := range votes { 176 if i != 0 { 177 sb.WriteByte(' ') 178 } 179 fmt.Fprintf(&sb, "%s:%d", c.sender, c.UTime) 180 } 181 f.log.WithField("op", "repair_exist").WithField("class", f.class). 182 WithField("shard", shard).WithField("uuid", id). 183 WithField("msg", sb.String()).Error(err) 184 } 185 enterrors.GoWrapper(g, f.logger) 186 return resultCh 187 } 188 189 // readBatchPart reads in replicated objects specified by their ids 190 // It checks each object x for consistency and sets x.IsConsistent 191 func (f *finderStream) readBatchPart(ctx context.Context, 192 batch shardPart, 193 ids []strfmt.UUID, 194 ch <-chan _Result[batchReply], st rState, 195 ) <-chan batchResult { 196 resultCh := make(chan batchResult, 1) 197 198 g := func() { 199 defer close(resultCh) 200 var ( 201 N = len(ids) // number of requested objects 202 // votes counts number of votes per object for each node 203 votes = make([]vote, 0, st.Level) 204 contentIdx = -1 // index of full read reply 205 ) 206 207 for r := range ch { // len(ch) == st.Level 208 resp := r.Value 209 if r.Err != nil { // at least one node is not responding 210 f.log.WithField("op", "read_batch.get").WithField("replica", r.Value.Sender). 211 WithField("class", f.class).WithField("shard", batch.Shard).Error(r.Err) 212 resultCh <- batchResult{nil, errRead} 213 return 214 } 215 if !resp.IsDigest { 216 contentIdx = len(votes) 217 } 218 219 votes = append(votes, vote{resp, make([]int, N), nil}) 220 M := 0 221 for i := 0; i < N; i++ { 222 max := 0 223 lastTime := resp.UpdateTimeAt(i) 224 225 for j := range votes { // count votes 226 if votes[j].UpdateTimeAt(i) == lastTime { 227 votes[j].Count[i]++ 228 } 229 if max < votes[j].Count[i] { 230 max = votes[j].Count[i] 231 } 232 } 233 if max >= st.Level { 234 M++ 235 } 236 } 237 238 if M == N { // all objects are consistent 239 for _, idx := range batch.Index { 240 batch.Data[idx].IsConsistent = true 241 } 242 resultCh <- batchResult{fromReplicas(votes[contentIdx].FullData), nil} 243 return 244 } 245 } 246 res, err := f.repairBatchPart(ctx, batch.Shard, ids, votes, st, contentIdx) 247 if err != nil { 248 resultCh <- batchResult{nil, errRepair} 249 f.log.WithField("op", "repair_batch").WithField("class", f.class). 250 WithField("shard", batch.Shard).WithField("uuids", ids).Error(err) 251 return 252 } 253 // count total number of votes 254 maxCount := len(votes) * len(votes) 255 sum := votes[0].Count 256 for _, vote := range votes[1:] { 257 for i, n := range vote.Count { 258 sum[i] += n 259 } 260 } 261 // set consistency flag 262 for i, n := range sum { 263 if x := res[i]; x != nil && n == maxCount { // if consistent 264 prev := batch.Data[batch.Index[i]] 265 x.BelongsToShard = prev.BelongsToShard 266 x.BelongsToNode = prev.BelongsToNode 267 batch.Data[batch.Index[i]] = x 268 x.IsConsistent = true 269 } 270 } 271 272 resultCh <- batchResult{res, nil} 273 } 274 enterrors.GoWrapper(g, f.logger) 275 276 return resultCh 277 } 278 279 // batchReply is a container of the batch received from a replica 280 // The returned data may result from a full or digest read request 281 type batchReply struct { 282 // Sender hostname of the sender 283 Sender string 284 // IsDigest is this reply from a digest read? 285 IsDigest bool 286 // FullData returned from a full read request 287 FullData []objects.Replica 288 // DigestData returned from a digest read request 289 DigestData []RepairResponse 290 } 291 292 // UpdateTimeAt gets update time from reply 293 func (r batchReply) UpdateTimeAt(idx int) int64 { 294 if len(r.DigestData) != 0 { 295 return r.DigestData[idx].UpdateTime 296 } 297 return r.FullData[idx].UpdateTime() 298 }