storj.io/uplink@v1.13.0/private/ecclient/client.go (about) 1 // Copyright (C) 2019 Storj Labs, Inc. 2 // See LICENSE for copying information. 3 4 package ecclient 5 6 import ( 7 "context" 8 "errors" 9 "io" 10 "sort" 11 "strconv" 12 "sync" 13 "time" 14 15 "github.com/spacemonkeygo/monkit/v3" 16 "github.com/zeebo/errs" 17 18 "storj.io/common/encryption" 19 "storj.io/common/errs2" 20 "storj.io/common/pb" 21 "storj.io/common/ranger" 22 "storj.io/common/rpc" 23 "storj.io/common/storj" 24 "storj.io/eventkit" 25 "storj.io/uplink/private/eestream" 26 "storj.io/uplink/private/piecestore" 27 ) 28 29 var mon = monkit.Package() 30 var evs = eventkit.Package() 31 32 // GetOptions is a struct of options for GetWithOptions. 33 type GetOptions struct { 34 ErrorDetection bool 35 } 36 37 // Client defines an interface for storing erasure coded data to piece store nodes. 38 type Client interface { 39 PutSingleResult(ctx context.Context, limits []*pb.AddressedOrderLimit, privateKey storj.PiecePrivateKey, rs eestream.RedundancyStrategy, data io.Reader) (results []*pb.SegmentPieceUploadResult, err error) 40 Get(ctx context.Context, limits []*pb.AddressedOrderLimit, privateKey storj.PiecePrivateKey, es eestream.ErasureScheme, size int64) (ranger.Ranger, error) 41 GetWithOptions(ctx context.Context, limits []*pb.AddressedOrderLimit, privateKey storj.PiecePrivateKey, es eestream.ErasureScheme, size int64, opts GetOptions) (ranger.Ranger, error) 42 WithForceErrorDetection(force bool) Client 43 // PutPiece is not intended to be used by normal uplinks directly, but is exported to support storagenode graceful exit transfers. 44 PutPiece(ctx, parent context.Context, limit *pb.AddressedOrderLimit, privateKey storj.PiecePrivateKey, data io.ReadCloser) (hash *pb.PieceHash, id *struct{}, err error) 45 } 46 47 type dialPiecestoreFunc func(context.Context, storj.NodeURL) (*piecestore.Client, error) 48 49 type ecClient struct { 50 dialer rpc.Dialer 51 memoryLimit int 52 forceErrorDetection bool 53 } 54 55 // New creates a client from the given dialer and max buffer memory. 56 func New(dialer rpc.Dialer, memoryLimit int) Client { 57 return &ecClient{ 58 dialer: dialer, 59 memoryLimit: memoryLimit, 60 } 61 } 62 63 func (ec *ecClient) WithForceErrorDetection(force bool) Client { 64 ec.forceErrorDetection = force 65 return ec 66 } 67 68 func (ec *ecClient) dialPiecestore(ctx context.Context, n storj.NodeURL) (*piecestore.Client, error) { 69 hashAlgo := piecestore.GetPieceHashAlgo(ctx) 70 client, err := piecestore.DialReplaySafe(ctx, ec.dialer, n, piecestore.DefaultConfig) 71 if err != nil { 72 return client, err 73 } 74 client.UploadHashAlgo = hashAlgo 75 return client, nil 76 } 77 78 func (ec *ecClient) PutSingleResult(ctx context.Context, limits []*pb.AddressedOrderLimit, privateKey storj.PiecePrivateKey, rs eestream.RedundancyStrategy, data io.Reader) (results []*pb.SegmentPieceUploadResult, err error) { 79 successfulNodes, successfulHashes, err := ec.put(ctx, limits, privateKey, rs, data, time.Time{}) 80 if err != nil { 81 return nil, err 82 } 83 84 uploadResults := make([]*pb.SegmentPieceUploadResult, 0, len(successfulNodes)) 85 for i := range successfulNodes { 86 if successfulNodes[i] == nil { 87 continue 88 } 89 90 uploadResults = append(uploadResults, &pb.SegmentPieceUploadResult{ 91 PieceNum: int32(i), 92 NodeId: successfulNodes[i].Id, 93 Hash: successfulHashes[i], 94 }) 95 } 96 97 if l := len(uploadResults); l < rs.OptimalThreshold() { 98 return nil, Error.New("uploaded results (%d) are below the optimal threshold (%d)", l, rs.OptimalThreshold()) 99 } 100 101 return uploadResults, nil 102 } 103 104 func (ec *ecClient) put(ctx context.Context, limits []*pb.AddressedOrderLimit, privateKey storj.PiecePrivateKey, rs eestream.RedundancyStrategy, data io.Reader, expiration time.Time) (successfulNodes []*pb.Node, successfulHashes []*pb.PieceHash, err error) { 105 defer mon.Task()(&ctx, 106 "erasure:"+strconv.Itoa(rs.ErasureShareSize()), 107 "stripe:"+strconv.Itoa(rs.StripeSize()), 108 "repair:"+strconv.Itoa(rs.RepairThreshold()), 109 "optimal:"+strconv.Itoa(rs.OptimalThreshold()), 110 )(&err) 111 112 pieceCount := len(limits) 113 if pieceCount != rs.TotalCount() { 114 return nil, nil, Error.New("size of limits slice (%d) does not match total count (%d) of erasure scheme", pieceCount, rs.TotalCount()) 115 } 116 117 nonNilLimits := nonNilCount(limits) 118 if nonNilLimits <= rs.RepairThreshold() && nonNilLimits < rs.OptimalThreshold() { 119 return nil, nil, Error.New("number of non-nil limits (%d) is less than or equal to the repair threshold (%d) of erasure scheme", nonNilLimits, rs.RepairThreshold()) 120 } 121 122 if !unique(limits) { 123 return nil, nil, Error.New("duplicated nodes are not allowed") 124 } 125 126 padded := encryption.PadReader(io.NopCloser(data), rs.StripeSize()) 127 readers, err := eestream.EncodeReader2(ctx, padded, rs) 128 if err != nil { 129 return nil, nil, err 130 } 131 132 type info struct { 133 i int 134 err error 135 hash *pb.PieceHash 136 } 137 infos := make(chan info, pieceCount) 138 139 piecesCtx, piecesCancel := context.WithCancel(ctx) 140 defer piecesCancel() 141 142 for i, addressedLimit := range limits { 143 go func(i int, addressedLimit *pb.AddressedOrderLimit) { 144 hash, _, err := ec.PutPiece(piecesCtx, ctx, addressedLimit, privateKey, readers[i]) 145 infos <- info{i: i, err: err, hash: hash} 146 }(i, addressedLimit) 147 } 148 149 successfulNodes = make([]*pb.Node, pieceCount) 150 successfulHashes = make([]*pb.PieceHash, pieceCount) 151 var successfulCount, failureCount, cancellationCount int32 152 153 // all the piece upload errors, combined 154 var pieceErrors errs.Group 155 for range limits { 156 info := <-infos 157 158 if limits[info.i] == nil { 159 continue 160 } 161 162 if info.err != nil { 163 pieceErrors.Add(info.err) 164 if !errs2.IsCanceled(info.err) { 165 failureCount++ 166 } else { 167 cancellationCount++ 168 } 169 continue 170 } 171 172 successfulNodes[info.i] = &pb.Node{ 173 Id: limits[info.i].GetLimit().StorageNodeId, 174 Address: limits[info.i].GetStorageNodeAddress(), 175 } 176 successfulHashes[info.i] = info.hash 177 178 successfulCount++ 179 if int(successfulCount) >= rs.OptimalThreshold() { 180 // cancelling remaining uploads 181 piecesCancel() 182 } 183 } 184 185 defer func() { 186 select { 187 case <-ctx.Done(): 188 // make sure context.Canceled is the primary error in the error chain 189 // for later errors.Is/errs2.IsCanceled checking 190 err = errs.Combine(context.Canceled, Error.New("upload cancelled by user")) 191 default: 192 } 193 }() 194 195 mon.IntVal("put_segment_pieces_total").Observe(int64(pieceCount)) 196 mon.IntVal("put_segment_pieces_optimal").Observe(int64(rs.OptimalThreshold())) 197 mon.IntVal("put_segment_pieces_successful").Observe(int64(successfulCount)) 198 mon.IntVal("put_segment_pieces_failed").Observe(int64(failureCount)) 199 mon.IntVal("put_segment_pieces_canceled").Observe(int64(cancellationCount)) 200 201 if int(successfulCount) <= rs.RepairThreshold() && int(successfulCount) < rs.OptimalThreshold() { 202 return nil, nil, Error.New("successful puts (%d) less than or equal to repair threshold (%d), %w", successfulCount, rs.RepairThreshold(), pieceErrors.Err()) 203 } 204 205 if int(successfulCount) < rs.OptimalThreshold() { 206 return nil, nil, Error.New("successful puts (%d) less than success threshold (%d), %w", successfulCount, rs.OptimalThreshold(), pieceErrors.Err()) 207 } 208 209 return successfulNodes, successfulHashes, nil 210 } 211 212 func (ec *ecClient) PutPiece(ctx, parent context.Context, limit *pb.AddressedOrderLimit, privateKey storj.PiecePrivateKey, data io.ReadCloser) (hash *pb.PieceHash, deprecated *struct{}, err error) { 213 if limit == nil { 214 defer mon.Task()(&ctx, "node: nil")(&err) 215 defer func() { err = errs.Combine(err, data.Close()) }() 216 _, _ = io.Copy(io.Discard, data) 217 return nil, nil, nil 218 } 219 220 storageNodeID := limit.GetLimit().StorageNodeId 221 defer mon.Task()(&ctx, "node: "+storageNodeID.String()[0:8])(&err) 222 start := time.Now() 223 measuredReader := countingReader{R: data} 224 defer func() { 225 var errstr string 226 if err != nil { 227 errstr = err.Error() 228 } 229 var pieceSize int64 230 var pieceTimestamp time.Time 231 var hashAlgo int64 232 if hash != nil { 233 pieceSize = hash.PieceSize 234 pieceTimestamp = hash.Timestamp 235 hashAlgo = int64(hash.HashAlgorithm) 236 } 237 evs.Event("piece-upload", 238 eventkit.Bytes("node_id", storageNodeID.Bytes()), 239 eventkit.Bytes("piece_id", limit.GetLimit().PieceId.Bytes()), 240 eventkit.Duration("upload_time", time.Since(start)), 241 eventkit.Bool("success", err == nil), 242 eventkit.String("error", errstr), 243 eventkit.Int64("bytes", measuredReader.N), 244 eventkit.Int64("piece_size", pieceSize), 245 eventkit.Timestamp("piece_timestamp", pieceTimestamp), 246 eventkit.Int64("hash_algo", hashAlgo), 247 ) 248 }() 249 defer func() { err = errs.Combine(err, data.Close()) }() 250 251 ps, err := ec.dialPiecestore(ctx, limitToNodeURL(limit)) 252 if err != nil { 253 return nil, nil, Error.New("failed to dial (node:%v): %w", storageNodeID, err) 254 } 255 defer func() { err = errs.Combine(err, ps.Close()) }() 256 257 hash, err = ps.UploadReader(ctx, limit.GetLimit(), privateKey, &measuredReader) 258 if err != nil { 259 if errors.Is(ctx.Err(), context.Canceled) { 260 // Canceled context means the piece upload was interrupted by user or due 261 // to slow connection. No error logging for this case. 262 if errors.Is(parent.Err(), context.Canceled) { 263 err = Error.New("upload canceled by user: %w", err) 264 } else { 265 err = Error.New("upload cut due to slow connection (node:%v): %w", storageNodeID, err) 266 } 267 268 // make sure context.Canceled is the primary error in the error chain 269 // for later errors.Is/errs2.IsCanceled checking 270 err = errs.Combine(context.Canceled, err) 271 } else { 272 nodeAddress := "" 273 if limit.GetStorageNodeAddress() != nil { 274 nodeAddress = limit.GetStorageNodeAddress().GetAddress() 275 } 276 err = Error.New("upload failed (node:%v, address:%v): %w", storageNodeID, nodeAddress, err) 277 } 278 279 return nil, nil, err 280 } 281 282 return hash, nil, nil 283 } 284 285 type countingReader struct { 286 N int64 287 R io.Reader 288 } 289 290 func (c *countingReader) Read(p []byte) (n int, err error) { 291 n, err = c.R.Read(p) 292 c.N += int64(n) 293 return n, err 294 } 295 296 func (ec *ecClient) Get(ctx context.Context, limits []*pb.AddressedOrderLimit, privateKey storj.PiecePrivateKey, es eestream.ErasureScheme, size int64) (rr ranger.Ranger, err error) { 297 return ec.GetWithOptions(ctx, limits, privateKey, es, size, GetOptions{}) 298 } 299 300 func (ec *ecClient) GetWithOptions(ctx context.Context, limits []*pb.AddressedOrderLimit, privateKey storj.PiecePrivateKey, es eestream.ErasureScheme, size int64, opts GetOptions) (rr ranger.Ranger, err error) { 301 defer mon.Task()(&ctx)(&err) 302 303 if len(limits) != es.TotalCount() { 304 return nil, Error.New("size of limits slice (%d) does not match total count (%d) of erasure scheme", len(limits), es.TotalCount()) 305 } 306 307 if nonNilCount(limits) < es.RequiredCount() { 308 return nil, Error.New("number of non-nil limits (%d) is less than required count (%d) of erasure scheme", nonNilCount(limits), es.RequiredCount()) 309 } 310 311 paddedSize := calcPadded(size, es.StripeSize()) 312 pieceSize := paddedSize / int64(es.RequiredCount()) 313 314 rrs := map[int]ranger.Ranger{} 315 for i, addressedLimit := range limits { 316 if addressedLimit == nil { 317 continue 318 } 319 320 rrs[i] = &lazyPieceRanger{ 321 dialPiecestore: ec.dialPiecestore, 322 limit: addressedLimit, 323 privateKey: privateKey, 324 size: pieceSize, 325 } 326 } 327 328 rr, err = eestream.Decode(rrs, es, ec.memoryLimit, opts.ErrorDetection || ec.forceErrorDetection) 329 if err != nil { 330 return nil, Error.Wrap(err) 331 } 332 333 ranger, err := encryption.Unpad(rr, int(paddedSize-size)) 334 return ranger, Error.Wrap(err) 335 } 336 337 func unique(limits []*pb.AddressedOrderLimit) bool { 338 if len(limits) < 2 { 339 return true 340 } 341 ids := make(storj.NodeIDList, len(limits)) 342 for i, addressedLimit := range limits { 343 if addressedLimit != nil { 344 ids[i] = addressedLimit.GetLimit().StorageNodeId 345 } 346 } 347 348 // sort the ids and check for identical neighbors 349 sort.Sort(ids) 350 // sort.Slice(ids, func(i, k int) bool { return ids[i].Less(ids[k]) }) 351 for i := 1; i < len(ids); i++ { 352 if ids[i] != (storj.NodeID{}) && ids[i] == ids[i-1] { 353 return false 354 } 355 } 356 357 return true 358 } 359 360 func calcPadded(size int64, blockSize int) int64 { 361 mod := size % int64(blockSize) 362 if mod == 0 { 363 return size 364 } 365 return size + int64(blockSize) - mod 366 } 367 368 type lazyPieceRanger struct { 369 dialPiecestore dialPiecestoreFunc 370 limit *pb.AddressedOrderLimit 371 privateKey storj.PiecePrivateKey 372 size int64 373 } 374 375 // Size implements Ranger.Size. 376 func (lr *lazyPieceRanger) Size() int64 { 377 return lr.size 378 } 379 380 // Range implements Ranger.Range to be lazily connected. 381 func (lr *lazyPieceRanger) Range(ctx context.Context, offset, length int64) (_ io.ReadCloser, err error) { 382 defer mon.Task()(&ctx)(&err) 383 384 ctx, cancel := context.WithCancel(ctx) 385 386 return &lazyPieceReader{ 387 ranger: lr, 388 ctx: ctx, 389 cancel: cancel, 390 offset: offset, 391 length: length, 392 }, nil 393 } 394 395 type lazyPieceReader struct { 396 ranger *lazyPieceRanger 397 ctx context.Context 398 cancel func() 399 offset int64 400 length int64 401 402 mu sync.Mutex 403 isClosed bool 404 download *piecestore.Download 405 client *piecestore.Client 406 } 407 408 func (lr *lazyPieceReader) Read(data []byte) (_ int, err error) { 409 if err := lr.dial(); err != nil { 410 return 0, err 411 } 412 return lr.download.Read(data) 413 } 414 415 func (lr *lazyPieceReader) dial() error { 416 lr.mu.Lock() 417 if lr.isClosed { 418 lr.mu.Unlock() 419 return io.EOF 420 } 421 if lr.download != nil { 422 lr.mu.Unlock() 423 return nil 424 } 425 lr.mu.Unlock() 426 427 client, downloader, err := lr.ranger.dial(lr.ctx, lr.offset, lr.length) 428 if err != nil { 429 return Error.Wrap(err) 430 } 431 432 lr.mu.Lock() 433 defer lr.mu.Unlock() 434 435 if lr.isClosed { 436 // Close tried to cancel the dialing, however failed to do so. 437 lr.cancel() 438 _ = downloader.Close() 439 _ = client.Close() 440 return io.ErrClosedPipe 441 } 442 443 lr.download = downloader 444 lr.client = client 445 446 return nil 447 } 448 449 func limitToNodeURL(limit *pb.AddressedOrderLimit) storj.NodeURL { 450 return (&pb.Node{ 451 Id: limit.GetLimit().StorageNodeId, 452 Address: limit.GetStorageNodeAddress(), 453 }).NodeURL() 454 } 455 456 var monLazyPieceRangerDialTask = mon.Task() 457 458 func (lr *lazyPieceRanger) dial(ctx context.Context, offset, length int64) (_ *piecestore.Client, _ *piecestore.Download, err error) { 459 defer monLazyPieceRangerDialTask(&ctx)(&err) 460 461 ps, err := lr.dialPiecestore(ctx, limitToNodeURL(lr.limit)) 462 if err != nil { 463 return nil, nil, err 464 } 465 466 download, err := ps.Download(ctx, lr.limit.GetLimit(), lr.privateKey, offset, length) 467 if err != nil { 468 return nil, nil, errs.Combine(err, ps.Close()) 469 } 470 return ps, download, nil 471 } 472 473 // GetHashAndLimit gets the download's hash and original order limit. 474 func (lr *lazyPieceReader) GetHashAndLimit() (*pb.PieceHash, *pb.OrderLimit) { 475 if lr.download == nil { 476 return nil, nil 477 } 478 return lr.download.GetHashAndLimit() 479 } 480 481 func (lr *lazyPieceReader) Close() (err error) { 482 lr.mu.Lock() 483 defer lr.mu.Unlock() 484 if lr.isClosed { 485 return nil 486 } 487 lr.isClosed = true 488 489 if lr.download != nil { 490 err = errs.Combine(err, lr.download.Close()) 491 } 492 if lr.client != nil { 493 err = errs.Combine(err, lr.client.Close()) 494 } 495 496 lr.cancel() 497 return err 498 } 499 500 func nonNilCount(limits []*pb.AddressedOrderLimit) int { 501 total := 0 502 for _, limit := range limits { 503 if limit != nil { 504 total++ 505 } 506 } 507 return total 508 }