github.com/uber/kraken@v0.1.4/lib/torrent/scheduler/dispatch/dispatcher.go (about) 1 // Copyright (c) 2016-2019 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 package dispatch 15 16 import ( 17 "errors" 18 "fmt" 19 "sync" 20 "time" 21 22 "github.com/uber/kraken/core" 23 "github.com/uber/kraken/gen/go/proto/p2p" 24 "github.com/uber/kraken/lib/torrent/networkevent" 25 "github.com/uber/kraken/lib/torrent/scheduler/conn" 26 "github.com/uber/kraken/lib/torrent/scheduler/dispatch/piecerequest" 27 "github.com/uber/kraken/lib/torrent/scheduler/torrentlog" 28 "github.com/uber/kraken/lib/torrent/storage" 29 "github.com/uber/kraken/utils/syncutil" 30 31 "github.com/andres-erbsen/clock" 32 "github.com/uber-go/tally" 33 "github.com/willf/bitset" 34 "go.uber.org/zap" 35 "golang.org/x/sync/syncmap" 36 ) 37 38 var ( 39 errPeerAlreadyDispatched = errors.New("peer is already dispatched for the torrent") 40 errPieceOutOfBounds = errors.New("piece index out of bounds") 41 errChunkNotSupported = errors.New("reading / writing chunk of piece not supported") 42 errRepeatedBitfieldMessage = errors.New("received repeated bitfield message") 43 ) 44 45 // Events defines Dispatcher events. 46 type Events interface { 47 DispatcherComplete(*Dispatcher) 48 PeerRemoved(core.PeerID, core.InfoHash) 49 } 50 51 // Messages defines a subset of conn.Conn methods which Dispatcher requires to 52 // communicate with remote peers. 53 type Messages interface { 54 Send(msg *conn.Message) error 55 Receiver() <-chan *conn.Message 56 Close() 57 } 58 59 // Dispatcher coordinates torrent state with sending / receiving messages between multiple 60 // peers. As such, Dispatcher and Torrent have a one-to-one relationship, while Dispatcher 61 // and Conn have a one-to-many relationship. 62 type Dispatcher struct { 63 config Config 64 stats tally.Scope 65 clk clock.Clock 66 createdAt time.Time 67 localPeerID core.PeerID 68 torrent *torrentAccessWatcher 69 peers syncmap.Map // core.PeerID -> *peer 70 peerStats syncmap.Map // core.PeerID -> *peerStats, persists on peer removal. 71 numPeersByPiece syncutil.Counters 72 netevents networkevent.Producer 73 pieceRequestTimeout time.Duration 74 pieceRequestManager *piecerequest.Manager 75 pendingPiecesDoneOnce sync.Once 76 pendingPiecesDone chan struct{} 77 completeOnce sync.Once 78 events Events 79 logger *zap.SugaredLogger 80 torrentlog *torrentlog.Logger 81 } 82 83 // New creates a new Dispatcher. 84 func New( 85 config Config, 86 stats tally.Scope, 87 clk clock.Clock, 88 netevents networkevent.Producer, 89 events Events, 90 peerID core.PeerID, 91 t storage.Torrent, 92 logger *zap.SugaredLogger, 93 tlog *torrentlog.Logger) (*Dispatcher, error) { 94 95 d, err := newDispatcher(config, stats, clk, netevents, events, peerID, t, logger, tlog) 96 if err != nil { 97 return nil, err 98 } 99 100 // Exits when d.pendingPiecesDone is closed. 101 go d.watchPendingPieceRequests() 102 103 if t.Complete() { 104 d.complete() 105 } 106 107 return d, nil 108 } 109 110 // newDispatcher creates a new Dispatcher with no side-effects for testing purposes. 111 func newDispatcher( 112 config Config, 113 stats tally.Scope, 114 clk clock.Clock, 115 netevents networkevent.Producer, 116 events Events, 117 peerID core.PeerID, 118 t storage.Torrent, 119 logger *zap.SugaredLogger, 120 tlog *torrentlog.Logger) (*Dispatcher, error) { 121 122 config = config.applyDefaults() 123 124 stats = stats.Tagged(map[string]string{ 125 "module": "dispatch", 126 }) 127 128 pieceRequestTimeout := config.calcPieceRequestTimeout(t.MaxPieceLength()) 129 pieceRequestManager, err := piecerequest.NewManager( 130 clk, pieceRequestTimeout, config.PieceRequestPolicy, config.PipelineLimit) 131 if err != nil { 132 return nil, fmt.Errorf("piece request manager: %s", err) 133 } 134 135 return &Dispatcher{ 136 config: config, 137 stats: stats, 138 clk: clk, 139 createdAt: clk.Now(), 140 localPeerID: peerID, 141 torrent: newTorrentAccessWatcher(t, clk), 142 numPeersByPiece: syncutil.NewCounters(t.NumPieces()), 143 netevents: netevents, 144 pieceRequestTimeout: pieceRequestTimeout, 145 pieceRequestManager: pieceRequestManager, 146 pendingPiecesDone: make(chan struct{}), 147 events: events, 148 logger: logger, 149 torrentlog: tlog, 150 }, nil 151 } 152 153 // Digest returns the blob digest for d's torrent. 154 func (d *Dispatcher) Digest() core.Digest { 155 return d.torrent.Digest() 156 } 157 158 // InfoHash returns d's torrent hash. 159 func (d *Dispatcher) InfoHash() core.InfoHash { 160 return d.torrent.InfoHash() 161 } 162 163 // Length returns d's torrent length. 164 func (d *Dispatcher) Length() int64 { 165 return d.torrent.Length() 166 } 167 168 // Stat returns d's TorrentInfo. 169 func (d *Dispatcher) Stat() *storage.TorrentInfo { 170 return d.torrent.Stat() 171 } 172 173 // Complete returns true if d's torrent is complete. 174 func (d *Dispatcher) Complete() bool { 175 return d.torrent.Complete() 176 } 177 178 // CreatedAt returns when d was created. 179 func (d *Dispatcher) CreatedAt() time.Time { 180 return d.createdAt 181 } 182 183 // LastGoodPieceReceived returns when d last received a valid and needed piece 184 // from peerID. 185 func (d *Dispatcher) LastGoodPieceReceived(peerID core.PeerID) time.Time { 186 v, ok := d.peers.Load(peerID) 187 if !ok { 188 return time.Time{} 189 } 190 return v.(*peer).getLastGoodPieceReceived() 191 } 192 193 // LastPieceSent returns when d last sent a piece to peerID. 194 func (d *Dispatcher) LastPieceSent(peerID core.PeerID) time.Time { 195 v, ok := d.peers.Load(peerID) 196 if !ok { 197 return time.Time{} 198 } 199 return v.(*peer).getLastPieceSent() 200 } 201 202 // LastReadTime returns when d's torrent was last read from. 203 func (d *Dispatcher) LastReadTime() time.Time { 204 return d.torrent.getLastReadTime() 205 } 206 207 // LastWriteTime returns when d's torrent was last written to. 208 func (d *Dispatcher) LastWriteTime() time.Time { 209 return d.torrent.getLastWriteTime() 210 } 211 212 // Empty returns true if the Dispatcher has no peers. 213 func (d *Dispatcher) Empty() bool { 214 empty := true 215 d.peers.Range(func(k, v interface{}) bool { 216 empty = false 217 return false 218 }) 219 return empty 220 } 221 222 // RemoteBitfields returns the bitfields of peers connected to the dispatcher. 223 func (d *Dispatcher) RemoteBitfields() conn.RemoteBitfields { 224 remoteBitfields := make(conn.RemoteBitfields) 225 226 d.peers.Range(func(k, v interface{}) bool { 227 remoteBitfields[k.(core.PeerID)] = v.(*peer).bitfield.Copy() 228 return true 229 }) 230 return remoteBitfields 231 } 232 233 // AddPeer registers a new peer with the Dispatcher. 234 func (d *Dispatcher) AddPeer( 235 peerID core.PeerID, b *bitset.BitSet, messages Messages) error { 236 237 p, err := d.addPeer(peerID, b, messages) 238 if err != nil { 239 return err 240 } 241 go d.maybeRequestMorePieces(p) 242 go d.feed(p) 243 return nil 244 } 245 246 // addPeer creates and inserts a new peer into the Dispatcher. Split from AddPeer 247 // with no goroutine side-effects for testing purposes. 248 func (d *Dispatcher) addPeer( 249 peerID core.PeerID, b *bitset.BitSet, messages Messages) (*peer, error) { 250 251 pstats := &peerStats{} 252 if s, ok := d.peerStats.LoadOrStore(peerID, pstats); ok { 253 pstats = s.(*peerStats) 254 } 255 256 p := newPeer(peerID, b, messages, d.clk, pstats) 257 if _, ok := d.peers.LoadOrStore(peerID, p); ok { 258 return nil, errors.New("peer already exists") 259 } 260 261 for _, i := range p.bitfield.GetAllSet() { 262 d.numPeersByPiece.Increment(int(i)) 263 } 264 return p, nil 265 } 266 267 func (d *Dispatcher) removePeer(p *peer) error { 268 d.peers.Delete(p.id) 269 d.pieceRequestManager.ClearPeer(p.id) 270 271 for _, i := range p.bitfield.GetAllSet() { 272 d.numPeersByPiece.Decrement(int(i)) 273 } 274 return nil 275 } 276 277 // TearDown closes all Dispatcher connections. 278 func (d *Dispatcher) TearDown() { 279 d.pendingPiecesDoneOnce.Do(func() { 280 close(d.pendingPiecesDone) 281 }) 282 283 d.peers.Range(func(k, v interface{}) bool { 284 p := v.(*peer) 285 d.log("peer", p).Info("Dispatcher teardown closing connection") 286 p.messages.Close() 287 return true 288 }) 289 290 summaries := make(torrentlog.LeecherSummaries, 0) 291 d.peerStats.Range(func(k, v interface{}) bool { 292 peerID := k.(core.PeerID) 293 pstats := v.(*peerStats) 294 summaries = append(summaries, torrentlog.LeecherSummary{ 295 PeerID: peerID, 296 RequestsReceived: pstats.getPieceRequestsReceived(), 297 PiecesSent: pstats.getPiecesSent(), 298 }) 299 return true 300 }) 301 302 if err := d.torrentlog.LeecherSummaries( 303 d.torrent.Digest(), d.torrent.InfoHash(), summaries); err != nil { 304 d.log().Errorf("Error logging incoming piece request summary: %s", err) 305 } 306 } 307 308 func (d *Dispatcher) String() string { 309 return fmt.Sprintf("Dispatcher(%s)", d.torrent) 310 } 311 312 func (d *Dispatcher) complete() { 313 d.completeOnce.Do(func() { go d.events.DispatcherComplete(d) }) 314 d.pendingPiecesDoneOnce.Do(func() { close(d.pendingPiecesDone) }) 315 316 d.peers.Range(func(k, v interface{}) bool { 317 p := v.(*peer) 318 if p.bitfield.Complete() { 319 // Close connections to other completed peers since those connections 320 // are now useless. 321 d.log("peer", p).Info("Closing connection to completed peer") 322 p.messages.Close() 323 } else { 324 // Notify in-progress peers that we have completed the torrent and 325 // all pieces are available. 326 p.messages.Send(conn.NewCompleteMessage()) 327 } 328 return true 329 }) 330 331 var piecesRequestedTotal int 332 summaries := make(torrentlog.SeederSummaries, 0) 333 d.peerStats.Range(func(k, v interface{}) bool { 334 peerID := k.(core.PeerID) 335 pstats := v.(*peerStats) 336 requested := pstats.getPieceRequestsSent() 337 piecesRequestedTotal += requested 338 summary := torrentlog.SeederSummary{ 339 PeerID: peerID, 340 RequestsSent: requested, 341 GoodPiecesReceived: pstats.getGoodPiecesReceived(), 342 DuplicatePiecesReceived: pstats.getDuplicatePiecesReceived(), 343 } 344 summaries = append(summaries, summary) 345 return true 346 }) 347 348 // Only log if we actually requested pieces from others. 349 if piecesRequestedTotal > 0 { 350 if err := d.torrentlog.SeederSummaries( 351 d.torrent.Digest(), d.torrent.InfoHash(), summaries); err != nil { 352 d.log().Errorf("Error logging outgoing piece request summary: %s", err) 353 } 354 } 355 } 356 357 func (d *Dispatcher) endgame() bool { 358 if d.config.DisableEndgame { 359 return false 360 } 361 remaining := d.torrent.NumPieces() - int(d.torrent.Bitfield().Count()) 362 return remaining <= d.config.EndgameThreshold 363 } 364 365 func (d *Dispatcher) maybeRequestMorePieces(p *peer) (bool, error) { 366 candidates := p.bitfield.Intersection(d.torrent.Bitfield().Complement()) 367 368 return d.maybeSendPieceRequests(p, candidates) 369 } 370 371 func (d *Dispatcher) maybeSendPieceRequests(p *peer, candidates *bitset.BitSet) (bool, error) { 372 pieces, err := d.pieceRequestManager.ReservePieces(p.id, candidates, d.numPeersByPiece, d.endgame()) 373 if err != nil { 374 return false, err 375 } 376 if len(pieces) == 0 { 377 return false, nil 378 } 379 for _, i := range pieces { 380 if err := p.messages.Send(conn.NewPieceRequestMessage(i, d.torrent.PieceLength(i))); err != nil { 381 // Connection closed. 382 d.pieceRequestManager.MarkUnsent(p.id, i) 383 return false, err 384 } 385 d.netevents.Produce( 386 networkevent.RequestPieceEvent(d.torrent.InfoHash(), d.localPeerID, p.id, i)) 387 p.pstats.incrementPieceRequestsSent() 388 } 389 return true, nil 390 } 391 392 func (d *Dispatcher) resendFailedPieceRequests() { 393 failedRequests := d.pieceRequestManager.GetFailedRequests() 394 if len(failedRequests) > 0 { 395 d.log().Infof("Resending %d failed piece requests", len(failedRequests)) 396 d.stats.Counter("piece_request_failures").Inc(int64(len(failedRequests))) 397 } 398 399 var sent int 400 for _, r := range failedRequests { 401 d.peers.Range(func(k, v interface{}) bool { 402 p := v.(*peer) 403 if (r.Status == piecerequest.StatusExpired || r.Status == piecerequest.StatusInvalid) && 404 r.PeerID == p.id { 405 // Do not resend to the same peer for expired or invalid requests. 406 return true 407 } 408 409 b := d.torrent.Bitfield() 410 candidates := p.bitfield.Intersection(b.Complement()) 411 if candidates.Test(uint(r.Piece)) { 412 nb := bitset.New(b.Len()).Set(uint(r.Piece)) 413 if sent, err := d.maybeSendPieceRequests(p, nb); sent && err == nil { 414 return false 415 } 416 } 417 return true 418 }) 419 } 420 421 unsent := len(failedRequests) - sent 422 if unsent > 0 { 423 d.log().Infof("Nowhere to resend %d / %d failed piece requests", unsent, len(failedRequests)) 424 } 425 } 426 427 func (d *Dispatcher) watchPendingPieceRequests() { 428 for { 429 select { 430 case <-d.clk.After(d.pieceRequestTimeout / 2): 431 d.resendFailedPieceRequests() 432 case <-d.pendingPiecesDone: 433 return 434 } 435 } 436 } 437 438 // feed reads off of peer and handles incoming messages. When peer's messages close, 439 // the feed goroutine removes peer from the Dispatcher and exits. 440 func (d *Dispatcher) feed(p *peer) { 441 for msg := range p.messages.Receiver() { 442 if err := d.dispatch(p, msg); err != nil { 443 d.log().Errorf("Error dispatching message: %s", err) 444 } 445 } 446 d.removePeer(p) 447 d.events.PeerRemoved(p.id, d.torrent.InfoHash()) 448 } 449 450 func (d *Dispatcher) dispatch(p *peer, msg *conn.Message) error { 451 switch msg.Message.Type { 452 case p2p.Message_ERROR: 453 d.handleError(p, msg.Message.Error) 454 case p2p.Message_ANNOUCE_PIECE: 455 d.handleAnnouncePiece(p, msg.Message.AnnouncePiece) 456 case p2p.Message_PIECE_REQUEST: 457 d.handlePieceRequest(p, msg.Message.PieceRequest) 458 case p2p.Message_PIECE_PAYLOAD: 459 d.handlePiecePayload(p, msg.Message.PiecePayload, msg.Payload) 460 case p2p.Message_CANCEL_PIECE: 461 d.handleCancelPiece(p, msg.Message.CancelPiece) 462 case p2p.Message_BITFIELD: 463 d.handleBitfield(p, msg.Message.Bitfield) 464 case p2p.Message_COMPLETE: 465 d.handleComplete(p) 466 default: 467 return fmt.Errorf("unknown message type: %d", msg.Message.Type) 468 } 469 return nil 470 } 471 472 func (d *Dispatcher) handleError(p *peer, msg *p2p.ErrorMessage) { 473 switch msg.Code { 474 case p2p.ErrorMessage_PIECE_REQUEST_FAILED: 475 d.log().Errorf("Piece request failed: %s", msg.Error) 476 d.pieceRequestManager.MarkInvalid(p.id, int(msg.Index)) 477 } 478 } 479 480 func (d *Dispatcher) handleAnnouncePiece(p *peer, msg *p2p.AnnouncePieceMessage) { 481 if int(msg.Index) >= d.torrent.NumPieces() { 482 d.log().Errorf("Announce piece out of bounds: %d >= %d", msg.Index, d.torrent.NumPieces()) 483 return 484 } 485 i := int(msg.Index) 486 p.bitfield.Set(uint(i), true) 487 d.numPeersByPiece.Increment(int(i)) 488 489 d.maybeRequestMorePieces(p) 490 } 491 492 func (d *Dispatcher) isFullPiece(i, offset, length int) bool { 493 return offset == 0 && length == int(d.torrent.PieceLength(i)) 494 } 495 496 func (d *Dispatcher) handlePieceRequest(p *peer, msg *p2p.PieceRequestMessage) { 497 p.pstats.incrementPieceRequestsReceived() 498 499 i := int(msg.Index) 500 if !d.isFullPiece(i, int(msg.Offset), int(msg.Length)) { 501 d.log("peer", p, "piece", i).Error("Rejecting piece request: chunk not supported") 502 p.messages.Send(conn.NewErrorMessage(i, p2p.ErrorMessage_PIECE_REQUEST_FAILED, errChunkNotSupported)) 503 return 504 } 505 506 payload, err := d.torrent.GetPieceReader(i) 507 if err != nil { 508 d.log("peer", p, "piece", i).Errorf("Error getting reader for requested piece: %s", err) 509 p.messages.Send(conn.NewErrorMessage(i, p2p.ErrorMessage_PIECE_REQUEST_FAILED, err)) 510 return 511 } 512 513 if err := p.messages.Send(conn.NewPiecePayloadMessage(i, payload)); err != nil { 514 return 515 } 516 517 p.touchLastPieceSent() 518 p.pstats.incrementPiecesSent() 519 520 // Assume that the peer successfully received the piece. 521 p.bitfield.Set(uint(i), true) 522 } 523 524 func (d *Dispatcher) handlePiecePayload( 525 p *peer, msg *p2p.PiecePayloadMessage, payload storage.PieceReader) { 526 527 defer payload.Close() 528 529 i := int(msg.Index) 530 if !d.isFullPiece(i, int(msg.Offset), int(msg.Length)) { 531 d.log("peer", p, "piece", i).Error("Rejecting piece payload: chunk not supported") 532 d.pieceRequestManager.MarkInvalid(p.id, i) 533 return 534 } 535 536 if err := d.torrent.WritePiece(payload, i); err != nil { 537 if err != storage.ErrPieceComplete { 538 d.log("peer", p, "piece", i).Errorf("Error writing piece payload: %s", err) 539 d.pieceRequestManager.MarkInvalid(p.id, i) 540 } else { 541 p.pstats.incrementDuplicatePiecesReceived() 542 } 543 return 544 } 545 546 d.netevents.Produce( 547 networkevent.ReceivePieceEvent(d.torrent.InfoHash(), d.localPeerID, p.id, i)) 548 549 p.pstats.incrementGoodPiecesReceived() 550 p.touchLastGoodPieceReceived() 551 if d.torrent.Complete() { 552 d.complete() 553 } 554 555 d.pieceRequestManager.Clear(i) 556 557 d.maybeRequestMorePieces(p) 558 559 d.peers.Range(func(k, v interface{}) bool { 560 if k.(core.PeerID) == p.id { 561 return true 562 } 563 pp := v.(*peer) 564 565 pp.messages.Send(conn.NewAnnouncePieceMessage(i)) 566 567 return true 568 }) 569 } 570 571 func (d *Dispatcher) handleCancelPiece(p *peer, msg *p2p.CancelPieceMessage) { 572 // No-op: cancelling not supported because all received messages are synchronized, 573 // therefore if we receive a cancel it is already too late -- we've already read 574 // the piece. 575 } 576 577 func (d *Dispatcher) handleBitfield(p *peer, msg *p2p.BitfieldMessage) { 578 d.log("peer", p).Error("Unexpected bitfield message from established conn") 579 } 580 581 func (d *Dispatcher) handleComplete(p *peer) { 582 if d.Complete() { 583 d.log("peer", p).Info("Closing connection to completed peer") 584 p.messages.Close() 585 } else { 586 p.bitfield.SetAll(true) 587 d.maybeRequestMorePieces(p) 588 } 589 } 590 591 func (d *Dispatcher) log(args ...interface{}) *zap.SugaredLogger { 592 args = append(args, "torrent", d.torrent) 593 return d.logger.With(args...) 594 }