github.com/uber/kraken@v0.1.4/lib/torrent/scheduler/scheduler.go (about)

     1  // Copyright (c) 2016-2019 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  package scheduler
    15  
    16  import (
    17  	"errors"
    18  	"fmt"
    19  	"net"
    20  	"sync"
    21  	"time"
    22  
    23  	"github.com/andres-erbsen/clock"
    24  	"github.com/uber-go/tally"
    25  	"go.uber.org/zap"
    26  
    27  	"github.com/uber/kraken/core"
    28  	"github.com/uber/kraken/lib/torrent/networkevent"
    29  	"github.com/uber/kraken/lib/torrent/scheduler/announcequeue"
    30  	"github.com/uber/kraken/lib/torrent/scheduler/announcer"
    31  	"github.com/uber/kraken/lib/torrent/scheduler/conn"
    32  	"github.com/uber/kraken/lib/torrent/scheduler/connstate"
    33  	"github.com/uber/kraken/lib/torrent/scheduler/torrentlog"
    34  	"github.com/uber/kraken/lib/torrent/storage"
    35  	"github.com/uber/kraken/tracker/announceclient"
    36  	"github.com/uber/kraken/utils/log"
    37  )
    38  
    39  // Scheduler errors.
    40  var (
    41  	ErrTorrentNotFound   = errors.New("torrent not found")
    42  	ErrSchedulerStopped  = errors.New("scheduler has been stopped")
    43  	ErrTorrentTimeout    = errors.New("torrent timed out")
    44  	ErrTorrentRemoved    = errors.New("torrent manually removed")
    45  	ErrSendEventTimedOut = errors.New("event loop send timed out")
    46  )
    47  
    48  // Scheduler defines operations for scheduler.
    49  type Scheduler interface {
    50  	Stop()
    51  	Download(namespace string, d core.Digest) error
    52  	BlacklistSnapshot() ([]connstate.BlacklistedConn, error)
    53  	RemoveTorrent(d core.Digest) error
    54  	Probe() error
    55  }
    56  
    57  // scheduler manages global state for the peer. This includes:
    58  // - Opening torrents.
    59  // - Announcing to the tracker.
    60  // - Handshaking incoming connections.
    61  // - Initializing outgoing connections.
    62  // - Dispatching connections to torrents.
    63  // - Pre-empting existing connections when better options are available (TODO).
    64  type scheduler struct {
    65  	pctx           core.PeerContext
    66  	config         Config
    67  	clock          clock.Clock
    68  	torrentArchive storage.TorrentArchive
    69  	stats          tally.Scope
    70  
    71  	handshaker *conn.Handshaker
    72  
    73  	eventLoop *liftedEventLoop
    74  
    75  	listener net.Listener
    76  
    77  	preemptionTick <-chan time.Time
    78  	emitStatsTick  <-chan time.Time
    79  
    80  	// TODO(codyg): We only need this hold on this reference for reloading the scheduler...
    81  	announceClient announceclient.Client
    82  
    83  	announcer *announcer.Announcer
    84  
    85  	netevents networkevent.Producer
    86  
    87  	torrentlog *torrentlog.Logger
    88  
    89  	logger *zap.SugaredLogger
    90  
    91  	// The following fields orchestrate the stopping of the scheduler.
    92  	stopOnce sync.Once      // Ensures the stop sequence is executed only once.
    93  	done     chan struct{}  // Signals all goroutines to exit.
    94  	wg       sync.WaitGroup // Waits for eventLoop and listenLoop to exit.
    95  }
    96  
    97  // schedOverrides defines scheduler fields which may be overrided for testing
    98  // purposes.
    99  type schedOverrides struct {
   100  	clock     clock.Clock
   101  	eventLoop eventLoop
   102  }
   103  
   104  type option func(*schedOverrides)
   105  
   106  func withClock(c clock.Clock) option {
   107  	return func(o *schedOverrides) { o.clock = c }
   108  }
   109  
   110  func withEventLoop(l eventLoop) option {
   111  	return func(o *schedOverrides) { o.eventLoop = l }
   112  }
   113  
   114  // newScheduler creates and starts a scheduler.
   115  func newScheduler(
   116  	config Config,
   117  	ta storage.TorrentArchive,
   118  	stats tally.Scope,
   119  	pctx core.PeerContext,
   120  	announceClient announceclient.Client,
   121  	netevents networkevent.Producer,
   122  	options ...option) (*scheduler, error) {
   123  
   124  	config = config.applyDefaults()
   125  
   126  	logger, err := log.New(config.Log, nil)
   127  	if err != nil {
   128  		return nil, fmt.Errorf("log: %s", err)
   129  	}
   130  	slogger := logger.Sugar()
   131  
   132  	done := make(chan struct{})
   133  
   134  	stats = stats.Tagged(map[string]string{
   135  		"module": "scheduler",
   136  	})
   137  
   138  	overrides := schedOverrides{
   139  		clock:     clock.New(),
   140  		eventLoop: newEventLoop(),
   141  	}
   142  	for _, opt := range options {
   143  		opt(&overrides)
   144  	}
   145  
   146  	eventLoop := liftEventLoop(overrides.eventLoop)
   147  
   148  	var preemptionTick <-chan time.Time
   149  	if !config.DisablePreemption {
   150  		preemptionTick = overrides.clock.Tick(config.PreemptionInterval)
   151  	}
   152  
   153  	handshaker, err := conn.NewHandshaker(
   154  		config.Conn, stats, overrides.clock, netevents, pctx.PeerID, eventLoop, slogger)
   155  	if err != nil {
   156  		return nil, fmt.Errorf("conn: %s", err)
   157  	}
   158  
   159  	tlog, err := torrentlog.New(config.TorrentLog, pctx)
   160  	if err != nil {
   161  		return nil, fmt.Errorf("torrentlog: %s", err)
   162  	}
   163  
   164  	s := &scheduler{
   165  		pctx:           pctx,
   166  		config:         config,
   167  		clock:          overrides.clock,
   168  		torrentArchive: ta,
   169  		stats:          stats,
   170  		handshaker:     handshaker,
   171  		eventLoop:      eventLoop,
   172  		preemptionTick: preemptionTick,
   173  		emitStatsTick:  overrides.clock.Tick(config.EmitStatsInterval),
   174  		announceClient: announceClient,
   175  		announcer:      announcer.Default(announceClient, eventLoop, overrides.clock, slogger),
   176  		netevents:      netevents,
   177  		torrentlog:     tlog,
   178  		logger:         slogger,
   179  		done:           done,
   180  	}
   181  
   182  	if config.DisablePreemption {
   183  		s.log().Warn("Preemption disabled")
   184  	}
   185  	if config.ConnState.DisableBlacklist {
   186  		s.log().Warn("Blacklisting disabled")
   187  	}
   188  
   189  	return s, nil
   190  }
   191  
   192  // start asynchronously starts all scheduler loops.
   193  //
   194  // Note: this has been split from the constructor so we can test against an
   195  // "unstarted" scheduler in certain cases.
   196  func (s *scheduler) start(aq announcequeue.Queue) error {
   197  	s.log().Infof(
   198  		"Scheduler starting as peer %s on addr %s:%d",
   199  		s.pctx.PeerID, s.pctx.IP, s.pctx.Port)
   200  
   201  	l, err := net.Listen("tcp", fmt.Sprintf(":%d", s.pctx.Port))
   202  	if err != nil {
   203  		return err
   204  	}
   205  	s.listener = l
   206  
   207  	s.wg.Add(4)
   208  	go s.runEventLoop(aq) // Careful, this should be the only reference to aq.
   209  	go s.listenLoop()
   210  	go s.tickerLoop()
   211  	go s.announceLoop()
   212  
   213  	return nil
   214  }
   215  
   216  // Stop shuts down the scheduler.
   217  func (s *scheduler) Stop() {
   218  	s.stopOnce.Do(func() {
   219  		s.log().Info("Stopping scheduler...")
   220  
   221  		close(s.done)
   222  		s.listener.Close()
   223  		s.eventLoop.send(shutdownEvent{})
   224  
   225  		// Waits for all loops to stop.
   226  		s.wg.Wait()
   227  
   228  		s.torrentlog.Sync()
   229  
   230  		s.log().Info("Scheduler stopped")
   231  	})
   232  }
   233  
   234  func (s *scheduler) doDownload(namespace string, d core.Digest) (size int64, err error) {
   235  	t, err := s.torrentArchive.CreateTorrent(namespace, d)
   236  	if err != nil {
   237  		if err == storage.ErrNotFound {
   238  			return 0, ErrTorrentNotFound
   239  		}
   240  		return 0, fmt.Errorf("create torrent: %s", err)
   241  	}
   242  
   243  	// Buffer size of 1 so sends do not block.
   244  	errc := make(chan error, 1)
   245  	if !s.eventLoop.send(newTorrentEvent{namespace, t, errc}) {
   246  		return 0, ErrSchedulerStopped
   247  	}
   248  	return t.Length(), <-errc
   249  }
   250  
   251  // Download downloads the torrent given metainfo. Once the torrent is downloaded,
   252  // it will begin seeding asynchronously.
   253  func (s *scheduler) Download(namespace string, d core.Digest) error {
   254  	start := time.Now()
   255  	size, err := s.doDownload(namespace, d)
   256  	if err != nil {
   257  		var errTag string
   258  		switch err {
   259  		case ErrTorrentNotFound:
   260  			errTag = "not_found"
   261  		case ErrTorrentTimeout:
   262  			errTag = "timeout"
   263  		case ErrSchedulerStopped:
   264  			errTag = "scheduler_stopped"
   265  		case ErrTorrentRemoved:
   266  			errTag = "removed"
   267  		default:
   268  			errTag = "unknown"
   269  		}
   270  		s.stats.Tagged(map[string]string{
   271  			"error": errTag,
   272  		}).Counter("download_errors").Inc(1)
   273  		s.torrentlog.DownloadFailure(namespace, d, size, err)
   274  	} else {
   275  		downloadTime := time.Since(start)
   276  		recordDownloadTime(s.stats, size, downloadTime)
   277  		s.torrentlog.DownloadSuccess(namespace, d, size, downloadTime)
   278  	}
   279  	return err
   280  }
   281  
   282  // BlacklistSnapshot returns a snapshot of the current connection blacklist.
   283  func (s *scheduler) BlacklistSnapshot() ([]connstate.BlacklistedConn, error) {
   284  	result := make(chan []connstate.BlacklistedConn)
   285  	if !s.eventLoop.send(blacklistSnapshotEvent{result}) {
   286  		return nil, ErrSchedulerStopped
   287  	}
   288  	return <-result, nil
   289  }
   290  
   291  // RemoveTorrent forcibly stops leeching / seeding torrent for d and removes
   292  // the torrent from disk.
   293  func (s *scheduler) RemoveTorrent(d core.Digest) error {
   294  	// Buffer size of 1 so sends do not block.
   295  	errc := make(chan error, 1)
   296  	if !s.eventLoop.send(removeTorrentEvent{d, errc}) {
   297  		return ErrSchedulerStopped
   298  	}
   299  	return <-errc
   300  }
   301  
   302  // Probe verifies that the scheduler event loop is running and unblocked.
   303  func (s *scheduler) Probe() error {
   304  	return s.eventLoop.sendTimeout(probeEvent{}, s.config.ProbeTimeout)
   305  }
   306  
   307  func (s *scheduler) runEventLoop(aq announcequeue.Queue) {
   308  	defer s.wg.Done()
   309  
   310  	s.eventLoop.run(newState(s, aq))
   311  }
   312  
   313  // listenLoop accepts incoming connections.
   314  func (s *scheduler) listenLoop() {
   315  	defer s.wg.Done()
   316  
   317  	s.log().Infof("Listening on %s", s.listener.Addr().String())
   318  	for {
   319  		nc, err := s.listener.Accept()
   320  		if err != nil {
   321  			// TODO Need some way to make this gracefully exit.
   322  			s.log().Infof("Error accepting new conn, exiting listen loop: %s", err)
   323  			return
   324  		}
   325  		go func() {
   326  			pc, err := s.handshaker.Accept(nc)
   327  			if err != nil {
   328  				s.log().Infof("Error accepting handshake, closing net conn: %s", err)
   329  				nc.Close()
   330  				return
   331  			}
   332  			s.eventLoop.send(incomingHandshakeEvent{pc})
   333  		}()
   334  	}
   335  }
   336  
   337  // tickerLoop periodically emits various tick events.
   338  func (s *scheduler) tickerLoop() {
   339  	defer s.wg.Done()
   340  
   341  	for {
   342  		select {
   343  		case <-s.preemptionTick:
   344  			s.eventLoop.send(preemptionTickEvent{})
   345  		case <-s.emitStatsTick:
   346  			s.eventLoop.send(emitStatsEvent{})
   347  		case <-s.done:
   348  			return
   349  		}
   350  	}
   351  }
   352  
   353  // announceLoop runs the announcer ticker.
   354  func (s *scheduler) announceLoop() {
   355  	defer s.wg.Done()
   356  
   357  	s.announcer.Ticker(s.done)
   358  }
   359  
   360  func (s *scheduler) announce(d core.Digest, h core.InfoHash, complete bool) {
   361  	peers, err := s.announcer.Announce(d, h, complete)
   362  	if err != nil {
   363  		if err != announceclient.ErrDisabled {
   364  			s.eventLoop.send(announceErrEvent{h, err})
   365  		}
   366  		return
   367  	}
   368  	s.eventLoop.send(announceResultEvent{h, peers})
   369  }
   370  
   371  func (s *scheduler) failIncomingHandshake(pc *conn.PendingConn, err error) {
   372  	s.log(
   373  		"peer", pc.PeerID(),
   374  		"hash", pc.InfoHash()).Infof("Error accepting incoming handshake: %s", err)
   375  	pc.Close()
   376  	s.eventLoop.send(failedIncomingHandshakeEvent{pc.PeerID(), pc.InfoHash()})
   377  }
   378  
   379  // establishIncomingHandshake attempts to establish a pending conn initialized
   380  // by a remote peer. Success / failure is communicated via events.
   381  func (s *scheduler) establishIncomingHandshake(pc *conn.PendingConn, rb conn.RemoteBitfields) {
   382  	info, err := s.torrentArchive.Stat(pc.Namespace(), pc.Digest())
   383  	if err != nil {
   384  		s.failIncomingHandshake(pc, fmt.Errorf("torrent stat: %s", err))
   385  		return
   386  	}
   387  	c, err := s.handshaker.Establish(pc, info, rb)
   388  	if err != nil {
   389  		s.failIncomingHandshake(pc, fmt.Errorf("establish handshake: %s", err))
   390  		return
   391  	}
   392  	s.torrentlog.IncomingConnectionAccept(pc.Digest(), pc.InfoHash(), pc.PeerID())
   393  	s.eventLoop.send(incomingConnEvent{pc.Namespace(), c, pc.Bitfield(), info})
   394  }
   395  
   396  // initializeOutgoingHandshake attempts to initialize a conn to a remote peer.
   397  // Success / failure is communicated via events.
   398  func (s *scheduler) initializeOutgoingHandshake(
   399  	p *core.PeerInfo, info *storage.TorrentInfo, rb conn.RemoteBitfields, namespace string) {
   400  
   401  	addr := fmt.Sprintf("%s:%d", p.IP, p.Port)
   402  	result, err := s.handshaker.Initialize(p.PeerID, addr, info, rb, namespace)
   403  	if err != nil {
   404  		s.log(
   405  			"peer", p.PeerID,
   406  			"hash", info.InfoHash(),
   407  			"addr", addr).Infof("Error initializing outgoing handshake: %s", err)
   408  		s.eventLoop.send(failedOutgoingHandshakeEvent{p.PeerID, info.InfoHash()})
   409  		s.torrentlog.OutgoingConnectionReject(info.Digest(), info.InfoHash(), p.PeerID, err)
   410  		return
   411  	}
   412  	s.torrentlog.OutgoingConnectionAccept(info.Digest(), info.InfoHash(), p.PeerID)
   413  	s.eventLoop.send(outgoingConnEvent{result.Conn, result.Bitfield, info})
   414  }
   415  
   416  func (s *scheduler) log(args ...interface{}) *zap.SugaredLogger {
   417  	return s.logger.With(args...)
   418  }