github.com/uber/kraken@v0.1.4/lib/torrent/scheduler/scheduler.go (about) 1 // Copyright (c) 2016-2019 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 package scheduler 15 16 import ( 17 "errors" 18 "fmt" 19 "net" 20 "sync" 21 "time" 22 23 "github.com/andres-erbsen/clock" 24 "github.com/uber-go/tally" 25 "go.uber.org/zap" 26 27 "github.com/uber/kraken/core" 28 "github.com/uber/kraken/lib/torrent/networkevent" 29 "github.com/uber/kraken/lib/torrent/scheduler/announcequeue" 30 "github.com/uber/kraken/lib/torrent/scheduler/announcer" 31 "github.com/uber/kraken/lib/torrent/scheduler/conn" 32 "github.com/uber/kraken/lib/torrent/scheduler/connstate" 33 "github.com/uber/kraken/lib/torrent/scheduler/torrentlog" 34 "github.com/uber/kraken/lib/torrent/storage" 35 "github.com/uber/kraken/tracker/announceclient" 36 "github.com/uber/kraken/utils/log" 37 ) 38 39 // Scheduler errors. 40 var ( 41 ErrTorrentNotFound = errors.New("torrent not found") 42 ErrSchedulerStopped = errors.New("scheduler has been stopped") 43 ErrTorrentTimeout = errors.New("torrent timed out") 44 ErrTorrentRemoved = errors.New("torrent manually removed") 45 ErrSendEventTimedOut = errors.New("event loop send timed out") 46 ) 47 48 // Scheduler defines operations for scheduler. 49 type Scheduler interface { 50 Stop() 51 Download(namespace string, d core.Digest) error 52 BlacklistSnapshot() ([]connstate.BlacklistedConn, error) 53 RemoveTorrent(d core.Digest) error 54 Probe() error 55 } 56 57 // scheduler manages global state for the peer. This includes: 58 // - Opening torrents. 59 // - Announcing to the tracker. 60 // - Handshaking incoming connections. 61 // - Initializing outgoing connections. 62 // - Dispatching connections to torrents. 63 // - Pre-empting existing connections when better options are available (TODO). 64 type scheduler struct { 65 pctx core.PeerContext 66 config Config 67 clock clock.Clock 68 torrentArchive storage.TorrentArchive 69 stats tally.Scope 70 71 handshaker *conn.Handshaker 72 73 eventLoop *liftedEventLoop 74 75 listener net.Listener 76 77 preemptionTick <-chan time.Time 78 emitStatsTick <-chan time.Time 79 80 // TODO(codyg): We only need this hold on this reference for reloading the scheduler... 81 announceClient announceclient.Client 82 83 announcer *announcer.Announcer 84 85 netevents networkevent.Producer 86 87 torrentlog *torrentlog.Logger 88 89 logger *zap.SugaredLogger 90 91 // The following fields orchestrate the stopping of the scheduler. 92 stopOnce sync.Once // Ensures the stop sequence is executed only once. 93 done chan struct{} // Signals all goroutines to exit. 94 wg sync.WaitGroup // Waits for eventLoop and listenLoop to exit. 95 } 96 97 // schedOverrides defines scheduler fields which may be overrided for testing 98 // purposes. 99 type schedOverrides struct { 100 clock clock.Clock 101 eventLoop eventLoop 102 } 103 104 type option func(*schedOverrides) 105 106 func withClock(c clock.Clock) option { 107 return func(o *schedOverrides) { o.clock = c } 108 } 109 110 func withEventLoop(l eventLoop) option { 111 return func(o *schedOverrides) { o.eventLoop = l } 112 } 113 114 // newScheduler creates and starts a scheduler. 115 func newScheduler( 116 config Config, 117 ta storage.TorrentArchive, 118 stats tally.Scope, 119 pctx core.PeerContext, 120 announceClient announceclient.Client, 121 netevents networkevent.Producer, 122 options ...option) (*scheduler, error) { 123 124 config = config.applyDefaults() 125 126 logger, err := log.New(config.Log, nil) 127 if err != nil { 128 return nil, fmt.Errorf("log: %s", err) 129 } 130 slogger := logger.Sugar() 131 132 done := make(chan struct{}) 133 134 stats = stats.Tagged(map[string]string{ 135 "module": "scheduler", 136 }) 137 138 overrides := schedOverrides{ 139 clock: clock.New(), 140 eventLoop: newEventLoop(), 141 } 142 for _, opt := range options { 143 opt(&overrides) 144 } 145 146 eventLoop := liftEventLoop(overrides.eventLoop) 147 148 var preemptionTick <-chan time.Time 149 if !config.DisablePreemption { 150 preemptionTick = overrides.clock.Tick(config.PreemptionInterval) 151 } 152 153 handshaker, err := conn.NewHandshaker( 154 config.Conn, stats, overrides.clock, netevents, pctx.PeerID, eventLoop, slogger) 155 if err != nil { 156 return nil, fmt.Errorf("conn: %s", err) 157 } 158 159 tlog, err := torrentlog.New(config.TorrentLog, pctx) 160 if err != nil { 161 return nil, fmt.Errorf("torrentlog: %s", err) 162 } 163 164 s := &scheduler{ 165 pctx: pctx, 166 config: config, 167 clock: overrides.clock, 168 torrentArchive: ta, 169 stats: stats, 170 handshaker: handshaker, 171 eventLoop: eventLoop, 172 preemptionTick: preemptionTick, 173 emitStatsTick: overrides.clock.Tick(config.EmitStatsInterval), 174 announceClient: announceClient, 175 announcer: announcer.Default(announceClient, eventLoop, overrides.clock, slogger), 176 netevents: netevents, 177 torrentlog: tlog, 178 logger: slogger, 179 done: done, 180 } 181 182 if config.DisablePreemption { 183 s.log().Warn("Preemption disabled") 184 } 185 if config.ConnState.DisableBlacklist { 186 s.log().Warn("Blacklisting disabled") 187 } 188 189 return s, nil 190 } 191 192 // start asynchronously starts all scheduler loops. 193 // 194 // Note: this has been split from the constructor so we can test against an 195 // "unstarted" scheduler in certain cases. 196 func (s *scheduler) start(aq announcequeue.Queue) error { 197 s.log().Infof( 198 "Scheduler starting as peer %s on addr %s:%d", 199 s.pctx.PeerID, s.pctx.IP, s.pctx.Port) 200 201 l, err := net.Listen("tcp", fmt.Sprintf(":%d", s.pctx.Port)) 202 if err != nil { 203 return err 204 } 205 s.listener = l 206 207 s.wg.Add(4) 208 go s.runEventLoop(aq) // Careful, this should be the only reference to aq. 209 go s.listenLoop() 210 go s.tickerLoop() 211 go s.announceLoop() 212 213 return nil 214 } 215 216 // Stop shuts down the scheduler. 217 func (s *scheduler) Stop() { 218 s.stopOnce.Do(func() { 219 s.log().Info("Stopping scheduler...") 220 221 close(s.done) 222 s.listener.Close() 223 s.eventLoop.send(shutdownEvent{}) 224 225 // Waits for all loops to stop. 226 s.wg.Wait() 227 228 s.torrentlog.Sync() 229 230 s.log().Info("Scheduler stopped") 231 }) 232 } 233 234 func (s *scheduler) doDownload(namespace string, d core.Digest) (size int64, err error) { 235 t, err := s.torrentArchive.CreateTorrent(namespace, d) 236 if err != nil { 237 if err == storage.ErrNotFound { 238 return 0, ErrTorrentNotFound 239 } 240 return 0, fmt.Errorf("create torrent: %s", err) 241 } 242 243 // Buffer size of 1 so sends do not block. 244 errc := make(chan error, 1) 245 if !s.eventLoop.send(newTorrentEvent{namespace, t, errc}) { 246 return 0, ErrSchedulerStopped 247 } 248 return t.Length(), <-errc 249 } 250 251 // Download downloads the torrent given metainfo. Once the torrent is downloaded, 252 // it will begin seeding asynchronously. 253 func (s *scheduler) Download(namespace string, d core.Digest) error { 254 start := time.Now() 255 size, err := s.doDownload(namespace, d) 256 if err != nil { 257 var errTag string 258 switch err { 259 case ErrTorrentNotFound: 260 errTag = "not_found" 261 case ErrTorrentTimeout: 262 errTag = "timeout" 263 case ErrSchedulerStopped: 264 errTag = "scheduler_stopped" 265 case ErrTorrentRemoved: 266 errTag = "removed" 267 default: 268 errTag = "unknown" 269 } 270 s.stats.Tagged(map[string]string{ 271 "error": errTag, 272 }).Counter("download_errors").Inc(1) 273 s.torrentlog.DownloadFailure(namespace, d, size, err) 274 } else { 275 downloadTime := time.Since(start) 276 recordDownloadTime(s.stats, size, downloadTime) 277 s.torrentlog.DownloadSuccess(namespace, d, size, downloadTime) 278 } 279 return err 280 } 281 282 // BlacklistSnapshot returns a snapshot of the current connection blacklist. 283 func (s *scheduler) BlacklistSnapshot() ([]connstate.BlacklistedConn, error) { 284 result := make(chan []connstate.BlacklistedConn) 285 if !s.eventLoop.send(blacklistSnapshotEvent{result}) { 286 return nil, ErrSchedulerStopped 287 } 288 return <-result, nil 289 } 290 291 // RemoveTorrent forcibly stops leeching / seeding torrent for d and removes 292 // the torrent from disk. 293 func (s *scheduler) RemoveTorrent(d core.Digest) error { 294 // Buffer size of 1 so sends do not block. 295 errc := make(chan error, 1) 296 if !s.eventLoop.send(removeTorrentEvent{d, errc}) { 297 return ErrSchedulerStopped 298 } 299 return <-errc 300 } 301 302 // Probe verifies that the scheduler event loop is running and unblocked. 303 func (s *scheduler) Probe() error { 304 return s.eventLoop.sendTimeout(probeEvent{}, s.config.ProbeTimeout) 305 } 306 307 func (s *scheduler) runEventLoop(aq announcequeue.Queue) { 308 defer s.wg.Done() 309 310 s.eventLoop.run(newState(s, aq)) 311 } 312 313 // listenLoop accepts incoming connections. 314 func (s *scheduler) listenLoop() { 315 defer s.wg.Done() 316 317 s.log().Infof("Listening on %s", s.listener.Addr().String()) 318 for { 319 nc, err := s.listener.Accept() 320 if err != nil { 321 // TODO Need some way to make this gracefully exit. 322 s.log().Infof("Error accepting new conn, exiting listen loop: %s", err) 323 return 324 } 325 go func() { 326 pc, err := s.handshaker.Accept(nc) 327 if err != nil { 328 s.log().Infof("Error accepting handshake, closing net conn: %s", err) 329 nc.Close() 330 return 331 } 332 s.eventLoop.send(incomingHandshakeEvent{pc}) 333 }() 334 } 335 } 336 337 // tickerLoop periodically emits various tick events. 338 func (s *scheduler) tickerLoop() { 339 defer s.wg.Done() 340 341 for { 342 select { 343 case <-s.preemptionTick: 344 s.eventLoop.send(preemptionTickEvent{}) 345 case <-s.emitStatsTick: 346 s.eventLoop.send(emitStatsEvent{}) 347 case <-s.done: 348 return 349 } 350 } 351 } 352 353 // announceLoop runs the announcer ticker. 354 func (s *scheduler) announceLoop() { 355 defer s.wg.Done() 356 357 s.announcer.Ticker(s.done) 358 } 359 360 func (s *scheduler) announce(d core.Digest, h core.InfoHash, complete bool) { 361 peers, err := s.announcer.Announce(d, h, complete) 362 if err != nil { 363 if err != announceclient.ErrDisabled { 364 s.eventLoop.send(announceErrEvent{h, err}) 365 } 366 return 367 } 368 s.eventLoop.send(announceResultEvent{h, peers}) 369 } 370 371 func (s *scheduler) failIncomingHandshake(pc *conn.PendingConn, err error) { 372 s.log( 373 "peer", pc.PeerID(), 374 "hash", pc.InfoHash()).Infof("Error accepting incoming handshake: %s", err) 375 pc.Close() 376 s.eventLoop.send(failedIncomingHandshakeEvent{pc.PeerID(), pc.InfoHash()}) 377 } 378 379 // establishIncomingHandshake attempts to establish a pending conn initialized 380 // by a remote peer. Success / failure is communicated via events. 381 func (s *scheduler) establishIncomingHandshake(pc *conn.PendingConn, rb conn.RemoteBitfields) { 382 info, err := s.torrentArchive.Stat(pc.Namespace(), pc.Digest()) 383 if err != nil { 384 s.failIncomingHandshake(pc, fmt.Errorf("torrent stat: %s", err)) 385 return 386 } 387 c, err := s.handshaker.Establish(pc, info, rb) 388 if err != nil { 389 s.failIncomingHandshake(pc, fmt.Errorf("establish handshake: %s", err)) 390 return 391 } 392 s.torrentlog.IncomingConnectionAccept(pc.Digest(), pc.InfoHash(), pc.PeerID()) 393 s.eventLoop.send(incomingConnEvent{pc.Namespace(), c, pc.Bitfield(), info}) 394 } 395 396 // initializeOutgoingHandshake attempts to initialize a conn to a remote peer. 397 // Success / failure is communicated via events. 398 func (s *scheduler) initializeOutgoingHandshake( 399 p *core.PeerInfo, info *storage.TorrentInfo, rb conn.RemoteBitfields, namespace string) { 400 401 addr := fmt.Sprintf("%s:%d", p.IP, p.Port) 402 result, err := s.handshaker.Initialize(p.PeerID, addr, info, rb, namespace) 403 if err != nil { 404 s.log( 405 "peer", p.PeerID, 406 "hash", info.InfoHash(), 407 "addr", addr).Infof("Error initializing outgoing handshake: %s", err) 408 s.eventLoop.send(failedOutgoingHandshakeEvent{p.PeerID, info.InfoHash()}) 409 s.torrentlog.OutgoingConnectionReject(info.Digest(), info.InfoHash(), p.PeerID, err) 410 return 411 } 412 s.torrentlog.OutgoingConnectionAccept(info.Digest(), info.InfoHash(), p.PeerID) 413 s.eventLoop.send(outgoingConnEvent{result.Conn, result.Bitfield, info}) 414 } 415 416 func (s *scheduler) log(args ...interface{}) *zap.SugaredLogger { 417 return s.logger.With(args...) 418 }