github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/httpserver/worker.go (about) 1 // Copyright 2018 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package httpserver 5 6 import ( 7 "context" 8 "crypto/tls" 9 "fmt" 10 "io" 11 "log" 12 "net" 13 "net/http" 14 "os" 15 "path/filepath" 16 "runtime/pprof" 17 "strconv" 18 "sync" 19 "time" 20 21 "github.com/juju/clock" 22 "github.com/juju/errors" 23 "github.com/juju/loggo" 24 "github.com/juju/pubsub" 25 "github.com/prometheus/client_golang/prometheus" 26 "gopkg.in/juju/worker.v1/catacomb" 27 28 "github.com/juju/juju/apiserver/apiserverhttp" 29 "github.com/juju/juju/pubsub/apiserver" 30 ) 31 32 var logger = loggo.GetLogger("juju.worker.httpserver") 33 34 // Config is the configuration required for running an API server worker. 35 type Config struct { 36 AgentName string 37 Clock clock.Clock 38 TLSConfig *tls.Config 39 Mux *apiserverhttp.Mux 40 MuxShutdownWait time.Duration 41 LogDir string 42 PrometheusRegisterer prometheus.Registerer 43 Hub *pubsub.StructuredHub 44 APIPort int 45 APIPortOpenDelay time.Duration 46 ControllerAPIPort int 47 } 48 49 // Validate validates the API server configuration. 50 func (config Config) Validate() error { 51 if config.AgentName == "" { 52 return errors.NotValidf("empty AgentName") 53 } 54 if config.TLSConfig == nil { 55 return errors.NotValidf("nil TLSConfig") 56 } 57 if config.Mux == nil { 58 return errors.NotValidf("nil Mux") 59 } 60 if config.PrometheusRegisterer == nil { 61 return errors.NotValidf("nil PrometheusRegisterer") 62 } 63 return nil 64 } 65 66 // NewWorker returns a new API server worker, with the given configuration. 67 func NewWorker(config Config) (*Worker, error) { 68 if err := config.Validate(); err != nil { 69 return nil, errors.Trace(err) 70 } 71 72 w := &Worker{ 73 config: config, 74 url: make(chan string), 75 status: "starting", 76 } 77 var err error 78 var listener listener 79 if w.config.ControllerAPIPort == 0 { 80 listener, err = w.newSimpleListener() 81 } else { 82 listener, err = w.newDualPortListener() 83 } 84 if err != nil { 85 return nil, errors.Trace(err) 86 } 87 w.holdable = newHeldListener(listener, config.Clock) 88 89 if err := catacomb.Invoke(catacomb.Plan{ 90 Site: &w.catacomb, 91 Work: w.loop, 92 }); err != nil { 93 listener.Close() 94 return nil, errors.Trace(err) 95 } 96 return w, nil 97 } 98 99 type reporter interface { 100 report() map[string]interface{} 101 } 102 103 type Worker struct { 104 catacomb catacomb.Catacomb 105 config Config 106 url chan string 107 holdable *heldListener 108 109 // mu controls access to both status and reporter. 110 mu sync.Mutex 111 status string 112 } 113 114 // Kill implements worker.Kill. 115 func (w *Worker) Kill() { 116 w.catacomb.Kill(nil) 117 } 118 119 // Wait implements worker.Wait. 120 func (w *Worker) Wait() error { 121 return w.catacomb.Wait() 122 } 123 124 // Report provides information for the engine report. 125 func (w *Worker) Report() map[string]interface{} { 126 w.mu.Lock() 127 result := map[string]interface{}{ 128 "api-port": w.config.APIPort, 129 "status": w.status, 130 "ports": w.holdable.report(), 131 } 132 if w.config.ControllerAPIPort != 0 { 133 result["api-port-open-delay"] = w.config.APIPortOpenDelay 134 result["controller-api-port"] = w.config.ControllerAPIPort 135 } 136 w.mu.Unlock() 137 return result 138 } 139 140 // URL returns the base URL of the HTTP server of the form 141 // https://ipaddr:port with no trailing slash. 142 func (w *Worker) URL() string { 143 select { 144 case <-w.catacomb.Dying(): 145 return "" 146 case url := <-w.url: 147 return url 148 } 149 } 150 151 func (w *Worker) loop() error { 152 serverLog := log.New(&loggoWrapper{ 153 level: loggo.WARNING, 154 logger: logger, 155 }, "", 0) // no prefix and no flags so log.Logger doesn't add extra prefixes 156 server := &http.Server{ 157 Handler: w.config.Mux, 158 TLSConfig: w.config.TLSConfig, 159 ErrorLog: serverLog, 160 } 161 go func() { 162 err := server.Serve(tls.NewListener(w.holdable, w.config.TLSConfig)) 163 if err != nil && err != http.ErrServerClosed { 164 logger.Errorf("server finished with error %v", err) 165 } 166 }() 167 defer func() { 168 logger.Infof("shutting down HTTP server") 169 // Shutting down the server will also close listener. 170 err := server.Shutdown(context.Background()) 171 // Release the holdable listener to unblock any pending accepts. 172 w.holdable.release() 173 w.catacomb.Kill(err) 174 }() 175 176 w.mu.Lock() 177 w.status = "running" 178 w.mu.Unlock() 179 180 for { 181 select { 182 case <-w.catacomb.Dying(): 183 w.mu.Lock() 184 w.status = "dying" 185 w.mu.Unlock() 186 // Stop accepting new connections. This allows the mux 187 // to process all pending requests without having to deal with 188 // new ones. 189 w.holdable.hold() 190 return w.shutdown() 191 case w.url <- w.holdable.URL(): 192 } 193 } 194 } 195 196 func (w *Worker) shutdown() error { 197 muxDone := make(chan struct{}) 198 go func() { 199 // Asked to shutdown - make sure we wait until all clients 200 // have finished up. 201 w.config.Mux.Wait() 202 close(muxDone) 203 }() 204 select { 205 case <-muxDone: 206 case <-w.config.Clock.After(w.config.MuxShutdownWait): 207 msg := "timeout waiting for apiserver shutdown" 208 dumpFile, err := w.dumpDebug() 209 if err == nil { 210 logger.Warningf("%v\ndebug info written to %v", msg, dumpFile) 211 } else { 212 logger.Warningf("%v\nerror writing debug info: %v", msg, err) 213 } 214 } 215 return w.catacomb.ErrDying() 216 } 217 218 func (w *Worker) dumpDebug() (string, error) { 219 dumpFile, err := os.OpenFile(filepath.Join(w.config.LogDir, "apiserver-debug.log"), os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) 220 if err != nil { 221 return "", errors.Trace(err) 222 } 223 defer dumpFile.Close() 224 if _, err = io.WriteString(dumpFile, fmt.Sprintf("goroutime dump %v\n", time.Now().Format(time.RFC3339))); err != nil { 225 return "", errors.Annotate(err, "writing header to apiserver log file") 226 } 227 return dumpFile.Name(), pprof.Lookup("goroutine").WriteTo(dumpFile, 1) 228 } 229 230 type heldListener struct { 231 listener 232 clock clock.Clock 233 cond *sync.Cond 234 held bool 235 } 236 237 func newHeldListener(l listener, c clock.Clock) *heldListener { 238 var mu sync.Mutex 239 return &heldListener{ 240 listener: l, 241 clock: c, 242 cond: sync.NewCond(&mu), 243 } 244 } 245 246 func (h *heldListener) report() map[string]interface{} { 247 result := h.listener.report() 248 h.cond.L.Lock() 249 if h.held { 250 result["held"] = true 251 } 252 h.cond.L.Unlock() 253 return result 254 } 255 256 func (h *heldListener) hold() { 257 h.cond.L.Lock() 258 defer h.cond.L.Unlock() 259 h.held = true 260 // No need to signal the cond here, since nothing that's waiting 261 // for the listener to be unheld can run. 262 } 263 264 func (h *heldListener) release() { 265 h.cond.L.Lock() 266 defer h.cond.L.Unlock() 267 h.held = false 268 // Wake up any goroutines waiting for held to be false. 269 h.cond.Broadcast() 270 } 271 272 func (h *heldListener) Accept() (net.Conn, error) { 273 h.cond.L.Lock() 274 for h.held { 275 h.cond.Wait() 276 } 277 h.cond.L.Unlock() 278 return h.listener.Accept() 279 } 280 281 type listener interface { 282 net.Listener 283 reporter 284 URL() string 285 } 286 287 func (w *Worker) newSimpleListener() (listener, error) { 288 listenAddr := net.JoinHostPort("", strconv.Itoa(w.config.APIPort)) 289 listener, err := net.Listen("tcp", listenAddr) 290 if err != nil { 291 return nil, errors.Trace(err) 292 } 293 logger.Infof("listening on %q", listener.Addr()) 294 return &simpleListener{listener}, nil 295 } 296 297 type simpleListener struct { 298 net.Listener 299 } 300 301 func (s *simpleListener) URL() string { 302 return fmt.Sprintf("https://%s", s.Addr()) 303 } 304 305 func (s *simpleListener) report() map[string]interface{} { 306 return map[string]interface{}{ 307 "listening": s.Addr().String(), 308 } 309 } 310 311 func (w *Worker) newDualPortListener() (listener, error) { 312 // Only open the controller port until we have been told that 313 // the controller is ready. This is currently done by the event 314 // from the peergrouper. 315 // TODO (thumper): make the raft worker publish an event when 316 // it knows who the raft master is. This means that this controller 317 // is part of the consensus set, and when it is, is is OK to accept 318 // agent connections. Until that time, accepting an agent connection 319 // would be a bit of a waste of time. 320 listenAddr := net.JoinHostPort("", strconv.Itoa(w.config.ControllerAPIPort)) 321 listener, err := net.Listen("tcp", listenAddr) 322 logger.Infof("listening for controller connections on %q", listener.Addr()) 323 dual := &dualListener{ 324 agentName: w.config.AgentName, 325 clock: w.config.Clock, 326 delay: w.config.APIPortOpenDelay, 327 apiPort: w.config.APIPort, 328 controllerListener: listener, 329 status: "waiting for signal to open agent port", 330 done: make(chan struct{}), 331 errors: make(chan error), 332 connections: make(chan net.Conn), 333 } 334 go dual.accept(listener) 335 336 dual.unsub, err = w.config.Hub.Subscribe(apiserver.ConnectTopic, dual.openAPIPort) 337 if err != nil { 338 dual.Close() 339 return nil, errors.Annotate(err, "unable to subscribe to details topic") 340 } 341 342 return dual, err 343 } 344 345 type dualListener struct { 346 agentName string 347 clock clock.Clock 348 delay time.Duration 349 apiPort int 350 351 controllerListener net.Listener 352 apiListener net.Listener 353 status string 354 355 mu sync.Mutex 356 closer sync.Once 357 358 done chan struct{} 359 errors chan error 360 connections chan net.Conn 361 362 unsub func() 363 } 364 365 func (d *dualListener) report() map[string]interface{} { 366 result := map[string]interface{}{ 367 "controller": d.controllerListener.Addr().String(), 368 } 369 d.mu.Lock() 370 defer d.mu.Unlock() 371 if d.status != "" { 372 result["status"] = d.status 373 } 374 if d.apiListener != nil { 375 result["agent"] = d.apiListener.Addr().String() 376 } 377 return result 378 } 379 380 func (d *dualListener) accept(listener net.Listener) { 381 for { 382 conn, err := listener.Accept() 383 if err != nil { 384 select { 385 case d.errors <- err: 386 case <-d.done: 387 logger.Infof("no longer accepting connections on %s", listener.Addr()) 388 return 389 } 390 } else { 391 select { 392 case d.connections <- conn: 393 case <-d.done: 394 conn.Close() 395 logger.Infof("no longer accepting connections on %s", listener.Addr()) 396 return 397 } 398 } 399 } 400 } 401 402 // Accept implements net.Listener. 403 func (d *dualListener) Accept() (net.Conn, error) { 404 select { 405 case <-d.done: 406 return nil, errors.New("listener has been closed") 407 case err := <-d.errors: 408 // Don't wrap this error with errors.Trace - the stdlib http 409 // server code has handling for various net error types (like 410 // temporary failures) that we don't want to interfere with. 411 return nil, err 412 case conn := <-d.connections: 413 // Due to the non-deterministic nature of select, it is possible 414 // that if there was a pending accept call we may get a connection 415 // even though we are done. So check that before we return 416 // the conn. 417 select { 418 case <-d.done: 419 conn.Close() 420 return nil, errors.New("listener has been closed") 421 default: 422 return conn, nil 423 } 424 } 425 } 426 427 // Close implements net.Listener. Closes all the open listeners. 428 func (d *dualListener) Close() error { 429 // Only close the channel once. 430 d.closer.Do(func() { close(d.done) }) 431 err := d.controllerListener.Close() 432 d.mu.Lock() 433 defer d.mu.Unlock() 434 if d.apiListener != nil { 435 err2 := d.apiListener.Close() 436 if err == nil { 437 err = err2 438 } 439 // If we already have a close error, we don't really care 440 // about this one. 441 } 442 d.status = "closed ports" 443 return errors.Trace(err) 444 } 445 446 // Addr implements net.Listener. If the api port has been opened, we 447 // return that, otherwise we return the controller port address. 448 func (d *dualListener) Addr() net.Addr { 449 d.mu.Lock() 450 defer d.mu.Unlock() 451 if d.apiListener != nil { 452 return d.apiListener.Addr() 453 } 454 return d.controllerListener.Addr() 455 } 456 457 // URL implements the listener method. 458 func (d *dualListener) URL() string { 459 return fmt.Sprintf("https://%s", d.Addr()) 460 } 461 462 // openAPIPort opens the api port and starts accepting connections. 463 func (d *dualListener) openAPIPort(topic string, conn apiserver.APIConnection, err error) { 464 if err != nil { 465 logger.Errorf("programming error: %v", err) 466 return 467 } 468 // We are wanting to make sure that the api-caller has connected before we 469 // open the api port. Each api connection is published with the origin tag. 470 // Any origin that matches our agent name means that someone has connected 471 // to us. We need to also check which agent connected as it is possible that 472 // one of the other HA controller could connect before we connect to 473 // ourselves. 474 if conn.Origin != d.agentName || conn.AgentTag != d.agentName { 475 return 476 } 477 478 d.unsub() 479 if d.delay > 0 { 480 d.mu.Lock() 481 d.status = "waiting prior to opening agent port" 482 d.mu.Unlock() 483 logger.Infof("waiting for %s before allowing api connections", d.delay) 484 <-d.clock.After(d.delay) 485 } 486 487 d.mu.Lock() 488 defer d.mu.Unlock() 489 // Make sure we haven't been closed already. 490 select { 491 case <-d.done: 492 return 493 default: 494 // We are all good. 495 } 496 497 listenAddr := net.JoinHostPort("", strconv.Itoa(d.apiPort)) 498 listener, err := net.Listen("tcp", listenAddr) 499 if err != nil { 500 select { 501 case d.errors <- err: 502 case <-d.done: 503 logger.Errorf("can't open api port: %v, but worker exiting already", err) 504 } 505 return 506 } 507 508 logger.Infof("listening for api connections on %q", listener.Addr()) 509 d.apiListener = listener 510 go d.accept(listener) 511 d.status = "" 512 }