github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/httpserver/worker.go (about)

     1  // Copyright 2018 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package httpserver
     5  
     6  import (
     7  	"context"
     8  	"crypto/tls"
     9  	"fmt"
    10  	"io"
    11  	"log"
    12  	"net"
    13  	"net/http"
    14  	"os"
    15  	"path/filepath"
    16  	"runtime/pprof"
    17  	"strconv"
    18  	"sync"
    19  	"time"
    20  
    21  	"github.com/juju/clock"
    22  	"github.com/juju/errors"
    23  	"github.com/juju/loggo"
    24  	"github.com/juju/pubsub"
    25  	"github.com/prometheus/client_golang/prometheus"
    26  	"gopkg.in/juju/worker.v1/catacomb"
    27  
    28  	"github.com/juju/juju/apiserver/apiserverhttp"
    29  	"github.com/juju/juju/pubsub/apiserver"
    30  )
    31  
    32  var logger = loggo.GetLogger("juju.worker.httpserver")
    33  
    34  // Config is the configuration required for running an API server worker.
    35  type Config struct {
    36  	AgentName            string
    37  	Clock                clock.Clock
    38  	TLSConfig            *tls.Config
    39  	Mux                  *apiserverhttp.Mux
    40  	MuxShutdownWait      time.Duration
    41  	LogDir               string
    42  	PrometheusRegisterer prometheus.Registerer
    43  	Hub                  *pubsub.StructuredHub
    44  	APIPort              int
    45  	APIPortOpenDelay     time.Duration
    46  	ControllerAPIPort    int
    47  }
    48  
    49  // Validate validates the API server configuration.
    50  func (config Config) Validate() error {
    51  	if config.AgentName == "" {
    52  		return errors.NotValidf("empty AgentName")
    53  	}
    54  	if config.TLSConfig == nil {
    55  		return errors.NotValidf("nil TLSConfig")
    56  	}
    57  	if config.Mux == nil {
    58  		return errors.NotValidf("nil Mux")
    59  	}
    60  	if config.PrometheusRegisterer == nil {
    61  		return errors.NotValidf("nil PrometheusRegisterer")
    62  	}
    63  	return nil
    64  }
    65  
    66  // NewWorker returns a new API server worker, with the given configuration.
    67  func NewWorker(config Config) (*Worker, error) {
    68  	if err := config.Validate(); err != nil {
    69  		return nil, errors.Trace(err)
    70  	}
    71  
    72  	w := &Worker{
    73  		config: config,
    74  		url:    make(chan string),
    75  		status: "starting",
    76  	}
    77  	var err error
    78  	var listener listener
    79  	if w.config.ControllerAPIPort == 0 {
    80  		listener, err = w.newSimpleListener()
    81  	} else {
    82  		listener, err = w.newDualPortListener()
    83  	}
    84  	if err != nil {
    85  		return nil, errors.Trace(err)
    86  	}
    87  	w.holdable = newHeldListener(listener, config.Clock)
    88  
    89  	if err := catacomb.Invoke(catacomb.Plan{
    90  		Site: &w.catacomb,
    91  		Work: w.loop,
    92  	}); err != nil {
    93  		listener.Close()
    94  		return nil, errors.Trace(err)
    95  	}
    96  	return w, nil
    97  }
    98  
    99  type reporter interface {
   100  	report() map[string]interface{}
   101  }
   102  
   103  type Worker struct {
   104  	catacomb catacomb.Catacomb
   105  	config   Config
   106  	url      chan string
   107  	holdable *heldListener
   108  
   109  	// mu controls access to both status and reporter.
   110  	mu     sync.Mutex
   111  	status string
   112  }
   113  
   114  // Kill implements worker.Kill.
   115  func (w *Worker) Kill() {
   116  	w.catacomb.Kill(nil)
   117  }
   118  
   119  // Wait implements worker.Wait.
   120  func (w *Worker) Wait() error {
   121  	return w.catacomb.Wait()
   122  }
   123  
   124  // Report provides information for the engine report.
   125  func (w *Worker) Report() map[string]interface{} {
   126  	w.mu.Lock()
   127  	result := map[string]interface{}{
   128  		"api-port": w.config.APIPort,
   129  		"status":   w.status,
   130  		"ports":    w.holdable.report(),
   131  	}
   132  	if w.config.ControllerAPIPort != 0 {
   133  		result["api-port-open-delay"] = w.config.APIPortOpenDelay
   134  		result["controller-api-port"] = w.config.ControllerAPIPort
   135  	}
   136  	w.mu.Unlock()
   137  	return result
   138  }
   139  
   140  // URL returns the base URL of the HTTP server of the form
   141  // https://ipaddr:port with no trailing slash.
   142  func (w *Worker) URL() string {
   143  	select {
   144  	case <-w.catacomb.Dying():
   145  		return ""
   146  	case url := <-w.url:
   147  		return url
   148  	}
   149  }
   150  
   151  func (w *Worker) loop() error {
   152  	serverLog := log.New(&loggoWrapper{
   153  		level:  loggo.WARNING,
   154  		logger: logger,
   155  	}, "", 0) // no prefix and no flags so log.Logger doesn't add extra prefixes
   156  	server := &http.Server{
   157  		Handler:   w.config.Mux,
   158  		TLSConfig: w.config.TLSConfig,
   159  		ErrorLog:  serverLog,
   160  	}
   161  	go func() {
   162  		err := server.Serve(tls.NewListener(w.holdable, w.config.TLSConfig))
   163  		if err != nil && err != http.ErrServerClosed {
   164  			logger.Errorf("server finished with error %v", err)
   165  		}
   166  	}()
   167  	defer func() {
   168  		logger.Infof("shutting down HTTP server")
   169  		// Shutting down the server will also close listener.
   170  		err := server.Shutdown(context.Background())
   171  		// Release the holdable listener to unblock any pending accepts.
   172  		w.holdable.release()
   173  		w.catacomb.Kill(err)
   174  	}()
   175  
   176  	w.mu.Lock()
   177  	w.status = "running"
   178  	w.mu.Unlock()
   179  
   180  	for {
   181  		select {
   182  		case <-w.catacomb.Dying():
   183  			w.mu.Lock()
   184  			w.status = "dying"
   185  			w.mu.Unlock()
   186  			// Stop accepting new connections. This allows the mux
   187  			// to process all pending requests without having to deal with
   188  			// new ones.
   189  			w.holdable.hold()
   190  			return w.shutdown()
   191  		case w.url <- w.holdable.URL():
   192  		}
   193  	}
   194  }
   195  
   196  func (w *Worker) shutdown() error {
   197  	muxDone := make(chan struct{})
   198  	go func() {
   199  		// Asked to shutdown - make sure we wait until all clients
   200  		// have finished up.
   201  		w.config.Mux.Wait()
   202  		close(muxDone)
   203  	}()
   204  	select {
   205  	case <-muxDone:
   206  	case <-w.config.Clock.After(w.config.MuxShutdownWait):
   207  		msg := "timeout waiting for apiserver shutdown"
   208  		dumpFile, err := w.dumpDebug()
   209  		if err == nil {
   210  			logger.Warningf("%v\ndebug info written to %v", msg, dumpFile)
   211  		} else {
   212  			logger.Warningf("%v\nerror writing debug info: %v", msg, err)
   213  		}
   214  	}
   215  	return w.catacomb.ErrDying()
   216  }
   217  
   218  func (w *Worker) dumpDebug() (string, error) {
   219  	dumpFile, err := os.OpenFile(filepath.Join(w.config.LogDir, "apiserver-debug.log"), os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
   220  	if err != nil {
   221  		return "", errors.Trace(err)
   222  	}
   223  	defer dumpFile.Close()
   224  	if _, err = io.WriteString(dumpFile, fmt.Sprintf("goroutime dump %v\n", time.Now().Format(time.RFC3339))); err != nil {
   225  		return "", errors.Annotate(err, "writing header to apiserver log file")
   226  	}
   227  	return dumpFile.Name(), pprof.Lookup("goroutine").WriteTo(dumpFile, 1)
   228  }
   229  
   230  type heldListener struct {
   231  	listener
   232  	clock clock.Clock
   233  	cond  *sync.Cond
   234  	held  bool
   235  }
   236  
   237  func newHeldListener(l listener, c clock.Clock) *heldListener {
   238  	var mu sync.Mutex
   239  	return &heldListener{
   240  		listener: l,
   241  		clock:    c,
   242  		cond:     sync.NewCond(&mu),
   243  	}
   244  }
   245  
   246  func (h *heldListener) report() map[string]interface{} {
   247  	result := h.listener.report()
   248  	h.cond.L.Lock()
   249  	if h.held {
   250  		result["held"] = true
   251  	}
   252  	h.cond.L.Unlock()
   253  	return result
   254  }
   255  
   256  func (h *heldListener) hold() {
   257  	h.cond.L.Lock()
   258  	defer h.cond.L.Unlock()
   259  	h.held = true
   260  	// No need to signal the cond here, since nothing that's waiting
   261  	// for the listener to be unheld can run.
   262  }
   263  
   264  func (h *heldListener) release() {
   265  	h.cond.L.Lock()
   266  	defer h.cond.L.Unlock()
   267  	h.held = false
   268  	// Wake up any goroutines waiting for held to be false.
   269  	h.cond.Broadcast()
   270  }
   271  
   272  func (h *heldListener) Accept() (net.Conn, error) {
   273  	h.cond.L.Lock()
   274  	for h.held {
   275  		h.cond.Wait()
   276  	}
   277  	h.cond.L.Unlock()
   278  	return h.listener.Accept()
   279  }
   280  
   281  type listener interface {
   282  	net.Listener
   283  	reporter
   284  	URL() string
   285  }
   286  
   287  func (w *Worker) newSimpleListener() (listener, error) {
   288  	listenAddr := net.JoinHostPort("", strconv.Itoa(w.config.APIPort))
   289  	listener, err := net.Listen("tcp", listenAddr)
   290  	if err != nil {
   291  		return nil, errors.Trace(err)
   292  	}
   293  	logger.Infof("listening on %q", listener.Addr())
   294  	return &simpleListener{listener}, nil
   295  }
   296  
   297  type simpleListener struct {
   298  	net.Listener
   299  }
   300  
   301  func (s *simpleListener) URL() string {
   302  	return fmt.Sprintf("https://%s", s.Addr())
   303  }
   304  
   305  func (s *simpleListener) report() map[string]interface{} {
   306  	return map[string]interface{}{
   307  		"listening": s.Addr().String(),
   308  	}
   309  }
   310  
   311  func (w *Worker) newDualPortListener() (listener, error) {
   312  	// Only open the controller port until we have been told that
   313  	// the controller is ready. This is currently done by the event
   314  	// from the peergrouper.
   315  	// TODO (thumper): make the raft worker publish an event when
   316  	// it knows who the raft master is. This means that this controller
   317  	// is part of the consensus set, and when it is, is is OK to accept
   318  	// agent connections. Until that time, accepting an agent connection
   319  	// would be a bit of a waste of time.
   320  	listenAddr := net.JoinHostPort("", strconv.Itoa(w.config.ControllerAPIPort))
   321  	listener, err := net.Listen("tcp", listenAddr)
   322  	logger.Infof("listening for controller connections on %q", listener.Addr())
   323  	dual := &dualListener{
   324  		agentName:          w.config.AgentName,
   325  		clock:              w.config.Clock,
   326  		delay:              w.config.APIPortOpenDelay,
   327  		apiPort:            w.config.APIPort,
   328  		controllerListener: listener,
   329  		status:             "waiting for signal to open agent port",
   330  		done:               make(chan struct{}),
   331  		errors:             make(chan error),
   332  		connections:        make(chan net.Conn),
   333  	}
   334  	go dual.accept(listener)
   335  
   336  	dual.unsub, err = w.config.Hub.Subscribe(apiserver.ConnectTopic, dual.openAPIPort)
   337  	if err != nil {
   338  		dual.Close()
   339  		return nil, errors.Annotate(err, "unable to subscribe to details topic")
   340  	}
   341  
   342  	return dual, err
   343  }
   344  
   345  type dualListener struct {
   346  	agentName string
   347  	clock     clock.Clock
   348  	delay     time.Duration
   349  	apiPort   int
   350  
   351  	controllerListener net.Listener
   352  	apiListener        net.Listener
   353  	status             string
   354  
   355  	mu     sync.Mutex
   356  	closer sync.Once
   357  
   358  	done        chan struct{}
   359  	errors      chan error
   360  	connections chan net.Conn
   361  
   362  	unsub func()
   363  }
   364  
   365  func (d *dualListener) report() map[string]interface{} {
   366  	result := map[string]interface{}{
   367  		"controller": d.controllerListener.Addr().String(),
   368  	}
   369  	d.mu.Lock()
   370  	defer d.mu.Unlock()
   371  	if d.status != "" {
   372  		result["status"] = d.status
   373  	}
   374  	if d.apiListener != nil {
   375  		result["agent"] = d.apiListener.Addr().String()
   376  	}
   377  	return result
   378  }
   379  
   380  func (d *dualListener) accept(listener net.Listener) {
   381  	for {
   382  		conn, err := listener.Accept()
   383  		if err != nil {
   384  			select {
   385  			case d.errors <- err:
   386  			case <-d.done:
   387  				logger.Infof("no longer accepting connections on %s", listener.Addr())
   388  				return
   389  			}
   390  		} else {
   391  			select {
   392  			case d.connections <- conn:
   393  			case <-d.done:
   394  				conn.Close()
   395  				logger.Infof("no longer accepting connections on %s", listener.Addr())
   396  				return
   397  			}
   398  		}
   399  	}
   400  }
   401  
   402  // Accept implements net.Listener.
   403  func (d *dualListener) Accept() (net.Conn, error) {
   404  	select {
   405  	case <-d.done:
   406  		return nil, errors.New("listener has been closed")
   407  	case err := <-d.errors:
   408  		// Don't wrap this error with errors.Trace - the stdlib http
   409  		// server code has handling for various net error types (like
   410  		// temporary failures) that we don't want to interfere with.
   411  		return nil, err
   412  	case conn := <-d.connections:
   413  		// Due to the non-deterministic nature of select, it is possible
   414  		// that if there was a pending accept call we may get a connection
   415  		// even though we are done. So check that before we return
   416  		// the conn.
   417  		select {
   418  		case <-d.done:
   419  			conn.Close()
   420  			return nil, errors.New("listener has been closed")
   421  		default:
   422  			return conn, nil
   423  		}
   424  	}
   425  }
   426  
   427  // Close implements net.Listener. Closes all the open listeners.
   428  func (d *dualListener) Close() error {
   429  	// Only close the channel once.
   430  	d.closer.Do(func() { close(d.done) })
   431  	err := d.controllerListener.Close()
   432  	d.mu.Lock()
   433  	defer d.mu.Unlock()
   434  	if d.apiListener != nil {
   435  		err2 := d.apiListener.Close()
   436  		if err == nil {
   437  			err = err2
   438  		}
   439  		// If we already have a close error, we don't really care
   440  		// about this one.
   441  	}
   442  	d.status = "closed ports"
   443  	return errors.Trace(err)
   444  }
   445  
   446  // Addr implements net.Listener. If the api port has been opened, we
   447  // return that, otherwise we return the controller port address.
   448  func (d *dualListener) Addr() net.Addr {
   449  	d.mu.Lock()
   450  	defer d.mu.Unlock()
   451  	if d.apiListener != nil {
   452  		return d.apiListener.Addr()
   453  	}
   454  	return d.controllerListener.Addr()
   455  }
   456  
   457  // URL implements the listener method.
   458  func (d *dualListener) URL() string {
   459  	return fmt.Sprintf("https://%s", d.Addr())
   460  }
   461  
   462  // openAPIPort opens the api port and starts accepting connections.
   463  func (d *dualListener) openAPIPort(topic string, conn apiserver.APIConnection, err error) {
   464  	if err != nil {
   465  		logger.Errorf("programming error: %v", err)
   466  		return
   467  	}
   468  	// We are wanting to make sure that the api-caller has connected before we
   469  	// open the api port. Each api connection is published with the origin tag.
   470  	// Any origin that matches our agent name means that someone has connected
   471  	// to us. We need to also check which agent connected as it is possible that
   472  	// one of the other HA controller could connect before we connect to
   473  	// ourselves.
   474  	if conn.Origin != d.agentName || conn.AgentTag != d.agentName {
   475  		return
   476  	}
   477  
   478  	d.unsub()
   479  	if d.delay > 0 {
   480  		d.mu.Lock()
   481  		d.status = "waiting prior to opening agent port"
   482  		d.mu.Unlock()
   483  		logger.Infof("waiting for %s before allowing api connections", d.delay)
   484  		<-d.clock.After(d.delay)
   485  	}
   486  
   487  	d.mu.Lock()
   488  	defer d.mu.Unlock()
   489  	// Make sure we haven't been closed already.
   490  	select {
   491  	case <-d.done:
   492  		return
   493  	default:
   494  		// We are all good.
   495  	}
   496  
   497  	listenAddr := net.JoinHostPort("", strconv.Itoa(d.apiPort))
   498  	listener, err := net.Listen("tcp", listenAddr)
   499  	if err != nil {
   500  		select {
   501  		case d.errors <- err:
   502  		case <-d.done:
   503  			logger.Errorf("can't open api port: %v, but worker exiting already", err)
   504  		}
   505  		return
   506  	}
   507  
   508  	logger.Infof("listening for api connections on %q", listener.Addr())
   509  	d.apiListener = listener
   510  	go d.accept(listener)
   511  	d.status = ""
   512  }