github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/pubsub/remoteserver.go (about)

     1  // Copyright 2016 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package pubsub
     5  
     6  import (
     7  	"fmt"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/juju/clock"
    12  	"github.com/juju/errors"
    13  	"github.com/juju/pubsub"
    14  	"github.com/juju/retry"
    15  	"github.com/juju/utils/deque"
    16  	"gopkg.in/juju/worker.v1"
    17  	"gopkg.in/tomb.v2"
    18  
    19  	"github.com/juju/juju/api"
    20  	"github.com/juju/juju/apiserver/params"
    21  	"github.com/juju/juju/pubsub/forwarder"
    22  )
    23  
    24  // RemoteServer represents the public interface of the worker
    25  // responsible for forwarding messages to a single other API server.
    26  type RemoteServer interface {
    27  	worker.Worker
    28  	Reporter
    29  	UpdateAddresses(addresses []string)
    30  	Publish(message *params.PubSubMessage)
    31  }
    32  
    33  // remoteServer is responsible for taking messages and sending them to the
    34  // pubsub endpoint on the remote server. If the connection is dropped, the
    35  // remoteServer will try to reconnect. Messages are not sent until the
    36  // connection either succeeds the first time, or fails to connect. Once there
    37  // is a failure, incoming messages are dropped until reconnection is complete,
    38  // then messages will flow again.
    39  type remoteServer struct {
    40  	origin string
    41  	target string
    42  	info   *api.Info
    43  	logger Logger
    44  
    45  	newWriter  func(*api.Info) (MessageWriter, error)
    46  	connection MessageWriter
    47  
    48  	hub   *pubsub.StructuredHub
    49  	tomb  tomb.Tomb
    50  	clock clock.Clock
    51  	mutex sync.Mutex
    52  
    53  	pending        *deque.Deque
    54  	data           chan struct{}
    55  	stopConnecting chan struct{}
    56  	sent           uint64
    57  
    58  	unsubscribe func()
    59  }
    60  
    61  // RemoteServerConfig defines all the attributes that are needed for a RemoteServer.
    62  type RemoteServerConfig struct {
    63  	// Hub is used to publish connection messages
    64  	Hub    *pubsub.StructuredHub
    65  	Origin string
    66  	Target string
    67  	Clock  clock.Clock
    68  	Logger Logger
    69  
    70  	// APIInfo is initially populated with the addresses of the target machine.
    71  	APIInfo   *api.Info
    72  	NewWriter func(*api.Info) (MessageWriter, error)
    73  }
    74  
    75  // NewRemoteServer creates a new RemoteServer that will connect to the remote
    76  // apiserver and pass on messages to the pubsub endpoint of that apiserver.
    77  func NewRemoteServer(config RemoteServerConfig) (RemoteServer, error) {
    78  	remote := &remoteServer{
    79  		origin:    config.Origin,
    80  		target:    config.Target,
    81  		info:      config.APIInfo,
    82  		logger:    config.Logger,
    83  		newWriter: config.NewWriter,
    84  		hub:       config.Hub,
    85  		clock:     config.Clock,
    86  		pending:   deque.New(),
    87  		data:      make(chan struct{}),
    88  	}
    89  	unsub, err := remote.hub.Subscribe(forwarder.ConnectedTopic, remote.onForwarderConnection)
    90  	if err != nil {
    91  		return nil, errors.Trace(err)
    92  	}
    93  	remote.unsubscribe = unsub
    94  	remote.tomb.Go(remote.loop)
    95  	return remote, nil
    96  }
    97  
    98  // Report provides information to the engine report.
    99  // It should be fast and minimally blocking.
   100  func (r *remoteServer) Report() map[string]interface{} {
   101  	r.mutex.Lock()
   102  	defer r.mutex.Unlock()
   103  
   104  	var status string
   105  	if r.connection == nil {
   106  		status = "disconnected"
   107  	} else {
   108  		status = "connected"
   109  	}
   110  	return map[string]interface{}{
   111  		"status":    status,
   112  		"addresses": r.info.Addrs,
   113  		"queue-len": r.pending.Len(),
   114  		"sent":      r.sent,
   115  	}
   116  }
   117  
   118  // IntrospectionReport is the method called by the subscriber to get
   119  // information about this server.
   120  func (r *remoteServer) IntrospectionReport() string {
   121  	r.mutex.Lock()
   122  	defer r.mutex.Unlock()
   123  
   124  	var status string
   125  	if r.connection == nil {
   126  		status = "disconnected"
   127  	} else {
   128  		status = "connected"
   129  	}
   130  	return fmt.Sprintf(""+
   131  		"  Status: %s\n"+
   132  		"  Addresses: %v\n"+
   133  		"  Queue length: %d\n"+
   134  		"  Sent count: %d\n",
   135  		status, r.info.Addrs, r.pending.Len(), r.sent)
   136  }
   137  
   138  func (r *remoteServer) onForwarderConnection(topic string, details forwarder.OriginTarget, err error) {
   139  	if err != nil {
   140  		// This should never happen.
   141  		r.logger.Errorf("subscriber callback error: %v", err)
   142  		return
   143  	}
   144  	if details.Target == r.origin && details.Origin == r.target {
   145  		// If we have just been connected to by the apiserver that we are
   146  		// trying to connect to, interrupt any waiting we may be doing and try
   147  		// again as we may be in the middle of a long wait.
   148  		r.interruptConnecting()
   149  	}
   150  }
   151  
   152  // UpdateAddresses will update the addresses held for the target API server.
   153  // If we are currently trying to connect to the target, interrupt it so we
   154  // can try again with the new addresses.
   155  func (r *remoteServer) UpdateAddresses(addresses []string) {
   156  	r.mutex.Lock()
   157  	defer r.mutex.Unlock()
   158  
   159  	if r.connection == nil && r.stopConnecting != nil {
   160  		// We are probably trying to reconnect, so interrupt that so we don't
   161  		// get a race between setting addresses and trying to read them to
   162  		// connect. Note that we don't call the interruptConnecting method
   163  		// here because that method also tries to lock the mutex.
   164  		r.logger.Debugf("interrupting connecting due to new addresses: %v", addresses)
   165  		close(r.stopConnecting)
   166  		r.stopConnecting = nil
   167  	}
   168  	r.info.Addrs = addresses
   169  }
   170  
   171  // Publish queues up the message if and only if we have an active connection to
   172  // the target apiserver.
   173  func (r *remoteServer) Publish(message *params.PubSubMessage) {
   174  	select {
   175  	case <-r.tomb.Dying():
   176  		r.logger.Tracef("dying, don't send %q", message.Topic)
   177  	default:
   178  		r.mutex.Lock()
   179  		// Only queue the message up if we are currently connected.
   180  		notifyData := false
   181  		if r.connection != nil {
   182  			r.logger.Tracef("queue up topic %q", message.Topic)
   183  			r.pending.PushBack(message)
   184  			notifyData = r.pending.Len() == 1
   185  
   186  		} else {
   187  			r.logger.Tracef("skipping %q for %s as not connected", message.Topic, r.target)
   188  		}
   189  		r.mutex.Unlock()
   190  		if notifyData {
   191  			r.data <- struct{}{}
   192  		}
   193  	}
   194  }
   195  
   196  // nextMessage returns the next queued message, and a flag to indicate empty.
   197  func (r *remoteServer) nextMessage() *params.PubSubMessage {
   198  	r.mutex.Lock()
   199  	defer r.mutex.Unlock()
   200  	val, ok := r.pending.PopFront()
   201  	if !ok {
   202  		// nothing to do
   203  		return nil
   204  	}
   205  	// Even though it isn't exactly sent right now, it effectively will
   206  	// be very soon, and we want to keep this counter in the mutex lock.
   207  	r.sent++
   208  	return val.(*params.PubSubMessage)
   209  }
   210  
   211  func (r *remoteServer) connect() bool {
   212  	stop := make(chan struct{})
   213  	r.mutex.Lock()
   214  	r.stopConnecting = stop
   215  	r.mutex.Unlock()
   216  
   217  	var connection MessageWriter
   218  	r.logger.Debugf("connecting to %s", r.target)
   219  	retry.Call(retry.CallArgs{
   220  		Func: func() error {
   221  			r.logger.Debugf("open api to %s: %v", r.target, r.info.Addrs)
   222  			conn, err := r.newWriter(r.info)
   223  			if err != nil {
   224  				r.logger.Tracef("unable to get message writer for %s, reconnecting... : %v\n%s", r.target, err, errors.ErrorStack(err))
   225  				return errors.Trace(err)
   226  			}
   227  			connection = conn
   228  			return nil
   229  		},
   230  		Attempts:    retry.UnlimitedAttempts,
   231  		Delay:       time.Second,
   232  		MaxDelay:    5 * time.Minute,
   233  		BackoffFunc: retry.DoubleDelay,
   234  		Stop:        stop,
   235  		Clock:       r.clock,
   236  	})
   237  
   238  	r.mutex.Lock()
   239  	r.stopConnecting = nil
   240  	defer r.mutex.Unlock()
   241  
   242  	if connection != nil {
   243  		r.connection = connection
   244  		r.logger.Infof("forwarding connected %s -> %s", r.origin, r.target)
   245  		_, err := r.hub.Publish(
   246  			forwarder.ConnectedTopic,
   247  			// NOTE: origin is filled in by the the central hub annotations.
   248  			forwarder.OriginTarget{Target: r.target})
   249  		if err != nil {
   250  			r.logger.Errorf("%v", err)
   251  		}
   252  		return true
   253  	}
   254  	return false
   255  }
   256  
   257  func (r *remoteServer) loop() error {
   258  	defer r.unsubscribe()
   259  
   260  	var delay <-chan time.Time
   261  	messageToSend := make(chan *params.PubSubMessage)
   262  	messageSent := make(chan *params.PubSubMessage)
   263  	go r.forwardMessages(messageToSend, messageSent)
   264  
   265  	for {
   266  		if r.connection == nil {
   267  			// If we don't have a current connection, try to get one.
   268  			if r.connect() {
   269  				delay = nil
   270  			} else {
   271  				// Skip through the select to try to reconnect.
   272  				delay = r.clock.After(time.Second)
   273  			}
   274  		}
   275  
   276  		select {
   277  		case <-r.tomb.Dying():
   278  			r.logger.Debugf("worker shutting down")
   279  			r.resetConnection()
   280  			return tomb.ErrDying
   281  		case <-r.data:
   282  			// Has new data been pushed on?
   283  			r.logger.Tracef("new messages")
   284  		case <-delay:
   285  			// If we failed to connect for whatever reason, this means we don't cycle
   286  			// immediately.
   287  			r.logger.Tracef("connect delay")
   288  		}
   289  		r.logger.Tracef("send pending messages")
   290  		r.sendPendingMessages(messageToSend, messageSent)
   291  	}
   292  }
   293  
   294  func (r *remoteServer) sendPendingMessages(messageToSend chan<- *params.PubSubMessage, messageSent <-chan *params.PubSubMessage) {
   295  	for message := r.nextMessage(); message != nil; message = r.nextMessage() {
   296  		select {
   297  		case <-r.tomb.Dying():
   298  			return
   299  		case messageToSend <- message:
   300  			// Just in case the worker dies while we are trying to send.
   301  		}
   302  		select {
   303  		case <-r.tomb.Dying():
   304  			// This will cause the main loop to iterate around, and close
   305  			// the connection before returning.
   306  			return
   307  		case <-messageSent:
   308  			// continue on to next
   309  		}
   310  	}
   311  }
   312  
   313  func (r *remoteServer) resetConnection() {
   314  	r.mutex.Lock()
   315  	defer r.mutex.Unlock()
   316  	// If we have already been reset, just return
   317  	if r.connection == nil {
   318  		return
   319  	}
   320  	r.logger.Debugf("closing connection and clearing pending")
   321  	r.connection.Close()
   322  	r.connection = nil
   323  	// Discard all pending messages.
   324  	r.pending = deque.New()
   325  	// Tell everyone what we have been disconnected.
   326  	_, err := r.hub.Publish(
   327  		forwarder.DisconnectedTopic,
   328  		// NOTE: origin is filled in by the the central hub annotations.
   329  		forwarder.OriginTarget{Target: r.target})
   330  	if err != nil {
   331  		r.logger.Errorf("%v", err)
   332  	}
   333  }
   334  
   335  // forwardMessages is a goroutine whose sole purpose is to get messages off
   336  // the messageToSend channel, try to send them over the API, and say when they
   337  // are done with this message. This allows for the potential blocking call of
   338  // `ForwardMessage`. If this does block for whatever reason and the worker is
   339  // asked to shutdown, the main loop method is able to do so. That would cause
   340  // the API connection to be closed, which would cause the `ForwardMessage` to
   341  // be unblocked due to the error of the socket closing.
   342  func (r *remoteServer) forwardMessages(messageToSend <-chan *params.PubSubMessage, messageSent chan<- *params.PubSubMessage) {
   343  	var message *params.PubSubMessage
   344  	for {
   345  		select {
   346  		case <-r.tomb.Dying():
   347  			return
   348  		case message = <-messageToSend:
   349  		}
   350  		r.mutex.Lock()
   351  		conn := r.connection
   352  		r.mutex.Unlock()
   353  
   354  		r.logger.Tracef("forwarding %q to %s, data %v", message.Topic, r.target, message.Data)
   355  		if conn != nil {
   356  			err := conn.ForwardMessage(message)
   357  			if err != nil {
   358  				// Some problem sending, so log, close the connection, and try to reconnect.
   359  				r.logger.Infof("unable to forward message, reconnecting... : %v", err)
   360  				r.resetConnection()
   361  			}
   362  		}
   363  
   364  		select {
   365  		case <-r.tomb.Dying():
   366  			return
   367  		case messageSent <- message:
   368  		}
   369  	}
   370  }
   371  
   372  func (r *remoteServer) interruptConnecting() {
   373  	r.mutex.Lock()
   374  	defer r.mutex.Unlock()
   375  	if r.stopConnecting != nil {
   376  		r.logger.Debugf("interrupting the pending connect loop")
   377  		close(r.stopConnecting)
   378  		r.stopConnecting = nil
   379  	}
   380  }
   381  
   382  // Kill is part of the worker.Worker interface.
   383  func (r *remoteServer) Kill() {
   384  	r.tomb.Kill(nil)
   385  	r.interruptConnecting()
   386  }
   387  
   388  // Wait is part of the worker.Worker interface.
   389  func (r *remoteServer) Wait() error {
   390  	return r.tomb.Wait()
   391  }