github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/pubsub/remoteserver.go (about) 1 // Copyright 2016 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package pubsub 5 6 import ( 7 "fmt" 8 "sync" 9 "time" 10 11 "github.com/juju/clock" 12 "github.com/juju/errors" 13 "github.com/juju/pubsub" 14 "github.com/juju/retry" 15 "github.com/juju/utils/deque" 16 "gopkg.in/juju/worker.v1" 17 "gopkg.in/tomb.v2" 18 19 "github.com/juju/juju/api" 20 "github.com/juju/juju/apiserver/params" 21 "github.com/juju/juju/pubsub/forwarder" 22 ) 23 24 // RemoteServer represents the public interface of the worker 25 // responsible for forwarding messages to a single other API server. 26 type RemoteServer interface { 27 worker.Worker 28 Reporter 29 UpdateAddresses(addresses []string) 30 Publish(message *params.PubSubMessage) 31 } 32 33 // remoteServer is responsible for taking messages and sending them to the 34 // pubsub endpoint on the remote server. If the connection is dropped, the 35 // remoteServer will try to reconnect. Messages are not sent until the 36 // connection either succeeds the first time, or fails to connect. Once there 37 // is a failure, incoming messages are dropped until reconnection is complete, 38 // then messages will flow again. 39 type remoteServer struct { 40 origin string 41 target string 42 info *api.Info 43 logger Logger 44 45 newWriter func(*api.Info) (MessageWriter, error) 46 connection MessageWriter 47 48 hub *pubsub.StructuredHub 49 tomb tomb.Tomb 50 clock clock.Clock 51 mutex sync.Mutex 52 53 pending *deque.Deque 54 data chan struct{} 55 stopConnecting chan struct{} 56 sent uint64 57 58 unsubscribe func() 59 } 60 61 // RemoteServerConfig defines all the attributes that are needed for a RemoteServer. 62 type RemoteServerConfig struct { 63 // Hub is used to publish connection messages 64 Hub *pubsub.StructuredHub 65 Origin string 66 Target string 67 Clock clock.Clock 68 Logger Logger 69 70 // APIInfo is initially populated with the addresses of the target machine. 71 APIInfo *api.Info 72 NewWriter func(*api.Info) (MessageWriter, error) 73 } 74 75 // NewRemoteServer creates a new RemoteServer that will connect to the remote 76 // apiserver and pass on messages to the pubsub endpoint of that apiserver. 77 func NewRemoteServer(config RemoteServerConfig) (RemoteServer, error) { 78 remote := &remoteServer{ 79 origin: config.Origin, 80 target: config.Target, 81 info: config.APIInfo, 82 logger: config.Logger, 83 newWriter: config.NewWriter, 84 hub: config.Hub, 85 clock: config.Clock, 86 pending: deque.New(), 87 data: make(chan struct{}), 88 } 89 unsub, err := remote.hub.Subscribe(forwarder.ConnectedTopic, remote.onForwarderConnection) 90 if err != nil { 91 return nil, errors.Trace(err) 92 } 93 remote.unsubscribe = unsub 94 remote.tomb.Go(remote.loop) 95 return remote, nil 96 } 97 98 // Report provides information to the engine report. 99 // It should be fast and minimally blocking. 100 func (r *remoteServer) Report() map[string]interface{} { 101 r.mutex.Lock() 102 defer r.mutex.Unlock() 103 104 var status string 105 if r.connection == nil { 106 status = "disconnected" 107 } else { 108 status = "connected" 109 } 110 return map[string]interface{}{ 111 "status": status, 112 "addresses": r.info.Addrs, 113 "queue-len": r.pending.Len(), 114 "sent": r.sent, 115 } 116 } 117 118 // IntrospectionReport is the method called by the subscriber to get 119 // information about this server. 120 func (r *remoteServer) IntrospectionReport() string { 121 r.mutex.Lock() 122 defer r.mutex.Unlock() 123 124 var status string 125 if r.connection == nil { 126 status = "disconnected" 127 } else { 128 status = "connected" 129 } 130 return fmt.Sprintf(""+ 131 " Status: %s\n"+ 132 " Addresses: %v\n"+ 133 " Queue length: %d\n"+ 134 " Sent count: %d\n", 135 status, r.info.Addrs, r.pending.Len(), r.sent) 136 } 137 138 func (r *remoteServer) onForwarderConnection(topic string, details forwarder.OriginTarget, err error) { 139 if err != nil { 140 // This should never happen. 141 r.logger.Errorf("subscriber callback error: %v", err) 142 return 143 } 144 if details.Target == r.origin && details.Origin == r.target { 145 // If we have just been connected to by the apiserver that we are 146 // trying to connect to, interrupt any waiting we may be doing and try 147 // again as we may be in the middle of a long wait. 148 r.interruptConnecting() 149 } 150 } 151 152 // UpdateAddresses will update the addresses held for the target API server. 153 // If we are currently trying to connect to the target, interrupt it so we 154 // can try again with the new addresses. 155 func (r *remoteServer) UpdateAddresses(addresses []string) { 156 r.mutex.Lock() 157 defer r.mutex.Unlock() 158 159 if r.connection == nil && r.stopConnecting != nil { 160 // We are probably trying to reconnect, so interrupt that so we don't 161 // get a race between setting addresses and trying to read them to 162 // connect. Note that we don't call the interruptConnecting method 163 // here because that method also tries to lock the mutex. 164 r.logger.Debugf("interrupting connecting due to new addresses: %v", addresses) 165 close(r.stopConnecting) 166 r.stopConnecting = nil 167 } 168 r.info.Addrs = addresses 169 } 170 171 // Publish queues up the message if and only if we have an active connection to 172 // the target apiserver. 173 func (r *remoteServer) Publish(message *params.PubSubMessage) { 174 select { 175 case <-r.tomb.Dying(): 176 r.logger.Tracef("dying, don't send %q", message.Topic) 177 default: 178 r.mutex.Lock() 179 // Only queue the message up if we are currently connected. 180 notifyData := false 181 if r.connection != nil { 182 r.logger.Tracef("queue up topic %q", message.Topic) 183 r.pending.PushBack(message) 184 notifyData = r.pending.Len() == 1 185 186 } else { 187 r.logger.Tracef("skipping %q for %s as not connected", message.Topic, r.target) 188 } 189 r.mutex.Unlock() 190 if notifyData { 191 r.data <- struct{}{} 192 } 193 } 194 } 195 196 // nextMessage returns the next queued message, and a flag to indicate empty. 197 func (r *remoteServer) nextMessage() *params.PubSubMessage { 198 r.mutex.Lock() 199 defer r.mutex.Unlock() 200 val, ok := r.pending.PopFront() 201 if !ok { 202 // nothing to do 203 return nil 204 } 205 // Even though it isn't exactly sent right now, it effectively will 206 // be very soon, and we want to keep this counter in the mutex lock. 207 r.sent++ 208 return val.(*params.PubSubMessage) 209 } 210 211 func (r *remoteServer) connect() bool { 212 stop := make(chan struct{}) 213 r.mutex.Lock() 214 r.stopConnecting = stop 215 r.mutex.Unlock() 216 217 var connection MessageWriter 218 r.logger.Debugf("connecting to %s", r.target) 219 retry.Call(retry.CallArgs{ 220 Func: func() error { 221 r.logger.Debugf("open api to %s: %v", r.target, r.info.Addrs) 222 conn, err := r.newWriter(r.info) 223 if err != nil { 224 r.logger.Tracef("unable to get message writer for %s, reconnecting... : %v\n%s", r.target, err, errors.ErrorStack(err)) 225 return errors.Trace(err) 226 } 227 connection = conn 228 return nil 229 }, 230 Attempts: retry.UnlimitedAttempts, 231 Delay: time.Second, 232 MaxDelay: 5 * time.Minute, 233 BackoffFunc: retry.DoubleDelay, 234 Stop: stop, 235 Clock: r.clock, 236 }) 237 238 r.mutex.Lock() 239 r.stopConnecting = nil 240 defer r.mutex.Unlock() 241 242 if connection != nil { 243 r.connection = connection 244 r.logger.Infof("forwarding connected %s -> %s", r.origin, r.target) 245 _, err := r.hub.Publish( 246 forwarder.ConnectedTopic, 247 // NOTE: origin is filled in by the the central hub annotations. 248 forwarder.OriginTarget{Target: r.target}) 249 if err != nil { 250 r.logger.Errorf("%v", err) 251 } 252 return true 253 } 254 return false 255 } 256 257 func (r *remoteServer) loop() error { 258 defer r.unsubscribe() 259 260 var delay <-chan time.Time 261 messageToSend := make(chan *params.PubSubMessage) 262 messageSent := make(chan *params.PubSubMessage) 263 go r.forwardMessages(messageToSend, messageSent) 264 265 for { 266 if r.connection == nil { 267 // If we don't have a current connection, try to get one. 268 if r.connect() { 269 delay = nil 270 } else { 271 // Skip through the select to try to reconnect. 272 delay = r.clock.After(time.Second) 273 } 274 } 275 276 select { 277 case <-r.tomb.Dying(): 278 r.logger.Debugf("worker shutting down") 279 r.resetConnection() 280 return tomb.ErrDying 281 case <-r.data: 282 // Has new data been pushed on? 283 r.logger.Tracef("new messages") 284 case <-delay: 285 // If we failed to connect for whatever reason, this means we don't cycle 286 // immediately. 287 r.logger.Tracef("connect delay") 288 } 289 r.logger.Tracef("send pending messages") 290 r.sendPendingMessages(messageToSend, messageSent) 291 } 292 } 293 294 func (r *remoteServer) sendPendingMessages(messageToSend chan<- *params.PubSubMessage, messageSent <-chan *params.PubSubMessage) { 295 for message := r.nextMessage(); message != nil; message = r.nextMessage() { 296 select { 297 case <-r.tomb.Dying(): 298 return 299 case messageToSend <- message: 300 // Just in case the worker dies while we are trying to send. 301 } 302 select { 303 case <-r.tomb.Dying(): 304 // This will cause the main loop to iterate around, and close 305 // the connection before returning. 306 return 307 case <-messageSent: 308 // continue on to next 309 } 310 } 311 } 312 313 func (r *remoteServer) resetConnection() { 314 r.mutex.Lock() 315 defer r.mutex.Unlock() 316 // If we have already been reset, just return 317 if r.connection == nil { 318 return 319 } 320 r.logger.Debugf("closing connection and clearing pending") 321 r.connection.Close() 322 r.connection = nil 323 // Discard all pending messages. 324 r.pending = deque.New() 325 // Tell everyone what we have been disconnected. 326 _, err := r.hub.Publish( 327 forwarder.DisconnectedTopic, 328 // NOTE: origin is filled in by the the central hub annotations. 329 forwarder.OriginTarget{Target: r.target}) 330 if err != nil { 331 r.logger.Errorf("%v", err) 332 } 333 } 334 335 // forwardMessages is a goroutine whose sole purpose is to get messages off 336 // the messageToSend channel, try to send them over the API, and say when they 337 // are done with this message. This allows for the potential blocking call of 338 // `ForwardMessage`. If this does block for whatever reason and the worker is 339 // asked to shutdown, the main loop method is able to do so. That would cause 340 // the API connection to be closed, which would cause the `ForwardMessage` to 341 // be unblocked due to the error of the socket closing. 342 func (r *remoteServer) forwardMessages(messageToSend <-chan *params.PubSubMessage, messageSent chan<- *params.PubSubMessage) { 343 var message *params.PubSubMessage 344 for { 345 select { 346 case <-r.tomb.Dying(): 347 return 348 case message = <-messageToSend: 349 } 350 r.mutex.Lock() 351 conn := r.connection 352 r.mutex.Unlock() 353 354 r.logger.Tracef("forwarding %q to %s, data %v", message.Topic, r.target, message.Data) 355 if conn != nil { 356 err := conn.ForwardMessage(message) 357 if err != nil { 358 // Some problem sending, so log, close the connection, and try to reconnect. 359 r.logger.Infof("unable to forward message, reconnecting... : %v", err) 360 r.resetConnection() 361 } 362 } 363 364 select { 365 case <-r.tomb.Dying(): 366 return 367 case messageSent <- message: 368 } 369 } 370 } 371 372 func (r *remoteServer) interruptConnecting() { 373 r.mutex.Lock() 374 defer r.mutex.Unlock() 375 if r.stopConnecting != nil { 376 r.logger.Debugf("interrupting the pending connect loop") 377 close(r.stopConnecting) 378 r.stopConnecting = nil 379 } 380 } 381 382 // Kill is part of the worker.Worker interface. 383 func (r *remoteServer) Kill() { 384 r.tomb.Kill(nil) 385 r.interruptConnecting() 386 } 387 388 // Wait is part of the worker.Worker interface. 389 func (r *remoteServer) Wait() error { 390 return r.tomb.Wait() 391 }