github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/logbroker/broker.go (about) 1 package logbroker 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "sync" 9 10 "github.com/docker/go-events" 11 "github.com/docker/swarmkit/api" 12 "github.com/docker/swarmkit/ca" 13 "github.com/docker/swarmkit/identity" 14 "github.com/docker/swarmkit/log" 15 "github.com/docker/swarmkit/manager/state/store" 16 "github.com/docker/swarmkit/watch" 17 "github.com/sirupsen/logrus" 18 "google.golang.org/grpc/codes" 19 "google.golang.org/grpc/status" 20 ) 21 22 var ( 23 errAlreadyRunning = errors.New("broker is already running") 24 errNotRunning = errors.New("broker is not running") 25 ) 26 27 type logMessage struct { 28 *api.PublishLogsMessage 29 completed bool 30 err error 31 } 32 33 // LogBroker coordinates log subscriptions to services and tasks. Clients can 34 // publish and subscribe to logs channels. 35 // 36 // Log subscriptions are pushed to the work nodes by creating log subscription 37 // tasks. As such, the LogBroker also acts as an orchestrator of these tasks. 38 type LogBroker struct { 39 mu sync.RWMutex 40 logQueue *watch.Queue 41 subscriptionQueue *watch.Queue 42 43 registeredSubscriptions map[string]*subscription 44 subscriptionsByNode map[string]map[*subscription]struct{} 45 46 pctx context.Context 47 cancelAll context.CancelFunc 48 49 store *store.MemoryStore 50 } 51 52 // New initializes and returns a new LogBroker 53 func New(store *store.MemoryStore) *LogBroker { 54 return &LogBroker{ 55 store: store, 56 } 57 } 58 59 // Start starts the log broker 60 func (lb *LogBroker) Start(ctx context.Context) error { 61 lb.mu.Lock() 62 defer lb.mu.Unlock() 63 64 if lb.cancelAll != nil { 65 return errAlreadyRunning 66 } 67 68 lb.pctx, lb.cancelAll = context.WithCancel(ctx) 69 lb.logQueue = watch.NewQueue() 70 lb.subscriptionQueue = watch.NewQueue() 71 lb.registeredSubscriptions = make(map[string]*subscription) 72 lb.subscriptionsByNode = make(map[string]map[*subscription]struct{}) 73 return nil 74 } 75 76 // Stop stops the log broker 77 func (lb *LogBroker) Stop() error { 78 lb.mu.Lock() 79 defer lb.mu.Unlock() 80 81 if lb.cancelAll == nil { 82 return errNotRunning 83 } 84 lb.cancelAll() 85 lb.cancelAll = nil 86 87 lb.logQueue.Close() 88 lb.subscriptionQueue.Close() 89 90 return nil 91 } 92 93 func validateSelector(selector *api.LogSelector) error { 94 if selector == nil { 95 return status.Errorf(codes.InvalidArgument, "log selector must be provided") 96 } 97 98 if len(selector.ServiceIDs) == 0 && len(selector.TaskIDs) == 0 && len(selector.NodeIDs) == 0 { 99 return status.Errorf(codes.InvalidArgument, "log selector must not be empty") 100 } 101 102 return nil 103 } 104 105 func (lb *LogBroker) newSubscription(selector *api.LogSelector, options *api.LogSubscriptionOptions) *subscription { 106 lb.mu.RLock() 107 defer lb.mu.RUnlock() 108 109 subscription := newSubscription(lb.store, &api.SubscriptionMessage{ 110 ID: identity.NewID(), 111 Selector: selector, 112 Options: options, 113 }, lb.subscriptionQueue) 114 115 return subscription 116 } 117 118 func (lb *LogBroker) getSubscription(id string) *subscription { 119 lb.mu.RLock() 120 defer lb.mu.RUnlock() 121 122 subscription, ok := lb.registeredSubscriptions[id] 123 if !ok { 124 return nil 125 } 126 return subscription 127 } 128 129 func (lb *LogBroker) registerSubscription(subscription *subscription) { 130 lb.mu.Lock() 131 defer lb.mu.Unlock() 132 133 lb.registeredSubscriptions[subscription.message.ID] = subscription 134 lb.subscriptionQueue.Publish(subscription) 135 136 for _, node := range subscription.Nodes() { 137 if _, ok := lb.subscriptionsByNode[node]; !ok { 138 // Mark nodes that won't receive the message as done. 139 subscription.Done(node, fmt.Errorf("node %s is not available", node)) 140 } else { 141 // otherwise, add the subscription to the node's subscriptions list 142 lb.subscriptionsByNode[node][subscription] = struct{}{} 143 } 144 } 145 } 146 147 func (lb *LogBroker) unregisterSubscription(subscription *subscription) { 148 lb.mu.Lock() 149 defer lb.mu.Unlock() 150 151 delete(lb.registeredSubscriptions, subscription.message.ID) 152 153 // remove the subscription from all of the nodes 154 for _, node := range subscription.Nodes() { 155 // but only if a node exists 156 if _, ok := lb.subscriptionsByNode[node]; ok { 157 delete(lb.subscriptionsByNode[node], subscription) 158 } 159 } 160 161 subscription.Close() 162 lb.subscriptionQueue.Publish(subscription) 163 } 164 165 // watchSubscriptions grabs all current subscriptions and notifies of any 166 // subscription change for this node. 167 // 168 // Subscriptions may fire multiple times and the caller has to protect against 169 // dupes. 170 func (lb *LogBroker) watchSubscriptions(nodeID string) ([]*subscription, chan events.Event, func()) { 171 lb.mu.RLock() 172 defer lb.mu.RUnlock() 173 174 // Watch for subscription changes for this node. 175 ch, cancel := lb.subscriptionQueue.CallbackWatch(events.MatcherFunc(func(event events.Event) bool { 176 s := event.(*subscription) 177 return s.Contains(nodeID) 178 })) 179 180 // Grab current subscriptions. 181 var subscriptions []*subscription 182 for _, s := range lb.registeredSubscriptions { 183 if s.Contains(nodeID) { 184 subscriptions = append(subscriptions, s) 185 } 186 } 187 188 return subscriptions, ch, cancel 189 } 190 191 func (lb *LogBroker) subscribe(id string) (chan events.Event, func()) { 192 lb.mu.RLock() 193 defer lb.mu.RUnlock() 194 195 return lb.logQueue.CallbackWatch(events.MatcherFunc(func(event events.Event) bool { 196 publish := event.(*logMessage) 197 return publish.SubscriptionID == id 198 })) 199 } 200 201 func (lb *LogBroker) publish(log *api.PublishLogsMessage) { 202 lb.mu.RLock() 203 defer lb.mu.RUnlock() 204 205 lb.logQueue.Publish(&logMessage{PublishLogsMessage: log}) 206 } 207 208 // markDone wraps (*Subscription).Done() so that the removal of the sub from 209 // the node's subscription list is possible 210 func (lb *LogBroker) markDone(sub *subscription, nodeID string, err error) { 211 lb.mu.Lock() 212 defer lb.mu.Unlock() 213 214 // remove the subscription from the node's subscription list, if it exists 215 if _, ok := lb.subscriptionsByNode[nodeID]; ok { 216 delete(lb.subscriptionsByNode[nodeID], sub) 217 } 218 219 // mark the sub as done 220 sub.Done(nodeID, err) 221 } 222 223 // SubscribeLogs creates a log subscription and streams back logs 224 func (lb *LogBroker) SubscribeLogs(request *api.SubscribeLogsRequest, stream api.Logs_SubscribeLogsServer) error { 225 ctx := stream.Context() 226 227 if err := validateSelector(request.Selector); err != nil { 228 return err 229 } 230 231 lb.mu.Lock() 232 pctx := lb.pctx 233 lb.mu.Unlock() 234 if pctx == nil { 235 return errNotRunning 236 } 237 238 subscription := lb.newSubscription(request.Selector, request.Options) 239 subscription.Run(pctx) 240 defer subscription.Stop() 241 242 log := log.G(ctx).WithFields( 243 logrus.Fields{ 244 "method": "(*LogBroker).SubscribeLogs", 245 "subscription.id": subscription.message.ID, 246 }, 247 ) 248 log.Debug("subscribed") 249 250 publishCh, publishCancel := lb.subscribe(subscription.message.ID) 251 defer publishCancel() 252 253 lb.registerSubscription(subscription) 254 defer lb.unregisterSubscription(subscription) 255 256 completed := subscription.Wait(ctx) 257 for { 258 select { 259 case <-ctx.Done(): 260 return ctx.Err() 261 case <-pctx.Done(): 262 return pctx.Err() 263 case event := <-publishCh: 264 publish := event.(*logMessage) 265 if publish.completed { 266 return publish.err 267 } 268 if err := stream.Send(&api.SubscribeLogsMessage{ 269 Messages: publish.Messages, 270 }); err != nil { 271 return err 272 } 273 case <-completed: 274 completed = nil 275 lb.logQueue.Publish(&logMessage{ 276 PublishLogsMessage: &api.PublishLogsMessage{ 277 SubscriptionID: subscription.message.ID, 278 }, 279 completed: true, 280 err: subscription.Err(), 281 }) 282 } 283 } 284 } 285 286 func (lb *LogBroker) nodeConnected(nodeID string) { 287 lb.mu.Lock() 288 defer lb.mu.Unlock() 289 290 if _, ok := lb.subscriptionsByNode[nodeID]; !ok { 291 lb.subscriptionsByNode[nodeID] = make(map[*subscription]struct{}) 292 } 293 } 294 295 func (lb *LogBroker) nodeDisconnected(nodeID string) { 296 lb.mu.Lock() 297 defer lb.mu.Unlock() 298 299 for sub := range lb.subscriptionsByNode[nodeID] { 300 sub.Done(nodeID, fmt.Errorf("node %s disconnected unexpectedly", nodeID)) 301 } 302 delete(lb.subscriptionsByNode, nodeID) 303 } 304 305 // ListenSubscriptions returns a stream of matching subscriptions for the current node 306 func (lb *LogBroker) ListenSubscriptions(request *api.ListenSubscriptionsRequest, stream api.LogBroker_ListenSubscriptionsServer) error { 307 remote, err := ca.RemoteNode(stream.Context()) 308 if err != nil { 309 return err 310 } 311 312 lb.mu.Lock() 313 pctx := lb.pctx 314 lb.mu.Unlock() 315 if pctx == nil { 316 return errNotRunning 317 } 318 319 lb.nodeConnected(remote.NodeID) 320 defer lb.nodeDisconnected(remote.NodeID) 321 322 log := log.G(stream.Context()).WithFields( 323 logrus.Fields{ 324 "method": "(*LogBroker).ListenSubscriptions", 325 "node": remote.NodeID, 326 }, 327 ) 328 subscriptions, subscriptionCh, subscriptionCancel := lb.watchSubscriptions(remote.NodeID) 329 defer subscriptionCancel() 330 331 log.Debug("node registered") 332 333 activeSubscriptions := make(map[string]*subscription) 334 335 // Start by sending down all active subscriptions. 336 for _, subscription := range subscriptions { 337 select { 338 case <-stream.Context().Done(): 339 return stream.Context().Err() 340 case <-pctx.Done(): 341 return nil 342 default: 343 } 344 345 if err := stream.Send(subscription.message); err != nil { 346 log.Error(err) 347 return err 348 } 349 activeSubscriptions[subscription.message.ID] = subscription 350 } 351 352 // Send down new subscriptions. 353 for { 354 select { 355 case v := <-subscriptionCh: 356 subscription := v.(*subscription) 357 358 if subscription.Closed() { 359 delete(activeSubscriptions, subscription.message.ID) 360 } else { 361 // Avoid sending down the same subscription multiple times 362 if _, ok := activeSubscriptions[subscription.message.ID]; ok { 363 continue 364 } 365 activeSubscriptions[subscription.message.ID] = subscription 366 } 367 if err := stream.Send(subscription.message); err != nil { 368 log.Error(err) 369 return err 370 } 371 case <-stream.Context().Done(): 372 return stream.Context().Err() 373 case <-pctx.Done(): 374 return nil 375 } 376 } 377 } 378 379 // PublishLogs publishes log messages for a given subscription 380 func (lb *LogBroker) PublishLogs(stream api.LogBroker_PublishLogsServer) (err error) { 381 remote, err := ca.RemoteNode(stream.Context()) 382 if err != nil { 383 return err 384 } 385 386 var currentSubscription *subscription 387 defer func() { 388 if currentSubscription != nil { 389 lb.markDone(currentSubscription, remote.NodeID, err) 390 } 391 }() 392 393 for { 394 logMsg, err := stream.Recv() 395 if err == io.EOF { 396 return stream.SendAndClose(&api.PublishLogsResponse{}) 397 } 398 if err != nil { 399 return err 400 } 401 402 if logMsg.SubscriptionID == "" { 403 return status.Errorf(codes.InvalidArgument, "missing subscription ID") 404 } 405 406 if currentSubscription == nil { 407 currentSubscription = lb.getSubscription(logMsg.SubscriptionID) 408 if currentSubscription == nil { 409 return status.Errorf(codes.NotFound, "unknown subscription ID") 410 } 411 } else { 412 if logMsg.SubscriptionID != currentSubscription.message.ID { 413 return status.Errorf(codes.InvalidArgument, "different subscription IDs in the same session") 414 } 415 } 416 417 // if we have a close message, close out the subscription 418 if logMsg.Close { 419 // Mark done and then set to nil so if we error after this point, 420 // we don't try to close again in the defer 421 lb.markDone(currentSubscription, remote.NodeID, err) 422 currentSubscription = nil 423 return nil 424 } 425 426 // Make sure logs are emitted using the right Node ID to avoid impersonation. 427 for _, msg := range logMsg.Messages { 428 if msg.Context.NodeID != remote.NodeID { 429 return status.Errorf(codes.PermissionDenied, "invalid NodeID: expected=%s;received=%s", remote.NodeID, msg.Context.NodeID) 430 } 431 } 432 433 lb.publish(logMsg) 434 } 435 }