github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/logbroker/broker.go (about)

     1  package logbroker
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"sync"
     9  
    10  	"github.com/docker/go-events"
    11  	"github.com/docker/swarmkit/api"
    12  	"github.com/docker/swarmkit/ca"
    13  	"github.com/docker/swarmkit/identity"
    14  	"github.com/docker/swarmkit/log"
    15  	"github.com/docker/swarmkit/manager/state/store"
    16  	"github.com/docker/swarmkit/watch"
    17  	"github.com/sirupsen/logrus"
    18  	"google.golang.org/grpc/codes"
    19  	"google.golang.org/grpc/status"
    20  )
    21  
    22  var (
    23  	errAlreadyRunning = errors.New("broker is already running")
    24  	errNotRunning     = errors.New("broker is not running")
    25  )
    26  
    27  type logMessage struct {
    28  	*api.PublishLogsMessage
    29  	completed bool
    30  	err       error
    31  }
    32  
    33  // LogBroker coordinates log subscriptions to services and tasks. Clients can
    34  // publish and subscribe to logs channels.
    35  //
    36  // Log subscriptions are pushed to the work nodes by creating log subscription
    37  // tasks. As such, the LogBroker also acts as an orchestrator of these tasks.
    38  type LogBroker struct {
    39  	mu                sync.RWMutex
    40  	logQueue          *watch.Queue
    41  	subscriptionQueue *watch.Queue
    42  
    43  	registeredSubscriptions map[string]*subscription
    44  	subscriptionsByNode     map[string]map[*subscription]struct{}
    45  
    46  	pctx      context.Context
    47  	cancelAll context.CancelFunc
    48  
    49  	store *store.MemoryStore
    50  }
    51  
    52  // New initializes and returns a new LogBroker
    53  func New(store *store.MemoryStore) *LogBroker {
    54  	return &LogBroker{
    55  		store: store,
    56  	}
    57  }
    58  
    59  // Start starts the log broker
    60  func (lb *LogBroker) Start(ctx context.Context) error {
    61  	lb.mu.Lock()
    62  	defer lb.mu.Unlock()
    63  
    64  	if lb.cancelAll != nil {
    65  		return errAlreadyRunning
    66  	}
    67  
    68  	lb.pctx, lb.cancelAll = context.WithCancel(ctx)
    69  	lb.logQueue = watch.NewQueue()
    70  	lb.subscriptionQueue = watch.NewQueue()
    71  	lb.registeredSubscriptions = make(map[string]*subscription)
    72  	lb.subscriptionsByNode = make(map[string]map[*subscription]struct{})
    73  	return nil
    74  }
    75  
    76  // Stop stops the log broker
    77  func (lb *LogBroker) Stop() error {
    78  	lb.mu.Lock()
    79  	defer lb.mu.Unlock()
    80  
    81  	if lb.cancelAll == nil {
    82  		return errNotRunning
    83  	}
    84  	lb.cancelAll()
    85  	lb.cancelAll = nil
    86  
    87  	lb.logQueue.Close()
    88  	lb.subscriptionQueue.Close()
    89  
    90  	return nil
    91  }
    92  
    93  func validateSelector(selector *api.LogSelector) error {
    94  	if selector == nil {
    95  		return status.Errorf(codes.InvalidArgument, "log selector must be provided")
    96  	}
    97  
    98  	if len(selector.ServiceIDs) == 0 && len(selector.TaskIDs) == 0 && len(selector.NodeIDs) == 0 {
    99  		return status.Errorf(codes.InvalidArgument, "log selector must not be empty")
   100  	}
   101  
   102  	return nil
   103  }
   104  
   105  func (lb *LogBroker) newSubscription(selector *api.LogSelector, options *api.LogSubscriptionOptions) *subscription {
   106  	lb.mu.RLock()
   107  	defer lb.mu.RUnlock()
   108  
   109  	subscription := newSubscription(lb.store, &api.SubscriptionMessage{
   110  		ID:       identity.NewID(),
   111  		Selector: selector,
   112  		Options:  options,
   113  	}, lb.subscriptionQueue)
   114  
   115  	return subscription
   116  }
   117  
   118  func (lb *LogBroker) getSubscription(id string) *subscription {
   119  	lb.mu.RLock()
   120  	defer lb.mu.RUnlock()
   121  
   122  	subscription, ok := lb.registeredSubscriptions[id]
   123  	if !ok {
   124  		return nil
   125  	}
   126  	return subscription
   127  }
   128  
   129  func (lb *LogBroker) registerSubscription(subscription *subscription) {
   130  	lb.mu.Lock()
   131  	defer lb.mu.Unlock()
   132  
   133  	lb.registeredSubscriptions[subscription.message.ID] = subscription
   134  	lb.subscriptionQueue.Publish(subscription)
   135  
   136  	for _, node := range subscription.Nodes() {
   137  		if _, ok := lb.subscriptionsByNode[node]; !ok {
   138  			// Mark nodes that won't receive the message as done.
   139  			subscription.Done(node, fmt.Errorf("node %s is not available", node))
   140  		} else {
   141  			// otherwise, add the subscription to the node's subscriptions list
   142  			lb.subscriptionsByNode[node][subscription] = struct{}{}
   143  		}
   144  	}
   145  }
   146  
   147  func (lb *LogBroker) unregisterSubscription(subscription *subscription) {
   148  	lb.mu.Lock()
   149  	defer lb.mu.Unlock()
   150  
   151  	delete(lb.registeredSubscriptions, subscription.message.ID)
   152  
   153  	// remove the subscription from all of the nodes
   154  	for _, node := range subscription.Nodes() {
   155  		// but only if a node exists
   156  		if _, ok := lb.subscriptionsByNode[node]; ok {
   157  			delete(lb.subscriptionsByNode[node], subscription)
   158  		}
   159  	}
   160  
   161  	subscription.Close()
   162  	lb.subscriptionQueue.Publish(subscription)
   163  }
   164  
   165  // watchSubscriptions grabs all current subscriptions and notifies of any
   166  // subscription change for this node.
   167  //
   168  // Subscriptions may fire multiple times and the caller has to protect against
   169  // dupes.
   170  func (lb *LogBroker) watchSubscriptions(nodeID string) ([]*subscription, chan events.Event, func()) {
   171  	lb.mu.RLock()
   172  	defer lb.mu.RUnlock()
   173  
   174  	// Watch for subscription changes for this node.
   175  	ch, cancel := lb.subscriptionQueue.CallbackWatch(events.MatcherFunc(func(event events.Event) bool {
   176  		s := event.(*subscription)
   177  		return s.Contains(nodeID)
   178  	}))
   179  
   180  	// Grab current subscriptions.
   181  	var subscriptions []*subscription
   182  	for _, s := range lb.registeredSubscriptions {
   183  		if s.Contains(nodeID) {
   184  			subscriptions = append(subscriptions, s)
   185  		}
   186  	}
   187  
   188  	return subscriptions, ch, cancel
   189  }
   190  
   191  func (lb *LogBroker) subscribe(id string) (chan events.Event, func()) {
   192  	lb.mu.RLock()
   193  	defer lb.mu.RUnlock()
   194  
   195  	return lb.logQueue.CallbackWatch(events.MatcherFunc(func(event events.Event) bool {
   196  		publish := event.(*logMessage)
   197  		return publish.SubscriptionID == id
   198  	}))
   199  }
   200  
   201  func (lb *LogBroker) publish(log *api.PublishLogsMessage) {
   202  	lb.mu.RLock()
   203  	defer lb.mu.RUnlock()
   204  
   205  	lb.logQueue.Publish(&logMessage{PublishLogsMessage: log})
   206  }
   207  
   208  // markDone wraps (*Subscription).Done() so that the removal of the sub from
   209  // the node's subscription list is possible
   210  func (lb *LogBroker) markDone(sub *subscription, nodeID string, err error) {
   211  	lb.mu.Lock()
   212  	defer lb.mu.Unlock()
   213  
   214  	// remove the subscription from the node's subscription list, if it exists
   215  	if _, ok := lb.subscriptionsByNode[nodeID]; ok {
   216  		delete(lb.subscriptionsByNode[nodeID], sub)
   217  	}
   218  
   219  	// mark the sub as done
   220  	sub.Done(nodeID, err)
   221  }
   222  
   223  // SubscribeLogs creates a log subscription and streams back logs
   224  func (lb *LogBroker) SubscribeLogs(request *api.SubscribeLogsRequest, stream api.Logs_SubscribeLogsServer) error {
   225  	ctx := stream.Context()
   226  
   227  	if err := validateSelector(request.Selector); err != nil {
   228  		return err
   229  	}
   230  
   231  	lb.mu.Lock()
   232  	pctx := lb.pctx
   233  	lb.mu.Unlock()
   234  	if pctx == nil {
   235  		return errNotRunning
   236  	}
   237  
   238  	subscription := lb.newSubscription(request.Selector, request.Options)
   239  	subscription.Run(pctx)
   240  	defer subscription.Stop()
   241  
   242  	log := log.G(ctx).WithFields(
   243  		logrus.Fields{
   244  			"method":          "(*LogBroker).SubscribeLogs",
   245  			"subscription.id": subscription.message.ID,
   246  		},
   247  	)
   248  	log.Debug("subscribed")
   249  
   250  	publishCh, publishCancel := lb.subscribe(subscription.message.ID)
   251  	defer publishCancel()
   252  
   253  	lb.registerSubscription(subscription)
   254  	defer lb.unregisterSubscription(subscription)
   255  
   256  	completed := subscription.Wait(ctx)
   257  	for {
   258  		select {
   259  		case <-ctx.Done():
   260  			return ctx.Err()
   261  		case <-pctx.Done():
   262  			return pctx.Err()
   263  		case event := <-publishCh:
   264  			publish := event.(*logMessage)
   265  			if publish.completed {
   266  				return publish.err
   267  			}
   268  			if err := stream.Send(&api.SubscribeLogsMessage{
   269  				Messages: publish.Messages,
   270  			}); err != nil {
   271  				return err
   272  			}
   273  		case <-completed:
   274  			completed = nil
   275  			lb.logQueue.Publish(&logMessage{
   276  				PublishLogsMessage: &api.PublishLogsMessage{
   277  					SubscriptionID: subscription.message.ID,
   278  				},
   279  				completed: true,
   280  				err:       subscription.Err(),
   281  			})
   282  		}
   283  	}
   284  }
   285  
   286  func (lb *LogBroker) nodeConnected(nodeID string) {
   287  	lb.mu.Lock()
   288  	defer lb.mu.Unlock()
   289  
   290  	if _, ok := lb.subscriptionsByNode[nodeID]; !ok {
   291  		lb.subscriptionsByNode[nodeID] = make(map[*subscription]struct{})
   292  	}
   293  }
   294  
   295  func (lb *LogBroker) nodeDisconnected(nodeID string) {
   296  	lb.mu.Lock()
   297  	defer lb.mu.Unlock()
   298  
   299  	for sub := range lb.subscriptionsByNode[nodeID] {
   300  		sub.Done(nodeID, fmt.Errorf("node %s disconnected unexpectedly", nodeID))
   301  	}
   302  	delete(lb.subscriptionsByNode, nodeID)
   303  }
   304  
   305  // ListenSubscriptions returns a stream of matching subscriptions for the current node
   306  func (lb *LogBroker) ListenSubscriptions(request *api.ListenSubscriptionsRequest, stream api.LogBroker_ListenSubscriptionsServer) error {
   307  	remote, err := ca.RemoteNode(stream.Context())
   308  	if err != nil {
   309  		return err
   310  	}
   311  
   312  	lb.mu.Lock()
   313  	pctx := lb.pctx
   314  	lb.mu.Unlock()
   315  	if pctx == nil {
   316  		return errNotRunning
   317  	}
   318  
   319  	lb.nodeConnected(remote.NodeID)
   320  	defer lb.nodeDisconnected(remote.NodeID)
   321  
   322  	log := log.G(stream.Context()).WithFields(
   323  		logrus.Fields{
   324  			"method": "(*LogBroker).ListenSubscriptions",
   325  			"node":   remote.NodeID,
   326  		},
   327  	)
   328  	subscriptions, subscriptionCh, subscriptionCancel := lb.watchSubscriptions(remote.NodeID)
   329  	defer subscriptionCancel()
   330  
   331  	log.Debug("node registered")
   332  
   333  	activeSubscriptions := make(map[string]*subscription)
   334  
   335  	// Start by sending down all active subscriptions.
   336  	for _, subscription := range subscriptions {
   337  		select {
   338  		case <-stream.Context().Done():
   339  			return stream.Context().Err()
   340  		case <-pctx.Done():
   341  			return nil
   342  		default:
   343  		}
   344  
   345  		if err := stream.Send(subscription.message); err != nil {
   346  			log.Error(err)
   347  			return err
   348  		}
   349  		activeSubscriptions[subscription.message.ID] = subscription
   350  	}
   351  
   352  	// Send down new subscriptions.
   353  	for {
   354  		select {
   355  		case v := <-subscriptionCh:
   356  			subscription := v.(*subscription)
   357  
   358  			if subscription.Closed() {
   359  				delete(activeSubscriptions, subscription.message.ID)
   360  			} else {
   361  				// Avoid sending down the same subscription multiple times
   362  				if _, ok := activeSubscriptions[subscription.message.ID]; ok {
   363  					continue
   364  				}
   365  				activeSubscriptions[subscription.message.ID] = subscription
   366  			}
   367  			if err := stream.Send(subscription.message); err != nil {
   368  				log.Error(err)
   369  				return err
   370  			}
   371  		case <-stream.Context().Done():
   372  			return stream.Context().Err()
   373  		case <-pctx.Done():
   374  			return nil
   375  		}
   376  	}
   377  }
   378  
   379  // PublishLogs publishes log messages for a given subscription
   380  func (lb *LogBroker) PublishLogs(stream api.LogBroker_PublishLogsServer) (err error) {
   381  	remote, err := ca.RemoteNode(stream.Context())
   382  	if err != nil {
   383  		return err
   384  	}
   385  
   386  	var currentSubscription *subscription
   387  	defer func() {
   388  		if currentSubscription != nil {
   389  			lb.markDone(currentSubscription, remote.NodeID, err)
   390  		}
   391  	}()
   392  
   393  	for {
   394  		logMsg, err := stream.Recv()
   395  		if err == io.EOF {
   396  			return stream.SendAndClose(&api.PublishLogsResponse{})
   397  		}
   398  		if err != nil {
   399  			return err
   400  		}
   401  
   402  		if logMsg.SubscriptionID == "" {
   403  			return status.Errorf(codes.InvalidArgument, "missing subscription ID")
   404  		}
   405  
   406  		if currentSubscription == nil {
   407  			currentSubscription = lb.getSubscription(logMsg.SubscriptionID)
   408  			if currentSubscription == nil {
   409  				return status.Errorf(codes.NotFound, "unknown subscription ID")
   410  			}
   411  		} else {
   412  			if logMsg.SubscriptionID != currentSubscription.message.ID {
   413  				return status.Errorf(codes.InvalidArgument, "different subscription IDs in the same session")
   414  			}
   415  		}
   416  
   417  		// if we have a close message, close out the subscription
   418  		if logMsg.Close {
   419  			// Mark done and then set to nil so if we error after this point,
   420  			// we don't try to close again in the defer
   421  			lb.markDone(currentSubscription, remote.NodeID, err)
   422  			currentSubscription = nil
   423  			return nil
   424  		}
   425  
   426  		// Make sure logs are emitted using the right Node ID to avoid impersonation.
   427  		for _, msg := range logMsg.Messages {
   428  			if msg.Context.NodeID != remote.NodeID {
   429  				return status.Errorf(codes.PermissionDenied, "invalid NodeID: expected=%s;received=%s", remote.NodeID, msg.Context.NodeID)
   430  			}
   431  		}
   432  
   433  		lb.publish(logMsg)
   434  	}
   435  }