github.com/ydb-platform/ydb-go-sdk/v3@v3.89.2/internal/coordination/conversation/conversation.go (about)

     1  // Package conversation contains coordination session internal code that helps implement a typical conversation-like
     2  // session protocol based on a bidirectional gRPC stream.
     3  package conversation
     4  
     5  import (
     6  	"context"
     7  	"sync"
     8  
     9  	"github.com/ydb-platform/ydb-go-genproto/protos/Ydb_Coordination"
    10  
    11  	"github.com/ydb-platform/ydb-go-sdk/v3/coordination"
    12  )
    13  
    14  // Controller provides a simple mechanism to work with a session protocol using a gRPC bidirectional stream. Creating a
    15  // bidirectional stream client may be quite tricky because messages are usually being processed independently and in
    16  // parallel. Moreover, the gRPC client library puts strict limitations on an implementation of the client, e.g. multiple
    17  // calls of the Send or Recv methods of the stub client must not be performed from different goroutines. Also, there are
    18  // no guarantees that a message successfully dispatched by the Send method will actually reach the server, neither does
    19  // the server enjoy same guarantees when delivering messages to the client. This usually ends up having two goroutines
    20  // (one for sending outgoing messages and another one for receiving incoming ones) and a queue where messages are
    21  // published to be eventually delivered to the server. The Controller simplifies working with this model providing
    22  // generic implementation of the message queue and related routines, handling retries of sent and pending operations
    23  // when the underlying gRPC stream needs to be reconnected.
    24  //
    25  // A typical coordination session looks like this (we are going to skip for now how the gRPC stream is created, handled
    26  // and kept alive, you can find the details on that in the Session, and focus on the protocol):
    27  //
    28  //  1. The client opens a new gRPC bidirectional stream.
    29  //  2. The client sends the SessionStart request and wait until the Failure or the SessionStarted reply.
    30  //  3. The server sends the SessionStarted response with the SessionID. At this point the session is started. If the
    31  //     client needs to reconnect the gRPC stream in the future, it will use that SessionID to attach to the previously
    32  //     created session in the SessionStart request.
    33  //  4. The client sends the AcquireSemaphore request to acquire a permit to the semaphore in this session with count 5.
    34  //  5. After a moment, the client decides to acquire another semaphore, it sends one more AcquireSemaphore request with
    35  //     count 4.
    36  //  6. The server replies with the AcquireSemaphoreResult response to the second AcquireSemaphore request to inform the
    37  //     client that the semaphore was successfully acquired.
    38  //  7. The server replies with the AcquireSemaphorePending response in order to inform the client that the semaphore
    39  //     from the first request has been acquired by another session.
    40  //  8. After a while, the server sends the AcquireSemaphoreResult response which implies that the semaphore from the
    41  //     first request is acquired in the current session.
    42  //  9. Then the client sends the ReleaseSemaphore request in order to release the acquired semaphore.
    43  //  10. The server replies with the ReleaseSemaphoreResult.
    44  //  11. The client terminates the session with the SessionStop request.
    45  //  12. The server let the client know that the session is over sending the SessionStopped response and closing the gRPC
    46  //     stream.
    47  //
    48  // We can notice five independent conversations here:
    49  //
    50  // 1. StartSession, SessionStarted — points 2–3;
    51  // 2. AcquireSemaphore, AcquireSemaphoreResult — points 4, 6;
    52  // 3. AcquireSemaphore, AcquireSemaphorePending, AcquireSemaphoreResult — points 5, 7 and 8;
    53  // 4. ReleaseSemaphore, ReleaseSemaphoreResult — points 9–10;
    54  // 5. SessionStop, SessionStopped — points 11–12.
    55  //
    56  // If at any time the client encounters an unrecoverable error (for example, the underlying gRPC stream becomes
    57  // disconnected), the client will have to replay every conversation from their very beginning. Let us see why it is
    58  // actually the case. But before we go into that, let us look at the grpc.ClientStream SendMsg method:
    59  //
    60  // "…SendMsg does not wait until the message is received by the server. An untimely stream closure may result in lost
    61  // messages. To ensure delivery, users should ensure the RPC completed successfully using RecvMsg…"
    62  //
    63  // This is true for both, the client and the server. So when the server replies to the client it does not really know if
    64  // the response is received by the client. And vice versa, when the client sends a request to the server it has no way
    65  // to know if the request was delivered to the server unless the server sends another message to the client in reply.
    66  //
    67  // That is why conversation-like protocols typically use idempotent requests. Idempotent requests can be safely retried
    68  // as long as you keep the original order of the conversations. For example, if the gRPC stream is terminated before
    69  // the point 6, we cannot know if the server gets the requests. There may be one, two or none AcquireSemaphore requests
    70  // successfully delivered to and handled by the server. Moreover, the server may have already sent to the client the
    71  // corresponding responses. Nevertheless, if the requests are idempotent, we can safely retry them all in the newly
    72  // created gRPC stream and get the same results as we would have got if we had sent them without stream termination.
    73  // Note that if the stream is terminated before the point 8, we still need to replay the first AcquireSemaphore
    74  // conversation because we have no knowledge if the server replied with the AcquireSemaphoreResult in the terminated
    75  // stream.
    76  //
    77  // However, sometimes even idempotent requests cannot be safely retried. Consider the case wherein the point 5 from the
    78  // original list is:
    79  //
    80  //  5. After a moment, the client decides to modify the AcquireSemaphore request and sends another one with the same
    81  //     semaphore but with count 4.
    82  //
    83  // If then the gRPC stream terminates, there are two likely outcomes:
    84  //
    85  //  1. The server received the first request but the second one was not delivered. The current semaphore count is 5.
    86  //  2. The server received and processed the both requests. The current semaphore permit count is 4.
    87  //
    88  // If we retry the both requests, the observed result will be different depending on which outcome occurs:
    89  //
    90  //  1. The first retry will be a noop, the second one will decrease the semaphore count to 4. This is expected behavior.
    91  //  2. The first retry will try to increase the semaphore count to 5, it causes an error. This is unexpected.
    92  //
    93  // In order to avoid that we could postpone a conversation if there is another one for the same semaphore which has been
    94  // sent but has not been yet delivered to the server. For more details, see the WithConflictKey option.
    95  type Controller struct {
    96  	mutex sync.Mutex // guards access to the fields below
    97  
    98  	queue     []*Conversation // the message queue, the front is in the end of the slice
    99  	conflicts map[string]struct{}
   100  
   101  	notifyChan chan struct{}
   102  	closed     bool
   103  }
   104  
   105  // ResponseFilter defines the filter function called by the controller to know if a received message relates to the
   106  // conversation. If a ResponseFilter returns true, the message is considered to be part of the conversation.
   107  type ResponseFilter func(request *Ydb_Coordination.SessionRequest, response *Ydb_Coordination.SessionResponse) bool
   108  
   109  // Conversation is a core concept of the conversation package. It is an ordered sequence of connected request/reply
   110  // messages. For example, the acquiring semaphore conversation may look like this:
   111  //
   112  // 1. The client sends the AcquireSemaphore request.
   113  // 2. The server replies with the AcquireSemaphorePending response.
   114  // 3. After a while, the server replies with the AcquireSemaphoreResult response. The conversation is ended.
   115  //
   116  // There may be many different conversations carried out simultaneously in one session, so the exact order of all the
   117  // messages in the session is unspecified. In the example above, there may be other messages (from different
   118  // conversations) between points 1 and 2, or 2 and 3.
   119  type Conversation struct {
   120  	message           func() *Ydb_Coordination.SessionRequest
   121  	responseFilter    ResponseFilter
   122  	acknowledgeFilter ResponseFilter
   123  	cancelMessage     func(req *Ydb_Coordination.SessionRequest) *Ydb_Coordination.SessionRequest
   124  	cancelFilter      ResponseFilter
   125  	conflictKey       string
   126  	requestSent       *Ydb_Coordination.SessionRequest
   127  	cancelRequestSent *Ydb_Coordination.SessionRequest
   128  	response          *Ydb_Coordination.SessionResponse
   129  	responseErr       error
   130  	done              chan struct{}
   131  	idempotent        bool
   132  	canceled          bool
   133  }
   134  
   135  // NewController creates a new conversation controller. You usually have one controller per one session.
   136  func NewController() *Controller {
   137  	return &Controller{
   138  		notifyChan: make(chan struct{}, 1),
   139  		conflicts:  make(map[string]struct{}),
   140  	}
   141  }
   142  
   143  // WithResponseFilter returns an Option that specifies the filter function that is used to detect the last response
   144  // message in the conversation. If such a message was found, the conversation is immediately ended and the response
   145  // becomes available by the Conversation.Await method.
   146  func WithResponseFilter(filter ResponseFilter) Option {
   147  	return func(c *Conversation) {
   148  		c.responseFilter = filter
   149  	}
   150  }
   151  
   152  // WithAcknowledgeFilter returns an Option that specifies the filter function that is used to detect an intermediate
   153  // response message in the conversation. If such a message was found, the conversation continues, but it lets the client
   154  // know that the server successfully consumed the first request of the conversation.
   155  func WithAcknowledgeFilter(filter ResponseFilter) Option {
   156  	return func(c *Conversation) {
   157  		c.acknowledgeFilter = filter
   158  	}
   159  }
   160  
   161  // WithCancelMessage returns an Option that specifies the message and filter functions that are used to cancel the
   162  // conversation which has been already sent. This message is sent in the background when the caller cancels the context
   163  // of the Controller.Await function. The response is never received by the caller and is only used to end the
   164  // conversation and remove it from the queue.
   165  func WithCancelMessage(
   166  	message func(req *Ydb_Coordination.SessionRequest) *Ydb_Coordination.SessionRequest,
   167  	filter ResponseFilter,
   168  ) Option {
   169  	return func(c *Conversation) {
   170  		c.cancelMessage = message
   171  		c.cancelFilter = filter
   172  	}
   173  }
   174  
   175  // WithConflictKey returns an Option that specifies the key that is used to find out messages that cannot be delivered
   176  // to the server until the server acknowledged the request. If there is a conversation with the same conflict key in the
   177  // queue that has not been yet delivered to the server, the controller will temporarily suspend other conversations with
   178  // the same conflict key until the first one is acknowledged.
   179  func WithConflictKey(key string) Option {
   180  	return func(c *Conversation) {
   181  		c.conflictKey = key
   182  	}
   183  }
   184  
   185  // WithIdempotence returns an Option that enabled retries for this conversation when the underlying gRPC stream
   186  // reconnects. The controller will replay the whole conversation from scratch unless it is not ended.
   187  func WithIdempotence(idempotent bool) Option {
   188  	return func(c *Conversation) {
   189  		c.idempotent = idempotent
   190  	}
   191  }
   192  
   193  // Option configures how we create a new conversation.
   194  type Option func(c *Conversation)
   195  
   196  // NewConversation creates a new conversation that starts with a specified message.
   197  func NewConversation(request func() *Ydb_Coordination.SessionRequest, opts ...Option) *Conversation {
   198  	conversation := Conversation{message: request}
   199  	for _, o := range opts {
   200  		if o != nil {
   201  			o(&conversation)
   202  		}
   203  	}
   204  
   205  	return &conversation
   206  }
   207  
   208  func (c *Controller) notify() {
   209  	select {
   210  	case c.notifyChan <- struct{}{}:
   211  	default:
   212  	}
   213  }
   214  
   215  // PushBack puts a new conversation at the end of the queue.
   216  func (c *Controller) PushBack(conversation *Conversation) error {
   217  	c.mutex.Lock()
   218  	defer c.mutex.Unlock()
   219  
   220  	if c.closed {
   221  		return coordination.ErrSessionClosed
   222  	}
   223  
   224  	conversation.enqueue()
   225  	c.queue = append([]*Conversation{conversation}, c.queue...)
   226  	c.notify()
   227  
   228  	return nil
   229  }
   230  
   231  // PushFront puts a new conversation at the beginning of the queue.
   232  func (c *Controller) PushFront(conversation *Conversation) error {
   233  	c.mutex.Lock()
   234  	defer c.mutex.Unlock()
   235  
   236  	if c.closed {
   237  		return coordination.ErrSessionClosed
   238  	}
   239  
   240  	conversation.enqueue()
   241  	c.queue = append(c.queue, conversation)
   242  	c.notify()
   243  
   244  	return nil
   245  }
   246  
   247  func (c *Controller) sendFront() *Ydb_Coordination.SessionRequest {
   248  	c.mutex.Lock()
   249  	defer c.mutex.Unlock()
   250  
   251  	// We are notified but there are no conversations in the queue. Return nil to make the loop in OnSend wait.
   252  	if len(c.queue) == 0 {
   253  		return nil
   254  	}
   255  
   256  	for i := len(c.queue) - 1; i >= 0; i-- {
   257  		req := c.queue[i]
   258  
   259  		if req.canceled && req.cancelRequestSent == nil {
   260  			req.sendCancel()
   261  			c.notify()
   262  
   263  			return req.cancelRequestSent
   264  		}
   265  
   266  		if req.requestSent != nil {
   267  			continue
   268  		}
   269  
   270  		if _, ok := c.conflicts[req.conflictKey]; ok {
   271  			continue
   272  		}
   273  
   274  		req.send()
   275  
   276  		if req.conflictKey != "" {
   277  			c.conflicts[req.conflictKey] = struct{}{}
   278  		}
   279  		if req.responseFilter == nil && req.acknowledgeFilter == nil {
   280  			c.queue = append(c.queue[:i], c.queue[i+1:]...)
   281  		}
   282  		c.notify()
   283  
   284  		return req.requestSent
   285  	}
   286  
   287  	return nil
   288  }
   289  
   290  // OnSend blocks until a new conversation request becomes available at the end of the queue. You should call this method
   291  // in the goroutine that handles gRPC stream Send method. ctx can be used to cancel the call.
   292  func (c *Controller) OnSend(ctx context.Context) (*Ydb_Coordination.SessionRequest, error) {
   293  	var req *Ydb_Coordination.SessionRequest
   294  	for {
   295  		select {
   296  		case <-ctx.Done():
   297  		case <-c.notifyChan:
   298  			req = c.sendFront()
   299  		}
   300  
   301  		// Process ctx.Done() first to make sure we cancel the call if conversations are too chatty.
   302  		if ctx.Err() != nil {
   303  			return nil, ctx.Err()
   304  		}
   305  
   306  		// We were notified but there were no messages in the queue. Just wait for more messages become available.
   307  		if req != nil {
   308  			break
   309  		}
   310  	}
   311  
   312  	return req, nil
   313  }
   314  
   315  // OnRecv consumes a new conversation response and process with the corresponding conversation if any exists for it. The
   316  // returned value indicates if any conversation considers the incoming message part of it or the controller is closed.
   317  // You should call this method in the goroutine that handles gRPC stream Recv method.
   318  func (c *Controller) OnRecv(resp *Ydb_Coordination.SessionResponse) bool {
   319  	c.mutex.Lock()
   320  	defer c.mutex.Unlock()
   321  
   322  	notify := false //nolint:ifshort
   323  	handled := false
   324  	for i := len(c.queue) - 1; i >= 0; i-- {
   325  		req := c.queue[i]
   326  		if req.requestSent == nil {
   327  			continue
   328  		}
   329  
   330  		switch {
   331  		case req.responseFilter != nil && req.responseFilter(req.requestSent, resp):
   332  			if !req.canceled {
   333  				req.succeed(resp)
   334  
   335  				if req.conflictKey != "" {
   336  					delete(c.conflicts, req.conflictKey)
   337  					notify = true
   338  				}
   339  
   340  				c.queue = append(c.queue[:i], c.queue[i+1:]...)
   341  			}
   342  
   343  			handled = true
   344  		case req.acknowledgeFilter != nil && req.acknowledgeFilter(req.requestSent, resp):
   345  			if !req.canceled {
   346  				if req.conflictKey != "" {
   347  					delete(c.conflicts, req.conflictKey)
   348  					notify = true
   349  				}
   350  			}
   351  
   352  			handled = true
   353  		case req.cancelRequestSent != nil && req.cancelFilter(req.cancelRequestSent, resp):
   354  			if req.conflictKey != "" {
   355  				delete(c.conflicts, req.conflictKey)
   356  				notify = true
   357  			}
   358  			c.queue = append(c.queue[:i], c.queue[i+1:]...)
   359  			handled = true
   360  		}
   361  	}
   362  
   363  	if notify {
   364  		c.notify()
   365  	}
   366  
   367  	return c.closed || handled
   368  }
   369  
   370  // OnDetach fails all non-idempotent conversations if there are any in the queue. You should call this method when the
   371  // underlying gRPC stream of the session is closed.
   372  func (c *Controller) OnDetach() {
   373  	c.mutex.Lock()
   374  	defer c.mutex.Unlock()
   375  
   376  	for i := len(c.queue) - 1; i >= 0; i-- {
   377  		req := c.queue[i]
   378  		if !req.idempotent {
   379  			req.fail(coordination.ErrOperationStatusUnknown)
   380  
   381  			if req.requestSent != nil && req.conflictKey != "" {
   382  				delete(c.conflicts, req.conflictKey)
   383  			}
   384  
   385  			c.queue = append(c.queue[:i], c.queue[i+1:]...)
   386  		}
   387  	}
   388  }
   389  
   390  // Close fails all conversations if there are any in the queue. It also does not allow pushing more conversations to the
   391  // queue anymore. You may optionally specify the final conversation if needed.
   392  func (c *Controller) Close(byeConversation *Conversation) {
   393  	c.mutex.Lock()
   394  	defer c.mutex.Unlock()
   395  
   396  	c.closed = true
   397  
   398  	for i := len(c.queue) - 1; i >= 0; i-- {
   399  		req := c.queue[i]
   400  		if !req.canceled {
   401  			req.fail(coordination.ErrSessionClosed)
   402  		}
   403  	}
   404  
   405  	if byeConversation != nil {
   406  		byeConversation.enqueue()
   407  		c.queue = []*Conversation{byeConversation}
   408  	}
   409  
   410  	c.notify()
   411  }
   412  
   413  // OnAttach retries all idempotent conversations if there are any in the queue. You should call this method when the
   414  // underlying gRPC stream of the session is connected.
   415  func (c *Controller) OnAttach() {
   416  	c.mutex.Lock()
   417  	defer c.mutex.Unlock()
   418  
   419  	notify := false
   420  	for i := len(c.queue) - 1; i >= 0; i-- {
   421  		req := c.queue[i]
   422  		if req.idempotent && req.requestSent != nil {
   423  			if req.conflictKey != "" {
   424  				delete(c.conflicts, req.conflictKey)
   425  			}
   426  
   427  			// If the request has been canceled, re-send the cancellation message, otherwise re-send the original one.
   428  			if req.canceled {
   429  				req.cancelRequestSent = nil
   430  			} else {
   431  				req.requestSent = nil
   432  			}
   433  			notify = true
   434  		}
   435  	}
   436  
   437  	if notify {
   438  		c.notify()
   439  	}
   440  }
   441  
   442  // Cancel the conversation if it has been sent and there is no response ready. This returns false if the response is
   443  // ready and the caller may safely return it instead of canceling the conversation.
   444  func (c *Controller) cancel(conversation *Conversation) bool {
   445  	if conversation.cancelMessage == nil {
   446  		return true
   447  	}
   448  
   449  	c.mutex.Lock()
   450  	defer c.mutex.Unlock()
   451  
   452  	// The context is canceled but the response is ready, return it anyway.
   453  	if conversation.response != nil || conversation.responseErr != nil {
   454  		return false
   455  	}
   456  
   457  	if conversation.requestSent != nil {
   458  		conversation.cancel()
   459  		c.notify()
   460  	} else {
   461  		// If the response has not been sent, just remove it from the queue.
   462  		for i := len(c.queue) - 1; i >= 0; i-- {
   463  			req := c.queue[i]
   464  			if req == conversation {
   465  				c.queue = append(c.queue[:i], c.queue[i+1:]...)
   466  
   467  				break
   468  			}
   469  		}
   470  	}
   471  
   472  	return true
   473  }
   474  
   475  // Await waits until the conversation ends. ctx can be used to cancel the call.
   476  func (c *Controller) Await(
   477  	ctx context.Context,
   478  	conversation *Conversation,
   479  ) (*Ydb_Coordination.SessionResponse, error) {
   480  	select {
   481  	case <-conversation.done:
   482  	case <-ctx.Done():
   483  	}
   484  
   485  	if ctx.Err() != nil && c.cancel(conversation) {
   486  		return nil, ctx.Err()
   487  	}
   488  
   489  	if conversation.responseErr != nil {
   490  		return nil, conversation.responseErr
   491  	}
   492  
   493  	return conversation.response, nil
   494  }
   495  
   496  func (c *Conversation) enqueue() {
   497  	c.requestSent = nil
   498  	c.done = make(chan struct{})
   499  }
   500  
   501  func (c *Conversation) send() {
   502  	c.requestSent = c.message()
   503  }
   504  
   505  func (c *Conversation) sendCancel() {
   506  	c.cancelRequestSent = c.cancelMessage(c.requestSent)
   507  }
   508  
   509  func (c *Conversation) succeed(response *Ydb_Coordination.SessionResponse) {
   510  	c.response = response
   511  	close(c.done)
   512  }
   513  
   514  func (c *Conversation) fail(err error) {
   515  	c.responseErr = err
   516  	close(c.done)
   517  }
   518  
   519  func (c *Conversation) cancel() {
   520  	c.canceled = true
   521  	close(c.done)
   522  }