github.com/google/fleetspeak@v0.1.15-0.20240426164851-4f31f62c1aea/fleetspeak/src/client/channel/relentless.go (about)

     1  // Copyright 2017 Google Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     https://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package channel
    16  
    17  import (
    18  	"math/rand"
    19  	"sync"
    20  	"time"
    21  
    22  	log "github.com/golang/glog"
    23  
    24  	anypb "google.golang.org/protobuf/types/known/anypb"
    25  
    26  	"github.com/google/fleetspeak/fleetspeak/src/client/service"
    27  
    28  	fspb "github.com/google/fleetspeak/fleetspeak/src/common/proto/fleetspeak"
    29  )
    30  
    31  // Builder must return a new Channel connected to the target process,
    32  // along with a cancel function that should shut down the Channel and release
    33  // any associated resources.
    34  //
    35  // May return (nil, nil) if the system is shutting down and the
    36  // RelentlessChannel using the builder should stop. Otherwise, should only
    37  // return once it has a Channel.
    38  type Builder func() (c *Channel, cancel func())
    39  
    40  // A RelentlessChannel is like a Channel, but relentless. Essentially it wraps a
    41  // Channel, which it recreates on error. Furthermore, it maintains a collection
    42  // of messages which have not been acknowledged, and resends them after channel
    43  // recreation. It also provides a mechanism for messages sent through it to be
    44  // acknowledged by the other side of the channel.
    45  type RelentlessChannel struct {
    46  	In  <-chan *fspb.Message      // Messages received from the other process.
    47  	Out chan<- service.AckMessage // Messages to send to the other process. Close to shutdown.
    48  
    49  	i chan<- *fspb.Message      // other end of In
    50  	o <-chan service.AckMessage // other end of Out
    51  
    52  	ch  *Channel // current Channel
    53  	fin func()   // current cleanup function for ch
    54  
    55  	pending map[string]service.AckMessage
    56  	builder Builder
    57  }
    58  
    59  // NewRelentlessChannel returns a RelentlessChannel which wraps Builder, and
    60  // uses it to create channels.
    61  func NewRelentlessChannel(b Builder) *RelentlessChannel {
    62  	i := make(chan *fspb.Message, 5)
    63  	o := make(chan service.AckMessage)
    64  
    65  	ret := RelentlessChannel{
    66  		In:  i,
    67  		Out: o,
    68  
    69  		i: i,
    70  		o: o,
    71  
    72  		pending: make(map[string]service.AckMessage),
    73  		builder: b,
    74  	}
    75  	go ret.processingLoop()
    76  	return &ret
    77  }
    78  
    79  func (c *RelentlessChannel) processingLoop() {
    80  	defer close(c.i)
    81  	defer c.cleanupChan()
    82  
    83  NewChan:
    84  	for {
    85  		c.cleanupChan()
    86  		c.ch, c.fin = c.builder()
    87  		if c.ch == nil {
    88  			return
    89  		}
    90  
    91  		// We now have a new channel, start by re-sending everything we still have
    92  		// pending.
    93  		for _, m := range c.pending {
    94  			if c.sendOne(m.M) {
    95  				continue NewChan
    96  			}
    97  		}
    98  
    99  		// Now get a message and pass it along.
   100  		for {
   101  			m, newChan, shutdown := c.receiveOne()
   102  			if shutdown {
   103  				return
   104  			}
   105  			if newChan {
   106  				continue NewChan
   107  			}
   108  			if len(m.M.SourceMessageId) == 0 {
   109  				m.M.SourceMessageId = make([]byte, 16)
   110  				rand.Read(m.M.SourceMessageId)
   111  			}
   112  			c.pending[string(m.M.SourceMessageId)] = m
   113  			if c.sendOne(m.M) {
   114  				continue NewChan
   115  			}
   116  		}
   117  	}
   118  }
   119  
   120  func (c *RelentlessChannel) cleanupChan() {
   121  	if c.fin != nil {
   122  		close(c.ch.Out)
   123  		c.fin()
   124  		c.fin = nil
   125  		c.ch = nil
   126  	}
   127  }
   128  
   129  func (c *RelentlessChannel) receiveOne() (am service.AckMessage, newChan, shutdown bool) {
   130  	for {
   131  		select {
   132  		case e := <-c.ch.Err:
   133  			log.Errorf("Channel failed with error: %v", e)
   134  			newChan = true
   135  			return
   136  		case m2, ok := <-c.ch.In:
   137  			if !ok {
   138  				newChan = true
   139  				return
   140  			}
   141  			if !c.processAck(m2) {
   142  				c.i <- m2
   143  			}
   144  		case m, ok := <-c.o:
   145  			if !ok {
   146  				shutdown = true
   147  				return
   148  			}
   149  			am = m
   150  			return
   151  		}
   152  	}
   153  }
   154  
   155  func (c *RelentlessChannel) sendOne(m *fspb.Message) (newChan bool) {
   156  	for {
   157  		select {
   158  		case e := <-c.ch.Err:
   159  			log.Errorf("Channel failed with error: %v", e)
   160  			return true
   161  		case m2, ok := <-c.ch.In:
   162  			if !ok {
   163  				return true
   164  			}
   165  			if !c.processAck(m2) {
   166  				c.i <- m2
   167  			}
   168  		case c.ch.Out <- m:
   169  			return false
   170  		}
   171  	}
   172  }
   173  
   174  func (c *RelentlessChannel) processAck(m *fspb.Message) bool {
   175  	if m.MessageType != "LocalAck" || m.Source.ServiceName != "client" {
   176  		return false
   177  	}
   178  	d := &fspb.MessageAckData{}
   179  	if err := m.Data.UnmarshalTo(d); err != nil {
   180  		log.Errorf("Error parsing m.Data: %v", err)
   181  		return true
   182  	}
   183  	for _, id := range d.MessageIds {
   184  		s := string(id)
   185  		m, ok := c.pending[s]
   186  		if !ok {
   187  			// This should be uncommon, but could happen if we restart while the FS
   188  			// server has acknowledgments for our predecessor.
   189  			log.Warningf("Received unexpected id: %x", id)
   190  			continue
   191  		}
   192  		if m.Ack != nil {
   193  			m.Ack()
   194  		}
   195  		delete(c.pending, s)
   196  	}
   197  	return true
   198  }
   199  
   200  // RelentlessAcknowledger partially wraps a Channel. It assumes that the other end
   201  // of the Channel is attached to a RelentlessChannel and implements the
   202  // acknowledgement protocol which RelentlessChannel expects.
   203  //
   204  // Once a Channel is so wrapped, the caller should read from
   205  // RelentlessAcknowledger.In instead of Channel.In. The resulting AckMessages should
   206  // be acknowledged in order to inform the attached RelentlessChannel that the
   207  // message was successfully handled.
   208  type RelentlessAcknowledger struct {
   209  	In <-chan service.AckMessage // Wraps Channel.In.
   210  
   211  	c     *Channel
   212  	acks  chan []byte // Communicates that a messages has been ack'd back to the struct's main loop.
   213  	toAck [][]byte    // An accumulation of acks to send through c.
   214  
   215  	stop chan struct{}
   216  	done sync.WaitGroup
   217  }
   218  
   219  // NewRelentlessAcknowledger creates a RelentlessAcknowledger wrapping c, buffered to
   220  // smoothly handle 'size' simultaneously unacknowledged messages.
   221  func NewRelentlessAcknowledger(c *Channel, size int) *RelentlessAcknowledger {
   222  	in := make(chan service.AckMessage)
   223  	r := &RelentlessAcknowledger{
   224  		In:   in,
   225  		c:    c,
   226  		acks: make(chan []byte, size),
   227  		stop: make(chan struct{}),
   228  	}
   229  	r.done.Add(1)
   230  	go r.acknowledgeLoop(in)
   231  
   232  	return r
   233  }
   234  
   235  // flush clears a.toAck by sending an acknowledgement message through the
   236  // Channel.
   237  func (a *RelentlessAcknowledger) flush() {
   238  	if len(a.toAck) > 0 {
   239  		d := &fspb.MessageAckData{}
   240  		d.MessageIds = a.toAck
   241  		data, err := anypb.New(d)
   242  		if err != nil {
   243  			// Should never happen.
   244  			log.Fatalf("Unable to marshal MessageAckData: %v", err)
   245  		}
   246  
   247  		m := &fspb.Message{
   248  			Source:      &fspb.Address{ServiceName: "client"},
   249  			MessageType: "LocalAck",
   250  			Data:        data,
   251  		}
   252  		select {
   253  		case a.c.Out <- m:
   254  		case <-a.stop:
   255  			// shutting down before we were able to flush
   256  			return
   257  		}
   258  		a.toAck = nil
   259  	}
   260  }
   261  
   262  // acknowledgeLoop is an event loop. It passes messages from a.c.In to a.In, and
   263  // sends acknowledgements when necessary. Runs until a.Stop() is called or
   264  // a.c.In is closed.
   265  func (a *RelentlessAcknowledger) acknowledgeLoop(out chan<- service.AckMessage) {
   266  	defer a.done.Done()
   267  	defer close(out)
   268  	defer a.flush()
   269  
   270  	// Ticker used to flush() regularly.
   271  	t := time.NewTicker(100 * time.Millisecond)
   272  	defer t.Stop()
   273  
   274  	for {
   275  		select {
   276  		case m, ok := <-a.c.In:
   277  			// We read in a message from the Channel. Pass it along, attaching an Ack
   278  			// which will write the message id to a.acks.
   279  			if !ok {
   280  				return
   281  			}
   282  			ac := service.AckMessage{
   283  				M: m,
   284  				Ack: func() {
   285  					a.acks <- m.SourceMessageId
   286  				},
   287  			}
   288  			select {
   289  			case out <- ac:
   290  			case <-a.stop:
   291  				return
   292  			}
   293  		case id := <-a.acks:
   294  			// A previously created messages has been acked. We'll tell the other end
   295  			// the next time we flush().
   296  			a.toAck = append(a.toAck, id)
   297  		case <-t.C:
   298  			a.flush()
   299  		case <-a.stop:
   300  			return
   301  		}
   302  	}
   303  }
   304  
   305  // Stop stops the RelentlessAcknowledger and closes a.In.
   306  func (a *RelentlessAcknowledger) Stop() {
   307  	close(a.stop)
   308  	a.done.Wait()
   309  }