github.com/lirm/aeron-go@v0.0.0-20230415210743-920325491dc4/cluster/clustered_service_agent.go (about)

     1  // Licensed under the Apache License, Version 2.0 (the "License");
     2  // you may not use this file except in compliance with the License.
     3  // You may obtain a copy of the License at
     4  //
     5  // http://www.apache.org/licenses/LICENSE-2.0
     6  //
     7  // Unless required by applicable law or agreed to in writing, software
     8  // distributed under the License is distributed on an "AS IS" BASIS,
     9  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    10  // See the License for the specific language governing permissions and
    11  // limitations under the License.
    12  
    13  package cluster
    14  
    15  import (
    16  	"fmt"
    17  	"strings"
    18  	"time"
    19  
    20  	"github.com/lirm/aeron-go/aeron"
    21  	"github.com/lirm/aeron-go/aeron/atomic"
    22  	"github.com/lirm/aeron-go/aeron/counters"
    23  	"github.com/lirm/aeron-go/aeron/idlestrategy"
    24  	"github.com/lirm/aeron-go/aeron/logbuffer"
    25  	"github.com/lirm/aeron-go/aeron/logbuffer/term"
    26  	"github.com/lirm/aeron-go/aeron/logging"
    27  	"github.com/lirm/aeron-go/aeron/util"
    28  	"github.com/lirm/aeron-go/archive"
    29  	"github.com/lirm/aeron-go/cluster/codecs"
    30  )
    31  
    32  const NullValue = -1
    33  const NullPosition = -1
    34  const markFileUpdateIntervalMs = 1000
    35  
    36  const (
    37  	recordingPosCounterTypeId  = 100
    38  	commitPosCounterTypeId     = 203
    39  	recoveryStateCounterTypeId = 204
    40  )
    41  
    42  var logger = logging.MustGetLogger("cluster")
    43  
    44  type ClusteredServiceAgent struct {
    45  	aeronClient              *aeron.Aeron
    46  	aeronCtx                 *aeron.Context
    47  	opts                     *Options
    48  	proxy                    *consensusModuleProxy
    49  	counters                 *counters.Reader
    50  	serviceAdapter           *serviceAdapter
    51  	logAdapter               *boundedLogAdapter
    52  	markFile                 *ClusterMarkFile
    53  	activeLogEvent           *activeLogEvent
    54  	cachedTimeMs             int64
    55  	markFileUpdateDeadlineMs int64
    56  	logPosition              int64
    57  	clusterTime              int64
    58  	timeUnit                 codecs.ClusterTimeUnitEnum
    59  	memberId                 int32
    60  	nextAckId                int64
    61  	terminationPosition      int64
    62  	isServiceActive          bool
    63  	role                     Role
    64  	service                  ClusteredService
    65  	sessions                 map[int64]ClientSession
    66  	commitPosition           *counters.ReadableCounter
    67  	sessionMsgHdrBuffer      *atomic.Buffer
    68  }
    69  
    70  func NewClusteredServiceAgent(
    71  	aeronCtx *aeron.Context,
    72  	options *Options,
    73  	service ClusteredService,
    74  ) (*ClusteredServiceAgent, error) {
    75  	if !strings.HasPrefix(options.ArchiveOptions.RequestChannel, "aeron:ipc") {
    76  		return nil, fmt.Errorf("archive request channel must be IPC: %s", options.ArchiveOptions.RequestChannel)
    77  	}
    78  	if !strings.HasPrefix(options.ArchiveOptions.ResponseChannel, "aeron:ipc") {
    79  		return nil, fmt.Errorf("archive response channel must be IPC: %s", options.ArchiveOptions.ResponseChannel)
    80  	}
    81  	if options.ServiceId < 0 || options.ServiceId > 127 {
    82  		return nil, fmt.Errorf("serviceId is outside allowed range (0-127): %d", options.ServiceId)
    83  	}
    84  
    85  	logging.SetLevel(options.Loglevel, "cluster")
    86  
    87  	aeronClient, err := aeron.Connect(aeronCtx)
    88  	if err != nil {
    89  		return nil, err
    90  	}
    91  
    92  	pub, err := aeronClient.AddPublication(options.ControlChannel, options.ConsensusModuleStreamId)
    93  	if err != nil {
    94  		return nil, err
    95  	}
    96  	proxy := newConsensusModuleProxy(options, pub)
    97  
    98  	sub, err := aeronClient.AddSubscription(options.ControlChannel, options.ServiceStreamId)
    99  	if err != nil {
   100  		return nil, err
   101  	}
   102  	serviceAdapter := &serviceAdapter{
   103  		marshaller:   codecs.NewSbeGoMarshaller(),
   104  		subscription: sub,
   105  	}
   106  	logAdapter := &boundedLogAdapter{
   107  		marshaller: codecs.NewSbeGoMarshaller(),
   108  		options:    options,
   109  	}
   110  
   111  	counterFile, _, _ := counters.MapFile(aeronCtx.CncFileName())
   112  	countersReader := counters.NewReader(
   113  		counterFile.ValuesBuf.Get(),
   114  		counterFile.MetaDataBuf.Get(),
   115  	)
   116  
   117  	cmf, err := NewClusterMarkFile(options.ClusterDir + "/cluster-mark-service-0.dat")
   118  	if err != nil {
   119  		return nil, err
   120  	}
   121  
   122  	agent := &ClusteredServiceAgent{
   123  		aeronClient:         aeronClient,
   124  		opts:                options,
   125  		serviceAdapter:      serviceAdapter,
   126  		logAdapter:          logAdapter,
   127  		aeronCtx:            aeronCtx,
   128  		proxy:               proxy,
   129  		counters:            countersReader,
   130  		markFile:            cmf,
   131  		role:                Follower,
   132  		service:             service,
   133  		logPosition:         NullPosition,
   134  		terminationPosition: NullPosition,
   135  		sessions:            map[int64]ClientSession{},
   136  		sessionMsgHdrBuffer: codecs.MakeClusterMessageBuffer(SessionMessageHeaderTemplateId, SessionMessageHdrBlockLength),
   137  	}
   138  	serviceAdapter.agent = agent
   139  	logAdapter.agent = agent
   140  	proxy.idleStrategy = agent
   141  
   142  	cmf.flyweight.ArchiveStreamId.Set(options.ArchiveOptions.RequestStream)
   143  	cmf.flyweight.ServiceStreamId.Set(options.ServiceStreamId)
   144  	cmf.flyweight.ConsensusModuleStreamId.Set(options.ConsensusModuleStreamId)
   145  	cmf.flyweight.IngressStreamId.Set(-1)
   146  	cmf.flyweight.MemberId.Set(-1)
   147  	cmf.flyweight.ServiceId.Set(options.ServiceId)
   148  	cmf.flyweight.ClusterId.Set(options.ClusterId)
   149  
   150  	cmf.UpdateActivityTimestamp(time.Now().UnixMilli())
   151  	cmf.SignalReady()
   152  
   153  	return agent, nil
   154  }
   155  
   156  func (agent *ClusteredServiceAgent) StartAndRun() error {
   157  	if err := agent.OnStart(); err != nil {
   158  		return err
   159  	}
   160  	for agent.isServiceActive {
   161  		agent.opts.IdleStrategy.Idle(agent.DoWork())
   162  	}
   163  	return nil
   164  }
   165  
   166  func (agent *ClusteredServiceAgent) OnStart() error {
   167  	if err := agent.awaitCommitPositionCounter(); err != nil {
   168  		return err
   169  	}
   170  	return agent.recoverState()
   171  }
   172  
   173  func (agent *ClusteredServiceAgent) awaitCommitPositionCounter() error {
   174  	for {
   175  		id := agent.counters.FindCounter(commitPosCounterTypeId, func(keyBuffer *atomic.Buffer) bool {
   176  			return keyBuffer.GetInt32(0) == agent.opts.ClusterId
   177  		})
   178  		if id != counters.NullCounterId {
   179  			commitPos, err := counters.NewReadableCounter(agent.counters, id)
   180  			logger.Debugf("found commit position counter - id=%d value=%d", id, commitPos.Get())
   181  			agent.commitPosition = commitPos
   182  			return err
   183  		}
   184  		agent.Idle(0)
   185  	}
   186  }
   187  
   188  func (agent *ClusteredServiceAgent) recoverState() error {
   189  	counterId, leadershipTermId := agent.awaitRecoveryCounter()
   190  	logger.Debugf("found recovery counter - id=%d leadershipTermId=%d logPos=%d clusterTime=%d",
   191  		counterId, leadershipTermId, agent.logPosition, agent.clusterTime)
   192  	agent.sessionMsgHdrBuffer.PutInt64(SBEHeaderLength, leadershipTermId)
   193  	agent.isServiceActive = true
   194  
   195  	if leadershipTermId == -1 {
   196  		agent.service.OnStart(agent, nil)
   197  	} else {
   198  		serviceCount, err := agent.counters.GetKeyPartInt32(counterId, 28)
   199  		if err != nil {
   200  			return err
   201  		}
   202  		if serviceCount < 1 {
   203  			return fmt.Errorf("invalid service count: %d", serviceCount)
   204  		}
   205  		snapshotRecId, err := agent.counters.GetKeyPartInt64(counterId, 32+(agent.opts.ServiceId*util.SizeOfInt64))
   206  		if err != nil {
   207  			return err
   208  		}
   209  		if err := agent.loadSnapshot(snapshotRecId); err != nil {
   210  			return err
   211  		}
   212  	}
   213  
   214  	agent.proxy.serviceAckRequest(
   215  		agent.logPosition,
   216  		agent.clusterTime,
   217  		agent.getAndIncrementNextAckId(),
   218  		agent.aeronClient.ClientID(),
   219  		agent.opts.ServiceId,
   220  	)
   221  	return nil
   222  }
   223  
   224  func (agent *ClusteredServiceAgent) awaitRecoveryCounter() (int32, int64) {
   225  	for {
   226  		var leadershipTermId int64
   227  		id := agent.counters.FindCounter(recoveryStateCounterTypeId, func(keyBuffer *atomic.Buffer) bool {
   228  			if keyBuffer.GetInt32(24) == agent.opts.ClusterId {
   229  				leadershipTermId = keyBuffer.GetInt64(0)
   230  				agent.logPosition = keyBuffer.GetInt64(8)
   231  				agent.clusterTime = keyBuffer.GetInt64(16)
   232  				return true
   233  			}
   234  			return false
   235  		})
   236  		if id != counters.NullCounterId {
   237  			return id, leadershipTermId
   238  		}
   239  		agent.Idle(0)
   240  	}
   241  }
   242  
   243  func (agent *ClusteredServiceAgent) loadSnapshot(recordingId int64) error {
   244  	arch, err := archive.NewArchive(agent.opts.ArchiveOptions, agent.aeronCtx)
   245  	if err != nil {
   246  		return err
   247  	}
   248  	defer closeArchive(arch)
   249  
   250  	channel := agent.opts.ReplayChannel
   251  	streamId := agent.opts.ReplayStreamId
   252  	replaySessionId, err := arch.StartReplay(recordingId, 0, NullValue, channel, streamId)
   253  	if err != nil {
   254  		return err
   255  	}
   256  	subChannel, err := archive.AddSessionIdToChannel(channel, archive.ReplaySessionIdToStreamId(replaySessionId))
   257  	if err != nil {
   258  		return err
   259  	}
   260  
   261  	logger.Debugf("replaying snapshot - recId=%d sessionId=%d streamId=%d",
   262  		recordingId, replaySessionId, streamId)
   263  	subscription, err := arch.AddSubscription(subChannel, streamId)
   264  	if err != nil {
   265  		return err
   266  	}
   267  	defer closeSubscription(subscription)
   268  
   269  	img := agent.awaitImage(int32(replaySessionId), subscription)
   270  	loader := newSnapshotLoader(agent, img)
   271  	for !loader.isDone {
   272  		agent.opts.IdleStrategy.Idle(loader.poll())
   273  	}
   274  	if util.SemanticVersionMajor(uint32(agent.opts.AppVersion)) != util.SemanticVersionMajor(uint32(loader.appVersion)) {
   275  		panic(fmt.Errorf("incompatible app version: %v snapshot=%v",
   276  			util.SemanticVersionToString(uint32(agent.opts.AppVersion)),
   277  			util.SemanticVersionToString(uint32(loader.appVersion))))
   278  	}
   279  	agent.timeUnit = loader.timeUnit
   280  	agent.service.OnStart(agent, img)
   281  	return nil
   282  }
   283  
   284  func (agent *ClusteredServiceAgent) addSessionFromSnapshot(session *containerClientSession) {
   285  	agent.sessions[session.id] = session
   286  }
   287  
   288  func (agent *ClusteredServiceAgent) checkForClockTick() bool {
   289  	nowMs := time.Now().UnixMilli()
   290  	if agent.cachedTimeMs != nowMs {
   291  		agent.cachedTimeMs = nowMs
   292  		if nowMs > agent.markFileUpdateDeadlineMs {
   293  			agent.markFileUpdateDeadlineMs = nowMs + markFileUpdateIntervalMs
   294  			agent.markFile.UpdateActivityTimestamp(nowMs)
   295  		}
   296  		return true
   297  	}
   298  	return false
   299  }
   300  
   301  func (agent *ClusteredServiceAgent) pollServiceAdapter() {
   302  	agent.serviceAdapter.poll()
   303  
   304  	if agent.activeLogEvent != nil && agent.logAdapter.image == nil {
   305  		event := agent.activeLogEvent
   306  		agent.activeLogEvent = nil
   307  		agent.joinActiveLog(event)
   308  	}
   309  
   310  	if agent.terminationPosition != NullPosition && agent.logPosition >= agent.terminationPosition {
   311  		if agent.logPosition > agent.terminationPosition {
   312  			logger.Errorf("service terminate: logPos=%d > terminationPos=%d", agent.logPosition, agent.terminationPosition)
   313  		}
   314  		agent.terminate()
   315  	}
   316  }
   317  
   318  func (agent *ClusteredServiceAgent) terminate() {
   319  	agent.isServiceActive = false
   320  	agent.service.OnTerminate(agent)
   321  	agent.proxy.serviceAckRequest(
   322  		agent.logPosition,
   323  		agent.clusterTime,
   324  		agent.getAndIncrementNextAckId(),
   325  		NullValue,
   326  		agent.opts.ServiceId,
   327  	)
   328  	agent.terminationPosition = NullPosition
   329  }
   330  
   331  func (agent *ClusteredServiceAgent) DoWork() int {
   332  	work := 0
   333  
   334  	if agent.checkForClockTick() {
   335  		agent.pollServiceAdapter()
   336  	}
   337  
   338  	if agent.logAdapter.image != nil {
   339  		polled := agent.logAdapter.poll(agent.commitPosition.Get())
   340  		work += polled
   341  		if polled == 0 && agent.logAdapter.isDone() {
   342  			agent.closeLog()
   343  		}
   344  	}
   345  
   346  	return work
   347  }
   348  
   349  func (agent *ClusteredServiceAgent) onJoinLog(
   350  	logPosition int64,
   351  	maxLogPosition int64,
   352  	memberId int32,
   353  	logSessionId int32,
   354  	logStreamId int32,
   355  	isStartup bool,
   356  	role Role,
   357  	logChannel string,
   358  ) {
   359  	logger.Debugf("onJoinLog - logPos=%d isStartup=%v role=%v logChannel=%s", logPosition, isStartup, role, logChannel)
   360  	agent.logAdapter.maxLogPosition = logPosition
   361  	event := &activeLogEvent{
   362  		logPosition:    logPosition,
   363  		maxLogPosition: maxLogPosition,
   364  		memberId:       memberId,
   365  		logSessionId:   logSessionId,
   366  		logStreamId:    logStreamId,
   367  		isStartup:      isStartup,
   368  		role:           role,
   369  		logChannel:     logChannel,
   370  	}
   371  	agent.activeLogEvent = event
   372  }
   373  
   374  type activeLogEvent struct {
   375  	logPosition    int64
   376  	maxLogPosition int64
   377  	memberId       int32
   378  	logSessionId   int32
   379  	logStreamId    int32
   380  	isStartup      bool
   381  	role           Role
   382  	logChannel     string
   383  }
   384  
   385  func (agent *ClusteredServiceAgent) joinActiveLog(event *activeLogEvent) error {
   386  	logSub, err := agent.aeronClient.AddSubscription(event.logChannel, event.logStreamId)
   387  	if err != nil {
   388  		return err
   389  	}
   390  	img := agent.awaitImage(event.logSessionId, logSub)
   391  	if img.Position() != agent.logPosition {
   392  		return fmt.Errorf("joinActiveLog - image.position=%v expected=%v", img.Position(), agent.logPosition)
   393  	}
   394  	if event.logPosition != agent.logPosition {
   395  		return fmt.Errorf("joinActiveLog - event.logPos=%v expected=%v", event.logPosition, agent.logPosition)
   396  	}
   397  	agent.logAdapter.image = img
   398  	agent.logAdapter.maxLogPosition = event.maxLogPosition
   399  
   400  	agent.proxy.serviceAckRequest(
   401  		event.logPosition,
   402  		agent.clusterTime,
   403  		agent.getAndIncrementNextAckId(),
   404  		NullValue,
   405  		agent.opts.ServiceId,
   406  	)
   407  
   408  	agent.memberId = event.memberId
   409  	agent.markFile.flyweight.MemberId.Set(agent.memberId)
   410  
   411  	agent.setRole(event.role)
   412  	return nil
   413  }
   414  
   415  func (agent *ClusteredServiceAgent) closeLog() {
   416  	imageLogPos := agent.logAdapter.image.Position()
   417  	if imageLogPos > agent.logPosition {
   418  		agent.logPosition = imageLogPos
   419  	}
   420  	if err := agent.logAdapter.Close(); err != nil {
   421  		logger.Errorf("error closing log image: %v", err)
   422  	}
   423  	agent.setRole(Follower)
   424  }
   425  
   426  func (agent *ClusteredServiceAgent) setRole(newRole Role) {
   427  	if newRole != agent.role {
   428  		agent.role = newRole
   429  		agent.service.OnRoleChange(newRole)
   430  	}
   431  }
   432  
   433  func (agent *ClusteredServiceAgent) awaitImage(
   434  	sessionId int32,
   435  	subscription *aeron.Subscription,
   436  ) aeron.Image {
   437  	for {
   438  		if img := subscription.ImageBySessionID(sessionId); img != nil {
   439  			return img
   440  		}
   441  		agent.opts.IdleStrategy.Idle(0)
   442  	}
   443  }
   444  
   445  func (agent *ClusteredServiceAgent) onSessionOpen(
   446  	leadershipTermId int64,
   447  	logPosition int64,
   448  	clusterSessionId int64,
   449  	timestamp int64,
   450  	responseStreamId int32,
   451  	responseChannel string,
   452  	encodedPrincipal []byte,
   453  ) error {
   454  	agent.logPosition = logPosition
   455  	agent.clusterTime = timestamp
   456  	if _, ok := agent.sessions[clusterSessionId]; ok {
   457  		return fmt.Errorf("clashing open session - id=%d leaderTermId=%d logPos=%d",
   458  			clusterSessionId, leadershipTermId, logPosition)
   459  	} else {
   460  		session, err := newContainerClientSession(
   461  			clusterSessionId,
   462  			responseStreamId,
   463  			responseChannel,
   464  			encodedPrincipal,
   465  			agent,
   466  		)
   467  		if err != nil {
   468  			return err
   469  		}
   470  		// TODO: looks like we only want to connect if this is the leader
   471  		// currently always connecting
   472  
   473  		agent.sessions[session.id] = session
   474  		agent.service.OnSessionOpen(session, timestamp)
   475  	}
   476  	return nil
   477  }
   478  
   479  func (agent *ClusteredServiceAgent) onSessionClose(
   480  	leadershipTermId int64,
   481  	logPosition int64,
   482  	clusterSessionId int64,
   483  	timestamp int64,
   484  	closeReason codecs.CloseReasonEnum,
   485  ) {
   486  	agent.logPosition = logPosition
   487  	agent.clusterTime = timestamp
   488  
   489  	if session, ok := agent.sessions[clusterSessionId]; ok {
   490  		delete(agent.sessions, clusterSessionId)
   491  		agent.service.OnSessionClose(session, timestamp, closeReason)
   492  	} else {
   493  		logger.Errorf("onSessionClose: unknown session - id=%d leaderTermId=%d logPos=%d reason=%v",
   494  			clusterSessionId, leadershipTermId, logPosition, closeReason)
   495  	}
   496  }
   497  
   498  func (agent *ClusteredServiceAgent) onSessionMessage(
   499  	logPosition int64,
   500  	clusterSessionId int64,
   501  	timestamp int64,
   502  	buffer *atomic.Buffer,
   503  	offset int32,
   504  	length int32,
   505  	header *logbuffer.Header,
   506  ) {
   507  	agent.logPosition = logPosition
   508  	agent.clusterTime = timestamp
   509  	clientSession := agent.sessions[clusterSessionId]
   510  	agent.service.OnSessionMessage(clientSession, timestamp, buffer, offset, length, header)
   511  }
   512  
   513  func (agent *ClusteredServiceAgent) onNewLeadershipTermEvent(
   514  	leadershipTermId int64,
   515  	logPosition int64,
   516  	timestamp int64,
   517  	termBaseLogPosition int64,
   518  	leaderMemberId int32,
   519  	logSessionId int32,
   520  	timeUnit codecs.ClusterTimeUnitEnum,
   521  	appVersion int32,
   522  ) {
   523  	if util.SemanticVersionMajor(uint32(agent.opts.AppVersion)) != util.SemanticVersionMajor(uint32(appVersion)) {
   524  		panic(fmt.Errorf("incompatible app version: %v log=%v",
   525  			util.SemanticVersionToString(uint32(agent.opts.AppVersion)),
   526  			util.SemanticVersionToString(uint32(appVersion))))
   527  	}
   528  	agent.sessionMsgHdrBuffer.PutInt64(SBEHeaderLength, leadershipTermId)
   529  	agent.logPosition = logPosition
   530  	agent.clusterTime = timestamp
   531  	agent.timeUnit = timeUnit
   532  
   533  	agent.service.OnNewLeadershipTermEvent(
   534  		leadershipTermId,
   535  		logPosition,
   536  		timestamp,
   537  		termBaseLogPosition,
   538  		leaderMemberId,
   539  		logSessionId,
   540  		timeUnit,
   541  		appVersion)
   542  }
   543  
   544  func (agent *ClusteredServiceAgent) onServiceAction(
   545  	leadershipTermId int64,
   546  	logPos int64,
   547  	timestamp int64,
   548  	action codecs.ClusterActionEnum,
   549  ) {
   550  	agent.logPosition = logPos
   551  	agent.clusterTime = timestamp
   552  	if action == codecs.ClusterAction.SNAPSHOT {
   553  		recordingId, err := agent.takeSnapshot(logPos, leadershipTermId)
   554  		if err != nil {
   555  			logger.Errorf("take snapshot failed: ", err)
   556  		} else {
   557  			agent.proxy.serviceAckRequest(logPos, timestamp, agent.getAndIncrementNextAckId(), recordingId, agent.opts.ServiceId)
   558  		}
   559  	}
   560  }
   561  
   562  func (agent *ClusteredServiceAgent) onTimerEvent(
   563  	logPosition int64,
   564  	correlationId int64,
   565  	timestamp int64,
   566  ) {
   567  	agent.logPosition = logPosition
   568  	agent.clusterTime = timestamp
   569  	agent.service.OnTimerEvent(correlationId, timestamp)
   570  }
   571  
   572  func (agent *ClusteredServiceAgent) onMembershipChange(
   573  	logPos int64,
   574  	timestamp int64,
   575  	changeType codecs.ChangeTypeEnum,
   576  	memberId int32,
   577  ) {
   578  	agent.logPosition = logPos
   579  	agent.clusterTime = timestamp
   580  	if memberId == agent.memberId && changeType == codecs.ChangeType.QUIT {
   581  		agent.terminate()
   582  	}
   583  }
   584  
   585  func (agent *ClusteredServiceAgent) takeSnapshot(logPos int64, leadershipTermId int64) (int64, error) {
   586  	arch, err := archive.NewArchive(agent.opts.ArchiveOptions, agent.aeronCtx)
   587  	if err != nil {
   588  		return NullValue, err
   589  	}
   590  	defer closeArchive(arch)
   591  
   592  	pub, err := arch.AddRecordedPublication(agent.opts.SnapshotChannel, agent.opts.SnapshotStreamId)
   593  	if err != nil {
   594  		return NullValue, err
   595  	}
   596  	defer closePublication(pub)
   597  
   598  	recordingId, err := agent.awaitRecordingId(pub.SessionID())
   599  	if err != nil {
   600  		return 0, err
   601  	}
   602  
   603  	logger.Debugf("takeSnapshot - got recordingId: %d", recordingId)
   604  	snapshotTaker := newSnapshotTaker(agent.opts, pub)
   605  	if err := snapshotTaker.markBegin(logPos, leadershipTermId, agent.timeUnit, agent.opts.AppVersion); err != nil {
   606  		return 0, err
   607  	}
   608  	for _, session := range agent.sessions {
   609  		if err := snapshotTaker.snapshotSession(session); err != nil {
   610  			return 0, err
   611  		}
   612  	}
   613  	if err := snapshotTaker.markEnd(logPos, leadershipTermId, agent.timeUnit, agent.opts.AppVersion); err != nil {
   614  		return 0, err
   615  	}
   616  	agent.checkForClockTick()
   617  	agent.service.OnTakeSnapshot(pub)
   618  
   619  	return recordingId, nil
   620  }
   621  
   622  func (agent *ClusteredServiceAgent) awaitRecordingId(sessionId int32) (int64, error) {
   623  	start := time.Now()
   624  	for time.Since(start) < agent.opts.Timeout {
   625  		recId := int64(NullValue)
   626  		counterId := agent.counters.FindCounter(recordingPosCounterTypeId, func(keyBuffer *atomic.Buffer) bool {
   627  			if keyBuffer.GetInt32(8) == sessionId {
   628  				recId = keyBuffer.GetInt64(0)
   629  				return true
   630  			}
   631  			return false
   632  		})
   633  		if counterId != NullValue {
   634  			return recId, nil
   635  		}
   636  		agent.Idle(0)
   637  	}
   638  	return NullValue, fmt.Errorf("timed out waiting for recordingId for sessionId=%d", sessionId)
   639  }
   640  
   641  func (agent *ClusteredServiceAgent) onServiceTerminationPosition(position int64) {
   642  	agent.terminationPosition = position
   643  }
   644  
   645  func (agent *ClusteredServiceAgent) getAndIncrementNextAckId() int64 {
   646  	ackId := agent.nextAckId
   647  	agent.nextAckId++
   648  	return ackId
   649  }
   650  
   651  func (agent *ClusteredServiceAgent) offerToSession(
   652  	clusterSessionId int64,
   653  	publication *aeron.Publication,
   654  	buffer *atomic.Buffer,
   655  	offset int32,
   656  	length int32,
   657  	reservedValueSupplier term.ReservedValueSupplier,
   658  ) int64 {
   659  	if agent.role != Leader {
   660  		return ClientSessionMockedOffer
   661  	}
   662  
   663  	hdrBuf := agent.sessionMsgHdrBuffer
   664  	hdrBuf.PutInt64(SBEHeaderLength+8, clusterSessionId)
   665  	hdrBuf.PutInt64(SBEHeaderLength+16, agent.clusterTime)
   666  	return publication.Offer2(hdrBuf, 0, hdrBuf.Capacity(), buffer, offset, length, reservedValueSupplier)
   667  }
   668  
   669  func (agent *ClusteredServiceAgent) getClientSession(id int64) (ClientSession, bool) {
   670  	session, ok := agent.sessions[id]
   671  	return session, ok
   672  }
   673  
   674  func (agent *ClusteredServiceAgent) closeClientSession(id int64) {
   675  	if _, ok := agent.sessions[id]; ok {
   676  		// TODO: check if session already closed
   677  		agent.proxy.closeSessionRequest(id)
   678  	} else {
   679  		logger.Errorf("closeClientSession: unknown session id=%d", id)
   680  	}
   681  }
   682  
   683  func closeArchive(arch *archive.Archive) {
   684  	err := arch.Close()
   685  	if err != nil {
   686  		logger.Errorf("error closing archive connection: %v", err)
   687  	}
   688  }
   689  
   690  func closeSubscription(sub *aeron.Subscription) {
   691  	err := sub.Close()
   692  	if err != nil {
   693  		logger.Errorf("error closing subscription, streamId=%d channel=%s: %v", sub.StreamID(), sub.Channel(), err)
   694  	}
   695  }
   696  
   697  func closePublication(pub *aeron.Publication) {
   698  	err := pub.Close()
   699  	if err != nil {
   700  		logger.Errorf("error closing publication, streamId=%d channel=%s: %v", pub.StreamID(), pub.Channel(), err)
   701  	}
   702  }
   703  
   704  func (agent *ClusteredServiceAgent) Idle(workCount int) {
   705  	agent.opts.IdleStrategy.Idle(workCount)
   706  	if workCount <= 0 {
   707  		agent.checkForClockTick()
   708  	}
   709  }
   710  
   711  // BEGIN CLUSTER IMPLEMENTATION
   712  
   713  func (agent *ClusteredServiceAgent) LogPosition() int64 {
   714  	return agent.logPosition
   715  }
   716  
   717  func (agent *ClusteredServiceAgent) MemberId() int32 {
   718  	return agent.memberId
   719  }
   720  
   721  func (agent *ClusteredServiceAgent) Role() Role {
   722  	return agent.role
   723  }
   724  
   725  func (agent *ClusteredServiceAgent) Time() int64 {
   726  	return agent.clusterTime
   727  }
   728  
   729  func (agent *ClusteredServiceAgent) TimeUnit() codecs.ClusterTimeUnitEnum {
   730  	return agent.timeUnit
   731  }
   732  
   733  func (agent *ClusteredServiceAgent) IdleStrategy() idlestrategy.Idler {
   734  	return agent
   735  }
   736  
   737  func (agent *ClusteredServiceAgent) ScheduleTimer(correlationId int64, deadline int64) bool {
   738  	return agent.proxy.scheduleTimer(correlationId, deadline)
   739  }
   740  
   741  func (agent *ClusteredServiceAgent) CancelTimer(correlationId int64) bool {
   742  	return agent.proxy.cancelTimer(correlationId)
   743  }
   744  
   745  func (agent *ClusteredServiceAgent) Offer(buffer *atomic.Buffer, offset, length int32) int64 {
   746  	if agent.role != Leader {
   747  		return ClientSessionMockedOffer
   748  	}
   749  
   750  	hdrBuf := agent.sessionMsgHdrBuffer
   751  	hdrBuf.PutInt64(SBEHeaderLength+8, -int64(agent.opts.ServiceId))
   752  	hdrBuf.PutInt64(SBEHeaderLength+16, agent.clusterTime)
   753  	return agent.proxy.Offer2(hdrBuf, 0, hdrBuf.Capacity(), buffer, offset, length)
   754  }
   755  
   756  // END CLUSTER IMPLEMENTATION