github.com/asynkron/protoactor-go@v0.0.0-20240308120642-ef91a6abee75/cluster/clusterproviders/consul/consul_provider.go

github.com/asynkron/protoactor-go@v0.0.0-20240308120642-ef91a6abee75/cluster/clusterproviders/consul/consul_provider.go (about)

     1  package consul
     2  
     3  import (
     4  	"fmt"
     5  	"log/slog"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/asynkron/protoactor-go/actor"
    10  
    11  	"github.com/asynkron/protoactor-go/cluster"
    12  	"github.com/hashicorp/consul/api"
    13  )
    14  
    15  var ProviderShuttingDownError = fmt.Errorf("consul cluster provider is shutting down")
    16  
    17  type Provider struct {
    18  	cluster            *cluster.Cluster
    19  	deregistered       bool
    20  	shutdown           bool
    21  	id                 string
    22  	clusterName        string
    23  	address            string
    24  	port               int
    25  	knownKinds         []string
    26  	index              uint64 // consul blocking index
    27  	client             *api.Client
    28  	ttl                time.Duration
    29  	refreshTTL         time.Duration
    30  	updateTTLWaitGroup sync.WaitGroup
    31  	deregisterCritical time.Duration
    32  	blockingWaitTime   time.Duration
    33  	clusterError       error
    34  	pid                *actor.PID
    35  	consulConfig       *api.Config
    36  }
    37  
    38  func New(opts ...Option) (*Provider, error) {
    39  	return NewWithConfig(&api.Config{}, opts...)
    40  }
    41  
    42  func NewWithConfig(consulConfig *api.Config, opts ...Option) (*Provider, error) {
    43  	client, err := api.NewClient(consulConfig)
    44  	if err != nil {
    45  		return nil, err
    46  	}
    47  	p := &Provider{
    48  		client:             client,
    49  		ttl:                3 * time.Second,
    50  		refreshTTL:         1 * time.Second,
    51  		deregisterCritical: 60 * time.Second,
    52  		blockingWaitTime:   20 * time.Second,
    53  		consulConfig:       consulConfig,
    54  	}
    55  	for _, opt := range opts {
    56  		opt(p)
    57  	}
    58  	return p, nil
    59  }
    60  
    61  func (p *Provider) init(c *cluster.Cluster) error {
    62  	knownKinds := c.GetClusterKinds()
    63  	clusterName := c.Config.Name
    64  	memberId := c.ActorSystem.ID
    65  
    66  	host, port, err := c.ActorSystem.GetHostPort()
    67  	if err != nil {
    68  		return err
    69  	}
    70  
    71  	p.cluster = c
    72  	p.id = memberId
    73  	p.clusterName = clusterName
    74  	p.address = host
    75  	p.port = port
    76  	p.knownKinds = knownKinds
    77  	return nil
    78  }
    79  
    80  func (p *Provider) StartMember(c *cluster.Cluster) error {
    81  	err := p.init(c)
    82  	if err != nil {
    83  		return err
    84  	}
    85  
    86  	p.pid, err = c.ActorSystem.Root.SpawnNamed(actor.PropsFromProducer(func() actor.Actor {
    87  		return newProviderActor(p)
    88  	}), "consul-provider")
    89  	if err != nil {
    90  		p.cluster.Logger().Error("Failed to start consul-provider actor", slog.Any("error", err))
    91  		return err
    92  	}
    93  
    94  	return nil
    95  }
    96  
    97  func (p *Provider) StartClient(c *cluster.Cluster) error {
    98  	if err := p.init(c); err != nil {
    99  		return err
   100  	}
   101  	p.blockingStatusChange()
   102  	p.monitorMemberStatusChanges()
   103  	return nil
   104  }
   105  
   106  func (p *Provider) DeregisterMember() error {
   107  	err := p.deregisterService()
   108  	if err != nil {
   109  		fmt.Println(err)
   110  		return err
   111  	}
   112  	p.deregistered = true
   113  	return nil
   114  }
   115  
   116  func (p *Provider) Shutdown(graceful bool) error {
   117  	if p.shutdown {
   118  		return nil
   119  	}
   120  	p.shutdown = true
   121  	if p.pid != nil {
   122  		if err := p.cluster.ActorSystem.Root.StopFuture(p.pid).Wait(); err != nil {
   123  			p.cluster.Logger().Error("Failed to stop consul-provider actor", slog.Any("error", err))
   124  		}
   125  		p.pid = nil
   126  	}
   127  
   128  	return nil
   129  }
   130  
   131  func blockingUpdateTTL(p *Provider) error {
   132  	p.clusterError = p.client.Agent().UpdateTTL("service:"+p.id, "", api.HealthPassing)
   133  	return p.clusterError
   134  }
   135  
   136  func (p *Provider) registerService() error {
   137  	s := &api.AgentServiceRegistration{
   138  		ID:      p.id,
   139  		Name:    p.clusterName,
   140  		Tags:    p.knownKinds,
   141  		Address: p.address,
   142  		Port:    p.port,
   143  		Meta: map[string]string{
   144  			"id": p.id,
   145  		},
   146  		Check: &api.AgentServiceCheck{
   147  			DeregisterCriticalServiceAfter: p.deregisterCritical.String(),
   148  			TTL:                            p.ttl.String(),
   149  		},
   150  	}
   151  	return p.client.Agent().ServiceRegister(s)
   152  }
   153  
   154  func (p *Provider) deregisterService() error {
   155  	return p.client.Agent().ServiceDeregister(p.id)
   156  }
   157  
   158  // call this directly after registering the service
   159  func (p *Provider) blockingStatusChange() {
   160  	p.notifyStatuses()
   161  }
   162  
   163  func (p *Provider) notifyStatuses() {
   164  	statuses, meta, err := p.client.Health().Service(p.clusterName, "", false, &api.QueryOptions{
   165  		WaitIndex: p.index,
   166  		WaitTime:  p.blockingWaitTime,
   167  	})
   168  	p.cluster.Logger().Info("Consul health check")
   169  
   170  	if err != nil {
   171  		p.cluster.Logger().Error("notifyStatues", slog.Any("error", err))
   172  		return
   173  	}
   174  	p.index = meta.LastIndex
   175  
   176  	var members []*cluster.Member
   177  	for _, v := range statuses {
   178  		if len(v.Checks) > 0 && v.Checks.AggregatedStatus() == api.HealthPassing {
   179  			memberId := v.Service.Meta["id"]
   180  			if memberId == "" {
   181  				memberId = fmt.Sprintf("%v@%v:%v", p.clusterName, v.Service.Address, v.Service.Port)
   182  				p.cluster.Logger().Info("meta['id'] was empty, fixeds", slog.String("id", memberId))
   183  			}
   184  			members = append(members, &cluster.Member{
   185  				Id:    memberId,
   186  				Host:  v.Service.Address,
   187  				Port:  int32(v.Service.Port),
   188  				Kinds: v.Service.Tags,
   189  			})
   190  		}
   191  	}
   192  	// the reason why we want this in a batch and not as individual messages is that
   193  	// if we have an atomic batch, we can calculate what nodes have left the cluster
   194  	// passing events one by one, we can't know if someone left or just haven't changed status for a long time
   195  
   196  	// publish the current cluster topology onto the event stream
   197  	p.cluster.MemberList.UpdateClusterTopology(members)
   198  }
   199  
   200  func (p *Provider) monitorMemberStatusChanges() {
   201  	go func() {
   202  		for !p.shutdown {
   203  			p.notifyStatuses()
   204  		}
   205  	}()
   206  }