github.com/asynkron/protoactor-go@v0.0.0-20240308120642-ef91a6abee75/cluster/clusterproviders/consul/consul_provider.go (about) 1 package consul 2 3 import ( 4 "fmt" 5 "log/slog" 6 "sync" 7 "time" 8 9 "github.com/asynkron/protoactor-go/actor" 10 11 "github.com/asynkron/protoactor-go/cluster" 12 "github.com/hashicorp/consul/api" 13 ) 14 15 var ProviderShuttingDownError = fmt.Errorf("consul cluster provider is shutting down") 16 17 type Provider struct { 18 cluster *cluster.Cluster 19 deregistered bool 20 shutdown bool 21 id string 22 clusterName string 23 address string 24 port int 25 knownKinds []string 26 index uint64 // consul blocking index 27 client *api.Client 28 ttl time.Duration 29 refreshTTL time.Duration 30 updateTTLWaitGroup sync.WaitGroup 31 deregisterCritical time.Duration 32 blockingWaitTime time.Duration 33 clusterError error 34 pid *actor.PID 35 consulConfig *api.Config 36 } 37 38 func New(opts ...Option) (*Provider, error) { 39 return NewWithConfig(&api.Config{}, opts...) 40 } 41 42 func NewWithConfig(consulConfig *api.Config, opts ...Option) (*Provider, error) { 43 client, err := api.NewClient(consulConfig) 44 if err != nil { 45 return nil, err 46 } 47 p := &Provider{ 48 client: client, 49 ttl: 3 * time.Second, 50 refreshTTL: 1 * time.Second, 51 deregisterCritical: 60 * time.Second, 52 blockingWaitTime: 20 * time.Second, 53 consulConfig: consulConfig, 54 } 55 for _, opt := range opts { 56 opt(p) 57 } 58 return p, nil 59 } 60 61 func (p *Provider) init(c *cluster.Cluster) error { 62 knownKinds := c.GetClusterKinds() 63 clusterName := c.Config.Name 64 memberId := c.ActorSystem.ID 65 66 host, port, err := c.ActorSystem.GetHostPort() 67 if err != nil { 68 return err 69 } 70 71 p.cluster = c 72 p.id = memberId 73 p.clusterName = clusterName 74 p.address = host 75 p.port = port 76 p.knownKinds = knownKinds 77 return nil 78 } 79 80 func (p *Provider) StartMember(c *cluster.Cluster) error { 81 err := p.init(c) 82 if err != nil { 83 return err 84 } 85 86 p.pid, err = c.ActorSystem.Root.SpawnNamed(actor.PropsFromProducer(func() actor.Actor { 87 return newProviderActor(p) 88 }), "consul-provider") 89 if err != nil { 90 p.cluster.Logger().Error("Failed to start consul-provider actor", slog.Any("error", err)) 91 return err 92 } 93 94 return nil 95 } 96 97 func (p *Provider) StartClient(c *cluster.Cluster) error { 98 if err := p.init(c); err != nil { 99 return err 100 } 101 p.blockingStatusChange() 102 p.monitorMemberStatusChanges() 103 return nil 104 } 105 106 func (p *Provider) DeregisterMember() error { 107 err := p.deregisterService() 108 if err != nil { 109 fmt.Println(err) 110 return err 111 } 112 p.deregistered = true 113 return nil 114 } 115 116 func (p *Provider) Shutdown(graceful bool) error { 117 if p.shutdown { 118 return nil 119 } 120 p.shutdown = true 121 if p.pid != nil { 122 if err := p.cluster.ActorSystem.Root.StopFuture(p.pid).Wait(); err != nil { 123 p.cluster.Logger().Error("Failed to stop consul-provider actor", slog.Any("error", err)) 124 } 125 p.pid = nil 126 } 127 128 return nil 129 } 130 131 func blockingUpdateTTL(p *Provider) error { 132 p.clusterError = p.client.Agent().UpdateTTL("service:"+p.id, "", api.HealthPassing) 133 return p.clusterError 134 } 135 136 func (p *Provider) registerService() error { 137 s := &api.AgentServiceRegistration{ 138 ID: p.id, 139 Name: p.clusterName, 140 Tags: p.knownKinds, 141 Address: p.address, 142 Port: p.port, 143 Meta: map[string]string{ 144 "id": p.id, 145 }, 146 Check: &api.AgentServiceCheck{ 147 DeregisterCriticalServiceAfter: p.deregisterCritical.String(), 148 TTL: p.ttl.String(), 149 }, 150 } 151 return p.client.Agent().ServiceRegister(s) 152 } 153 154 func (p *Provider) deregisterService() error { 155 return p.client.Agent().ServiceDeregister(p.id) 156 } 157 158 // call this directly after registering the service 159 func (p *Provider) blockingStatusChange() { 160 p.notifyStatuses() 161 } 162 163 func (p *Provider) notifyStatuses() { 164 statuses, meta, err := p.client.Health().Service(p.clusterName, "", false, &api.QueryOptions{ 165 WaitIndex: p.index, 166 WaitTime: p.blockingWaitTime, 167 }) 168 p.cluster.Logger().Info("Consul health check") 169 170 if err != nil { 171 p.cluster.Logger().Error("notifyStatues", slog.Any("error", err)) 172 return 173 } 174 p.index = meta.LastIndex 175 176 var members []*cluster.Member 177 for _, v := range statuses { 178 if len(v.Checks) > 0 && v.Checks.AggregatedStatus() == api.HealthPassing { 179 memberId := v.Service.Meta["id"] 180 if memberId == "" { 181 memberId = fmt.Sprintf("%v@%v:%v", p.clusterName, v.Service.Address, v.Service.Port) 182 p.cluster.Logger().Info("meta['id'] was empty, fixeds", slog.String("id", memberId)) 183 } 184 members = append(members, &cluster.Member{ 185 Id: memberId, 186 Host: v.Service.Address, 187 Port: int32(v.Service.Port), 188 Kinds: v.Service.Tags, 189 }) 190 } 191 } 192 // the reason why we want this in a batch and not as individual messages is that 193 // if we have an atomic batch, we can calculate what nodes have left the cluster 194 // passing events one by one, we can't know if someone left or just haven't changed status for a long time 195 196 // publish the current cluster topology onto the event stream 197 p.cluster.MemberList.UpdateClusterTopology(members) 198 } 199 200 func (p *Provider) monitorMemberStatusChanges() { 201 go func() { 202 for !p.shutdown { 203 p.notifyStatuses() 204 } 205 }() 206 }