github.com/erda-project/erda-infra@v1.0.9/providers/zk-master-election/provider.go (about) 1 // Copyright (c) 2021 Terminus, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package election 16 17 import ( 18 "context" 19 "fmt" 20 "path/filepath" 21 "reflect" 22 "strings" 23 "sync" 24 "sync/atomic" 25 "time" 26 27 "github.com/erda-project/erda-infra/base/logs" 28 "github.com/erda-project/erda-infra/base/servicehub" 29 "github.com/erda-project/erda-infra/providers/zookeeper" 30 "github.com/go-zookeeper/zk" 31 ) 32 33 // Event . 34 type Event interface { 35 IsConnected() bool 36 IsMaster() bool 37 } 38 39 // Listener . 40 type Listener func(Event) 41 42 // Interface . 43 type Interface interface { 44 IsConnected() bool 45 IsMaster() bool 46 Watch(Listener) 47 } 48 49 type config struct { 50 RootPath string `file:"root_path"` 51 MasterNode string `file:"master_node" default:"master-node-key"` 52 masterPath string 53 } 54 55 type provider struct { 56 Cfg *config 57 Log logs.Logger 58 zk zookeeper.Interface 59 closeCh chan struct{} 60 61 isConnected int32 62 isMaster int32 63 keys []string 64 watchers map[string][]Listener 65 } 66 67 // Init . 68 func (p *provider) Init(ctx servicehub.Context) error { 69 p.zk = ctx.Service("zookeeper").(zookeeper.Interface) 70 p.Cfg.RootPath = filepath.Clean("/" + p.Cfg.RootPath) 71 p.Cfg.MasterNode = filepath.Clean(p.Cfg.MasterNode) 72 p.Cfg.masterPath = filepath.Join(p.Cfg.RootPath, p.Cfg.MasterNode) 73 return nil 74 } 75 76 func (p *provider) run() error { 77 for { 78 conn, ch, err := p.zk.Connect() 79 if err != nil { 80 p.Log.Errorf("fail to connect zookeeper: %s", err) 81 select { 82 case <-p.closeCh: 83 if conn != nil { 84 conn.Close() 85 } 86 return err 87 default: 88 time.Sleep(3 * time.Second) 89 } 90 continue 91 } 92 var wg sync.WaitGroup 93 ctx, cancel := context.WithCancel(context.Background()) 94 timer := time.After(p.zk.SessionTimeout()) 95 for { 96 var exit bool 97 select { 98 case event := <-ch: 99 if event.Type != zk.EventSession { 100 continue 101 } 102 switch event.State { 103 case zk.StateConnected: 104 atomic.StoreInt32(&p.isConnected, 1) 105 p.Log.Info("connected to zookeeper successfully") 106 err := p.election(conn) 107 if err != nil { 108 break 109 } 110 wg.Add(1) 111 go p.watchMasterNode(ctx, &wg, conn) 112 continue 113 case zk.StateConnectedReadOnly, zk.StateConnecting, zk.StateHasSession, zk.StateSaslAuthenticated, zk.StateUnknown: 114 continue 115 case zk.StateExpired, zk.StateAuthFailed, zk.StateDisconnected: 116 break 117 default: 118 p.Log.Errorf("unknown event: %v", event) 119 continue 120 } 121 case <-timer: 122 if !p.IsConnected() { 123 p.Log.Errorf("connect to zookeeper timeout") 124 break 125 } 126 continue 127 case <-p.closeCh: 128 exit = true 129 } 130 cancel() 131 atomic.StoreInt32(&p.isMaster, 0) 132 atomic.StoreInt32(&p.isConnected, 0) 133 wg.Wait() 134 conn.Close() 135 p.Log.Info("disconnected zookeeper") 136 if exit { 137 return nil 138 } 139 break 140 } 141 time.Sleep(2 * time.Second) 142 } 143 } 144 145 func (p *provider) IsConnected() bool { 146 return atomic.LoadInt32(&p.isConnected) != 0 147 } 148 149 func (p *provider) IsMaster() bool { 150 return atomic.LoadInt32(&p.isMaster) != 0 151 } 152 153 func (p *provider) makePath(conn *zk.Conn, path string) error { 154 exist, _, err := conn.Exists(path) 155 if err != nil { 156 return err 157 } 158 if !exist { 159 createdPath, err := conn.Create(path, nil, 0, zk.WorldACL(zk.PermAll)) 160 if err != nil { 161 return fmt.Errorf("fail to create path %q: %s", path, err) 162 } 163 if path != createdPath { 164 return fmt.Errorf("create different path %q != %q", createdPath, path) 165 } 166 p.Log.Infof("created path %q", path) 167 } 168 return nil 169 } 170 171 type stateEvent struct { 172 isConnected bool 173 isMaster bool 174 } 175 176 func (c *stateEvent) IsConnected() bool { return c.isConnected } 177 func (c *stateEvent) IsMaster() bool { return c.isMaster } 178 179 func (p *provider) election(conn *zk.Conn) error { 180 err := p.makePath(conn, p.Cfg.RootPath) 181 if err != nil { 182 return err 183 } 184 createdPath, err := conn.Create(p.Cfg.masterPath, nil, zk.FlagEphemeral, zk.WorldACL(zk.PermAll)) 185 if err != nil { 186 if !strings.Contains(err.Error(), "exists") { 187 err = fmt.Errorf("fail to create path %q: %s", p.Cfg.masterPath, err) 188 p.Log.Error(err) 189 return err 190 } 191 } else if createdPath != p.Cfg.masterPath { 192 err = fmt.Errorf("create different path %q != %q", createdPath, p.Cfg.masterPath) 193 p.Log.Error(err) 194 return err 195 } 196 isMaster := err == nil 197 if isMaster { 198 atomic.StoreInt32(&p.isMaster, 0) 199 p.Log.Infof("election finish, i am slave") 200 } else { 201 atomic.StoreInt32(&p.isMaster, 1) 202 p.Log.Infof("election success, i am master") 203 } 204 ctx := &stateEvent{ 205 isMaster: isMaster, 206 isConnected: p.IsConnected(), 207 } 208 for _, key := range p.keys { 209 for _, w := range p.watchers[key] { 210 w(ctx) 211 } 212 } 213 return nil 214 } 215 216 func (p *provider) watchMasterNode(ctx context.Context, wg *sync.WaitGroup, conn *zk.Conn) { 217 defer wg.Done() 218 loop: 219 for { 220 _, _, ch, err := conn.ChildrenW(p.Cfg.masterPath) 221 if err != nil { 222 p.Log.Errorf("fail to watch path %q: %s", p.Cfg.masterPath, err) 223 select { 224 case <-ctx.Done(): 225 default: 226 time.Sleep(3 * time.Second) 227 } 228 continue 229 } 230 p.Log.Infof("start watch path %q", p.Cfg.masterPath) 231 defer p.Log.Infof("exit waith path %q", p.Cfg.masterPath) 232 for { 233 select { 234 case event, ok := <-ch: 235 if !ok { 236 continue loop 237 } 238 if event.Type == zk.EventNodeDeleted { 239 err := p.election(conn) 240 if err != nil { 241 continue loop 242 } 243 } 244 case <-ctx.Done(): 245 return 246 } 247 } 248 } 249 } 250 251 func (p *provider) Start() error { 252 return p.run() 253 } 254 255 func (p *provider) Close() error { 256 close(p.closeCh) 257 return nil 258 } 259 260 type service struct { 261 p *provider 262 name string 263 } 264 265 func (s *service) IsMaster() bool { 266 return s.p.IsMaster() 267 } 268 269 func (s *service) IsConnected() bool { 270 return s.p.IsConnected() 271 } 272 273 func (s *service) Watch(ln Listener) { 274 list, ok := s.p.watchers[s.name] 275 if !ok { 276 s.p.keys = append(s.p.keys, s.name) 277 } 278 s.p.watchers[s.name] = append(list, ln) 279 } 280 281 func (p *provider) Provide(ctx servicehub.DependencyContext, args ...interface{}) interface{} { 282 return &service{ 283 p: p, 284 name: ctx.Caller(), 285 } 286 } 287 288 func init() { 289 servicehub.Register("zk-master-election", &servicehub.Spec{ 290 Services: []string{"zk-master-election"}, 291 Dependencies: []string{"zookeeper"}, 292 Types: []reflect.Type{reflect.TypeOf((*Interface)(nil)).Elem()}, 293 Description: "master election implemented by zookeeper", 294 ConfigFunc: func() interface{} { return &config{} }, 295 Creator: func() servicehub.Provider { 296 return &provider{ 297 closeCh: make(chan struct{}), 298 watchers: make(map[string][]Listener), 299 } 300 }, 301 }) 302 }