github.com/erda-project/erda-infra@v1.0.9/providers/zk-master-election/provider.go (about)

     1  // Copyright (c) 2021 Terminus, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package election
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"path/filepath"
    21  	"reflect"
    22  	"strings"
    23  	"sync"
    24  	"sync/atomic"
    25  	"time"
    26  
    27  	"github.com/erda-project/erda-infra/base/logs"
    28  	"github.com/erda-project/erda-infra/base/servicehub"
    29  	"github.com/erda-project/erda-infra/providers/zookeeper"
    30  	"github.com/go-zookeeper/zk"
    31  )
    32  
    33  // Event .
    34  type Event interface {
    35  	IsConnected() bool
    36  	IsMaster() bool
    37  }
    38  
    39  // Listener .
    40  type Listener func(Event)
    41  
    42  // Interface .
    43  type Interface interface {
    44  	IsConnected() bool
    45  	IsMaster() bool
    46  	Watch(Listener)
    47  }
    48  
    49  type config struct {
    50  	RootPath   string `file:"root_path"`
    51  	MasterNode string `file:"master_node" default:"master-node-key"`
    52  	masterPath string
    53  }
    54  
    55  type provider struct {
    56  	Cfg     *config
    57  	Log     logs.Logger
    58  	zk      zookeeper.Interface
    59  	closeCh chan struct{}
    60  
    61  	isConnected int32
    62  	isMaster    int32
    63  	keys        []string
    64  	watchers    map[string][]Listener
    65  }
    66  
    67  // Init .
    68  func (p *provider) Init(ctx servicehub.Context) error {
    69  	p.zk = ctx.Service("zookeeper").(zookeeper.Interface)
    70  	p.Cfg.RootPath = filepath.Clean("/" + p.Cfg.RootPath)
    71  	p.Cfg.MasterNode = filepath.Clean(p.Cfg.MasterNode)
    72  	p.Cfg.masterPath = filepath.Join(p.Cfg.RootPath, p.Cfg.MasterNode)
    73  	return nil
    74  }
    75  
    76  func (p *provider) run() error {
    77  	for {
    78  		conn, ch, err := p.zk.Connect()
    79  		if err != nil {
    80  			p.Log.Errorf("fail to connect zookeeper: %s", err)
    81  			select {
    82  			case <-p.closeCh:
    83  				if conn != nil {
    84  					conn.Close()
    85  				}
    86  				return err
    87  			default:
    88  				time.Sleep(3 * time.Second)
    89  			}
    90  			continue
    91  		}
    92  		var wg sync.WaitGroup
    93  		ctx, cancel := context.WithCancel(context.Background())
    94  		timer := time.After(p.zk.SessionTimeout())
    95  		for {
    96  			var exit bool
    97  			select {
    98  			case event := <-ch:
    99  				if event.Type != zk.EventSession {
   100  					continue
   101  				}
   102  				switch event.State {
   103  				case zk.StateConnected:
   104  					atomic.StoreInt32(&p.isConnected, 1)
   105  					p.Log.Info("connected to zookeeper successfully")
   106  					err := p.election(conn)
   107  					if err != nil {
   108  						break
   109  					}
   110  					wg.Add(1)
   111  					go p.watchMasterNode(ctx, &wg, conn)
   112  					continue
   113  				case zk.StateConnectedReadOnly, zk.StateConnecting, zk.StateHasSession, zk.StateSaslAuthenticated, zk.StateUnknown:
   114  					continue
   115  				case zk.StateExpired, zk.StateAuthFailed, zk.StateDisconnected:
   116  					break
   117  				default:
   118  					p.Log.Errorf("unknown event: %v", event)
   119  					continue
   120  				}
   121  			case <-timer:
   122  				if !p.IsConnected() {
   123  					p.Log.Errorf("connect to zookeeper timeout")
   124  					break
   125  				}
   126  				continue
   127  			case <-p.closeCh:
   128  				exit = true
   129  			}
   130  			cancel()
   131  			atomic.StoreInt32(&p.isMaster, 0)
   132  			atomic.StoreInt32(&p.isConnected, 0)
   133  			wg.Wait()
   134  			conn.Close()
   135  			p.Log.Info("disconnected zookeeper")
   136  			if exit {
   137  				return nil
   138  			}
   139  			break
   140  		}
   141  		time.Sleep(2 * time.Second)
   142  	}
   143  }
   144  
   145  func (p *provider) IsConnected() bool {
   146  	return atomic.LoadInt32(&p.isConnected) != 0
   147  }
   148  
   149  func (p *provider) IsMaster() bool {
   150  	return atomic.LoadInt32(&p.isMaster) != 0
   151  }
   152  
   153  func (p *provider) makePath(conn *zk.Conn, path string) error {
   154  	exist, _, err := conn.Exists(path)
   155  	if err != nil {
   156  		return err
   157  	}
   158  	if !exist {
   159  		createdPath, err := conn.Create(path, nil, 0, zk.WorldACL(zk.PermAll))
   160  		if err != nil {
   161  			return fmt.Errorf("fail to create path %q: %s", path, err)
   162  		}
   163  		if path != createdPath {
   164  			return fmt.Errorf("create different path %q != %q", createdPath, path)
   165  		}
   166  		p.Log.Infof("created path %q", path)
   167  	}
   168  	return nil
   169  }
   170  
   171  type stateEvent struct {
   172  	isConnected bool
   173  	isMaster    bool
   174  }
   175  
   176  func (c *stateEvent) IsConnected() bool { return c.isConnected }
   177  func (c *stateEvent) IsMaster() bool    { return c.isMaster }
   178  
   179  func (p *provider) election(conn *zk.Conn) error {
   180  	err := p.makePath(conn, p.Cfg.RootPath)
   181  	if err != nil {
   182  		return err
   183  	}
   184  	createdPath, err := conn.Create(p.Cfg.masterPath, nil, zk.FlagEphemeral, zk.WorldACL(zk.PermAll))
   185  	if err != nil {
   186  		if !strings.Contains(err.Error(), "exists") {
   187  			err = fmt.Errorf("fail to create path %q: %s", p.Cfg.masterPath, err)
   188  			p.Log.Error(err)
   189  			return err
   190  		}
   191  	} else if createdPath != p.Cfg.masterPath {
   192  		err = fmt.Errorf("create different path %q != %q", createdPath, p.Cfg.masterPath)
   193  		p.Log.Error(err)
   194  		return err
   195  	}
   196  	isMaster := err == nil
   197  	if isMaster {
   198  		atomic.StoreInt32(&p.isMaster, 0)
   199  		p.Log.Infof("election finish, i am slave")
   200  	} else {
   201  		atomic.StoreInt32(&p.isMaster, 1)
   202  		p.Log.Infof("election success, i am master")
   203  	}
   204  	ctx := &stateEvent{
   205  		isMaster:    isMaster,
   206  		isConnected: p.IsConnected(),
   207  	}
   208  	for _, key := range p.keys {
   209  		for _, w := range p.watchers[key] {
   210  			w(ctx)
   211  		}
   212  	}
   213  	return nil
   214  }
   215  
   216  func (p *provider) watchMasterNode(ctx context.Context, wg *sync.WaitGroup, conn *zk.Conn) {
   217  	defer wg.Done()
   218  loop:
   219  	for {
   220  		_, _, ch, err := conn.ChildrenW(p.Cfg.masterPath)
   221  		if err != nil {
   222  			p.Log.Errorf("fail to watch path %q: %s", p.Cfg.masterPath, err)
   223  			select {
   224  			case <-ctx.Done():
   225  			default:
   226  				time.Sleep(3 * time.Second)
   227  			}
   228  			continue
   229  		}
   230  		p.Log.Infof("start watch path %q", p.Cfg.masterPath)
   231  		defer p.Log.Infof("exit waith path %q", p.Cfg.masterPath)
   232  		for {
   233  			select {
   234  			case event, ok := <-ch:
   235  				if !ok {
   236  					continue loop
   237  				}
   238  				if event.Type == zk.EventNodeDeleted {
   239  					err := p.election(conn)
   240  					if err != nil {
   241  						continue loop
   242  					}
   243  				}
   244  			case <-ctx.Done():
   245  				return
   246  			}
   247  		}
   248  	}
   249  }
   250  
   251  func (p *provider) Start() error {
   252  	return p.run()
   253  }
   254  
   255  func (p *provider) Close() error {
   256  	close(p.closeCh)
   257  	return nil
   258  }
   259  
   260  type service struct {
   261  	p    *provider
   262  	name string
   263  }
   264  
   265  func (s *service) IsMaster() bool {
   266  	return s.p.IsMaster()
   267  }
   268  
   269  func (s *service) IsConnected() bool {
   270  	return s.p.IsConnected()
   271  }
   272  
   273  func (s *service) Watch(ln Listener) {
   274  	list, ok := s.p.watchers[s.name]
   275  	if !ok {
   276  		s.p.keys = append(s.p.keys, s.name)
   277  	}
   278  	s.p.watchers[s.name] = append(list, ln)
   279  }
   280  
   281  func (p *provider) Provide(ctx servicehub.DependencyContext, args ...interface{}) interface{} {
   282  	return &service{
   283  		p:    p,
   284  		name: ctx.Caller(),
   285  	}
   286  }
   287  
   288  func init() {
   289  	servicehub.Register("zk-master-election", &servicehub.Spec{
   290  		Services:     []string{"zk-master-election"},
   291  		Dependencies: []string{"zookeeper"},
   292  		Types:        []reflect.Type{reflect.TypeOf((*Interface)(nil)).Elem()},
   293  		Description:  "master election implemented by zookeeper",
   294  		ConfigFunc:   func() interface{} { return &config{} },
   295  		Creator: func() servicehub.Provider {
   296  			return &provider{
   297  				closeCh:  make(chan struct{}),
   298  				watchers: make(map[string][]Listener),
   299  			}
   300  		},
   301  	})
   302  }