github.com/polarismesh/polaris@v1.17.8/store/mysql/admin.go (about)

     1  /**
     2   * Tencent is pleased to support the open source community by making Polaris available.
     3   *
     4   * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
     5   *
     6   * Licensed under the BSD 3-Clause License (the "License");
     7   * you may not use this file except in compliance with the License.
     8   * You may obtain a copy of the License at
     9   *
    10   * https://opensource.org/licenses/BSD-3-Clause
    11   *
    12   * Unless required by applicable law or agreed to in writing, software distributed
    13   * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
    14   * CONDITIONS OF ANY KIND, either express or implied. See the License for the
    15   * specific language governing permissions and limitations under the License.
    16   */
    17  
    18  package sqldb
    19  
    20  import (
    21  	"context"
    22  	"database/sql"
    23  	"fmt"
    24  	"strings"
    25  	"sync"
    26  	"sync/atomic"
    27  	"time"
    28  
    29  	"github.com/polarismesh/polaris/common/eventhub"
    30  	"github.com/polarismesh/polaris/common/model"
    31  	"github.com/polarismesh/polaris/common/utils"
    32  	"github.com/polarismesh/polaris/store"
    33  )
    34  
    35  const (
    36  	TickTime  = 2
    37  	LeaseTime = 10
    38  )
    39  
    40  // adminStore implement adminStore interface
    41  type adminStore struct {
    42  	master  *BaseDB
    43  	leStore LeaderElectionStore
    44  	leMap   map[string]*leaderElectionStateMachine
    45  	mutex   sync.Mutex
    46  }
    47  
    48  func newAdminStore(master *BaseDB) *adminStore {
    49  	return &adminStore{
    50  		master:  master,
    51  		leStore: &leaderElectionStore{master: master},
    52  		leMap:   make(map[string]*leaderElectionStateMachine),
    53  	}
    54  }
    55  
    56  // LeaderElectionStore store inteface
    57  type LeaderElectionStore interface {
    58  	// CreateLeaderElection
    59  	CreateLeaderElection(key string) error
    60  	// GetVersion get current version
    61  	GetVersion(key string) (int64, error)
    62  	// CompareAndSwapVersion cas version
    63  	CompareAndSwapVersion(key string, curVersion int64, newVersion int64, leader string) (bool, error)
    64  	// CheckMtimeExpired check mtime expired
    65  	CheckMtimeExpired(key string, leaseTime int32) (string, bool, error)
    66  	// ListLeaderElections list all leaderelection
    67  	ListLeaderElections() ([]*model.LeaderElection, error)
    68  }
    69  
    70  // leaderElectionStore
    71  type leaderElectionStore struct {
    72  	master *BaseDB
    73  }
    74  
    75  // CreateLeaderElection insert election key into leader table
    76  func (l *leaderElectionStore) CreateLeaderElection(key string) error {
    77  	log.Debugf("[Store][database] create leader election (%s)", key)
    78  	return l.master.processWithTransaction("createLeaderElection", func(tx *BaseTx) error {
    79  		mainStr := "insert ignore into leader_election (elect_key, leader) values (?, ?)"
    80  		if _, err := tx.Exec(mainStr, key, ""); err != nil {
    81  			log.Errorf("[Store][database] create leader election (%s), err: %s", key, err.Error())
    82  		}
    83  
    84  		if err := tx.Commit(); err != nil {
    85  			log.Errorf("[Store][database] create leader election (%s) commit tx err: %s", key, err.Error())
    86  			return err
    87  		}
    88  		return nil
    89  	})
    90  }
    91  
    92  // GetVersion get the version from election
    93  func (l *leaderElectionStore) GetVersion(key string) (int64, error) {
    94  	log.Debugf("[Store][database] get version (%s)", key)
    95  	mainStr := "select version from leader_election where elect_key = ?"
    96  
    97  	var count int64
    98  	err := l.master.DB.QueryRow(mainStr, key).Scan(&count)
    99  	if err != nil {
   100  		log.Errorf("[Store][database] get version (%s), err: %s", key, err.Error())
   101  	}
   102  	return count, store.Error(err)
   103  }
   104  
   105  // CompareAndSwapVersion compare key version and update
   106  func (l *leaderElectionStore) CompareAndSwapVersion(key string, curVersion int64, newVersion int64,
   107  	leader string) (bool, error) {
   108  	var rows int64
   109  	err := l.master.processWithTransaction("compareAndSwapVersion", func(tx *BaseTx) error {
   110  		log.Debugf("[Store][database] compare and swap version (%s, %d, %d, %s)", key, curVersion, newVersion, leader)
   111  		mainStr := "update leader_election set leader = ?, version = ? where elect_key = ? and version = ?"
   112  		result, err := tx.Exec(mainStr, leader, newVersion, key, curVersion)
   113  		if err != nil {
   114  			log.Errorf("[Store][database] compare and swap version (%s), err: %s", key, err.Error())
   115  			return store.Error(err)
   116  		}
   117  		tRows, err := result.RowsAffected()
   118  		if err != nil {
   119  			log.Errorf("[Store][database] compare and swap version (%s), get RowsAffected err: %s", key, err.Error())
   120  			return store.Error(err)
   121  		}
   122  
   123  		if err := tx.Commit(); err != nil {
   124  			log.Errorf("[Store][database] create leader election (%s) commit tx err: %s", key, err.Error())
   125  			return err
   126  		}
   127  
   128  		rows = tRows
   129  		return nil
   130  	})
   131  	return rows > 0, err
   132  }
   133  
   134  // CheckMtimeExpired check last modify time expired
   135  func (l *leaderElectionStore) CheckMtimeExpired(key string, leaseTime int32) (string, bool, error) {
   136  	log.Debugf("[Store][database] check mtime expired (%s, %d)", key, leaseTime)
   137  	mainStr := "select leader, FROM_UNIXTIME(UNIX_TIMESTAMP(SYSDATE())) - mtime from leader_election where elect_key = ?"
   138  
   139  	var (
   140  		leader   string
   141  		diffTime int32
   142  	)
   143  	err := l.master.DB.QueryRow(mainStr, key).Scan(&leader, &diffTime)
   144  	if err != nil {
   145  		log.Errorf("[Store][database] check mtime expired (%s), err: %s", key, err.Error())
   146  	}
   147  	return leader, (diffTime > leaseTime), store.Error(err)
   148  }
   149  
   150  // ListLeaderElections list the election records
   151  func (l *leaderElectionStore) ListLeaderElections() ([]*model.LeaderElection, error) {
   152  	log.Info("[Store][database] list leader election")
   153  	mainStr := "select elect_key, leader, UNIX_TIMESTAMP(ctime), UNIX_TIMESTAMP(mtime) from leader_election"
   154  
   155  	rows, err := l.master.Query(mainStr)
   156  	if err != nil {
   157  		log.Errorf("[Store][database] list leader election query err: %s", err.Error())
   158  		return nil, store.Error(err)
   159  	}
   160  
   161  	return fetchLeaderElectionRows(rows)
   162  }
   163  
   164  func fetchLeaderElectionRows(rows *sql.Rows) ([]*model.LeaderElection, error) {
   165  	if rows == nil {
   166  		return nil, nil
   167  	}
   168  	defer rows.Close()
   169  
   170  	var out []*model.LeaderElection
   171  
   172  	for rows.Next() {
   173  		space := &model.LeaderElection{}
   174  		if err := rows.Scan(&space.ElectKey, &space.Host, &space.Ctime, &space.Mtime); err != nil {
   175  			log.Errorf("[Store][database] fetch leader election rows scan err: %s", err.Error())
   176  			return nil, err
   177  		}
   178  
   179  		space.CreateTime = time.Unix(space.Ctime, 0)
   180  		space.ModifyTime = time.Unix(space.Mtime, 0)
   181  		space.Valid = checkLeaderValid(space.Mtime)
   182  		out = append(out, space)
   183  	}
   184  	if err := rows.Err(); err != nil {
   185  		log.Errorf("[Store][database] fetch leader election rows next err: %s", err.Error())
   186  		return nil, err
   187  	}
   188  
   189  	return out, nil
   190  }
   191  
   192  func checkLeaderValid(mtime int64) bool {
   193  	delta := time.Now().Unix() - mtime
   194  	return delta <= LeaseTime
   195  }
   196  
   197  // leaderElectionStateMachine
   198  type leaderElectionStateMachine struct {
   199  	electKey         string
   200  	leStore          LeaderElectionStore
   201  	leaderFlag       int32
   202  	version          int64
   203  	ctx              context.Context
   204  	cancel           context.CancelFunc
   205  	releaseSignal    int32
   206  	releaseTickLimit int32
   207  	leader           string
   208  }
   209  
   210  // isLeader
   211  func isLeader(flag int32) bool {
   212  	return flag > 0
   213  }
   214  
   215  // mainLoop
   216  func (le *leaderElectionStateMachine) mainLoop() {
   217  	le.changeToFollower("")
   218  	log.Infof("[Store][database] leader election started (%s)", le.electKey)
   219  	ticker := time.NewTicker(TickTime * time.Second)
   220  	defer ticker.Stop()
   221  	for {
   222  		select {
   223  		case <-ticker.C:
   224  			le.tick()
   225  		case <-le.ctx.Done():
   226  			log.Infof("[Store][database] leader election stopped (%s)", le.electKey)
   227  			le.changeToFollower("")
   228  			return
   229  		}
   230  	}
   231  }
   232  
   233  // tick
   234  func (le *leaderElectionStateMachine) tick() {
   235  	if le.checkReleaseTickLimit() {
   236  		log.Infof("[Store][database] abandon leader election in this tick (%s)", le.electKey)
   237  		return
   238  	}
   239  	shouldRelease := le.checkAndClearReleaseSignal()
   240  	if le.isLeader() {
   241  		if shouldRelease {
   242  			log.Infof("[Store][database] release leader election (%s)", le.electKey)
   243  			le.changeToFollower("")
   244  			le.setReleaseTickLimit()
   245  			return
   246  		}
   247  		success, err := le.heartbeat()
   248  		if err == nil && success {
   249  			return
   250  		}
   251  		if err != nil {
   252  			log.Errorf("[Store][database] leader heartbeat err (%v), change to follower state (%s)", err, le.electKey)
   253  		}
   254  		if !success && err == nil {
   255  			log.Infof("[Store][database] leader heartbeat abort, change to follower state (%s)", le.electKey)
   256  		}
   257  	}
   258  	leader, dead, err := le.checkLeaderDead()
   259  	if err != nil {
   260  		log.Errorf("[Store][database] check leader dead err (%s), stay follower state (%s)",
   261  			err.Error(), le.electKey)
   262  		return
   263  	}
   264  	if !dead {
   265  		// 自己之前是 leader,并且租期还没过,调整自己为 leader
   266  		if leader == utils.LocalHost {
   267  			le.changeToLeader()
   268  		}
   269  		// leader 信息出现变化,发布leader信息变化通知
   270  		if le.leader != leader {
   271  			le.changeToFollower(leader)
   272  		}
   273  		return
   274  	}
   275  	success, err := le.elect()
   276  	if err != nil {
   277  		log.Errorf("[Store][database] elect leader err (%s), stay follower state (%s)", err.Error(), le.electKey)
   278  		return
   279  	}
   280  	if success {
   281  		le.changeToLeader()
   282  	}
   283  }
   284  
   285  func (le *leaderElectionStateMachine) publishLeaderChangeEvent() {
   286  	_ = eventhub.Publish(eventhub.LeaderChangeEventTopic, store.LeaderChangeEvent{
   287  		Key:        le.electKey,
   288  		Leader:     le.isLeader(),
   289  		LeaderHost: le.leader,
   290  	})
   291  }
   292  
   293  // changeToLeader
   294  func (le *leaderElectionStateMachine) changeToLeader() {
   295  	log.Infof("[Store][database] change from follower to leader (%s)", le.electKey)
   296  	atomic.StoreInt32(&le.leaderFlag, 1)
   297  	le.leader = utils.LocalHost
   298  	le.publishLeaderChangeEvent()
   299  }
   300  
   301  // changeToFollower
   302  func (le *leaderElectionStateMachine) changeToFollower(leader string) {
   303  	log.Infof("[Store][database] change from leader to follower (%s)", le.electKey)
   304  	atomic.StoreInt32(&le.leaderFlag, 0)
   305  	le.leader = leader
   306  	le.publishLeaderChangeEvent()
   307  }
   308  
   309  // checkLeaderDead
   310  func (le *leaderElectionStateMachine) checkLeaderDead() (string, bool, error) {
   311  	return le.leStore.CheckMtimeExpired(le.electKey, LeaseTime)
   312  }
   313  
   314  // elect
   315  func (le *leaderElectionStateMachine) elect() (bool, error) {
   316  	curVersion, err := le.leStore.GetVersion(le.electKey)
   317  	if err != nil {
   318  		return false, err
   319  	}
   320  	le.version = curVersion + 1
   321  	return le.leStore.CompareAndSwapVersion(le.electKey, curVersion, le.version, utils.LocalHost)
   322  }
   323  
   324  // heartbeat
   325  func (le *leaderElectionStateMachine) heartbeat() (bool, error) {
   326  	curVersion := le.version
   327  	le.version = curVersion + 1
   328  	return le.leStore.CompareAndSwapVersion(le.electKey, curVersion, le.version, utils.LocalHost)
   329  }
   330  
   331  // isLeader
   332  func (le *leaderElectionStateMachine) isLeader() bool {
   333  	return isLeader(le.leaderFlag)
   334  }
   335  
   336  // isLeaderAtomic
   337  func (le *leaderElectionStateMachine) isLeaderAtomic() bool {
   338  	return isLeader(atomic.LoadInt32(&le.leaderFlag))
   339  }
   340  
   341  func (le *leaderElectionStateMachine) setReleaseSignal() {
   342  	atomic.StoreInt32(&le.releaseSignal, 1)
   343  }
   344  
   345  func (le *leaderElectionStateMachine) checkAndClearReleaseSignal() bool {
   346  	return atomic.CompareAndSwapInt32(&le.releaseSignal, 1, 0)
   347  }
   348  
   349  func (le *leaderElectionStateMachine) checkReleaseTickLimit() bool {
   350  	if le.releaseTickLimit > 0 {
   351  		le.releaseTickLimit = le.releaseTickLimit - 1
   352  		return true
   353  	}
   354  	return false
   355  }
   356  
   357  func (le *leaderElectionStateMachine) setReleaseTickLimit() {
   358  	le.releaseTickLimit = LeaseTime / TickTime * 3
   359  }
   360  
   361  // StartLeaderElection start the election procedure
   362  func (m *adminStore) StartLeaderElection(key string) error {
   363  	m.mutex.Lock()
   364  	defer m.mutex.Unlock()
   365  	_, ok := m.leMap[key]
   366  	if ok {
   367  		return nil
   368  	}
   369  
   370  	ctx, cancel := context.WithCancel(context.TODO())
   371  	le := &leaderElectionStateMachine{
   372  		electKey:         key,
   373  		leStore:          m.leStore,
   374  		leaderFlag:       0,
   375  		version:          0,
   376  		ctx:              ctx,
   377  		cancel:           cancel,
   378  		releaseSignal:    0,
   379  		releaseTickLimit: 0,
   380  	}
   381  	err := le.leStore.CreateLeaderElection(key)
   382  	if err != nil {
   383  		return store.Error(err)
   384  	}
   385  
   386  	m.leMap[key] = le
   387  	go le.mainLoop()
   388  	return nil
   389  }
   390  
   391  // StopLeaderElections stop the election procedure
   392  func (m *adminStore) StopLeaderElections() {
   393  	m.mutex.Lock()
   394  	defer m.mutex.Unlock()
   395  	for k, le := range m.leMap {
   396  		le.cancel()
   397  		delete(m.leMap, k)
   398  	}
   399  }
   400  
   401  // IsLeader check leader
   402  func (m *adminStore) IsLeader(key string) bool {
   403  	m.mutex.Lock()
   404  	defer m.mutex.Unlock()
   405  	le, ok := m.leMap[key]
   406  	if !ok {
   407  		return false
   408  	}
   409  	return le.isLeaderAtomic()
   410  }
   411  
   412  // ListLeaderElections list election records
   413  func (m *adminStore) ListLeaderElections() ([]*model.LeaderElection, error) {
   414  	return m.leStore.ListLeaderElections()
   415  }
   416  
   417  // ReleaseLeaderElection release election lock
   418  func (m *adminStore) ReleaseLeaderElection(key string) error {
   419  	m.mutex.Lock()
   420  	defer m.mutex.Unlock()
   421  	le, ok := m.leMap[key]
   422  	if !ok {
   423  		return fmt.Errorf("LeaderElection(%s) not started", key)
   424  	}
   425  
   426  	le.setReleaseSignal()
   427  	return nil
   428  }
   429  
   430  // BatchCleanDeletedInstances batch clean soft deleted instances
   431  func (m *adminStore) BatchCleanDeletedInstances(timeout time.Duration, batchSize uint32) (uint32, error) {
   432  	log.Infof("[Store][database] batch clean soft deleted instances(%d)", batchSize)
   433  	var rowsAffected int64
   434  	err := m.master.processWithTransaction("batchCleanDeletedInstances", func(tx *BaseTx) error {
   435  		// 查询出需要清理的实例 ID 信息
   436  		loadWaitDel := "SELECT id FROM instance WHERE flag = 1 AND " +
   437  			"mtime <= FROM_UNIXTIME(UNIX_TIMESTAMP(SYSDATE()) - ?) LIMIT ?"
   438  		rows, err := tx.Query(loadWaitDel, int32(timeout.Seconds()), batchSize)
   439  		if err != nil {
   440  			log.Errorf("[Store][database] batch clean soft deleted instances(%d), err: %s", batchSize, err.Error())
   441  			return store.Error(err)
   442  		}
   443  		waitDelIds := make([]interface{}, 0, batchSize)
   444  		defer func() {
   445  			_ = rows.Close()
   446  		}()
   447  
   448  		placeholders := make([]string, 0, batchSize)
   449  		for rows.Next() {
   450  			var id string
   451  			if err := rows.Scan(&id); err != nil {
   452  				log.Errorf("[Store][database] scan deleted instances id, err: %s", err.Error())
   453  				return store.Error(err)
   454  			}
   455  			waitDelIds = append(waitDelIds, id)
   456  			placeholders = append(placeholders, "?")
   457  		}
   458  
   459  		if len(waitDelIds) == 0 {
   460  			return nil
   461  		}
   462  		inSql := strings.Join(placeholders, ",")
   463  
   464  		cleanMetaStr := fmt.Sprintf("delete from instance_metadata where id in (%s)", inSql)
   465  		if _, err := tx.Exec(cleanMetaStr, waitDelIds...); err != nil {
   466  			log.Errorf("[Store][database] batch clean soft deleted instances(%d), err: %s", batchSize, err.Error())
   467  			return store.Error(err)
   468  		}
   469  
   470  		cleanCheckStr := fmt.Sprintf("delete from health_check where id in (%s)", inSql)
   471  		if _, err := tx.Exec(cleanCheckStr, waitDelIds...); err != nil {
   472  			log.Errorf("[Store][database] batch clean soft deleted instances(%d), err: %s", batchSize, err.Error())
   473  			return store.Error(err)
   474  		}
   475  
   476  		cleanInsStr := fmt.Sprintf("delete from instance where flag = 1 and id in (%s)", inSql)
   477  		result, err := tx.Exec(cleanInsStr, waitDelIds...)
   478  		if err != nil {
   479  			log.Errorf("[Store][database] batch clean soft deleted instances(%d), err: %s", batchSize, err.Error())
   480  			return store.Error(err)
   481  		}
   482  
   483  		tRows, err := result.RowsAffected()
   484  		if err != nil {
   485  			log.Warnf("[Store][database] batch clean soft deleted instances(%d), get RowsAffected err: %s",
   486  				batchSize, err.Error())
   487  			return store.Error(err)
   488  		}
   489  
   490  		if err := tx.Commit(); err != nil {
   491  			log.Errorf("[Store][database] batch clean soft deleted instances(%d) commit tx err: %s",
   492  				batchSize, err.Error())
   493  			return err
   494  		}
   495  
   496  		rowsAffected = tRows
   497  		return nil
   498  	})
   499  	return uint32(rowsAffected), err
   500  }
   501  
   502  func (m *adminStore) GetUnHealthyInstances(timeout time.Duration, limit uint32) ([]string, error) {
   503  	log.Infof("[Store][database] get unhealthy instances which mtime timeout %s (%d)", timeout, limit)
   504  	queryStr := "select id from instance where flag=0 and enable_health_check=1 and health_status=0 " +
   505  		"and mtime < FROM_UNIXTIME(UNIX_TIMESTAMP(SYSDATE()) - ?) limit ?"
   506  	rows, err := m.master.Query(queryStr, int32(timeout.Seconds()), limit)
   507  	if err != nil {
   508  		log.Errorf("[Store][database] get unhealthy instances, err: %s", err.Error())
   509  		return nil, store.Error(err)
   510  	}
   511  
   512  	var instanceIds []string
   513  	defer rows.Close()
   514  	for rows.Next() {
   515  		var id string
   516  		err := rows.Scan(&id)
   517  		if err != nil {
   518  			log.Errorf("[Store][database] fetch unhealthy instance rows, err: %s", err.Error())
   519  			return nil, store.Error(err)
   520  		}
   521  		instanceIds = append(instanceIds, id)
   522  	}
   523  	if err := rows.Err(); err != nil {
   524  		log.Errorf("[Store][database] fetch unhealthy instance rows next, err: %s", err.Error())
   525  		return nil, store.Error(err)
   526  	}
   527  
   528  	return instanceIds, nil
   529  }
   530  
   531  // BatchCleanDeletedClients batch clean soft deleted clients
   532  func (m *adminStore) BatchCleanDeletedClients(timeout time.Duration, batchSize uint32) (uint32, error) {
   533  	log.Infof("[Store][database] batch clean soft deleted clients(%d)", batchSize)
   534  	var rows int64
   535  	err := m.master.processWithTransaction("batchCleanDeletedClients", func(tx *BaseTx) error {
   536  		mainStr := "delete from client where flag = 1 and " +
   537  			"mtime <= FROM_UNIXTIME(UNIX_TIMESTAMP(SYSDATE()) - ?) limit ?"
   538  		result, err := tx.Exec(mainStr, int32(timeout.Seconds()), batchSize)
   539  		if err != nil {
   540  			log.Errorf("[Store][database] batch clean soft deleted clients(%d), err: %s", batchSize, err.Error())
   541  			return store.Error(err)
   542  		}
   543  
   544  		tRows, err := result.RowsAffected()
   545  		if err != nil {
   546  			log.Warnf("[Store][database] batch clean soft deleted clients(%d), get RowsAffected err: %s",
   547  				batchSize, err.Error())
   548  			return store.Error(err)
   549  		}
   550  
   551  		if err := tx.Commit(); err != nil {
   552  			log.Errorf("[Store][database] batch clean soft deleted clients(%d) commit tx err: %s",
   553  				batchSize, err.Error())
   554  			return err
   555  		}
   556  
   557  		rows = tRows
   558  		return nil
   559  	})
   560  	return uint32(rows), err
   561  }