github.com/polarismesh/polaris@v1.17.8/store/mysql/admin.go (about) 1 /** 2 * Tencent is pleased to support the open source community by making Polaris available. 3 * 4 * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 5 * 6 * Licensed under the BSD 3-Clause License (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * https://opensource.org/licenses/BSD-3-Clause 11 * 12 * Unless required by applicable law or agreed to in writing, software distributed 13 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 14 * CONDITIONS OF ANY KIND, either express or implied. See the License for the 15 * specific language governing permissions and limitations under the License. 16 */ 17 18 package sqldb 19 20 import ( 21 "context" 22 "database/sql" 23 "fmt" 24 "strings" 25 "sync" 26 "sync/atomic" 27 "time" 28 29 "github.com/polarismesh/polaris/common/eventhub" 30 "github.com/polarismesh/polaris/common/model" 31 "github.com/polarismesh/polaris/common/utils" 32 "github.com/polarismesh/polaris/store" 33 ) 34 35 const ( 36 TickTime = 2 37 LeaseTime = 10 38 ) 39 40 // adminStore implement adminStore interface 41 type adminStore struct { 42 master *BaseDB 43 leStore LeaderElectionStore 44 leMap map[string]*leaderElectionStateMachine 45 mutex sync.Mutex 46 } 47 48 func newAdminStore(master *BaseDB) *adminStore { 49 return &adminStore{ 50 master: master, 51 leStore: &leaderElectionStore{master: master}, 52 leMap: make(map[string]*leaderElectionStateMachine), 53 } 54 } 55 56 // LeaderElectionStore store inteface 57 type LeaderElectionStore interface { 58 // CreateLeaderElection 59 CreateLeaderElection(key string) error 60 // GetVersion get current version 61 GetVersion(key string) (int64, error) 62 // CompareAndSwapVersion cas version 63 CompareAndSwapVersion(key string, curVersion int64, newVersion int64, leader string) (bool, error) 64 // CheckMtimeExpired check mtime expired 65 CheckMtimeExpired(key string, leaseTime int32) (string, bool, error) 66 // ListLeaderElections list all leaderelection 67 ListLeaderElections() ([]*model.LeaderElection, error) 68 } 69 70 // leaderElectionStore 71 type leaderElectionStore struct { 72 master *BaseDB 73 } 74 75 // CreateLeaderElection insert election key into leader table 76 func (l *leaderElectionStore) CreateLeaderElection(key string) error { 77 log.Debugf("[Store][database] create leader election (%s)", key) 78 return l.master.processWithTransaction("createLeaderElection", func(tx *BaseTx) error { 79 mainStr := "insert ignore into leader_election (elect_key, leader) values (?, ?)" 80 if _, err := tx.Exec(mainStr, key, ""); err != nil { 81 log.Errorf("[Store][database] create leader election (%s), err: %s", key, err.Error()) 82 } 83 84 if err := tx.Commit(); err != nil { 85 log.Errorf("[Store][database] create leader election (%s) commit tx err: %s", key, err.Error()) 86 return err 87 } 88 return nil 89 }) 90 } 91 92 // GetVersion get the version from election 93 func (l *leaderElectionStore) GetVersion(key string) (int64, error) { 94 log.Debugf("[Store][database] get version (%s)", key) 95 mainStr := "select version from leader_election where elect_key = ?" 96 97 var count int64 98 err := l.master.DB.QueryRow(mainStr, key).Scan(&count) 99 if err != nil { 100 log.Errorf("[Store][database] get version (%s), err: %s", key, err.Error()) 101 } 102 return count, store.Error(err) 103 } 104 105 // CompareAndSwapVersion compare key version and update 106 func (l *leaderElectionStore) CompareAndSwapVersion(key string, curVersion int64, newVersion int64, 107 leader string) (bool, error) { 108 var rows int64 109 err := l.master.processWithTransaction("compareAndSwapVersion", func(tx *BaseTx) error { 110 log.Debugf("[Store][database] compare and swap version (%s, %d, %d, %s)", key, curVersion, newVersion, leader) 111 mainStr := "update leader_election set leader = ?, version = ? where elect_key = ? and version = ?" 112 result, err := tx.Exec(mainStr, leader, newVersion, key, curVersion) 113 if err != nil { 114 log.Errorf("[Store][database] compare and swap version (%s), err: %s", key, err.Error()) 115 return store.Error(err) 116 } 117 tRows, err := result.RowsAffected() 118 if err != nil { 119 log.Errorf("[Store][database] compare and swap version (%s), get RowsAffected err: %s", key, err.Error()) 120 return store.Error(err) 121 } 122 123 if err := tx.Commit(); err != nil { 124 log.Errorf("[Store][database] create leader election (%s) commit tx err: %s", key, err.Error()) 125 return err 126 } 127 128 rows = tRows 129 return nil 130 }) 131 return rows > 0, err 132 } 133 134 // CheckMtimeExpired check last modify time expired 135 func (l *leaderElectionStore) CheckMtimeExpired(key string, leaseTime int32) (string, bool, error) { 136 log.Debugf("[Store][database] check mtime expired (%s, %d)", key, leaseTime) 137 mainStr := "select leader, FROM_UNIXTIME(UNIX_TIMESTAMP(SYSDATE())) - mtime from leader_election where elect_key = ?" 138 139 var ( 140 leader string 141 diffTime int32 142 ) 143 err := l.master.DB.QueryRow(mainStr, key).Scan(&leader, &diffTime) 144 if err != nil { 145 log.Errorf("[Store][database] check mtime expired (%s), err: %s", key, err.Error()) 146 } 147 return leader, (diffTime > leaseTime), store.Error(err) 148 } 149 150 // ListLeaderElections list the election records 151 func (l *leaderElectionStore) ListLeaderElections() ([]*model.LeaderElection, error) { 152 log.Info("[Store][database] list leader election") 153 mainStr := "select elect_key, leader, UNIX_TIMESTAMP(ctime), UNIX_TIMESTAMP(mtime) from leader_election" 154 155 rows, err := l.master.Query(mainStr) 156 if err != nil { 157 log.Errorf("[Store][database] list leader election query err: %s", err.Error()) 158 return nil, store.Error(err) 159 } 160 161 return fetchLeaderElectionRows(rows) 162 } 163 164 func fetchLeaderElectionRows(rows *sql.Rows) ([]*model.LeaderElection, error) { 165 if rows == nil { 166 return nil, nil 167 } 168 defer rows.Close() 169 170 var out []*model.LeaderElection 171 172 for rows.Next() { 173 space := &model.LeaderElection{} 174 if err := rows.Scan(&space.ElectKey, &space.Host, &space.Ctime, &space.Mtime); err != nil { 175 log.Errorf("[Store][database] fetch leader election rows scan err: %s", err.Error()) 176 return nil, err 177 } 178 179 space.CreateTime = time.Unix(space.Ctime, 0) 180 space.ModifyTime = time.Unix(space.Mtime, 0) 181 space.Valid = checkLeaderValid(space.Mtime) 182 out = append(out, space) 183 } 184 if err := rows.Err(); err != nil { 185 log.Errorf("[Store][database] fetch leader election rows next err: %s", err.Error()) 186 return nil, err 187 } 188 189 return out, nil 190 } 191 192 func checkLeaderValid(mtime int64) bool { 193 delta := time.Now().Unix() - mtime 194 return delta <= LeaseTime 195 } 196 197 // leaderElectionStateMachine 198 type leaderElectionStateMachine struct { 199 electKey string 200 leStore LeaderElectionStore 201 leaderFlag int32 202 version int64 203 ctx context.Context 204 cancel context.CancelFunc 205 releaseSignal int32 206 releaseTickLimit int32 207 leader string 208 } 209 210 // isLeader 211 func isLeader(flag int32) bool { 212 return flag > 0 213 } 214 215 // mainLoop 216 func (le *leaderElectionStateMachine) mainLoop() { 217 le.changeToFollower("") 218 log.Infof("[Store][database] leader election started (%s)", le.electKey) 219 ticker := time.NewTicker(TickTime * time.Second) 220 defer ticker.Stop() 221 for { 222 select { 223 case <-ticker.C: 224 le.tick() 225 case <-le.ctx.Done(): 226 log.Infof("[Store][database] leader election stopped (%s)", le.electKey) 227 le.changeToFollower("") 228 return 229 } 230 } 231 } 232 233 // tick 234 func (le *leaderElectionStateMachine) tick() { 235 if le.checkReleaseTickLimit() { 236 log.Infof("[Store][database] abandon leader election in this tick (%s)", le.electKey) 237 return 238 } 239 shouldRelease := le.checkAndClearReleaseSignal() 240 if le.isLeader() { 241 if shouldRelease { 242 log.Infof("[Store][database] release leader election (%s)", le.electKey) 243 le.changeToFollower("") 244 le.setReleaseTickLimit() 245 return 246 } 247 success, err := le.heartbeat() 248 if err == nil && success { 249 return 250 } 251 if err != nil { 252 log.Errorf("[Store][database] leader heartbeat err (%v), change to follower state (%s)", err, le.electKey) 253 } 254 if !success && err == nil { 255 log.Infof("[Store][database] leader heartbeat abort, change to follower state (%s)", le.electKey) 256 } 257 } 258 leader, dead, err := le.checkLeaderDead() 259 if err != nil { 260 log.Errorf("[Store][database] check leader dead err (%s), stay follower state (%s)", 261 err.Error(), le.electKey) 262 return 263 } 264 if !dead { 265 // 自己之前是 leader,并且租期还没过,调整自己为 leader 266 if leader == utils.LocalHost { 267 le.changeToLeader() 268 } 269 // leader 信息出现变化,发布leader信息变化通知 270 if le.leader != leader { 271 le.changeToFollower(leader) 272 } 273 return 274 } 275 success, err := le.elect() 276 if err != nil { 277 log.Errorf("[Store][database] elect leader err (%s), stay follower state (%s)", err.Error(), le.electKey) 278 return 279 } 280 if success { 281 le.changeToLeader() 282 } 283 } 284 285 func (le *leaderElectionStateMachine) publishLeaderChangeEvent() { 286 _ = eventhub.Publish(eventhub.LeaderChangeEventTopic, store.LeaderChangeEvent{ 287 Key: le.electKey, 288 Leader: le.isLeader(), 289 LeaderHost: le.leader, 290 }) 291 } 292 293 // changeToLeader 294 func (le *leaderElectionStateMachine) changeToLeader() { 295 log.Infof("[Store][database] change from follower to leader (%s)", le.electKey) 296 atomic.StoreInt32(&le.leaderFlag, 1) 297 le.leader = utils.LocalHost 298 le.publishLeaderChangeEvent() 299 } 300 301 // changeToFollower 302 func (le *leaderElectionStateMachine) changeToFollower(leader string) { 303 log.Infof("[Store][database] change from leader to follower (%s)", le.electKey) 304 atomic.StoreInt32(&le.leaderFlag, 0) 305 le.leader = leader 306 le.publishLeaderChangeEvent() 307 } 308 309 // checkLeaderDead 310 func (le *leaderElectionStateMachine) checkLeaderDead() (string, bool, error) { 311 return le.leStore.CheckMtimeExpired(le.electKey, LeaseTime) 312 } 313 314 // elect 315 func (le *leaderElectionStateMachine) elect() (bool, error) { 316 curVersion, err := le.leStore.GetVersion(le.electKey) 317 if err != nil { 318 return false, err 319 } 320 le.version = curVersion + 1 321 return le.leStore.CompareAndSwapVersion(le.electKey, curVersion, le.version, utils.LocalHost) 322 } 323 324 // heartbeat 325 func (le *leaderElectionStateMachine) heartbeat() (bool, error) { 326 curVersion := le.version 327 le.version = curVersion + 1 328 return le.leStore.CompareAndSwapVersion(le.electKey, curVersion, le.version, utils.LocalHost) 329 } 330 331 // isLeader 332 func (le *leaderElectionStateMachine) isLeader() bool { 333 return isLeader(le.leaderFlag) 334 } 335 336 // isLeaderAtomic 337 func (le *leaderElectionStateMachine) isLeaderAtomic() bool { 338 return isLeader(atomic.LoadInt32(&le.leaderFlag)) 339 } 340 341 func (le *leaderElectionStateMachine) setReleaseSignal() { 342 atomic.StoreInt32(&le.releaseSignal, 1) 343 } 344 345 func (le *leaderElectionStateMachine) checkAndClearReleaseSignal() bool { 346 return atomic.CompareAndSwapInt32(&le.releaseSignal, 1, 0) 347 } 348 349 func (le *leaderElectionStateMachine) checkReleaseTickLimit() bool { 350 if le.releaseTickLimit > 0 { 351 le.releaseTickLimit = le.releaseTickLimit - 1 352 return true 353 } 354 return false 355 } 356 357 func (le *leaderElectionStateMachine) setReleaseTickLimit() { 358 le.releaseTickLimit = LeaseTime / TickTime * 3 359 } 360 361 // StartLeaderElection start the election procedure 362 func (m *adminStore) StartLeaderElection(key string) error { 363 m.mutex.Lock() 364 defer m.mutex.Unlock() 365 _, ok := m.leMap[key] 366 if ok { 367 return nil 368 } 369 370 ctx, cancel := context.WithCancel(context.TODO()) 371 le := &leaderElectionStateMachine{ 372 electKey: key, 373 leStore: m.leStore, 374 leaderFlag: 0, 375 version: 0, 376 ctx: ctx, 377 cancel: cancel, 378 releaseSignal: 0, 379 releaseTickLimit: 0, 380 } 381 err := le.leStore.CreateLeaderElection(key) 382 if err != nil { 383 return store.Error(err) 384 } 385 386 m.leMap[key] = le 387 go le.mainLoop() 388 return nil 389 } 390 391 // StopLeaderElections stop the election procedure 392 func (m *adminStore) StopLeaderElections() { 393 m.mutex.Lock() 394 defer m.mutex.Unlock() 395 for k, le := range m.leMap { 396 le.cancel() 397 delete(m.leMap, k) 398 } 399 } 400 401 // IsLeader check leader 402 func (m *adminStore) IsLeader(key string) bool { 403 m.mutex.Lock() 404 defer m.mutex.Unlock() 405 le, ok := m.leMap[key] 406 if !ok { 407 return false 408 } 409 return le.isLeaderAtomic() 410 } 411 412 // ListLeaderElections list election records 413 func (m *adminStore) ListLeaderElections() ([]*model.LeaderElection, error) { 414 return m.leStore.ListLeaderElections() 415 } 416 417 // ReleaseLeaderElection release election lock 418 func (m *adminStore) ReleaseLeaderElection(key string) error { 419 m.mutex.Lock() 420 defer m.mutex.Unlock() 421 le, ok := m.leMap[key] 422 if !ok { 423 return fmt.Errorf("LeaderElection(%s) not started", key) 424 } 425 426 le.setReleaseSignal() 427 return nil 428 } 429 430 // BatchCleanDeletedInstances batch clean soft deleted instances 431 func (m *adminStore) BatchCleanDeletedInstances(timeout time.Duration, batchSize uint32) (uint32, error) { 432 log.Infof("[Store][database] batch clean soft deleted instances(%d)", batchSize) 433 var rowsAffected int64 434 err := m.master.processWithTransaction("batchCleanDeletedInstances", func(tx *BaseTx) error { 435 // 查询出需要清理的实例 ID 信息 436 loadWaitDel := "SELECT id FROM instance WHERE flag = 1 AND " + 437 "mtime <= FROM_UNIXTIME(UNIX_TIMESTAMP(SYSDATE()) - ?) LIMIT ?" 438 rows, err := tx.Query(loadWaitDel, int32(timeout.Seconds()), batchSize) 439 if err != nil { 440 log.Errorf("[Store][database] batch clean soft deleted instances(%d), err: %s", batchSize, err.Error()) 441 return store.Error(err) 442 } 443 waitDelIds := make([]interface{}, 0, batchSize) 444 defer func() { 445 _ = rows.Close() 446 }() 447 448 placeholders := make([]string, 0, batchSize) 449 for rows.Next() { 450 var id string 451 if err := rows.Scan(&id); err != nil { 452 log.Errorf("[Store][database] scan deleted instances id, err: %s", err.Error()) 453 return store.Error(err) 454 } 455 waitDelIds = append(waitDelIds, id) 456 placeholders = append(placeholders, "?") 457 } 458 459 if len(waitDelIds) == 0 { 460 return nil 461 } 462 inSql := strings.Join(placeholders, ",") 463 464 cleanMetaStr := fmt.Sprintf("delete from instance_metadata where id in (%s)", inSql) 465 if _, err := tx.Exec(cleanMetaStr, waitDelIds...); err != nil { 466 log.Errorf("[Store][database] batch clean soft deleted instances(%d), err: %s", batchSize, err.Error()) 467 return store.Error(err) 468 } 469 470 cleanCheckStr := fmt.Sprintf("delete from health_check where id in (%s)", inSql) 471 if _, err := tx.Exec(cleanCheckStr, waitDelIds...); err != nil { 472 log.Errorf("[Store][database] batch clean soft deleted instances(%d), err: %s", batchSize, err.Error()) 473 return store.Error(err) 474 } 475 476 cleanInsStr := fmt.Sprintf("delete from instance where flag = 1 and id in (%s)", inSql) 477 result, err := tx.Exec(cleanInsStr, waitDelIds...) 478 if err != nil { 479 log.Errorf("[Store][database] batch clean soft deleted instances(%d), err: %s", batchSize, err.Error()) 480 return store.Error(err) 481 } 482 483 tRows, err := result.RowsAffected() 484 if err != nil { 485 log.Warnf("[Store][database] batch clean soft deleted instances(%d), get RowsAffected err: %s", 486 batchSize, err.Error()) 487 return store.Error(err) 488 } 489 490 if err := tx.Commit(); err != nil { 491 log.Errorf("[Store][database] batch clean soft deleted instances(%d) commit tx err: %s", 492 batchSize, err.Error()) 493 return err 494 } 495 496 rowsAffected = tRows 497 return nil 498 }) 499 return uint32(rowsAffected), err 500 } 501 502 func (m *adminStore) GetUnHealthyInstances(timeout time.Duration, limit uint32) ([]string, error) { 503 log.Infof("[Store][database] get unhealthy instances which mtime timeout %s (%d)", timeout, limit) 504 queryStr := "select id from instance where flag=0 and enable_health_check=1 and health_status=0 " + 505 "and mtime < FROM_UNIXTIME(UNIX_TIMESTAMP(SYSDATE()) - ?) limit ?" 506 rows, err := m.master.Query(queryStr, int32(timeout.Seconds()), limit) 507 if err != nil { 508 log.Errorf("[Store][database] get unhealthy instances, err: %s", err.Error()) 509 return nil, store.Error(err) 510 } 511 512 var instanceIds []string 513 defer rows.Close() 514 for rows.Next() { 515 var id string 516 err := rows.Scan(&id) 517 if err != nil { 518 log.Errorf("[Store][database] fetch unhealthy instance rows, err: %s", err.Error()) 519 return nil, store.Error(err) 520 } 521 instanceIds = append(instanceIds, id) 522 } 523 if err := rows.Err(); err != nil { 524 log.Errorf("[Store][database] fetch unhealthy instance rows next, err: %s", err.Error()) 525 return nil, store.Error(err) 526 } 527 528 return instanceIds, nil 529 } 530 531 // BatchCleanDeletedClients batch clean soft deleted clients 532 func (m *adminStore) BatchCleanDeletedClients(timeout time.Duration, batchSize uint32) (uint32, error) { 533 log.Infof("[Store][database] batch clean soft deleted clients(%d)", batchSize) 534 var rows int64 535 err := m.master.processWithTransaction("batchCleanDeletedClients", func(tx *BaseTx) error { 536 mainStr := "delete from client where flag = 1 and " + 537 "mtime <= FROM_UNIXTIME(UNIX_TIMESTAMP(SYSDATE()) - ?) limit ?" 538 result, err := tx.Exec(mainStr, int32(timeout.Seconds()), batchSize) 539 if err != nil { 540 log.Errorf("[Store][database] batch clean soft deleted clients(%d), err: %s", batchSize, err.Error()) 541 return store.Error(err) 542 } 543 544 tRows, err := result.RowsAffected() 545 if err != nil { 546 log.Warnf("[Store][database] batch clean soft deleted clients(%d), get RowsAffected err: %s", 547 batchSize, err.Error()) 548 return store.Error(err) 549 } 550 551 if err := tx.Commit(); err != nil { 552 log.Errorf("[Store][database] batch clean soft deleted clients(%d) commit tx err: %s", 553 batchSize, err.Error()) 554 return err 555 } 556 557 rows = tRows 558 return nil 559 }) 560 return uint32(rows), err 561 }