github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/dbs/dagger/soliton/dead_table_lock_checker.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package soliton
    15  
    16  import (
    17  	"context"
    18  	"strings"
    19  	"time"
    20  
    21  	"github.com/whtcorpsinc/BerolinaSQL/perceptron"
    22  	"github.com/whtcorpsinc/errors"
    23  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    24  	"go.etcd.io/etcd/clientv3"
    25  	"go.uber.org/zap"
    26  )
    27  
    28  const (
    29  	defaultRetryCnt      = 5
    30  	defaultRetryInterval = time.Millisecond * 200
    31  	defaultTimeout       = time.Second
    32  )
    33  
    34  // DeadBlockLockChecker uses to check dead causet locks.
    35  // If milevadb-server panic or killed by others, the causet locks hold by the killed milevadb-server maybe doesn't released.
    36  type DeadBlockLockChecker struct {
    37  	etcdCli *clientv3.Client
    38  }
    39  
    40  // NewDeadBlockLockChecker creates new DeadLockChecker.
    41  func NewDeadBlockLockChecker(etcdCli *clientv3.Client) DeadBlockLockChecker {
    42  	return DeadBlockLockChecker{
    43  		etcdCli: etcdCli,
    44  	}
    45  }
    46  
    47  func (d *DeadBlockLockChecker) getAliveServers(ctx context.Context) (map[string]struct{}, error) {
    48  	var err error
    49  	var resp *clientv3.GetResponse
    50  	allInfos := make(map[string]struct{})
    51  	for i := 0; i < defaultRetryCnt; i++ {
    52  		select {
    53  		case <-ctx.Done():
    54  			return nil, ctx.Err()
    55  		default:
    56  		}
    57  		childCtx, cancel := context.WithTimeout(ctx, defaultTimeout)
    58  		resp, err = d.etcdCli.Get(childCtx, DBSAllSchemaVersions, clientv3.WithPrefix())
    59  		cancel()
    60  		if err != nil {
    61  			logutil.BgLogger().Info("[dbs] clean dead causet dagger get alive servers failed.", zap.Error(err))
    62  			time.Sleep(defaultRetryInterval)
    63  			continue
    64  		}
    65  		for _, ekv := range resp.Ekvs {
    66  			serverID := strings.TrimPrefix(string(ekv.Key), DBSAllSchemaVersions+"/")
    67  			allInfos[serverID] = struct{}{}
    68  		}
    69  		return allInfos, nil
    70  	}
    71  	return nil, errors.Trace(err)
    72  }
    73  
    74  // GetDeadLockedBlocks gets dead locked blocks.
    75  func (d *DeadBlockLockChecker) GetDeadLockedBlocks(ctx context.Context, schemas []*perceptron.DBInfo) (map[perceptron.StochastikInfo][]perceptron.BlockLockTpInfo, error) {
    76  	if d.etcdCli == nil {
    77  		return nil, nil
    78  	}
    79  	aliveServers, err := d.getAliveServers(ctx)
    80  	if err != nil {
    81  		return nil, err
    82  	}
    83  	deadLockBlocks := make(map[perceptron.StochastikInfo][]perceptron.BlockLockTpInfo)
    84  	for _, schemaReplicant := range schemas {
    85  		select {
    86  		case <-ctx.Done():
    87  			return nil, ctx.Err()
    88  		default:
    89  		}
    90  		for _, tbl := range schemaReplicant.Blocks {
    91  			if tbl.Lock == nil {
    92  				continue
    93  			}
    94  			for _, se := range tbl.Lock.Stochastiks {
    95  				if _, ok := aliveServers[se.ServerID]; !ok {
    96  					deadLockBlocks[se] = append(deadLockBlocks[se], perceptron.BlockLockTpInfo{
    97  						SchemaID: schemaReplicant.ID,
    98  						BlockID:  tbl.ID,
    99  						Tp:       tbl.Lock.Tp,
   100  					})
   101  				}
   102  			}
   103  		}
   104  	}
   105  	return deadLockBlocks, nil
   106  }