github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/tikv/tikv.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package tikv
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"regexp"
    20  	"strings"
    21  
    22  	"github.com/coreos/go-semver/semver"
    23  	"github.com/pingcap/errors"
    24  	"github.com/pingcap/kvproto/pkg/debugpb"
    25  	"github.com/pingcap/kvproto/pkg/import_sstpb"
    26  	"github.com/pingcap/parser/model"
    27  	"go.uber.org/zap"
    28  	"golang.org/x/sync/errgroup"
    29  	"google.golang.org/grpc"
    30  	"google.golang.org/grpc/codes"
    31  	"google.golang.org/grpc/status"
    32  
    33  	"github.com/pingcap/br/pkg/lightning/common"
    34  	"github.com/pingcap/br/pkg/lightning/log"
    35  	"github.com/pingcap/br/pkg/pdutil"
    36  	"github.com/pingcap/br/pkg/version"
    37  )
    38  
    39  // StoreState is the state of a TiKV store. The numerical value is sorted by
    40  // the store's accessibility (Tombstone < Down < Disconnected < Offline < Up).
    41  //
    42  // The meaning of each state can be found from PingCAP's documentation at
    43  // https://pingcap.com/docs/v3.0/how-to/scale/horizontally/#delete-a-node-dynamically-1
    44  type StoreState int
    45  
    46  const (
    47  	// StoreStateUp means the TiKV store is in service.
    48  	StoreStateUp StoreState = -iota
    49  	// StoreStateOffline means the TiKV store is in the process of being taken
    50  	// offline (but is still accessible).
    51  	StoreStateOffline
    52  	// StoreStateDisconnected means the TiKV store does not respond to PD.
    53  	StoreStateDisconnected
    54  	// StoreStateDown means the TiKV store does not respond to PD for a long
    55  	// time (> 30 minutes).
    56  	StoreStateDown
    57  	// StoreStateTombstone means the TiKV store is shut down and the data has
    58  	// been evacuated. Lightning should never interact with stores in this
    59  	// state.
    60  	StoreStateTombstone
    61  )
    62  
    63  var jsonToStoreState = map[string]StoreState{
    64  	`"Up"`:           StoreStateUp,
    65  	`"Offline"`:      StoreStateOffline,
    66  	`"Disconnected"`: StoreStateDisconnected,
    67  	`"Down"`:         StoreStateDown,
    68  	`"Tombstone"`:    StoreStateTombstone,
    69  }
    70  
    71  // UnmarshalJSON implements the json.Unmarshaler interface.
    72  func (ss *StoreState) UnmarshalJSON(content []byte) error {
    73  	if state, ok := jsonToStoreState[string(content)]; ok {
    74  		*ss = state
    75  		return nil
    76  	}
    77  	return errors.New("Unknown store state")
    78  }
    79  
    80  // Store contains metadata about a TiKV store.
    81  type Store struct {
    82  	Address string
    83  	Version string
    84  	State   StoreState `json:"state_name"`
    85  }
    86  
    87  func withTiKVConnection(ctx context.Context, tls *common.TLS, tikvAddr string, action func(import_sstpb.ImportSSTClient) error) error {
    88  	// Connect to the ImportSST service on the given TiKV node.
    89  	// The connection is needed for executing `action` and will be tear down
    90  	// when this function exits.
    91  	conn, err := grpc.DialContext(ctx, tikvAddr, tls.ToGRPCDialOption())
    92  	if err != nil {
    93  		return errors.Trace(err)
    94  	}
    95  	defer conn.Close()
    96  
    97  	client := import_sstpb.NewImportSSTClient(conn)
    98  	return action(client)
    99  }
   100  
   101  // ForAllStores executes `action` in parallel for all TiKV stores connected to
   102  // a PD server given by the HTTPS client `tls`.
   103  //
   104  // Returns the first non-nil error returned in all `action` calls. If all
   105  // `action` returns nil, this method would return nil as well.
   106  //
   107  // The `minState` argument defines the minimum store state to be included in the
   108  // result (Tombstone < Offline < Down < Disconnected < Up).
   109  func ForAllStores(
   110  	ctx context.Context,
   111  	tls *common.TLS,
   112  	minState StoreState,
   113  	action func(c context.Context, store *Store) error,
   114  ) error {
   115  	// Go through the HTTP interface instead of gRPC so we don't need to keep
   116  	// track of the cluster ID.
   117  	var stores struct {
   118  		Stores []struct {
   119  			Store Store
   120  		}
   121  	}
   122  	err := tls.GetJSON(ctx, "/pd/api/v1/stores", &stores)
   123  	if err != nil {
   124  		return err
   125  	}
   126  
   127  	eg, c := errgroup.WithContext(ctx)
   128  	for _, store := range stores.Stores {
   129  		if store.Store.State >= minState {
   130  			s := store.Store
   131  			eg.Go(func() error { return action(c, &s) })
   132  		}
   133  	}
   134  	return eg.Wait()
   135  }
   136  
   137  func ignoreUnimplementedError(err error, logger log.Logger) error {
   138  	if status.Code(err) == codes.Unimplemented {
   139  		logger.Debug("skipping potentially TiFlash store")
   140  		return nil
   141  	}
   142  	return errors.Trace(err)
   143  }
   144  
   145  // SwitchMode changes the TiKV node at the given address to a particular mode.
   146  func SwitchMode(ctx context.Context, tls *common.TLS, tikvAddr string, mode import_sstpb.SwitchMode) error {
   147  	task := log.With(zap.Stringer("mode", mode), zap.String("tikv", tikvAddr)).Begin(zap.DebugLevel, "switch mode")
   148  	err := withTiKVConnection(ctx, tls, tikvAddr, func(client import_sstpb.ImportSSTClient) error {
   149  		_, err := client.SwitchMode(ctx, &import_sstpb.SwitchModeRequest{
   150  			Mode: mode,
   151  		})
   152  		return ignoreUnimplementedError(err, task.Logger)
   153  	})
   154  	task.End(zap.InfoLevel, err)
   155  	return err
   156  }
   157  
   158  // Compact performs a leveled compaction with the given minimum level.
   159  func Compact(ctx context.Context, tls *common.TLS, tikvAddr string, level int32) error {
   160  	task := log.With(zap.Int32("level", level), zap.String("tikv", tikvAddr)).Begin(zap.InfoLevel, "compact cluster")
   161  	err := withTiKVConnection(ctx, tls, tikvAddr, func(client import_sstpb.ImportSSTClient) error {
   162  		_, err := client.Compact(ctx, &import_sstpb.CompactRequest{
   163  			OutputLevel: level,
   164  		})
   165  		return ignoreUnimplementedError(err, task.Logger)
   166  	})
   167  	task.End(zap.ErrorLevel, err)
   168  	return err
   169  }
   170  
   171  var fetchModeRegexp = regexp.MustCompile(`\btikv_config_rocksdb\{cf="default",name="hard_pending_compaction_bytes_limit"\} ([^\n]+)`)
   172  
   173  // FetchMode obtains the import mode status of the TiKV node.
   174  func FetchMode(ctx context.Context, tls *common.TLS, tikvAddr string) (import_sstpb.SwitchMode, error) {
   175  	conn, err := grpc.DialContext(ctx, tikvAddr, tls.ToGRPCDialOption())
   176  	if err != nil {
   177  		return 0, err
   178  	}
   179  	defer conn.Close()
   180  
   181  	client := debugpb.NewDebugClient(conn)
   182  	resp, err := client.GetMetrics(ctx, &debugpb.GetMetricsRequest{All: false})
   183  	if err != nil {
   184  		return 0, errors.Trace(err)
   185  	}
   186  	return FetchModeFromMetrics(resp.Prometheus)
   187  }
   188  
   189  // FetchMode obtains the import mode status from the Prometheus metrics of a TiKV node.
   190  func FetchModeFromMetrics(metrics string) (import_sstpb.SwitchMode, error) {
   191  	m := fetchModeRegexp.FindStringSubmatch(metrics)
   192  	switch {
   193  	case len(m) < 2:
   194  		return 0, errors.New("import mode status is not exposed")
   195  	case m[1] == "0":
   196  		return import_sstpb.SwitchMode_Import, nil
   197  	default:
   198  		return import_sstpb.SwitchMode_Normal, nil
   199  	}
   200  }
   201  
   202  func FetchRemoteTableModelsFromTLS(ctx context.Context, tls *common.TLS, schema string) ([]*model.TableInfo, error) {
   203  	var tables []*model.TableInfo
   204  	err := tls.GetJSON(ctx, "/schema/"+schema, &tables)
   205  	if err != nil {
   206  		return nil, errors.Annotatef(err, "cannot read schema '%s' from remote", schema)
   207  	}
   208  	return tables, nil
   209  }
   210  
   211  func CheckPDVersion(ctx context.Context, tls *common.TLS, pdAddr string, requiredMinVersion, requiredMaxVersion semver.Version) error {
   212  	ver, err := pdutil.FetchPDVersion(ctx, tls, pdAddr)
   213  	if err != nil {
   214  		return errors.Trace(err)
   215  	}
   216  
   217  	return version.CheckVersion("PD", *ver, requiredMinVersion, requiredMaxVersion)
   218  }
   219  
   220  func CheckTiKVVersion(ctx context.Context, tls *common.TLS, pdAddr string, requiredMinVersion, requiredMaxVersion semver.Version) error {
   221  	return ForAllStores(
   222  		ctx,
   223  		tls.WithHost(pdAddr),
   224  		StoreStateDown,
   225  		func(c context.Context, store *Store) error {
   226  			component := fmt.Sprintf("TiKV (at %s)", store.Address)
   227  			ver, err := semver.NewVersion(strings.TrimPrefix(store.Version, "v"))
   228  			if err != nil {
   229  				return errors.Annotate(err, component)
   230  			}
   231  			return version.CheckVersion(component, *ver, requiredMinVersion, requiredMaxVersion)
   232  		},
   233  	)
   234  }