github.com/pingcap/tidb-lightning@v5.0.0-rc.0.20210428090220-84b649866577+incompatible/lightning/backend/tikv.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package backend
    15  
    16  import (
    17  	"context"
    18  	"regexp"
    19  
    20  	"github.com/pingcap/errors"
    21  	"github.com/pingcap/kvproto/pkg/debugpb"
    22  	"github.com/pingcap/kvproto/pkg/import_sstpb"
    23  	"go.uber.org/zap"
    24  	"golang.org/x/sync/errgroup"
    25  	"google.golang.org/grpc"
    26  	"google.golang.org/grpc/codes"
    27  	"google.golang.org/grpc/status"
    28  
    29  	"github.com/pingcap/tidb-lightning/lightning/common"
    30  	"github.com/pingcap/tidb-lightning/lightning/log"
    31  )
    32  
    33  // StoreState is the state of a TiKV store. The numerical value is sorted by
    34  // the store's accessibility (Tombstone < Down < Disconnected < Offline < Up).
    35  //
    36  // The meaning of each state can be found from PingCAP's documentation at
    37  // https://pingcap.com/docs/v3.0/how-to/scale/horizontally/#delete-a-node-dynamically-1
    38  type StoreState int
    39  
    40  const (
    41  	// StoreStateUp means the TiKV store is in service.
    42  	StoreStateUp StoreState = -iota
    43  	// StoreStateOffline means the TiKV store is in the process of being taken
    44  	// offline (but is still accessible).
    45  	StoreStateOffline
    46  	// StoreStateDisconnected means the TiKV store does not respond to PD.
    47  	StoreStateDisconnected
    48  	// StoreStateDown means the TiKV store does not respond to PD for a long
    49  	// time (> 30 minutes).
    50  	StoreStateDown
    51  	// StoreStateTombstone means the TiKV store is shut down and the data has
    52  	// been evacuated. Lightning should never interact with stores in this
    53  	// state.
    54  	StoreStateTombstone
    55  )
    56  
    57  var jsonToStoreState = map[string]StoreState{
    58  	`"Up"`:           StoreStateUp,
    59  	`"Offline"`:      StoreStateOffline,
    60  	`"Disconnected"`: StoreStateDisconnected,
    61  	`"Down"`:         StoreStateDown,
    62  	`"Tombstone"`:    StoreStateTombstone,
    63  }
    64  
    65  // UnmarshalJSON implements the json.Unmarshaler interface.
    66  func (ss *StoreState) UnmarshalJSON(content []byte) error {
    67  	if state, ok := jsonToStoreState[string(content)]; ok {
    68  		*ss = state
    69  		return nil
    70  	}
    71  	return errors.New("Unknown store state")
    72  }
    73  
    74  // Store contains metadata about a TiKV store.
    75  type Store struct {
    76  	Address string
    77  	Version string
    78  	State   StoreState `json:"state_name"`
    79  }
    80  
    81  func withTiKVConnection(ctx context.Context, tls *common.TLS, tikvAddr string, action func(import_sstpb.ImportSSTClient) error) error {
    82  	// Connect to the ImportSST service on the given TiKV node.
    83  	// The connection is needed for executing `action` and will be tear down
    84  	// when this function exits.
    85  	conn, err := grpc.DialContext(ctx, tikvAddr, tls.ToGRPCDialOption())
    86  	if err != nil {
    87  		return errors.Trace(err)
    88  	}
    89  	defer conn.Close()
    90  
    91  	client := import_sstpb.NewImportSSTClient(conn)
    92  	return action(client)
    93  }
    94  
    95  // ForAllStores executes `action` in parallel for all TiKV stores connected to
    96  // a PD server given by the HTTPS client `tls`.
    97  //
    98  // Returns the first non-nil error returned in all `action` calls. If all
    99  // `action` returns nil, this method would return nil as well.
   100  //
   101  // The `minState` argument defines the minimum store state to be included in the
   102  // result (Tombstone < Offline < Down < Disconnected < Up).
   103  func ForAllStores(
   104  	ctx context.Context,
   105  	tls *common.TLS,
   106  	minState StoreState,
   107  	action func(c context.Context, store *Store) error,
   108  ) error {
   109  	// Go through the HTTP interface instead of gRPC so we don't need to keep
   110  	// track of the cluster ID.
   111  	var stores struct {
   112  		Stores []struct {
   113  			Store Store
   114  		}
   115  	}
   116  	err := tls.GetJSON(ctx, "/pd/api/v1/stores", &stores)
   117  	if err != nil {
   118  		return err
   119  	}
   120  
   121  	eg, c := errgroup.WithContext(ctx)
   122  	for _, store := range stores.Stores {
   123  		if store.Store.State >= minState {
   124  			s := store.Store
   125  			eg.Go(func() error { return action(c, &s) })
   126  		}
   127  	}
   128  	return eg.Wait()
   129  }
   130  
   131  func ignoreUnimplementedError(err error, logger log.Logger) error {
   132  	if status.Code(err) == codes.Unimplemented {
   133  		logger.Debug("skipping potentially TiFlash store")
   134  		return nil
   135  	}
   136  	return errors.Trace(err)
   137  }
   138  
   139  // SwitchMode changes the TiKV node at the given address to a particular mode.
   140  func SwitchMode(ctx context.Context, tls *common.TLS, tikvAddr string, mode import_sstpb.SwitchMode) error {
   141  	task := log.With(zap.Stringer("mode", mode), zap.String("tikv", tikvAddr)).Begin(zap.DebugLevel, "switch mode")
   142  	err := withTiKVConnection(ctx, tls, tikvAddr, func(client import_sstpb.ImportSSTClient) error {
   143  		_, err := client.SwitchMode(ctx, &import_sstpb.SwitchModeRequest{
   144  			Mode: mode,
   145  		})
   146  		return ignoreUnimplementedError(err, task.Logger)
   147  	})
   148  	task.End(zap.InfoLevel, err)
   149  	return err
   150  }
   151  
   152  // Compact performs a leveled compaction with the given minimum level.
   153  func Compact(ctx context.Context, tls *common.TLS, tikvAddr string, level int32) error {
   154  	task := log.With(zap.Int32("level", level), zap.String("tikv", tikvAddr)).Begin(zap.InfoLevel, "compact cluster")
   155  	err := withTiKVConnection(ctx, tls, tikvAddr, func(client import_sstpb.ImportSSTClient) error {
   156  		_, err := client.Compact(ctx, &import_sstpb.CompactRequest{
   157  			OutputLevel: level,
   158  		})
   159  		return ignoreUnimplementedError(err, task.Logger)
   160  	})
   161  	task.End(zap.ErrorLevel, err)
   162  	return err
   163  }
   164  
   165  var fetchModeRegexp = regexp.MustCompile(`\btikv_config_rocksdb\{cf="default",name="hard_pending_compaction_bytes_limit"\} ([^\n]+)`)
   166  
   167  // FetchMode obtains the import mode status of the TiKV node.
   168  func FetchMode(ctx context.Context, tls *common.TLS, tikvAddr string) (import_sstpb.SwitchMode, error) {
   169  	conn, err := grpc.DialContext(ctx, tikvAddr, tls.ToGRPCDialOption())
   170  	if err != nil {
   171  		return 0, err
   172  	}
   173  	defer conn.Close()
   174  
   175  	client := debugpb.NewDebugClient(conn)
   176  	resp, err := client.GetMetrics(ctx, &debugpb.GetMetricsRequest{All: false})
   177  	if err != nil {
   178  		return 0, errors.Trace(err)
   179  	}
   180  	return FetchModeFromMetrics(resp.Prometheus)
   181  }
   182  
   183  // FetchMode obtains the import mode status from the Prometheus metrics of a TiKV node.
   184  func FetchModeFromMetrics(metrics string) (import_sstpb.SwitchMode, error) {
   185  	m := fetchModeRegexp.FindStringSubmatch(metrics)
   186  	switch {
   187  	case len(m) < 2:
   188  		return 0, errors.New("import mode status is not exposed")
   189  	case m[1] == "0":
   190  		return import_sstpb.SwitchMode_Import, nil
   191  	default:
   192  		return import_sstpb.SwitchMode_Normal, nil
   193  	}
   194  }