github.com/pingcap/tidb-lightning@v5.0.0-rc.0.20210428090220-84b649866577+incompatible/lightning/backend/tikv.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package backend 15 16 import ( 17 "context" 18 "regexp" 19 20 "github.com/pingcap/errors" 21 "github.com/pingcap/kvproto/pkg/debugpb" 22 "github.com/pingcap/kvproto/pkg/import_sstpb" 23 "go.uber.org/zap" 24 "golang.org/x/sync/errgroup" 25 "google.golang.org/grpc" 26 "google.golang.org/grpc/codes" 27 "google.golang.org/grpc/status" 28 29 "github.com/pingcap/tidb-lightning/lightning/common" 30 "github.com/pingcap/tidb-lightning/lightning/log" 31 ) 32 33 // StoreState is the state of a TiKV store. The numerical value is sorted by 34 // the store's accessibility (Tombstone < Down < Disconnected < Offline < Up). 35 // 36 // The meaning of each state can be found from PingCAP's documentation at 37 // https://pingcap.com/docs/v3.0/how-to/scale/horizontally/#delete-a-node-dynamically-1 38 type StoreState int 39 40 const ( 41 // StoreStateUp means the TiKV store is in service. 42 StoreStateUp StoreState = -iota 43 // StoreStateOffline means the TiKV store is in the process of being taken 44 // offline (but is still accessible). 45 StoreStateOffline 46 // StoreStateDisconnected means the TiKV store does not respond to PD. 47 StoreStateDisconnected 48 // StoreStateDown means the TiKV store does not respond to PD for a long 49 // time (> 30 minutes). 50 StoreStateDown 51 // StoreStateTombstone means the TiKV store is shut down and the data has 52 // been evacuated. Lightning should never interact with stores in this 53 // state. 54 StoreStateTombstone 55 ) 56 57 var jsonToStoreState = map[string]StoreState{ 58 `"Up"`: StoreStateUp, 59 `"Offline"`: StoreStateOffline, 60 `"Disconnected"`: StoreStateDisconnected, 61 `"Down"`: StoreStateDown, 62 `"Tombstone"`: StoreStateTombstone, 63 } 64 65 // UnmarshalJSON implements the json.Unmarshaler interface. 66 func (ss *StoreState) UnmarshalJSON(content []byte) error { 67 if state, ok := jsonToStoreState[string(content)]; ok { 68 *ss = state 69 return nil 70 } 71 return errors.New("Unknown store state") 72 } 73 74 // Store contains metadata about a TiKV store. 75 type Store struct { 76 Address string 77 Version string 78 State StoreState `json:"state_name"` 79 } 80 81 func withTiKVConnection(ctx context.Context, tls *common.TLS, tikvAddr string, action func(import_sstpb.ImportSSTClient) error) error { 82 // Connect to the ImportSST service on the given TiKV node. 83 // The connection is needed for executing `action` and will be tear down 84 // when this function exits. 85 conn, err := grpc.DialContext(ctx, tikvAddr, tls.ToGRPCDialOption()) 86 if err != nil { 87 return errors.Trace(err) 88 } 89 defer conn.Close() 90 91 client := import_sstpb.NewImportSSTClient(conn) 92 return action(client) 93 } 94 95 // ForAllStores executes `action` in parallel for all TiKV stores connected to 96 // a PD server given by the HTTPS client `tls`. 97 // 98 // Returns the first non-nil error returned in all `action` calls. If all 99 // `action` returns nil, this method would return nil as well. 100 // 101 // The `minState` argument defines the minimum store state to be included in the 102 // result (Tombstone < Offline < Down < Disconnected < Up). 103 func ForAllStores( 104 ctx context.Context, 105 tls *common.TLS, 106 minState StoreState, 107 action func(c context.Context, store *Store) error, 108 ) error { 109 // Go through the HTTP interface instead of gRPC so we don't need to keep 110 // track of the cluster ID. 111 var stores struct { 112 Stores []struct { 113 Store Store 114 } 115 } 116 err := tls.GetJSON(ctx, "/pd/api/v1/stores", &stores) 117 if err != nil { 118 return err 119 } 120 121 eg, c := errgroup.WithContext(ctx) 122 for _, store := range stores.Stores { 123 if store.Store.State >= minState { 124 s := store.Store 125 eg.Go(func() error { return action(c, &s) }) 126 } 127 } 128 return eg.Wait() 129 } 130 131 func ignoreUnimplementedError(err error, logger log.Logger) error { 132 if status.Code(err) == codes.Unimplemented { 133 logger.Debug("skipping potentially TiFlash store") 134 return nil 135 } 136 return errors.Trace(err) 137 } 138 139 // SwitchMode changes the TiKV node at the given address to a particular mode. 140 func SwitchMode(ctx context.Context, tls *common.TLS, tikvAddr string, mode import_sstpb.SwitchMode) error { 141 task := log.With(zap.Stringer("mode", mode), zap.String("tikv", tikvAddr)).Begin(zap.DebugLevel, "switch mode") 142 err := withTiKVConnection(ctx, tls, tikvAddr, func(client import_sstpb.ImportSSTClient) error { 143 _, err := client.SwitchMode(ctx, &import_sstpb.SwitchModeRequest{ 144 Mode: mode, 145 }) 146 return ignoreUnimplementedError(err, task.Logger) 147 }) 148 task.End(zap.InfoLevel, err) 149 return err 150 } 151 152 // Compact performs a leveled compaction with the given minimum level. 153 func Compact(ctx context.Context, tls *common.TLS, tikvAddr string, level int32) error { 154 task := log.With(zap.Int32("level", level), zap.String("tikv", tikvAddr)).Begin(zap.InfoLevel, "compact cluster") 155 err := withTiKVConnection(ctx, tls, tikvAddr, func(client import_sstpb.ImportSSTClient) error { 156 _, err := client.Compact(ctx, &import_sstpb.CompactRequest{ 157 OutputLevel: level, 158 }) 159 return ignoreUnimplementedError(err, task.Logger) 160 }) 161 task.End(zap.ErrorLevel, err) 162 return err 163 } 164 165 var fetchModeRegexp = regexp.MustCompile(`\btikv_config_rocksdb\{cf="default",name="hard_pending_compaction_bytes_limit"\} ([^\n]+)`) 166 167 // FetchMode obtains the import mode status of the TiKV node. 168 func FetchMode(ctx context.Context, tls *common.TLS, tikvAddr string) (import_sstpb.SwitchMode, error) { 169 conn, err := grpc.DialContext(ctx, tikvAddr, tls.ToGRPCDialOption()) 170 if err != nil { 171 return 0, err 172 } 173 defer conn.Close() 174 175 client := debugpb.NewDebugClient(conn) 176 resp, err := client.GetMetrics(ctx, &debugpb.GetMetricsRequest{All: false}) 177 if err != nil { 178 return 0, errors.Trace(err) 179 } 180 return FetchModeFromMetrics(resp.Prometheus) 181 } 182 183 // FetchMode obtains the import mode status from the Prometheus metrics of a TiKV node. 184 func FetchModeFromMetrics(metrics string) (import_sstpb.SwitchMode, error) { 185 m := fetchModeRegexp.FindStringSubmatch(metrics) 186 switch { 187 case len(m) < 2: 188 return 0, errors.New("import mode status is not exposed") 189 case m[1] == "0": 190 return import_sstpb.SwitchMode_Import, nil 191 default: 192 return import_sstpb.SwitchMode_Normal, nil 193 } 194 }