github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/tikv/tikv.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package tikv 15 16 import ( 17 "context" 18 "fmt" 19 "regexp" 20 "strings" 21 22 "github.com/coreos/go-semver/semver" 23 "github.com/pingcap/errors" 24 "github.com/pingcap/kvproto/pkg/debugpb" 25 "github.com/pingcap/kvproto/pkg/import_sstpb" 26 "github.com/pingcap/parser/model" 27 "go.uber.org/zap" 28 "golang.org/x/sync/errgroup" 29 "google.golang.org/grpc" 30 "google.golang.org/grpc/codes" 31 "google.golang.org/grpc/status" 32 33 "github.com/pingcap/br/pkg/lightning/common" 34 "github.com/pingcap/br/pkg/lightning/log" 35 "github.com/pingcap/br/pkg/pdutil" 36 "github.com/pingcap/br/pkg/version" 37 ) 38 39 // StoreState is the state of a TiKV store. The numerical value is sorted by 40 // the store's accessibility (Tombstone < Down < Disconnected < Offline < Up). 41 // 42 // The meaning of each state can be found from PingCAP's documentation at 43 // https://pingcap.com/docs/v3.0/how-to/scale/horizontally/#delete-a-node-dynamically-1 44 type StoreState int 45 46 const ( 47 // StoreStateUp means the TiKV store is in service. 48 StoreStateUp StoreState = -iota 49 // StoreStateOffline means the TiKV store is in the process of being taken 50 // offline (but is still accessible). 51 StoreStateOffline 52 // StoreStateDisconnected means the TiKV store does not respond to PD. 53 StoreStateDisconnected 54 // StoreStateDown means the TiKV store does not respond to PD for a long 55 // time (> 30 minutes). 56 StoreStateDown 57 // StoreStateTombstone means the TiKV store is shut down and the data has 58 // been evacuated. Lightning should never interact with stores in this 59 // state. 60 StoreStateTombstone 61 ) 62 63 var jsonToStoreState = map[string]StoreState{ 64 `"Up"`: StoreStateUp, 65 `"Offline"`: StoreStateOffline, 66 `"Disconnected"`: StoreStateDisconnected, 67 `"Down"`: StoreStateDown, 68 `"Tombstone"`: StoreStateTombstone, 69 } 70 71 // UnmarshalJSON implements the json.Unmarshaler interface. 72 func (ss *StoreState) UnmarshalJSON(content []byte) error { 73 if state, ok := jsonToStoreState[string(content)]; ok { 74 *ss = state 75 return nil 76 } 77 return errors.New("Unknown store state") 78 } 79 80 // Store contains metadata about a TiKV store. 81 type Store struct { 82 Address string 83 Version string 84 State StoreState `json:"state_name"` 85 } 86 87 func withTiKVConnection(ctx context.Context, tls *common.TLS, tikvAddr string, action func(import_sstpb.ImportSSTClient) error) error { 88 // Connect to the ImportSST service on the given TiKV node. 89 // The connection is needed for executing `action` and will be tear down 90 // when this function exits. 91 conn, err := grpc.DialContext(ctx, tikvAddr, tls.ToGRPCDialOption()) 92 if err != nil { 93 return errors.Trace(err) 94 } 95 defer conn.Close() 96 97 client := import_sstpb.NewImportSSTClient(conn) 98 return action(client) 99 } 100 101 // ForAllStores executes `action` in parallel for all TiKV stores connected to 102 // a PD server given by the HTTPS client `tls`. 103 // 104 // Returns the first non-nil error returned in all `action` calls. If all 105 // `action` returns nil, this method would return nil as well. 106 // 107 // The `minState` argument defines the minimum store state to be included in the 108 // result (Tombstone < Offline < Down < Disconnected < Up). 109 func ForAllStores( 110 ctx context.Context, 111 tls *common.TLS, 112 minState StoreState, 113 action func(c context.Context, store *Store) error, 114 ) error { 115 // Go through the HTTP interface instead of gRPC so we don't need to keep 116 // track of the cluster ID. 117 var stores struct { 118 Stores []struct { 119 Store Store 120 } 121 } 122 err := tls.GetJSON(ctx, "/pd/api/v1/stores", &stores) 123 if err != nil { 124 return err 125 } 126 127 eg, c := errgroup.WithContext(ctx) 128 for _, store := range stores.Stores { 129 if store.Store.State >= minState { 130 s := store.Store 131 eg.Go(func() error { return action(c, &s) }) 132 } 133 } 134 return eg.Wait() 135 } 136 137 func ignoreUnimplementedError(err error, logger log.Logger) error { 138 if status.Code(err) == codes.Unimplemented { 139 logger.Debug("skipping potentially TiFlash store") 140 return nil 141 } 142 return errors.Trace(err) 143 } 144 145 // SwitchMode changes the TiKV node at the given address to a particular mode. 146 func SwitchMode(ctx context.Context, tls *common.TLS, tikvAddr string, mode import_sstpb.SwitchMode) error { 147 task := log.With(zap.Stringer("mode", mode), zap.String("tikv", tikvAddr)).Begin(zap.DebugLevel, "switch mode") 148 err := withTiKVConnection(ctx, tls, tikvAddr, func(client import_sstpb.ImportSSTClient) error { 149 _, err := client.SwitchMode(ctx, &import_sstpb.SwitchModeRequest{ 150 Mode: mode, 151 }) 152 return ignoreUnimplementedError(err, task.Logger) 153 }) 154 task.End(zap.InfoLevel, err) 155 return err 156 } 157 158 // Compact performs a leveled compaction with the given minimum level. 159 func Compact(ctx context.Context, tls *common.TLS, tikvAddr string, level int32) error { 160 task := log.With(zap.Int32("level", level), zap.String("tikv", tikvAddr)).Begin(zap.InfoLevel, "compact cluster") 161 err := withTiKVConnection(ctx, tls, tikvAddr, func(client import_sstpb.ImportSSTClient) error { 162 _, err := client.Compact(ctx, &import_sstpb.CompactRequest{ 163 OutputLevel: level, 164 }) 165 return ignoreUnimplementedError(err, task.Logger) 166 }) 167 task.End(zap.ErrorLevel, err) 168 return err 169 } 170 171 var fetchModeRegexp = regexp.MustCompile(`\btikv_config_rocksdb\{cf="default",name="hard_pending_compaction_bytes_limit"\} ([^\n]+)`) 172 173 // FetchMode obtains the import mode status of the TiKV node. 174 func FetchMode(ctx context.Context, tls *common.TLS, tikvAddr string) (import_sstpb.SwitchMode, error) { 175 conn, err := grpc.DialContext(ctx, tikvAddr, tls.ToGRPCDialOption()) 176 if err != nil { 177 return 0, err 178 } 179 defer conn.Close() 180 181 client := debugpb.NewDebugClient(conn) 182 resp, err := client.GetMetrics(ctx, &debugpb.GetMetricsRequest{All: false}) 183 if err != nil { 184 return 0, errors.Trace(err) 185 } 186 return FetchModeFromMetrics(resp.Prometheus) 187 } 188 189 // FetchMode obtains the import mode status from the Prometheus metrics of a TiKV node. 190 func FetchModeFromMetrics(metrics string) (import_sstpb.SwitchMode, error) { 191 m := fetchModeRegexp.FindStringSubmatch(metrics) 192 switch { 193 case len(m) < 2: 194 return 0, errors.New("import mode status is not exposed") 195 case m[1] == "0": 196 return import_sstpb.SwitchMode_Import, nil 197 default: 198 return import_sstpb.SwitchMode_Normal, nil 199 } 200 } 201 202 func FetchRemoteTableModelsFromTLS(ctx context.Context, tls *common.TLS, schema string) ([]*model.TableInfo, error) { 203 var tables []*model.TableInfo 204 err := tls.GetJSON(ctx, "/schema/"+schema, &tables) 205 if err != nil { 206 return nil, errors.Annotatef(err, "cannot read schema '%s' from remote", schema) 207 } 208 return tables, nil 209 } 210 211 func CheckPDVersion(ctx context.Context, tls *common.TLS, pdAddr string, requiredMinVersion, requiredMaxVersion semver.Version) error { 212 ver, err := pdutil.FetchPDVersion(ctx, tls, pdAddr) 213 if err != nil { 214 return errors.Trace(err) 215 } 216 217 return version.CheckVersion("PD", *ver, requiredMinVersion, requiredMaxVersion) 218 } 219 220 func CheckTiKVVersion(ctx context.Context, tls *common.TLS, pdAddr string, requiredMinVersion, requiredMaxVersion semver.Version) error { 221 return ForAllStores( 222 ctx, 223 tls.WithHost(pdAddr), 224 StoreStateDown, 225 func(c context.Context, store *Store) error { 226 component := fmt.Sprintf("TiKV (at %s)", store.Address) 227 ver, err := semver.NewVersion(strings.TrimPrefix(store.Version, "v")) 228 if err != nil { 229 return errors.Annotate(err, component) 230 } 231 return version.CheckVersion(component, *ver, requiredMinVersion, requiredMaxVersion) 232 }, 233 ) 234 }