
     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    14  package upstream
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"strings"
    20  	"sync"
    21  	"sync/atomic"
    22  	"time"
    24  	""
    25  	dmysql ""
    26  	""
    27  	tidbkv ""
    28  	""
    29  	""
    30  	""
    31  	""
    32  	""
    33  	""
    34  	""
    35  	pmysql ""
    36  	""
    37  	""
    38  	""
    39  	tikvconfig ""
    40  	""
    41  	pd ""
    42  	uatomic ""
    43  	clientv3 ""
    44  	""
    45  	""
    46  	""
    47  	""
    48  )
    50  const (
    51  	// indicate an upstream is created but not initialized.
    52  	uninit int32 = iota
    53  	// indicate an upstream is initialized and can work normally.
    54  	normal
    55  	// indicate an upstream is closing
    56  	closing
    57  	// indicate an upstream is closed.
    58  	closed
    60  	maxIdleDuration = time.Minute * 30
    61  )
    63  // Upstream holds resources of a TiDB cluster, it can be shared by many changefeeds
    64  // and processors. All public fields and method of an upstream should be thread-safe.
    65  // Please be careful that never change any exported field of an Upstream.
    66  type Upstream struct {
    67  	ID uint64
    69  	PdEndpoints    []string
    70  	SecurityConfig *security.Credential
    71  	PDClient       pd.Client
    72  	etcdCli        *etcd.Client
    73  	session        *concurrency.Session
    75  	KVStorage   tidbkv.Storage
    76  	GrpcPool    kv.GrpcPool
    77  	RegionCache *tikv.RegionCache
    78  	PDClock     pdutil.Clock
    79  	GCManager   gc.Manager
    80  	// Only use in Close().
    81  	cancel func()
    82  	mu     sync.Mutex
    83  	// record the time when Upstream.hc becomes zero.
    84  	idleTime time.Time
    85  	// use clock to facilitate unit test
    86  	clock  clock.Clock
    87  	wg     *sync.WaitGroup
    88  	status int32
    90  	err               uatomic.Error
    91  	isDefaultUpstream bool
    92  }
    94  func newUpstream(pdEndpoints []string,
    95  	securityConfig *security.Credential,
    96  ) *Upstream {
    97  	return &Upstream{
    98  		PdEndpoints:    pdEndpoints,
    99  		SecurityConfig: securityConfig,
   100  		status:         uninit,
   101  		wg:             new(sync.WaitGroup),
   102  		clock:          clock.New(),
   103  	}
   104  }
   106  // NewUpstream4Test new an upstream for unit test.
   107  func NewUpstream4Test(pdClient pd.Client) *Upstream {
   108  	pdClock := pdutil.NewClock4Test()
   109  	gcManager := gc.NewManager(
   110  		etcd.GcServiceIDForTest(),
   111  		pdClient, pdClock)
   112  	res := &Upstream{
   113  		ID:             testUpstreamID,
   114  		PDClient:       pdClient,
   115  		PDClock:        pdClock,
   116  		GCManager:      gcManager,
   117  		status:         normal,
   118  		wg:             new(sync.WaitGroup),
   119  		clock:          clock.New(),
   120  		SecurityConfig: &security.Credential{},
   121  		cancel:         func() {},
   122  	}
   124  	return res
   125  }
   127  // init initializes the upstream
   128  func initUpstream(ctx context.Context, up *Upstream, cfg CaptureTopologyCfg) error {
   129  	ctx, up.cancel = context.WithCancel(ctx)
   130  	grpcTLSOption, err := up.SecurityConfig.ToGRPCDialOption()
   131  	if err != nil {
   132  		up.err.Store(err)
   133  		return errors.Trace(err)
   134  	}
   135  	// init the tikv client tls global config
   136  	initGlobalConfig(up.SecurityConfig)
   137  	// default upstream always use the pdClient pass from cdc server
   138  	if !up.isDefaultUpstream {
   139  		up.PDClient, err = pd.NewClientWithContext(
   140  			ctx, up.PdEndpoints, up.SecurityConfig.PDSecurityOption(),
   141  			// the default `timeout` is 3s, maybe too small if the pd is busy,
   142  			// set to 10s to avoid frequent timeout.
   143  			pd.WithCustomTimeoutOption(10*time.Second),
   144  			pd.WithGRPCDialOptions(
   145  				grpcTLSOption,
   146  				grpc.WithBlock(),
   147  				grpc.WithConnectParams(grpc.ConnectParams{
   148  					Backoff: backoff.Config{
   149  						BaseDelay:  time.Second,
   150  						Multiplier: 1.1,
   151  						Jitter:     0.1,
   152  						MaxDelay:   3 * time.Second,
   153  					},
   154  					MinConnectTimeout: 3 * time.Second,
   155  				}),
   156  			),
   157  			pd.WithForwardingOption(config.EnablePDForwarding))
   158  		if err != nil {
   159  			up.err.Store(err)
   160  			return errors.Trace(err)
   161  		}
   163  		etcdCli, err := etcd.CreateRawEtcdClient(up.SecurityConfig, grpcTLSOption, up.PdEndpoints...)
   164  		if err != nil {
   165  			return errors.Trace(err)
   166  		}
   167  		up.etcdCli = etcd.Wrap(etcdCli, make(map[string]prometheus.Counter))
   168  	}
   169  	clusterID := up.PDClient.GetClusterID(ctx)
   170  	if up.ID != 0 && up.ID != clusterID {
   171  		err := fmt.Errorf("upstream id missmatch expected %d, actual: %d",
   172  			up.ID, clusterID)
   173  		up.err.Store(err)
   174  		return errors.Trace(err)
   175  	}
   176  	up.ID = clusterID
   178  	// To not block CDC server startup, we need to warn instead of error
   179  	// when TiKV is incompatible.
   180  	errorTiKVIncompatible := false
   181  	err = version.CheckClusterVersion(ctx, up.PDClient,
   182  		up.PdEndpoints, up.SecurityConfig, errorTiKVIncompatible)
   183  	if err != nil {
   184  		up.err.Store(err)
   185  		log.Error("init upstream error", zap.Error(err))
   186  		return errors.Trace(err)
   187  	}
   189  	up.KVStorage, err = kv.CreateTiStore(strings.Join(up.PdEndpoints, ","), up.SecurityConfig)
   190  	if err != nil {
   191  		up.err.Store(err)
   192  		return errors.Trace(err)
   193  	}
   195  	up.GrpcPool = kv.NewGrpcPoolImpl(ctx, up.SecurityConfig)
   197  	up.RegionCache = tikv.NewRegionCache(up.PDClient)
   199  	up.PDClock, err = pdutil.NewClock(ctx, up.PDClient)
   200  	if err != nil {
   201  		up.err.Store(err)
   202  		return errors.Trace(err)
   203  	}
   205  	up.GCManager = gc.NewManager(cfg.GCServiceID, up.PDClient, up.PDClock)
   207  	// Update meta-region label to ensure that meta region isolated from data regions.
   208  	pc, err := pdutil.NewPDAPIClient(up.PDClient, up.SecurityConfig)
   209  	if err != nil {
   210  		log.Error("create pd api client failed", zap.Error(err))
   211  		return errors.Trace(err)
   212  	}
   213  	defer pc.Close()
   215  	err = pc.UpdateMetaLabel(ctx)
   216  	if err != nil {
   217  		log.Warn("Fail to verify region label rule",
   218  			zap.Error(err),
   219  			zap.Uint64("upstreamID", up.ID),
   220  			zap.Strings("upstreamEndpoints", up.PdEndpoints))
   221  	}
   222  	err = up.registerTopologyInfo(ctx, cfg)
   223  	if err != nil {
   224  		return errors.Trace(err)
   225  	}
   227  	up.wg.Add(1)
   228  	go func() {
   229  		defer up.wg.Done()
   230  		up.PDClock.Run(ctx)
   231  	}()
   232  	up.wg.Add(1)
   233  	go func() {
   234  		defer up.wg.Done()
   235  		up.GrpcPool.RecycleConn(ctx)
   236  	}()
   238  	log.Info("upstream initialize successfully", zap.Uint64("upstreamID", up.ID))
   239  	atomic.StoreInt32(&up.status, normal)
   240  	return nil
   241  }
   243  // initGlobalConfig initializes the global config for tikv client tls.
   244  // region cache health check will use the global config.
   245  // TODO: remove this function after tikv client tls is refactored.
   246  func initGlobalConfig(secCfg *security.Credential) {
   247  	if secCfg.CAPath != "" || secCfg.CertPath != "" || secCfg.KeyPath != "" {
   248  		conf := tikvconfig.GetGlobalConfig()
   249  		conf.Security.ClusterSSLCA = secCfg.CAPath
   250  		conf.Security.ClusterSSLCert = secCfg.CertPath
   251  		conf.Security.ClusterSSLKey = secCfg.KeyPath
   252  		conf.Security.ClusterVerifyCN = secCfg.CertAllowedCN
   253  		tikvconfig.StoreGlobalConfig(conf)
   254  	}
   255  }
   257  // Close all resources.
   258  func (up *Upstream) Close() {
   260  	defer
   261  	up.cancel()
   262  	if atomic.LoadInt32(&up.status) == closed ||
   263  		atomic.LoadInt32(&up.status) == closing {
   264  		return
   265  	}
   266  	atomic.StoreInt32(&up.status, closing)
   268  	// should never close default upstream's pdClient and etcdClient here
   269  	// because it's shared in the cdc server
   270  	if !up.isDefaultUpstream {
   271  		if up.PDClient != nil {
   272  			up.PDClient.Close()
   273  		}
   274  		if up.etcdCli != nil {
   275  			err := up.etcdCli.Unwrap().Close()
   276  			if err != nil {
   277  				log.Warn("etcd client close failed", zap.Error(err))
   278  			}
   279  		}
   280  	}
   282  	if up.KVStorage != nil {
   283  		err := up.KVStorage.Close()
   284  		if err != nil {
   285  			log.Warn("kv store close failed", zap.Error(err))
   286  		}
   287  	}
   289  	if up.GrpcPool != nil {
   290  		up.GrpcPool.Close()
   291  	}
   292  	if up.RegionCache != nil {
   293  		up.RegionCache.Close()
   294  	}
   295  	if up.PDClock != nil {
   296  		up.PDClock.Stop()
   297  	}
   298  	if up.session != nil {
   299  		err := up.session.Close()
   300  		if err != nil {
   301  			log.Warn("etcd session close failed", zap.Error(err))
   302  		}
   303  	}
   305  	up.wg.Wait()
   306  	atomic.StoreInt32(&up.status, closed)
   307  	log.Info("upstream closed", zap.Uint64("upstreamID", up.ID))
   308  }
   310  // Error returns the error during init this stream
   311  func (up *Upstream) Error() error {
   312  	return up.err.Load()
   313  }
   315  // IsNormal returns true if the upstream is normal.
   316  func (up *Upstream) IsNormal() bool {
   317  	return atomic.LoadInt32(&up.status) == normal && up.err.Load() == nil
   318  }
   320  // IsClosed returns true if the upstream is closed.
   321  func (up *Upstream) IsClosed() bool {
   322  	return atomic.LoadInt32(&up.status) == closed
   323  }
   325  // resetIdleTime set the upstream idle time to true
   326  func (up *Upstream) resetIdleTime() {
   328  	defer
   330  	if !up.idleTime.IsZero() {
   331  		log.Info("upstream idle time is set to 0",
   332  			zap.Uint64("id", up.ID))
   333  		up.idleTime = time.Time{}
   334  	}
   335  }
   337  // trySetIdleTime set the upstream idle time if it's not zero
   338  func (up *Upstream) trySetIdleTime() {
   340  	defer
   341  	// reset idleTime
   342  	if up.idleTime.IsZero() {
   343  		log.Info("upstream idle time is set to current time",
   344  			zap.Uint64("id", up.ID))
   345  		up.idleTime = up.clock.Now()
   346  	}
   347  }
   349  func (up *Upstream) registerTopologyInfo(ctx context.Context, cfg CaptureTopologyCfg) error {
   350  	lease, err := up.etcdCli.Grant(ctx, cfg.SessionTTL)
   351  	if err != nil {
   352  		return errors.Trace(err)
   353  	}
   354  	up.session, err = concurrency.NewSession(up.etcdCli.Unwrap(), concurrency.WithLease(lease.ID))
   355  	if err != nil {
   356  		return errors.Trace(err)
   357  	}
   358  	// register capture info to upstream pd
   359  	key := fmt.Sprintf(topologyTiCDC, cfg.GCServiceID, cfg.AdvertiseAddr)
   360  	value, err := cfg.CaptureInfo.Marshal()
   361  	if err != nil {
   362  		return errors.Trace(err)
   363  	}
   364  	_, err = up.etcdCli.Put(ctx, key, string(value), clientv3.WithLease(up.session.Lease()))
   365  	return errors.WrapError(errors.ErrPDEtcdAPIError, err)
   366  }
   368  // shouldClose returns true if
   369  // this upstream idleTime reaches maxIdleDuration.
   370  func (up *Upstream) shouldClose() bool {
   371  	// default upstream should never be closed.
   372  	if up.isDefaultUpstream {
   373  		return false
   374  	}
   376  	if !up.idleTime.IsZero() &&
   377  		up.clock.Since(up.idleTime) >= maxIdleDuration {
   378  		return true
   379  	}
   381  	return false
   382  }
   384  // VerifyTiDBUser verify whether the username and password are valid in TiDB. It does the validation via
   385  // the successfully build of a connection with upstream TiDB with the username and password.
   386  func (up *Upstream) VerifyTiDBUser(ctx context.Context, username, password string) error {
   387  	tidbs, err := fetchTiDBTopology(ctx, up.etcdCli.Unwrap())
   388  	if err != nil {
   389  		return errors.Trace(err)
   390  	}
   391  	if len(tidbs) == 0 {
   392  		return errors.New("tidb instance not found in topology, please check if the tidb is running")
   393  	}
   395  	for _, tidb := range tidbs {
   396  		// connect tidb
   397  		host := fmt.Sprintf("%s:%d", tidb.IP, tidb.Port)
   398  		dsnStr := fmt.Sprintf("%s:%s@tcp(%s)/", username, password, host)
   399  		err = up.doVerify(ctx, dsnStr)
   400  		if err == nil {
   401  			return nil
   402  		}
   403  		if errorutil.IsAccessDeniedError(err) {
   404  			// For access denied error, we can return immediately.
   405  			// For other errors, we need to continue to verify the next tidb instance.
   406  			return errors.Trace(err)
   407  		}
   408  	}
   409  	return errors.Trace(err)
   410  }
   412  func (up *Upstream) doVerify(ctx context.Context, dsnStr string) error {
   413  	ctx, cancel := context.WithTimeout(ctx, defaultTimeout)
   414  	defer cancel()
   416  	dsn, err := dmysql.ParseDSN(dsnStr)
   417  	if err != nil {
   418  		return errors.Trace(err)
   419  	}
   420  	// Note: we use "preferred" here to make sure the connection is encrypted if possible. It is the same as the default
   421  	// behavior of mysql client, refer to:
   422  	dsn.TLSConfig = "preferred"
   424  	db, err := pmysql.GetTestDB(ctx, dsn, pmysql.CreateMySQLDBConn)
   425  	if err != nil {
   426  		return errors.Trace(err)
   427  	}
   428  	defer db.Close()
   430  	rows, err := db.Query("SHOW STATUS LIKE '%Ssl_cipher'")
   431  	if err != nil {
   432  		return errors.Trace(err)
   433  	}
   434  	defer func() {
   435  		if err := rows.Close(); err != nil {
   436  			log.Warn("query Ssl_cipher close rows failed", zap.Error(err))
   437  		}
   438  		if rows.Err() != nil {
   439  			log.Warn("query Ssl_cipher rows has error", zap.Error(rows.Err()))
   440  		}
   441  	}()
   443  	var name, value string
   444  	err = rows.Scan(&name, &value)
   445  	if err != nil {
   446  		log.Warn("failed to get ssl cipher", zap.Error(err),
   447  			zap.String("username", dsn.User), zap.Uint64("upstreamID", up.ID))
   448  	}
   449  	log.Info("verify tidb user successfully", zap.String("username", dsn.User),
   450  		zap.String("sslCipherName", name), zap.String("sslCipherValue", value),
   451  		zap.Uint64("upstreamID", up.ID))
   452  	return nil
   453  }