github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/upstream/manager.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package upstream
    15  
    16  import (
    17  	"context"
    18  	"strings"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/benbjohnson/clock"
    23  	"github.com/pingcap/log"
    24  	"github.com/pingcap/tiflow/cdc/model"
    25  	cerror "github.com/pingcap/tiflow/pkg/errors"
    26  	"github.com/pingcap/tiflow/pkg/etcd"
    27  	"github.com/pingcap/tiflow/pkg/orchestrator"
    28  	"github.com/pingcap/tiflow/pkg/security"
    29  	pd "github.com/tikv/pd/client"
    30  	"go.uber.org/atomic"
    31  	"go.uber.org/zap"
    32  )
    33  
    34  // testUpstreamID is a pseudo upstreamID for now. It will be removed in the future.
    35  const testUpstreamID uint64 = 0
    36  
    37  // tickInterval is the minimum interval that upstream manager to check upstreams
    38  var tickInterval = 3 * time.Minute
    39  
    40  // CaptureTopologyCfg stores the information of the capture topology.
    41  type CaptureTopologyCfg struct {
    42  	*model.CaptureInfo
    43  
    44  	// GCServiceID identify the cdc cluster gc service id
    45  	GCServiceID string
    46  	SessionTTL  int64
    47  }
    48  
    49  // Manager manages all upstream.
    50  type Manager struct {
    51  	// upstreamID map to *Upstream.
    52  	ups *sync.Map
    53  	// all upstream should be spawn from this ctx.
    54  	ctx context.Context
    55  	// Only use in Close().
    56  	cancel func()
    57  	// lock this mutex when add or delete a value of Manager.ups.
    58  	mu sync.Mutex
    59  
    60  	defaultUpstream *Upstream
    61  
    62  	lastTickTime atomic.Time
    63  
    64  	initUpstreamFunc func(context.Context, *Upstream, CaptureTopologyCfg) error
    65  	captureCfg       CaptureTopologyCfg
    66  }
    67  
    68  // NewManager creates a new Manager.
    69  // ctx will be used to initialize upstream spawned by this Manager.
    70  func NewManager(ctx context.Context, cfg CaptureTopologyCfg) *Manager {
    71  	ctx, cancel := context.WithCancel(ctx)
    72  	return &Manager{
    73  		ups:              new(sync.Map),
    74  		ctx:              ctx,
    75  		cancel:           cancel,
    76  		initUpstreamFunc: initUpstream,
    77  		captureCfg:       cfg,
    78  	}
    79  }
    80  
    81  // NewManager4Test returns a Manager for unit test.
    82  func NewManager4Test(pdClient pd.Client) *Manager {
    83  	up := NewUpstream4Test(pdClient)
    84  	res := &Manager{
    85  		ups: new(sync.Map), ctx: context.Background(),
    86  		defaultUpstream: up,
    87  		cancel:          func() {},
    88  		captureCfg: CaptureTopologyCfg{
    89  			GCServiceID: etcd.GcServiceIDForTest(),
    90  		},
    91  	}
    92  	up.isDefaultUpstream = true
    93  	res.ups.Store(testUpstreamID, up)
    94  	return res
    95  }
    96  
    97  // AddDefaultUpstream add the default upstream
    98  func (m *Manager) AddDefaultUpstream(
    99  	pdEndpoints []string,
   100  	conf *security.Credential,
   101  	pdClient pd.Client,
   102  	etcdClient *etcd.Client,
   103  ) (*Upstream, error) {
   104  	// use the pdClient and etcdClient pass from cdc server as the default upstream
   105  	// to reduce the creation times of pdClient to make cdc server more stable
   106  	up := &Upstream{
   107  		PdEndpoints:       pdEndpoints,
   108  		SecurityConfig:    conf,
   109  		PDClient:          pdClient,
   110  		etcdCli:           etcdClient,
   111  		isDefaultUpstream: true,
   112  		status:            uninit,
   113  		wg:                new(sync.WaitGroup),
   114  		clock:             clock.New(),
   115  	}
   116  	if err := m.initUpstreamFunc(m.ctx, up, m.captureCfg); err != nil {
   117  		return nil, err
   118  	}
   119  	m.defaultUpstream = up
   120  	m.ups.Store(up.ID, up)
   121  	log.Info("default upstream is added", zap.Uint64("id", up.ID))
   122  	return up, nil
   123  }
   124  
   125  // GetDefaultUpstream returns the default upstream
   126  func (m *Manager) GetDefaultUpstream() (*Upstream, error) {
   127  	if m.defaultUpstream == nil {
   128  		return nil, cerror.ErrUpstreamNotFound
   129  	}
   130  	return m.defaultUpstream, nil
   131  }
   132  
   133  func (m *Manager) add(upstreamID uint64,
   134  	pdEndpoints []string, conf *security.Credential,
   135  ) *Upstream {
   136  	m.mu.Lock()
   137  	defer m.mu.Unlock()
   138  	v, ok := m.ups.Load(upstreamID)
   139  	if ok {
   140  		up := v.(*Upstream)
   141  		up.resetIdleTime()
   142  		return up
   143  	}
   144  	securityConf := &security.Credential{}
   145  	if conf != nil {
   146  		securityConf = &security.Credential{
   147  			CAPath:        conf.CAPath,
   148  			CertPath:      conf.CertPath,
   149  			KeyPath:       conf.KeyPath,
   150  			CertAllowedCN: conf.CertAllowedCN,
   151  		}
   152  	}
   153  	up := newUpstream(pdEndpoints, securityConf)
   154  	m.ups.Store(upstreamID, up)
   155  	go func() {
   156  		err := m.initUpstreamFunc(m.ctx, up, m.captureCfg)
   157  		up.err.Store(err)
   158  	}()
   159  	up.resetIdleTime()
   160  	log.Info("new upstream is added", zap.Uint64("id", up.ID))
   161  	return up
   162  }
   163  
   164  // AddUpstream adds an upstream and init it.
   165  func (m *Manager) AddUpstream(info *model.UpstreamInfo) *Upstream {
   166  	return m.add(info.ID,
   167  		strings.Split(info.PDEndpoints, ","),
   168  		&security.Credential{
   169  			CAPath:        info.CAPath,
   170  			CertPath:      info.CertPath,
   171  			KeyPath:       info.KeyPath,
   172  			CertAllowedCN: info.CertAllowedCN,
   173  		})
   174  }
   175  
   176  // Get gets a upstream by upstreamID.
   177  func (m *Manager) Get(upstreamID uint64) (*Upstream, bool) {
   178  	v, ok := m.ups.Load(upstreamID)
   179  	if !ok {
   180  		return nil, false
   181  	}
   182  	up := v.(*Upstream)
   183  	return up, true
   184  }
   185  
   186  // Close closes all upstreams.
   187  // Please make sure it will only be called once when capture exits.
   188  func (m *Manager) Close() {
   189  	m.cancel()
   190  	m.ups.Range(func(k, v interface{}) bool {
   191  		v.(*Upstream).Close()
   192  		m.ups.Delete(k)
   193  		return true
   194  	})
   195  }
   196  
   197  // Visit on each upstream, return error on the first
   198  func (m *Manager) Visit(visitor func(up *Upstream) error) error {
   199  	var err error
   200  	m.ups.Range(func(k, v interface{}) bool {
   201  		err = visitor(v.(*Upstream))
   202  		return err == nil
   203  	})
   204  	return err
   205  }
   206  
   207  // Tick checks and frees upstream that have not been used
   208  // for a long time to save resources.
   209  // It's a thread-safe method.
   210  func (m *Manager) Tick(ctx context.Context,
   211  	globalState *orchestrator.GlobalReactorState,
   212  ) error {
   213  	if time.Since(m.lastTickTime.Load()) < tickInterval {
   214  		return nil
   215  	}
   216  
   217  	activeUpstreams := make(map[uint64]struct{})
   218  	for _, cf := range globalState.Changefeeds {
   219  		if cf != nil && cf.Info != nil {
   220  			activeUpstreams[cf.Info.UpstreamID] = struct{}{}
   221  		}
   222  	}
   223  	m.mu.Lock()
   224  	defer m.mu.Unlock()
   225  
   226  	var err error
   227  	m.ups.Range(func(k, v interface{}) bool {
   228  		select {
   229  		case <-ctx.Done():
   230  			err = ctx.Err()
   231  			return false
   232  		default:
   233  		}
   234  		id := k.(uint64)
   235  
   236  		up := v.(*Upstream)
   237  		if up.isDefaultUpstream {
   238  			return true
   239  		}
   240  		// remove failed upstream
   241  		if up.Error() != nil {
   242  			log.Warn("upstream init failed, remove it from manager",
   243  				zap.Uint64("id", up.ID),
   244  				zap.Error(up.Error()))
   245  			go up.Close()
   246  			m.ups.Delete(id)
   247  			return true
   248  		}
   249  		_, ok := activeUpstreams[id]
   250  		if ok {
   251  			return true
   252  		}
   253  
   254  		up.trySetIdleTime()
   255  		log.Info("no active changefeed found, try to close upstream",
   256  			zap.Uint64("id", up.ID))
   257  		if up.shouldClose() {
   258  			log.Info("upstream should be closed ,remove it from manager",
   259  				zap.Uint64("id", up.ID))
   260  			go up.Close()
   261  			m.ups.Delete(id)
   262  		}
   263  		return true
   264  	})
   265  	m.lastTickTime.Store(time.Now())
   266  	return err
   267  }