github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/upstream/manager.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package upstream 15 16 import ( 17 "context" 18 "strings" 19 "sync" 20 "time" 21 22 "github.com/benbjohnson/clock" 23 "github.com/pingcap/log" 24 "github.com/pingcap/tiflow/cdc/model" 25 cerror "github.com/pingcap/tiflow/pkg/errors" 26 "github.com/pingcap/tiflow/pkg/etcd" 27 "github.com/pingcap/tiflow/pkg/orchestrator" 28 "github.com/pingcap/tiflow/pkg/security" 29 pd "github.com/tikv/pd/client" 30 "go.uber.org/atomic" 31 "go.uber.org/zap" 32 ) 33 34 // testUpstreamID is a pseudo upstreamID for now. It will be removed in the future. 35 const testUpstreamID uint64 = 0 36 37 // tickInterval is the minimum interval that upstream manager to check upstreams 38 var tickInterval = 3 * time.Minute 39 40 // CaptureTopologyCfg stores the information of the capture topology. 41 type CaptureTopologyCfg struct { 42 *model.CaptureInfo 43 44 // GCServiceID identify the cdc cluster gc service id 45 GCServiceID string 46 SessionTTL int64 47 } 48 49 // Manager manages all upstream. 50 type Manager struct { 51 // upstreamID map to *Upstream. 52 ups *sync.Map 53 // all upstream should be spawn from this ctx. 54 ctx context.Context 55 // Only use in Close(). 56 cancel func() 57 // lock this mutex when add or delete a value of Manager.ups. 58 mu sync.Mutex 59 60 defaultUpstream *Upstream 61 62 lastTickTime atomic.Time 63 64 initUpstreamFunc func(context.Context, *Upstream, CaptureTopologyCfg) error 65 captureCfg CaptureTopologyCfg 66 } 67 68 // NewManager creates a new Manager. 69 // ctx will be used to initialize upstream spawned by this Manager. 70 func NewManager(ctx context.Context, cfg CaptureTopologyCfg) *Manager { 71 ctx, cancel := context.WithCancel(ctx) 72 return &Manager{ 73 ups: new(sync.Map), 74 ctx: ctx, 75 cancel: cancel, 76 initUpstreamFunc: initUpstream, 77 captureCfg: cfg, 78 } 79 } 80 81 // NewManager4Test returns a Manager for unit test. 82 func NewManager4Test(pdClient pd.Client) *Manager { 83 up := NewUpstream4Test(pdClient) 84 res := &Manager{ 85 ups: new(sync.Map), ctx: context.Background(), 86 defaultUpstream: up, 87 cancel: func() {}, 88 captureCfg: CaptureTopologyCfg{ 89 GCServiceID: etcd.GcServiceIDForTest(), 90 }, 91 } 92 up.isDefaultUpstream = true 93 res.ups.Store(testUpstreamID, up) 94 return res 95 } 96 97 // AddDefaultUpstream add the default upstream 98 func (m *Manager) AddDefaultUpstream( 99 pdEndpoints []string, 100 conf *security.Credential, 101 pdClient pd.Client, 102 etcdClient *etcd.Client, 103 ) (*Upstream, error) { 104 // use the pdClient and etcdClient pass from cdc server as the default upstream 105 // to reduce the creation times of pdClient to make cdc server more stable 106 up := &Upstream{ 107 PdEndpoints: pdEndpoints, 108 SecurityConfig: conf, 109 PDClient: pdClient, 110 etcdCli: etcdClient, 111 isDefaultUpstream: true, 112 status: uninit, 113 wg: new(sync.WaitGroup), 114 clock: clock.New(), 115 } 116 if err := m.initUpstreamFunc(m.ctx, up, m.captureCfg); err != nil { 117 return nil, err 118 } 119 m.defaultUpstream = up 120 m.ups.Store(up.ID, up) 121 log.Info("default upstream is added", zap.Uint64("id", up.ID)) 122 return up, nil 123 } 124 125 // GetDefaultUpstream returns the default upstream 126 func (m *Manager) GetDefaultUpstream() (*Upstream, error) { 127 if m.defaultUpstream == nil { 128 return nil, cerror.ErrUpstreamNotFound 129 } 130 return m.defaultUpstream, nil 131 } 132 133 func (m *Manager) add(upstreamID uint64, 134 pdEndpoints []string, conf *security.Credential, 135 ) *Upstream { 136 m.mu.Lock() 137 defer m.mu.Unlock() 138 v, ok := m.ups.Load(upstreamID) 139 if ok { 140 up := v.(*Upstream) 141 up.resetIdleTime() 142 return up 143 } 144 securityConf := &security.Credential{} 145 if conf != nil { 146 securityConf = &security.Credential{ 147 CAPath: conf.CAPath, 148 CertPath: conf.CertPath, 149 KeyPath: conf.KeyPath, 150 CertAllowedCN: conf.CertAllowedCN, 151 } 152 } 153 up := newUpstream(pdEndpoints, securityConf) 154 m.ups.Store(upstreamID, up) 155 go func() { 156 err := m.initUpstreamFunc(m.ctx, up, m.captureCfg) 157 up.err.Store(err) 158 }() 159 up.resetIdleTime() 160 log.Info("new upstream is added", zap.Uint64("id", up.ID)) 161 return up 162 } 163 164 // AddUpstream adds an upstream and init it. 165 func (m *Manager) AddUpstream(info *model.UpstreamInfo) *Upstream { 166 return m.add(info.ID, 167 strings.Split(info.PDEndpoints, ","), 168 &security.Credential{ 169 CAPath: info.CAPath, 170 CertPath: info.CertPath, 171 KeyPath: info.KeyPath, 172 CertAllowedCN: info.CertAllowedCN, 173 }) 174 } 175 176 // Get gets a upstream by upstreamID. 177 func (m *Manager) Get(upstreamID uint64) (*Upstream, bool) { 178 v, ok := m.ups.Load(upstreamID) 179 if !ok { 180 return nil, false 181 } 182 up := v.(*Upstream) 183 return up, true 184 } 185 186 // Close closes all upstreams. 187 // Please make sure it will only be called once when capture exits. 188 func (m *Manager) Close() { 189 m.cancel() 190 m.ups.Range(func(k, v interface{}) bool { 191 v.(*Upstream).Close() 192 m.ups.Delete(k) 193 return true 194 }) 195 } 196 197 // Visit on each upstream, return error on the first 198 func (m *Manager) Visit(visitor func(up *Upstream) error) error { 199 var err error 200 m.ups.Range(func(k, v interface{}) bool { 201 err = visitor(v.(*Upstream)) 202 return err == nil 203 }) 204 return err 205 } 206 207 // Tick checks and frees upstream that have not been used 208 // for a long time to save resources. 209 // It's a thread-safe method. 210 func (m *Manager) Tick(ctx context.Context, 211 globalState *orchestrator.GlobalReactorState, 212 ) error { 213 if time.Since(m.lastTickTime.Load()) < tickInterval { 214 return nil 215 } 216 217 activeUpstreams := make(map[uint64]struct{}) 218 for _, cf := range globalState.Changefeeds { 219 if cf != nil && cf.Info != nil { 220 activeUpstreams[cf.Info.UpstreamID] = struct{}{} 221 } 222 } 223 m.mu.Lock() 224 defer m.mu.Unlock() 225 226 var err error 227 m.ups.Range(func(k, v interface{}) bool { 228 select { 229 case <-ctx.Done(): 230 err = ctx.Err() 231 return false 232 default: 233 } 234 id := k.(uint64) 235 236 up := v.(*Upstream) 237 if up.isDefaultUpstream { 238 return true 239 } 240 // remove failed upstream 241 if up.Error() != nil { 242 log.Warn("upstream init failed, remove it from manager", 243 zap.Uint64("id", up.ID), 244 zap.Error(up.Error())) 245 go up.Close() 246 m.ups.Delete(id) 247 return true 248 } 249 _, ok := activeUpstreams[id] 250 if ok { 251 return true 252 } 253 254 up.trySetIdleTime() 255 log.Info("no active changefeed found, try to close upstream", 256 zap.Uint64("id", up.ID)) 257 if up.shouldClose() { 258 log.Info("upstream should be closed ,remove it from manager", 259 zap.Uint64("id", up.ID)) 260 go up.Close() 261 m.ups.Delete(id) 262 } 263 return true 264 }) 265 m.lastTickTime.Store(time.Now()) 266 return err 267 }