github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdcv2/owner/owner.go (about) 1 // Copyright 2023 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package owner 15 16 import ( 17 "context" 18 "database/sql" 19 "io" 20 "sync" 21 "sync/atomic" 22 "time" 23 24 "github.com/pingcap/log" 25 "github.com/pingcap/tiflow/cdc/model" 26 "github.com/pingcap/tiflow/cdc/owner" 27 "github.com/pingcap/tiflow/cdc/scheduler" 28 "github.com/pingcap/tiflow/cdcv2/metadata" 29 msql "github.com/pingcap/tiflow/cdcv2/metadata/sql" 30 "github.com/pingcap/tiflow/pkg/config" 31 cerror "github.com/pingcap/tiflow/pkg/errors" 32 "github.com/pingcap/tiflow/pkg/upstream" 33 "go.uber.org/zap" 34 "gorm.io/gorm" 35 ) 36 37 // Owner implements the owner interface. 38 type Owner struct { 39 upstreamManager *upstream.Manager 40 captureObservation *msql.CaptureOb[*gorm.DB] 41 cfg *config.SchedulerConfig 42 storage *sql.DB 43 44 liveness *model.Liveness 45 46 ownerJobQueue struct { 47 sync.Mutex 48 queue []*ownerJob 49 } 50 closed int32 51 52 querier metadata.Querier 53 } 54 55 // UpdateChangefeedAndUpstream updates the changefeed info and upstream info. 56 func (o *Owner) UpdateChangefeedAndUpstream(ctx context.Context, 57 upstreamInfo *model.UpstreamInfo, 58 changeFeedInfo *model.ChangeFeedInfo, 59 ) error { 60 panic("implement me") 61 } 62 63 // UpdateChangefeed updates the changefeed info. 64 func (o *Owner) UpdateChangefeed(ctx context.Context, 65 changeFeedInfo *model.ChangeFeedInfo, 66 ) error { 67 panic("implement me") 68 } 69 70 // EnqueueJob enqueues a job to the owner. 71 func (o *Owner) EnqueueJob(adminJob model.AdminJob, 72 done chan<- error, 73 ) { 74 o.pushOwnerJob(&ownerJob{ 75 Tp: ownerJobTypeAdminJob, 76 AdminJob: &adminJob, 77 ChangefeedID: adminJob.CfID, 78 done: done, 79 }) 80 } 81 82 // RebalanceTables rebalances the tables of a changefeed. 83 func (o *Owner) RebalanceTables(cfID model.ChangeFeedID, 84 done chan<- error, 85 ) { 86 o.pushOwnerJob(&ownerJob{ 87 Tp: ownerJobTypeRebalance, 88 ChangefeedID: cfID, 89 done: done, 90 }) 91 } 92 93 // ScheduleTable schedules a table to a capture. 94 func (o *Owner) ScheduleTable(cfID model.ChangeFeedID, 95 toCapture model.CaptureID, 96 tableID model.TableID, done chan<- error, 97 ) { 98 o.pushOwnerJob(&ownerJob{ 99 Tp: ownerJobTypeScheduleTable, 100 ChangefeedID: cfID, 101 TargetCaptureID: toCapture, 102 TableID: tableID, 103 done: done, 104 }) 105 } 106 107 // DrainCapture drains a capture. 108 func (o *Owner) DrainCapture(query *scheduler.Query, 109 done chan<- error, 110 ) { 111 o.pushOwnerJob(&ownerJob{ 112 Tp: ownerJobTypeDrainCapture, 113 scheduleQuery: query, 114 done: done, 115 }) 116 } 117 118 // WriteDebugInfo writes the debug info to the writer. 119 func (o *Owner) WriteDebugInfo(w io.Writer, 120 done chan<- error, 121 ) { 122 o.pushOwnerJob(&ownerJob{ 123 Tp: ownerJobTypeDebugInfo, 124 debugInfoWriter: w, 125 done: done, 126 }) 127 } 128 129 // Query queries owner internal information. 130 func (o *Owner) Query(query *owner.Query, done chan<- error) { 131 o.pushOwnerJob(&ownerJob{ 132 Tp: ownerJobTypeQuery, 133 query: query, 134 done: done, 135 }) 136 } 137 138 // AsyncStop stops the owner asynchronously. 139 func (o *Owner) AsyncStop() { 140 panic("implement me") 141 } 142 143 // NewOwner creates a new owner. 144 func NewOwner( 145 liveness *model.Liveness, 146 upstreamManager *upstream.Manager, 147 cfg *config.SchedulerConfig, 148 captureObservation *msql.CaptureOb[*gorm.DB], 149 querier metadata.Querier, 150 storage *sql.DB, 151 ) *Owner { 152 return &Owner{ 153 upstreamManager: upstreamManager, 154 captureObservation: captureObservation, 155 cfg: cfg, 156 querier: querier, 157 storage: storage, 158 liveness: liveness, 159 } 160 } 161 162 // Run runs the owner. 163 func (o *Owner) Run(ctx context.Context) error { 164 tick := time.NewTicker(time.Millisecond * 100) 165 for { 166 select { 167 case <-ctx.Done(): 168 return nil 169 case <-tick.C: 170 // handleJobs() should be called before clusterVersionConsistent(), because 171 // when there are different versions of cdc nodes in the cluster, 172 // the admin job may not be processed all the time. And http api relies on 173 // admin job, which will cause all http api unavailable. 174 o.handleJobs(ctx) 175 case cf := <-o.captureObservation.OwnerChanges(): 176 switch cf.OwnerState { 177 case metadata.SchedRemoving: 178 case metadata.SchedLaunched: 179 } 180 } 181 } 182 } 183 184 // nolint:unused 185 type ownerInfoClient struct { 186 ownerID model.CaptureID 187 captures []*model.CaptureInfo 188 } 189 190 // nolint:unused 191 func (o *ownerInfoClient) GetOwnerID(context.Context) (model.CaptureID, error) { 192 return o.ownerID, nil 193 } 194 195 // nolint:unused 196 func (o *ownerInfoClient) GetOwnerRevision(context.Context, model.CaptureID) (int64, error) { 197 return 0, nil 198 } 199 200 // nolint:unused 201 func (o *ownerInfoClient) GetCaptures(context.Context) (int64, []*model.CaptureInfo, error) { 202 return 0, o.captures, nil 203 } 204 205 func (o *Owner) handleJobs(_ context.Context) { 206 jobs := o.takeOwnerJobs() 207 for _, job := range jobs { 208 switch job.Tp { 209 case ownerJobTypeAdminJob: 210 case ownerJobTypeScheduleTable: 211 case ownerJobTypeDrainCapture: 212 // todo: drain capture 213 // o.handleDrainCaptures(ctx, job.scheduleQuery, job.done) 214 continue // continue here to prevent close the done channel twice 215 case ownerJobTypeRebalance: 216 // Scheduler is created lazily, it is nil before initialization. 217 case ownerJobTypeQuery: 218 job.done <- o.handleQueries(job.query) 219 case ownerJobTypeDebugInfo: 220 // TODO: implement this function 221 } 222 close(job.done) 223 } 224 } 225 226 // nolint 227 func (o *Owner) handleQueries(query *owner.Query) error { 228 switch query.Tp { 229 case owner.QueryChangeFeedStatuses: 230 case owner.QueryProcessors: 231 case owner.QueryHealth: 232 query.Data = o.isHealthy() 233 case owner.QueryOwner: 234 case owner.QueryChangefeedInfo: 235 } 236 return nil 237 } 238 239 func (o *Owner) isHealthy() bool { 240 return false 241 } 242 243 func (o *Owner) takeOwnerJobs() []*ownerJob { 244 o.ownerJobQueue.Lock() 245 defer o.ownerJobQueue.Unlock() 246 247 jobs := o.ownerJobQueue.queue 248 o.ownerJobQueue.queue = nil 249 return jobs 250 } 251 252 func (o *Owner) pushOwnerJob(job *ownerJob) { 253 o.ownerJobQueue.Lock() 254 defer o.ownerJobQueue.Unlock() 255 if atomic.LoadInt32(&o.closed) != 0 { 256 log.Info("reject owner job as owner has been closed", 257 zap.Int("jobType", int(job.Tp))) 258 select { 259 case job.done <- cerror.ErrOwnerNotFound.GenWithStackByArgs(): 260 default: 261 } 262 close(job.done) 263 return 264 } 265 o.ownerJobQueue.queue = append(o.ownerJobQueue.queue, job) 266 } 267 268 // nolint:unused 269 func (o *Owner) cleanupOwnerJob() { 270 log.Info("cleanup owner jobs as owner has been closed") 271 jobs := o.takeOwnerJobs() 272 for _, job := range jobs { 273 select { 274 case job.done <- cerror.ErrOwnerNotFound.GenWithStackByArgs(): 275 default: 276 } 277 close(job.done) 278 } 279 } 280 281 type ownerJobType int 282 283 // All OwnerJob types 284 const ( 285 ownerJobTypeRebalance ownerJobType = iota 286 ownerJobTypeScheduleTable 287 ownerJobTypeDrainCapture 288 ownerJobTypeAdminJob 289 ownerJobTypeDebugInfo 290 ownerJobTypeQuery 291 ) 292 293 // Export field names for pretty printing. 294 type ownerJob struct { 295 Tp ownerJobType 296 ChangefeedID model.ChangeFeedID 297 298 // for ScheduleTable only 299 TargetCaptureID model.CaptureID 300 // for ScheduleTable only 301 TableID model.TableID 302 303 // for Admin Job only 304 AdminJob *model.AdminJob 305 306 // for debug info only 307 debugInfoWriter io.Writer 308 309 // for status provider 310 query *owner.Query 311 312 // for scheduler related jobs 313 scheduleQuery *scheduler.Query 314 315 done chan<- error 316 }