github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdcv2/owner/owner.go (about)

     1  // Copyright 2023 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package owner
    15  
    16  import (
    17  	"context"
    18  	"database/sql"
    19  	"io"
    20  	"sync"
    21  	"sync/atomic"
    22  	"time"
    23  
    24  	"github.com/pingcap/log"
    25  	"github.com/pingcap/tiflow/cdc/model"
    26  	"github.com/pingcap/tiflow/cdc/owner"
    27  	"github.com/pingcap/tiflow/cdc/scheduler"
    28  	"github.com/pingcap/tiflow/cdcv2/metadata"
    29  	msql "github.com/pingcap/tiflow/cdcv2/metadata/sql"
    30  	"github.com/pingcap/tiflow/pkg/config"
    31  	cerror "github.com/pingcap/tiflow/pkg/errors"
    32  	"github.com/pingcap/tiflow/pkg/upstream"
    33  	"go.uber.org/zap"
    34  	"gorm.io/gorm"
    35  )
    36  
    37  // Owner implements the owner interface.
    38  type Owner struct {
    39  	upstreamManager    *upstream.Manager
    40  	captureObservation *msql.CaptureOb[*gorm.DB]
    41  	cfg                *config.SchedulerConfig
    42  	storage            *sql.DB
    43  
    44  	liveness *model.Liveness
    45  
    46  	ownerJobQueue struct {
    47  		sync.Mutex
    48  		queue []*ownerJob
    49  	}
    50  	closed int32
    51  
    52  	querier metadata.Querier
    53  }
    54  
    55  // UpdateChangefeedAndUpstream updates the changefeed info and upstream info.
    56  func (o *Owner) UpdateChangefeedAndUpstream(ctx context.Context,
    57  	upstreamInfo *model.UpstreamInfo,
    58  	changeFeedInfo *model.ChangeFeedInfo,
    59  ) error {
    60  	panic("implement me")
    61  }
    62  
    63  // UpdateChangefeed updates the changefeed info.
    64  func (o *Owner) UpdateChangefeed(ctx context.Context,
    65  	changeFeedInfo *model.ChangeFeedInfo,
    66  ) error {
    67  	panic("implement me")
    68  }
    69  
    70  // EnqueueJob enqueues a job to the owner.
    71  func (o *Owner) EnqueueJob(adminJob model.AdminJob,
    72  	done chan<- error,
    73  ) {
    74  	o.pushOwnerJob(&ownerJob{
    75  		Tp:           ownerJobTypeAdminJob,
    76  		AdminJob:     &adminJob,
    77  		ChangefeedID: adminJob.CfID,
    78  		done:         done,
    79  	})
    80  }
    81  
    82  // RebalanceTables rebalances the tables of a changefeed.
    83  func (o *Owner) RebalanceTables(cfID model.ChangeFeedID,
    84  	done chan<- error,
    85  ) {
    86  	o.pushOwnerJob(&ownerJob{
    87  		Tp:           ownerJobTypeRebalance,
    88  		ChangefeedID: cfID,
    89  		done:         done,
    90  	})
    91  }
    92  
    93  // ScheduleTable schedules a table to a capture.
    94  func (o *Owner) ScheduleTable(cfID model.ChangeFeedID,
    95  	toCapture model.CaptureID,
    96  	tableID model.TableID, done chan<- error,
    97  ) {
    98  	o.pushOwnerJob(&ownerJob{
    99  		Tp:              ownerJobTypeScheduleTable,
   100  		ChangefeedID:    cfID,
   101  		TargetCaptureID: toCapture,
   102  		TableID:         tableID,
   103  		done:            done,
   104  	})
   105  }
   106  
   107  // DrainCapture drains a capture.
   108  func (o *Owner) DrainCapture(query *scheduler.Query,
   109  	done chan<- error,
   110  ) {
   111  	o.pushOwnerJob(&ownerJob{
   112  		Tp:            ownerJobTypeDrainCapture,
   113  		scheduleQuery: query,
   114  		done:          done,
   115  	})
   116  }
   117  
   118  // WriteDebugInfo writes the debug info to the writer.
   119  func (o *Owner) WriteDebugInfo(w io.Writer,
   120  	done chan<- error,
   121  ) {
   122  	o.pushOwnerJob(&ownerJob{
   123  		Tp:              ownerJobTypeDebugInfo,
   124  		debugInfoWriter: w,
   125  		done:            done,
   126  	})
   127  }
   128  
   129  // Query queries owner internal information.
   130  func (o *Owner) Query(query *owner.Query, done chan<- error) {
   131  	o.pushOwnerJob(&ownerJob{
   132  		Tp:    ownerJobTypeQuery,
   133  		query: query,
   134  		done:  done,
   135  	})
   136  }
   137  
   138  // AsyncStop stops the owner asynchronously.
   139  func (o *Owner) AsyncStop() {
   140  	panic("implement me")
   141  }
   142  
   143  // NewOwner creates a new owner.
   144  func NewOwner(
   145  	liveness *model.Liveness,
   146  	upstreamManager *upstream.Manager,
   147  	cfg *config.SchedulerConfig,
   148  	captureObservation *msql.CaptureOb[*gorm.DB],
   149  	querier metadata.Querier,
   150  	storage *sql.DB,
   151  ) *Owner {
   152  	return &Owner{
   153  		upstreamManager:    upstreamManager,
   154  		captureObservation: captureObservation,
   155  		cfg:                cfg,
   156  		querier:            querier,
   157  		storage:            storage,
   158  		liveness:           liveness,
   159  	}
   160  }
   161  
   162  // Run runs the owner.
   163  func (o *Owner) Run(ctx context.Context) error {
   164  	tick := time.NewTicker(time.Millisecond * 100)
   165  	for {
   166  		select {
   167  		case <-ctx.Done():
   168  			return nil
   169  		case <-tick.C:
   170  			// handleJobs() should be called before clusterVersionConsistent(), because
   171  			// when there are different versions of cdc nodes in the cluster,
   172  			// the admin job may not be processed all the time. And http api relies on
   173  			// admin job, which will cause all http api unavailable.
   174  			o.handleJobs(ctx)
   175  		case cf := <-o.captureObservation.OwnerChanges():
   176  			switch cf.OwnerState {
   177  			case metadata.SchedRemoving:
   178  			case metadata.SchedLaunched:
   179  			}
   180  		}
   181  	}
   182  }
   183  
   184  // nolint:unused
   185  type ownerInfoClient struct {
   186  	ownerID  model.CaptureID
   187  	captures []*model.CaptureInfo
   188  }
   189  
   190  // nolint:unused
   191  func (o *ownerInfoClient) GetOwnerID(context.Context) (model.CaptureID, error) {
   192  	return o.ownerID, nil
   193  }
   194  
   195  // nolint:unused
   196  func (o *ownerInfoClient) GetOwnerRevision(context.Context, model.CaptureID) (int64, error) {
   197  	return 0, nil
   198  }
   199  
   200  // nolint:unused
   201  func (o *ownerInfoClient) GetCaptures(context.Context) (int64, []*model.CaptureInfo, error) {
   202  	return 0, o.captures, nil
   203  }
   204  
   205  func (o *Owner) handleJobs(_ context.Context) {
   206  	jobs := o.takeOwnerJobs()
   207  	for _, job := range jobs {
   208  		switch job.Tp {
   209  		case ownerJobTypeAdminJob:
   210  		case ownerJobTypeScheduleTable:
   211  		case ownerJobTypeDrainCapture:
   212  			// todo: drain capture
   213  			// o.handleDrainCaptures(ctx, job.scheduleQuery, job.done)
   214  			continue // continue here to prevent close the done channel twice
   215  		case ownerJobTypeRebalance:
   216  			// Scheduler is created lazily, it is nil before initialization.
   217  		case ownerJobTypeQuery:
   218  			job.done <- o.handleQueries(job.query)
   219  		case ownerJobTypeDebugInfo:
   220  			// TODO: implement this function
   221  		}
   222  		close(job.done)
   223  	}
   224  }
   225  
   226  // nolint
   227  func (o *Owner) handleQueries(query *owner.Query) error {
   228  	switch query.Tp {
   229  	case owner.QueryChangeFeedStatuses:
   230  	case owner.QueryProcessors:
   231  	case owner.QueryHealth:
   232  		query.Data = o.isHealthy()
   233  	case owner.QueryOwner:
   234  	case owner.QueryChangefeedInfo:
   235  	}
   236  	return nil
   237  }
   238  
   239  func (o *Owner) isHealthy() bool {
   240  	return false
   241  }
   242  
   243  func (o *Owner) takeOwnerJobs() []*ownerJob {
   244  	o.ownerJobQueue.Lock()
   245  	defer o.ownerJobQueue.Unlock()
   246  
   247  	jobs := o.ownerJobQueue.queue
   248  	o.ownerJobQueue.queue = nil
   249  	return jobs
   250  }
   251  
   252  func (o *Owner) pushOwnerJob(job *ownerJob) {
   253  	o.ownerJobQueue.Lock()
   254  	defer o.ownerJobQueue.Unlock()
   255  	if atomic.LoadInt32(&o.closed) != 0 {
   256  		log.Info("reject owner job as owner has been closed",
   257  			zap.Int("jobType", int(job.Tp)))
   258  		select {
   259  		case job.done <- cerror.ErrOwnerNotFound.GenWithStackByArgs():
   260  		default:
   261  		}
   262  		close(job.done)
   263  		return
   264  	}
   265  	o.ownerJobQueue.queue = append(o.ownerJobQueue.queue, job)
   266  }
   267  
   268  // nolint:unused
   269  func (o *Owner) cleanupOwnerJob() {
   270  	log.Info("cleanup owner jobs as owner has been closed")
   271  	jobs := o.takeOwnerJobs()
   272  	for _, job := range jobs {
   273  		select {
   274  		case job.done <- cerror.ErrOwnerNotFound.GenWithStackByArgs():
   275  		default:
   276  		}
   277  		close(job.done)
   278  	}
   279  }
   280  
   281  type ownerJobType int
   282  
   283  // All OwnerJob types
   284  const (
   285  	ownerJobTypeRebalance ownerJobType = iota
   286  	ownerJobTypeScheduleTable
   287  	ownerJobTypeDrainCapture
   288  	ownerJobTypeAdminJob
   289  	ownerJobTypeDebugInfo
   290  	ownerJobTypeQuery
   291  )
   292  
   293  // Export field names for pretty printing.
   294  type ownerJob struct {
   295  	Tp           ownerJobType
   296  	ChangefeedID model.ChangeFeedID
   297  
   298  	// for ScheduleTable only
   299  	TargetCaptureID model.CaptureID
   300  	// for ScheduleTable only
   301  	TableID model.TableID
   302  
   303  	// for Admin Job only
   304  	AdminJob *model.AdminJob
   305  
   306  	// for debug info only
   307  	debugInfoWriter io.Writer
   308  
   309  	// for status provider
   310  	query *owner.Query
   311  
   312  	// for scheduler related jobs
   313  	scheduleQuery *scheduler.Query
   314  
   315  	done chan<- error
   316  }