github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/syncer/binlogstream/stream_modifier.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package binlogstream
    15  
    16  import (
    17  	"bytes"
    18  	"fmt"
    19  	"sort"
    20  	"strings"
    21  
    22  	"github.com/go-mysql-org/go-mysql/mysql"
    23  	"github.com/go-mysql-org/go-mysql/replication"
    24  	"github.com/google/uuid"
    25  	"github.com/pingcap/tiflow/dm/pb"
    26  	"github.com/pingcap/tiflow/dm/pkg/binlog"
    27  	"github.com/pingcap/tiflow/dm/pkg/log"
    28  	"github.com/pingcap/tiflow/dm/pkg/terror"
    29  	"go.uber.org/zap"
    30  )
    31  
    32  // operator contains an operation for specified binlog pos
    33  // used by `handle-error`.
    34  type operator struct {
    35  	uuid      string // add a UUID, make it more friendly to be traced in log
    36  	op        pb.ErrorOp
    37  	pos       mysql.Position
    38  	events    []*replication.BinlogEvent // ddls -> events
    39  	originReq *pb.HandleWorkerErrorRequest
    40  }
    41  
    42  // newOperator creates a new operator with a random UUID.
    43  func newOperator(
    44  	op pb.ErrorOp,
    45  	pos mysql.Position,
    46  	events []*replication.BinlogEvent,
    47  	originReq *pb.HandleWorkerErrorRequest,
    48  ) *operator {
    49  	reqClone := *originReq
    50  	return &operator{
    51  		uuid:      uuid.New().String(),
    52  		op:        op,
    53  		pos:       pos,
    54  		events:    events,
    55  		originReq: &reqClone,
    56  	}
    57  }
    58  
    59  func (o *operator) String() string {
    60  	events := make([]string, 0)
    61  	for _, e := range o.events {
    62  		buf := new(bytes.Buffer)
    63  		e.Dump(buf)
    64  		events = append(events, buf.String())
    65  	}
    66  	return fmt.Sprintf("uuid: %s, op: %s, events: %s, originReq: %v", o.uuid, o.op, strings.Join(events, "\n"), o.originReq)
    67  }
    68  
    69  // streamModifier is not thread-safe.
    70  type streamModifier struct {
    71  	ops    []*operator // sorted on operator.Position
    72  	nextOp int         // next operator whose location is waiting to be matched
    73  
    74  	// next event in current operator. This field can be
    75  	// modified by StreamerController.
    76  	nextEventInOp int
    77  
    78  	logger log.Logger
    79  }
    80  
    81  func newStreamModifier(logger log.Logger) *streamModifier {
    82  	return &streamModifier{
    83  		ops:    []*operator{},
    84  		logger: logger,
    85  	}
    86  }
    87  
    88  // Set handles HandleWorkerErrorRequest with ErrorOp_Skip, ErrorOp_Replace, ErrorOp_Inject.
    89  // - ErrorOp_Skip: events will be ignored.
    90  // - ErrorOp_Replace, ErrorOp_Inject: events should be query events generated by caller.
    91  func (m *streamModifier) Set(req *pb.HandleWorkerErrorRequest, events []*replication.BinlogEvent) error {
    92  	// precheck
    93  	switch req.Op {
    94  	case pb.ErrorOp_Skip:
    95  		if len(events) != 0 {
    96  			m.logger.Warn("skip op should not have events", zap.Int("eventLen", len(events)))
    97  		}
    98  	case pb.ErrorOp_Replace, pb.ErrorOp_Inject:
    99  		if len(events) == 0 {
   100  			return terror.ErrSyncerEvent.Generatef("%s op should have non-empty events", req.Op.String())
   101  		}
   102  	default:
   103  		m.logger.DPanic("invalid error op", zap.String("op", req.Op.String()))
   104  		return terror.ErrSyncerEvent.Generatef("invalid error op: %s", req.Op.String())
   105  	}
   106  
   107  	pos, err := binlog.PositionFromPosStr(req.BinlogPos)
   108  	if err != nil {
   109  		return err
   110  	}
   111  
   112  	toInject := newOperator(req.Op, pos, events, req)
   113  	toInsertIndex := m.minIdxLargerOrEqual(pos)
   114  
   115  	if toInsertIndex == len(m.ops) {
   116  		m.ops = append(m.ops, toInject)
   117  		m.logger.Info("set a new operator",
   118  			zap.Stringer("position", pos),
   119  			zap.Stringer("new operator", toInject))
   120  		return nil
   121  	}
   122  
   123  	pre := m.ops[toInsertIndex]
   124  	if pre.pos.Compare(pos) == 0 {
   125  		m.ops[toInsertIndex] = toInject
   126  		m.logger.Warn("overwrite operator",
   127  			zap.Stringer("position", pos),
   128  			zap.Stringer("old operator", pre))
   129  		return nil
   130  	}
   131  
   132  	m.ops = append(m.ops, nil)
   133  	copy(m.ops[toInsertIndex+1:], m.ops[toInsertIndex:])
   134  	m.ops[toInsertIndex] = toInject
   135  	if toInsertIndex < m.nextOp {
   136  		m.nextOp++
   137  	}
   138  	m.logger.Info("set a new operator",
   139  		zap.Stringer("position", pos),
   140  		zap.Stringer("new operator", toInject))
   141  	return nil
   142  }
   143  
   144  // Delete will delete an operator. `posStr` should be in the format of "binlog-file:pos".
   145  func (m *streamModifier) Delete(posStr string) error {
   146  	pos, err := binlog.PositionFromPosStr(posStr)
   147  	if err != nil {
   148  		return err
   149  	}
   150  
   151  	toDeleteIndex := m.minIdxLargerOrEqual(pos)
   152  
   153  	if toDeleteIndex < m.nextOp || toDeleteIndex == len(m.ops) {
   154  		return terror.ErrSyncerOperatorNotExist.Generate(posStr)
   155  	}
   156  	pre := m.ops[toDeleteIndex]
   157  	if pre.pos.Compare(pos) != 0 {
   158  		return terror.ErrSyncerOperatorNotExist.Generate(posStr)
   159  	}
   160  	copy(m.ops[toDeleteIndex:], m.ops[toDeleteIndex+1:])
   161  	m.ops = m.ops[:len(m.ops)-1]
   162  	return nil
   163  }
   164  
   165  // ListEqualAndAfter returns a JSON string of operators equals and after the given
   166  // position.
   167  //   - if argument is "", it returns all operators.
   168  //   - Otherwise caller should make sure the argument in format of "binlog-file:pos"
   169  //     and it returns all operators >= this position.
   170  func (m *streamModifier) ListEqualAndAfter(posStr string) []*pb.HandleWorkerErrorRequest {
   171  	var matchedOps []*operator
   172  	if posStr == "" {
   173  		matchedOps = m.ops
   174  	} else {
   175  		pos, err := binlog.PositionFromPosStr(posStr)
   176  		if err != nil {
   177  			m.logger.DPanic("invalid position, should be verified in caller",
   178  				zap.String("position", posStr))
   179  			return []*pb.HandleWorkerErrorRequest{}
   180  		}
   181  
   182  		newStartIdx := m.minIdxLargerOrEqual(pos)
   183  
   184  		if newStartIdx < len(m.ops) {
   185  			matchedOps = m.ops[newStartIdx:]
   186  		}
   187  	}
   188  
   189  	reqs := make([]*pb.HandleWorkerErrorRequest, 0, len(matchedOps))
   190  	for _, op := range matchedOps {
   191  		reqs = append(reqs, op.originReq)
   192  	}
   193  
   194  	return reqs
   195  }
   196  
   197  // RemoveOutdated removes outdated operators which will not be triggered again after
   198  // upstream binlog streamer reset. A common usage is to use global checkpoint as
   199  // the argument.
   200  // RemoveOutdated will not remove the operator equals or after the `front`.
   201  func (m *streamModifier) RemoveOutdated(pos mysql.Position) {
   202  	newStartIdx := m.minIdxLargerOrEqual(pos)
   203  
   204  	if newStartIdx > m.nextOp {
   205  		newStartIdx = m.nextOp
   206  	}
   207  
   208  	m.ops = m.ops[newStartIdx:]
   209  	m.nextOp -= newStartIdx
   210  }
   211  
   212  func (m *streamModifier) front() *operator {
   213  	if m.nextOp == len(m.ops) {
   214  		return nil
   215  	}
   216  	return m.ops[m.nextOp]
   217  }
   218  
   219  func (m *streamModifier) next() {
   220  	m.nextOp++
   221  	m.nextEventInOp = 0
   222  }
   223  
   224  type getEventFromFrontOpStatus int
   225  
   226  const (
   227  	normal getEventFromFrontOpStatus = iota
   228  	lastEvent
   229  	eventsExhausted
   230  )
   231  
   232  // getEventFromFrontOp returns (next event in front op, status). Caller should
   233  // make sure front op is valid.
   234  func (m *streamModifier) getEventFromFrontOp() (*replication.BinlogEvent, getEventFromFrontOpStatus) {
   235  	events := m.front().events
   236  	if m.nextEventInOp >= len(events) {
   237  		return nil, eventsExhausted
   238  	}
   239  	op := normal
   240  	if m.nextEventInOp == len(events)-1 {
   241  		op = lastEvent
   242  	}
   243  	event := events[m.nextEventInOp]
   244  	m.nextEventInOp++
   245  	return event, op
   246  }
   247  
   248  // minIdxLargerOrEqual return an index of m.ops where m.ops[index:] are all equal
   249  // or larger than `pos` since m.ops are monotonous.
   250  func (m *streamModifier) minIdxLargerOrEqual(pos mysql.Position) int {
   251  	return sort.Search(len(m.ops), func(i int) bool {
   252  		return pos.Compare(m.ops[i].pos) <= 0
   253  	})
   254  }
   255  
   256  // reset will also reset nextEventInOp to a correct value.
   257  func (m *streamModifier) reset(loc binlog.Location) {
   258  	m.nextEventInOp = 0
   259  	m.nextOp = m.minIdxLargerOrEqual(loc.Position)
   260  
   261  	if m.nextOp == len(m.ops) {
   262  		return
   263  	}
   264  	op := m.ops[m.nextOp]
   265  	if op.pos.Compare(loc.Position) > 0 {
   266  		return
   267  	}
   268  
   269  	m.nextEventInOp = loc.Suffix
   270  }