github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/changefeedccl/kvfeed/buffer.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package kvfeed
    10  
    11  import (
    12  	"context"
    13  	"time"
    14  
    15  	"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
    16  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/rowcontainer"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    21  	"github.com/cockroachdb/cockroach/pkg/util/envutil"
    22  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    25  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    26  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    27  )
    28  
    29  // EventBuffer is an interface for communicating kvfeed entries between processors.
    30  type EventBuffer interface {
    31  	EventBufferReader
    32  	EventBufferWriter
    33  }
    34  
    35  // EventBufferReader is the read portion of the EventBuffer interface.
    36  type EventBufferReader interface {
    37  	// Get retrieves an entry from the buffer.
    38  	Get(ctx context.Context) (Event, error)
    39  }
    40  
    41  // EventBufferWriter is the write portion of the EventBuffer interface.
    42  type EventBufferWriter interface {
    43  	AddKV(ctx context.Context, kv roachpb.KeyValue, prevVal roachpb.Value, backfillTimestamp hlc.Timestamp) error
    44  	AddResolved(ctx context.Context, span roachpb.Span, ts hlc.Timestamp, boundaryReached bool) error
    45  	Close(ctx context.Context)
    46  }
    47  
    48  // EventType indicates the type of the event.
    49  // Different types indicate which methods will be meaningful.
    50  // Events are implemented this way rather than as an interface to remove the
    51  // need to box the events and allow for events to be used in slices directly.
    52  type EventType int
    53  
    54  const (
    55  	// KVEvent indicates that the KV, PrevValue, and BackfillTimestamp methods
    56  	// on the Event meaningful.
    57  	KVEvent EventType = iota
    58  
    59  	// ResolvedEvent indicates that the Resolved method on the Event will be
    60  	// meaningful.
    61  	ResolvedEvent
    62  )
    63  
    64  // Event represents an event emitted by a kvfeed. It is either a KV
    65  // or a resolved timestamp.
    66  type Event struct {
    67  	kv                 roachpb.KeyValue
    68  	prevVal            roachpb.Value
    69  	resolved           *jobspb.ResolvedSpan
    70  	backfillTimestamp  hlc.Timestamp
    71  	bufferGetTimestamp time.Time
    72  }
    73  
    74  // Type returns the event's EventType.
    75  func (b *Event) Type() EventType {
    76  	if b.kv.Key != nil {
    77  		return KVEvent
    78  	}
    79  	if b.resolved != nil {
    80  		return ResolvedEvent
    81  	}
    82  	log.Fatalf(context.TODO(), "found event with unknown type: %+v", *b)
    83  	return 0 // unreachable
    84  }
    85  
    86  // KV is populated if this event returns true for IsKV().
    87  func (b *Event) KV() roachpb.KeyValue {
    88  	return b.kv
    89  }
    90  
    91  // PrevValue returns the previous value for this event. PrevValue is non-zero
    92  // if this is a KV event and the key had a non-tombstone value before the change
    93  // and the before value of each change was requested (optDiff).
    94  func (b *Event) PrevValue() roachpb.Value {
    95  	return b.prevVal
    96  }
    97  
    98  // Resolved will be non-nil if this is a resolved timestamp event (i.e. IsKV()
    99  // returns false).
   100  func (b *Event) Resolved() *jobspb.ResolvedSpan {
   101  	return b.resolved
   102  }
   103  
   104  // BackfillTimestamp overrides the timestamp of the schema that should be
   105  // used to interpret this KV. If set and prevVal is provided, the previous
   106  // timestamp will be used to interpret the previous value.
   107  //
   108  // If unset (zero-valued), the KV's timestamp will be used to interpret both
   109  // of the current and previous values instead.
   110  func (b *Event) BackfillTimestamp() hlc.Timestamp {
   111  	return b.backfillTimestamp
   112  }
   113  
   114  // BufferGetTimestamp is the time this event came out of the buffer.
   115  func (b *Event) BufferGetTimestamp() time.Time {
   116  	return b.bufferGetTimestamp
   117  }
   118  
   119  // Timestamp returns the timestamp of the write if this is a KV event.
   120  // If there is a non-zero BackfillTimestamp, that is returned.
   121  // If this is a resolved timestamp event, the timestamp is the resolved
   122  // timestamp.
   123  func (b *Event) Timestamp() hlc.Timestamp {
   124  	switch b.Type() {
   125  	case ResolvedEvent:
   126  		return b.resolved.Timestamp
   127  	case KVEvent:
   128  		if b.backfillTimestamp != (hlc.Timestamp{}) {
   129  			return b.backfillTimestamp
   130  		}
   131  		return b.kv.Value.Timestamp
   132  	default:
   133  		log.Fatalf(context.TODO(), "unknown event type")
   134  		return hlc.Timestamp{} // unreachable
   135  	}
   136  }
   137  
   138  // chanBuffer mediates between the changed data KVFeed and the rest of the
   139  // changefeed pipeline (which is backpressured all the way to the sink).
   140  type chanBuffer struct {
   141  	entriesCh chan Event
   142  }
   143  
   144  // MakeChanBuffer returns an EventBuffer backed by an unbuffered channel.
   145  //
   146  // TODO(ajwerner): Consider adding a buffer here. We know performance of the
   147  // backfill is terrible. Probably some of that is due to every KV being sent
   148  // on a channel. This should all get benchmarked and tuned.
   149  func MakeChanBuffer() EventBuffer {
   150  	return &chanBuffer{entriesCh: make(chan Event)}
   151  }
   152  
   153  // AddKV inserts a changed KV into the buffer. Individual keys must be added in
   154  // increasing mvcc order.
   155  func (b *chanBuffer) AddKV(
   156  	ctx context.Context, kv roachpb.KeyValue, prevVal roachpb.Value, backfillTimestamp hlc.Timestamp,
   157  ) error {
   158  	return b.addEvent(ctx, Event{
   159  		kv:                kv,
   160  		prevVal:           prevVal,
   161  		backfillTimestamp: backfillTimestamp,
   162  	})
   163  }
   164  
   165  // AddResolved inserts a Resolved timestamp notification in the buffer.
   166  func (b *chanBuffer) AddResolved(
   167  	ctx context.Context, span roachpb.Span, ts hlc.Timestamp, boundaryReached bool,
   168  ) error {
   169  	return b.addEvent(ctx, Event{resolved: &jobspb.ResolvedSpan{Span: span, Timestamp: ts, BoundaryReached: boundaryReached}})
   170  }
   171  
   172  func (b *chanBuffer) Close(_ context.Context) {
   173  	close(b.entriesCh)
   174  }
   175  
   176  func (b *chanBuffer) addEvent(ctx context.Context, e Event) error {
   177  	select {
   178  	case <-ctx.Done():
   179  		return ctx.Err()
   180  	case b.entriesCh <- e:
   181  		return nil
   182  	}
   183  }
   184  
   185  // Get returns an entry from the buffer. They are handed out in an order that
   186  // (if it is maintained all the way to the sink) meets our external guarantees.
   187  func (b *chanBuffer) Get(ctx context.Context) (Event, error) {
   188  	select {
   189  	case <-ctx.Done():
   190  		return Event{}, ctx.Err()
   191  	case e := <-b.entriesCh:
   192  		e.bufferGetTimestamp = timeutil.Now()
   193  		return e, nil
   194  	}
   195  }
   196  
   197  // MemBufferDefaultCapacity is the default capacity for a memBuffer for a single
   198  // changefeed.
   199  //
   200  // TODO(dan): It would be better if all changefeeds shared a single capacity
   201  // that was given by the operater at startup, like we do for RocksDB and SQL.
   202  var MemBufferDefaultCapacity = envutil.EnvOrDefaultBytes(
   203  	"COCKROACH_CHANGEFEED_BUFFER_CAPACITY", 1<<30) // 1GB
   204  
   205  var memBufferColTypes = []*types.T{
   206  	types.Bytes, // KV.Key
   207  	types.Bytes, // KV.Value
   208  	types.Bytes, // KV.PrevValue
   209  	types.Bytes, // span.Key
   210  	types.Bytes, // span.EndKey
   211  	types.Int,   // ts.WallTime
   212  	types.Int,   // ts.Logical
   213  }
   214  
   215  // memBuffer is an in-memory buffer for changed KV and Resolved timestamp
   216  // events. It's size is limited only by the BoundAccount passed to the
   217  // constructor. memBuffer is only for use with single-producer single-consumer.
   218  type memBuffer struct {
   219  	metrics *Metrics
   220  
   221  	mu struct {
   222  		syncutil.Mutex
   223  		entries rowcontainer.RowContainer
   224  	}
   225  	// signalCh can be selected on to learn when an entry is written to
   226  	// mu.entries.
   227  	signalCh chan struct{}
   228  
   229  	allocMu struct {
   230  		syncutil.Mutex
   231  		a sqlbase.DatumAlloc
   232  	}
   233  }
   234  
   235  func makeMemBuffer(acc mon.BoundAccount, metrics *Metrics) *memBuffer {
   236  	b := &memBuffer{
   237  		metrics:  metrics,
   238  		signalCh: make(chan struct{}, 1),
   239  	}
   240  	b.mu.entries.Init(acc, sqlbase.ColTypeInfoFromColTypes(memBufferColTypes), 0 /* rowCapacity */)
   241  	return b
   242  }
   243  
   244  func (b *memBuffer) Close(ctx context.Context) {
   245  	b.mu.Lock()
   246  	b.mu.entries.Close(ctx)
   247  	b.mu.Unlock()
   248  }
   249  
   250  // AddKV inserts a changed KV into the buffer. Individual keys must be added in
   251  // increasing mvcc order.
   252  func (b *memBuffer) AddKV(
   253  	ctx context.Context, kv roachpb.KeyValue, prevVal roachpb.Value, backfillTimestamp hlc.Timestamp,
   254  ) error {
   255  	b.allocMu.Lock()
   256  	prevValDatum := tree.DNull
   257  	if prevVal.IsPresent() {
   258  		prevValDatum = b.allocMu.a.NewDBytes(tree.DBytes(prevVal.RawBytes))
   259  	}
   260  	row := tree.Datums{
   261  		b.allocMu.a.NewDBytes(tree.DBytes(kv.Key)),
   262  		b.allocMu.a.NewDBytes(tree.DBytes(kv.Value.RawBytes)),
   263  		prevValDatum,
   264  		tree.DNull,
   265  		tree.DNull,
   266  		b.allocMu.a.NewDInt(tree.DInt(kv.Value.Timestamp.WallTime)),
   267  		b.allocMu.a.NewDInt(tree.DInt(kv.Value.Timestamp.Logical)),
   268  	}
   269  	b.allocMu.Unlock()
   270  	return b.addRow(ctx, row)
   271  }
   272  
   273  // AddResolved inserts a Resolved timestamp notification in the buffer.
   274  func (b *memBuffer) AddResolved(
   275  	ctx context.Context, span roachpb.Span, ts hlc.Timestamp, boundaryReached bool,
   276  ) error {
   277  	b.allocMu.Lock()
   278  	row := tree.Datums{
   279  		tree.DNull,
   280  		tree.DNull,
   281  		tree.DNull,
   282  		b.allocMu.a.NewDBytes(tree.DBytes(span.Key)),
   283  		b.allocMu.a.NewDBytes(tree.DBytes(span.EndKey)),
   284  		b.allocMu.a.NewDInt(tree.DInt(ts.WallTime)),
   285  		b.allocMu.a.NewDInt(tree.DInt(ts.Logical)),
   286  	}
   287  	b.allocMu.Unlock()
   288  	return b.addRow(ctx, row)
   289  }
   290  
   291  // Get returns an entry from the buffer. They are handed out in an order that
   292  // (if it is maintained all the way to the sink) meets our external guarantees.
   293  func (b *memBuffer) Get(ctx context.Context) (Event, error) {
   294  	row, err := b.getRow(ctx)
   295  	if err != nil {
   296  		return Event{}, err
   297  	}
   298  	e := Event{bufferGetTimestamp: timeutil.Now()}
   299  	ts := hlc.Timestamp{
   300  		WallTime: int64(*row[5].(*tree.DInt)),
   301  		Logical:  int32(*row[6].(*tree.DInt)),
   302  	}
   303  	if row[2] != tree.DNull {
   304  		e.prevVal = roachpb.Value{
   305  			RawBytes: []byte(*row[2].(*tree.DBytes)),
   306  		}
   307  	}
   308  	if row[0] != tree.DNull {
   309  		e.kv = roachpb.KeyValue{
   310  			Key: []byte(*row[0].(*tree.DBytes)),
   311  			Value: roachpb.Value{
   312  				RawBytes:  []byte(*row[1].(*tree.DBytes)),
   313  				Timestamp: ts,
   314  			},
   315  		}
   316  		return e, nil
   317  	}
   318  	e.resolved = &jobspb.ResolvedSpan{
   319  		Span: roachpb.Span{
   320  			Key:    []byte(*row[3].(*tree.DBytes)),
   321  			EndKey: []byte(*row[4].(*tree.DBytes)),
   322  		},
   323  		Timestamp: ts,
   324  	}
   325  	return e, nil
   326  }
   327  
   328  func (b *memBuffer) addRow(ctx context.Context, row tree.Datums) error {
   329  	b.mu.Lock()
   330  	_, err := b.mu.entries.AddRow(ctx, row)
   331  	b.mu.Unlock()
   332  	b.metrics.BufferEntriesIn.Inc(1)
   333  	select {
   334  	case b.signalCh <- struct{}{}:
   335  	default:
   336  		// Already signaled, don't need to signal again.
   337  	}
   338  	return err
   339  }
   340  
   341  func (b *memBuffer) getRow(ctx context.Context) (tree.Datums, error) {
   342  	for {
   343  		var row tree.Datums
   344  		b.mu.Lock()
   345  		if b.mu.entries.Len() > 0 {
   346  			row = b.mu.entries.At(0)
   347  			b.mu.entries.PopFirst()
   348  		}
   349  		b.mu.Unlock()
   350  		if row != nil {
   351  			b.metrics.BufferEntriesOut.Inc(1)
   352  			return row, nil
   353  		}
   354  
   355  		select {
   356  		case <-ctx.Done():
   357  			return nil, ctx.Err()
   358  		case <-b.signalCh:
   359  		}
   360  	}
   361  }