github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/changefeedccl/kvfeed/buffer.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package kvfeed 10 11 import ( 12 "context" 13 "time" 14 15 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 16 "github.com/cockroachdb/cockroach/pkg/roachpb" 17 "github.com/cockroachdb/cockroach/pkg/sql/rowcontainer" 18 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 19 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 20 "github.com/cockroachdb/cockroach/pkg/sql/types" 21 "github.com/cockroachdb/cockroach/pkg/util/envutil" 22 "github.com/cockroachdb/cockroach/pkg/util/hlc" 23 "github.com/cockroachdb/cockroach/pkg/util/log" 24 "github.com/cockroachdb/cockroach/pkg/util/mon" 25 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 26 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 27 ) 28 29 // EventBuffer is an interface for communicating kvfeed entries between processors. 30 type EventBuffer interface { 31 EventBufferReader 32 EventBufferWriter 33 } 34 35 // EventBufferReader is the read portion of the EventBuffer interface. 36 type EventBufferReader interface { 37 // Get retrieves an entry from the buffer. 38 Get(ctx context.Context) (Event, error) 39 } 40 41 // EventBufferWriter is the write portion of the EventBuffer interface. 42 type EventBufferWriter interface { 43 AddKV(ctx context.Context, kv roachpb.KeyValue, prevVal roachpb.Value, backfillTimestamp hlc.Timestamp) error 44 AddResolved(ctx context.Context, span roachpb.Span, ts hlc.Timestamp, boundaryReached bool) error 45 Close(ctx context.Context) 46 } 47 48 // EventType indicates the type of the event. 49 // Different types indicate which methods will be meaningful. 50 // Events are implemented this way rather than as an interface to remove the 51 // need to box the events and allow for events to be used in slices directly. 52 type EventType int 53 54 const ( 55 // KVEvent indicates that the KV, PrevValue, and BackfillTimestamp methods 56 // on the Event meaningful. 57 KVEvent EventType = iota 58 59 // ResolvedEvent indicates that the Resolved method on the Event will be 60 // meaningful. 61 ResolvedEvent 62 ) 63 64 // Event represents an event emitted by a kvfeed. It is either a KV 65 // or a resolved timestamp. 66 type Event struct { 67 kv roachpb.KeyValue 68 prevVal roachpb.Value 69 resolved *jobspb.ResolvedSpan 70 backfillTimestamp hlc.Timestamp 71 bufferGetTimestamp time.Time 72 } 73 74 // Type returns the event's EventType. 75 func (b *Event) Type() EventType { 76 if b.kv.Key != nil { 77 return KVEvent 78 } 79 if b.resolved != nil { 80 return ResolvedEvent 81 } 82 log.Fatalf(context.TODO(), "found event with unknown type: %+v", *b) 83 return 0 // unreachable 84 } 85 86 // KV is populated if this event returns true for IsKV(). 87 func (b *Event) KV() roachpb.KeyValue { 88 return b.kv 89 } 90 91 // PrevValue returns the previous value for this event. PrevValue is non-zero 92 // if this is a KV event and the key had a non-tombstone value before the change 93 // and the before value of each change was requested (optDiff). 94 func (b *Event) PrevValue() roachpb.Value { 95 return b.prevVal 96 } 97 98 // Resolved will be non-nil if this is a resolved timestamp event (i.e. IsKV() 99 // returns false). 100 func (b *Event) Resolved() *jobspb.ResolvedSpan { 101 return b.resolved 102 } 103 104 // BackfillTimestamp overrides the timestamp of the schema that should be 105 // used to interpret this KV. If set and prevVal is provided, the previous 106 // timestamp will be used to interpret the previous value. 107 // 108 // If unset (zero-valued), the KV's timestamp will be used to interpret both 109 // of the current and previous values instead. 110 func (b *Event) BackfillTimestamp() hlc.Timestamp { 111 return b.backfillTimestamp 112 } 113 114 // BufferGetTimestamp is the time this event came out of the buffer. 115 func (b *Event) BufferGetTimestamp() time.Time { 116 return b.bufferGetTimestamp 117 } 118 119 // Timestamp returns the timestamp of the write if this is a KV event. 120 // If there is a non-zero BackfillTimestamp, that is returned. 121 // If this is a resolved timestamp event, the timestamp is the resolved 122 // timestamp. 123 func (b *Event) Timestamp() hlc.Timestamp { 124 switch b.Type() { 125 case ResolvedEvent: 126 return b.resolved.Timestamp 127 case KVEvent: 128 if b.backfillTimestamp != (hlc.Timestamp{}) { 129 return b.backfillTimestamp 130 } 131 return b.kv.Value.Timestamp 132 default: 133 log.Fatalf(context.TODO(), "unknown event type") 134 return hlc.Timestamp{} // unreachable 135 } 136 } 137 138 // chanBuffer mediates between the changed data KVFeed and the rest of the 139 // changefeed pipeline (which is backpressured all the way to the sink). 140 type chanBuffer struct { 141 entriesCh chan Event 142 } 143 144 // MakeChanBuffer returns an EventBuffer backed by an unbuffered channel. 145 // 146 // TODO(ajwerner): Consider adding a buffer here. We know performance of the 147 // backfill is terrible. Probably some of that is due to every KV being sent 148 // on a channel. This should all get benchmarked and tuned. 149 func MakeChanBuffer() EventBuffer { 150 return &chanBuffer{entriesCh: make(chan Event)} 151 } 152 153 // AddKV inserts a changed KV into the buffer. Individual keys must be added in 154 // increasing mvcc order. 155 func (b *chanBuffer) AddKV( 156 ctx context.Context, kv roachpb.KeyValue, prevVal roachpb.Value, backfillTimestamp hlc.Timestamp, 157 ) error { 158 return b.addEvent(ctx, Event{ 159 kv: kv, 160 prevVal: prevVal, 161 backfillTimestamp: backfillTimestamp, 162 }) 163 } 164 165 // AddResolved inserts a Resolved timestamp notification in the buffer. 166 func (b *chanBuffer) AddResolved( 167 ctx context.Context, span roachpb.Span, ts hlc.Timestamp, boundaryReached bool, 168 ) error { 169 return b.addEvent(ctx, Event{resolved: &jobspb.ResolvedSpan{Span: span, Timestamp: ts, BoundaryReached: boundaryReached}}) 170 } 171 172 func (b *chanBuffer) Close(_ context.Context) { 173 close(b.entriesCh) 174 } 175 176 func (b *chanBuffer) addEvent(ctx context.Context, e Event) error { 177 select { 178 case <-ctx.Done(): 179 return ctx.Err() 180 case b.entriesCh <- e: 181 return nil 182 } 183 } 184 185 // Get returns an entry from the buffer. They are handed out in an order that 186 // (if it is maintained all the way to the sink) meets our external guarantees. 187 func (b *chanBuffer) Get(ctx context.Context) (Event, error) { 188 select { 189 case <-ctx.Done(): 190 return Event{}, ctx.Err() 191 case e := <-b.entriesCh: 192 e.bufferGetTimestamp = timeutil.Now() 193 return e, nil 194 } 195 } 196 197 // MemBufferDefaultCapacity is the default capacity for a memBuffer for a single 198 // changefeed. 199 // 200 // TODO(dan): It would be better if all changefeeds shared a single capacity 201 // that was given by the operater at startup, like we do for RocksDB and SQL. 202 var MemBufferDefaultCapacity = envutil.EnvOrDefaultBytes( 203 "COCKROACH_CHANGEFEED_BUFFER_CAPACITY", 1<<30) // 1GB 204 205 var memBufferColTypes = []*types.T{ 206 types.Bytes, // KV.Key 207 types.Bytes, // KV.Value 208 types.Bytes, // KV.PrevValue 209 types.Bytes, // span.Key 210 types.Bytes, // span.EndKey 211 types.Int, // ts.WallTime 212 types.Int, // ts.Logical 213 } 214 215 // memBuffer is an in-memory buffer for changed KV and Resolved timestamp 216 // events. It's size is limited only by the BoundAccount passed to the 217 // constructor. memBuffer is only for use with single-producer single-consumer. 218 type memBuffer struct { 219 metrics *Metrics 220 221 mu struct { 222 syncutil.Mutex 223 entries rowcontainer.RowContainer 224 } 225 // signalCh can be selected on to learn when an entry is written to 226 // mu.entries. 227 signalCh chan struct{} 228 229 allocMu struct { 230 syncutil.Mutex 231 a sqlbase.DatumAlloc 232 } 233 } 234 235 func makeMemBuffer(acc mon.BoundAccount, metrics *Metrics) *memBuffer { 236 b := &memBuffer{ 237 metrics: metrics, 238 signalCh: make(chan struct{}, 1), 239 } 240 b.mu.entries.Init(acc, sqlbase.ColTypeInfoFromColTypes(memBufferColTypes), 0 /* rowCapacity */) 241 return b 242 } 243 244 func (b *memBuffer) Close(ctx context.Context) { 245 b.mu.Lock() 246 b.mu.entries.Close(ctx) 247 b.mu.Unlock() 248 } 249 250 // AddKV inserts a changed KV into the buffer. Individual keys must be added in 251 // increasing mvcc order. 252 func (b *memBuffer) AddKV( 253 ctx context.Context, kv roachpb.KeyValue, prevVal roachpb.Value, backfillTimestamp hlc.Timestamp, 254 ) error { 255 b.allocMu.Lock() 256 prevValDatum := tree.DNull 257 if prevVal.IsPresent() { 258 prevValDatum = b.allocMu.a.NewDBytes(tree.DBytes(prevVal.RawBytes)) 259 } 260 row := tree.Datums{ 261 b.allocMu.a.NewDBytes(tree.DBytes(kv.Key)), 262 b.allocMu.a.NewDBytes(tree.DBytes(kv.Value.RawBytes)), 263 prevValDatum, 264 tree.DNull, 265 tree.DNull, 266 b.allocMu.a.NewDInt(tree.DInt(kv.Value.Timestamp.WallTime)), 267 b.allocMu.a.NewDInt(tree.DInt(kv.Value.Timestamp.Logical)), 268 } 269 b.allocMu.Unlock() 270 return b.addRow(ctx, row) 271 } 272 273 // AddResolved inserts a Resolved timestamp notification in the buffer. 274 func (b *memBuffer) AddResolved( 275 ctx context.Context, span roachpb.Span, ts hlc.Timestamp, boundaryReached bool, 276 ) error { 277 b.allocMu.Lock() 278 row := tree.Datums{ 279 tree.DNull, 280 tree.DNull, 281 tree.DNull, 282 b.allocMu.a.NewDBytes(tree.DBytes(span.Key)), 283 b.allocMu.a.NewDBytes(tree.DBytes(span.EndKey)), 284 b.allocMu.a.NewDInt(tree.DInt(ts.WallTime)), 285 b.allocMu.a.NewDInt(tree.DInt(ts.Logical)), 286 } 287 b.allocMu.Unlock() 288 return b.addRow(ctx, row) 289 } 290 291 // Get returns an entry from the buffer. They are handed out in an order that 292 // (if it is maintained all the way to the sink) meets our external guarantees. 293 func (b *memBuffer) Get(ctx context.Context) (Event, error) { 294 row, err := b.getRow(ctx) 295 if err != nil { 296 return Event{}, err 297 } 298 e := Event{bufferGetTimestamp: timeutil.Now()} 299 ts := hlc.Timestamp{ 300 WallTime: int64(*row[5].(*tree.DInt)), 301 Logical: int32(*row[6].(*tree.DInt)), 302 } 303 if row[2] != tree.DNull { 304 e.prevVal = roachpb.Value{ 305 RawBytes: []byte(*row[2].(*tree.DBytes)), 306 } 307 } 308 if row[0] != tree.DNull { 309 e.kv = roachpb.KeyValue{ 310 Key: []byte(*row[0].(*tree.DBytes)), 311 Value: roachpb.Value{ 312 RawBytes: []byte(*row[1].(*tree.DBytes)), 313 Timestamp: ts, 314 }, 315 } 316 return e, nil 317 } 318 e.resolved = &jobspb.ResolvedSpan{ 319 Span: roachpb.Span{ 320 Key: []byte(*row[3].(*tree.DBytes)), 321 EndKey: []byte(*row[4].(*tree.DBytes)), 322 }, 323 Timestamp: ts, 324 } 325 return e, nil 326 } 327 328 func (b *memBuffer) addRow(ctx context.Context, row tree.Datums) error { 329 b.mu.Lock() 330 _, err := b.mu.entries.AddRow(ctx, row) 331 b.mu.Unlock() 332 b.metrics.BufferEntriesIn.Inc(1) 333 select { 334 case b.signalCh <- struct{}{}: 335 default: 336 // Already signaled, don't need to signal again. 337 } 338 return err 339 } 340 341 func (b *memBuffer) getRow(ctx context.Context) (tree.Datums, error) { 342 for { 343 var row tree.Datums 344 b.mu.Lock() 345 if b.mu.entries.Len() > 0 { 346 row = b.mu.entries.At(0) 347 b.mu.entries.PopFirst() 348 } 349 b.mu.Unlock() 350 if row != nil { 351 b.metrics.BufferEntriesOut.Inc(1) 352 return row, nil 353 } 354 355 select { 356 case <-ctx.Done(): 357 return nil, ctx.Err() 358 case <-b.signalCh: 359 } 360 } 361 }