github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/codec/craft/model.go (about) 1 // Copyright 2021 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License") 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.orglicensesLICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package craft 15 16 import ( 17 "github.com/pingcap/errors" 18 "github.com/pingcap/tiflow/cdc/model" 19 cerror "github.com/pingcap/tiflow/pkg/errors" 20 ) 21 22 const ( 23 // Version1 represents the version of craft format 24 Version1 uint64 = 1 25 26 // DefaultBufferCapacity is default buffer size 27 DefaultBufferCapacity = 1024 28 29 // Column group types 30 columnGroupTypeOld = 0x2 31 columnGroupTypeNew = 0x1 32 33 // Size tables index 34 metaSizeTableIndex = 0 35 bodySizeTableIndex = 1 36 columnGroupSizeTableStartIndex = 2 37 38 // meta size table index 39 headerSizeIndex = 0 40 termDictionarySizeIndex = 1 41 maxMetaSizeIndex = termDictionarySizeIndex 42 43 nullInt64 = -1 44 ) 45 46 var ( 47 oneNullInt64Slice = []int64{nullInt64} 48 oneNullStringSlice = []*string{nil} 49 emptyDecodingTermDictionary = &termDictionary{ 50 id: make([]string, 0), 51 } 52 ) 53 54 type termDictionary struct { 55 term map[string]int 56 id []string 57 } 58 59 func newEncodingTermDictionaryWithSize(size int) *termDictionary { 60 return &termDictionary{ 61 term: make(map[string]int), 62 id: make([]string, 0, size), 63 } 64 } 65 66 func newEncodingTermDictionary() *termDictionary { 67 return newEncodingTermDictionaryWithSize(8) // TODO, this number should be evaluated 68 } 69 70 func (d *termDictionary) encodeNullable(s *string) int64 { 71 if s == nil { 72 return nullInt64 73 } 74 return d.encode(*s) 75 } 76 77 func (d *termDictionary) encode(s string) int64 { 78 id, ok := d.term[s] 79 if !ok { 80 id := len(d.id) 81 d.term[s] = id 82 d.id = append(d.id, s) 83 return int64(id) 84 } 85 return int64(id) 86 } 87 88 func (d *termDictionary) encodeNullableChunk(array []*string) []int64 { 89 result := make([]int64, len(array)) 90 for idx, s := range array { 91 result[idx] = d.encodeNullable(s) 92 } 93 return result 94 } 95 96 func (d *termDictionary) encodeChunk(array []string) []int64 { 97 result := make([]int64, len(array)) 98 for idx, s := range array { 99 result[idx] = d.encode(s) 100 } 101 return result 102 } 103 104 func (d *termDictionary) decode(id int64) (string, error) { 105 i := int(id) 106 if len(d.id) <= i || i < 0 { 107 return "", cerror.ErrCraftCodecInvalidData.GenWithStack("invalid term id") 108 } 109 return d.id[i], nil 110 } 111 112 func (d *termDictionary) decodeNullable(id int64) (*string, error) { 113 if id == nullInt64 { 114 return nil, nil 115 } 116 if id < nullInt64 { 117 return nil, cerror.ErrCraftCodecInvalidData.GenWithStack("invalid term id") 118 } 119 s, err := d.decode(id) 120 if err != nil { 121 return nil, err 122 } 123 return &s, nil 124 } 125 126 func (d *termDictionary) decodeChunk(array []int64) ([]string, error) { 127 result := make([]string, len(array)) 128 for idx, id := range array { 129 t, err := d.decode(id) 130 if err != nil { 131 return nil, err 132 } 133 result[idx] = t 134 } 135 return result, nil 136 } 137 138 func (d *termDictionary) decodeNullableChunk(array []int64) ([]*string, error) { 139 result := make([]*string, len(array)) 140 for idx, id := range array { 141 t, err := d.decodeNullable(id) 142 if err != nil { 143 return nil, err 144 } 145 result[idx] = t 146 } 147 return result, nil 148 } 149 150 func encodeTermDictionary(bits []byte, dict *termDictionary) []byte { 151 if len(dict.id) == 0 { 152 return bits 153 } 154 bits = encodeUvarint(bits, uint64(len(dict.id))) 155 bits = encodeStringChunk(bits, dict.id) 156 return bits 157 } 158 159 func decodeTermDictionary(bits []byte, allocator *SliceAllocator) ([]byte, *termDictionary, error) { 160 newBits, l, err := decodeUvarint(bits) 161 if err != nil { 162 return bits, nil, err 163 } 164 newBits, id, err := decodeStringChunk(newBits, int(l), allocator) 165 if err != nil { 166 return bits, nil, err 167 } 168 return newBits, &termDictionary{id: id}, nil 169 } 170 171 // Headers in columnar layout 172 type Headers struct { 173 ts []uint64 174 ty []uint64 175 partition []int64 176 schema []*string 177 table []*string 178 179 count int 180 } 181 182 // Count returns number of headers 183 func (h *Headers) Count() int { 184 return h.count 185 } 186 187 func (h *Headers) encode(bits []byte, dict *termDictionary) []byte { 188 bits = encodeDeltaUvarintChunk(bits, h.ts[:h.count]) 189 bits = encodeUvarintChunk(bits, h.ty[:h.count]) 190 bits = encodeDeltaVarintChunk(bits, h.partition[:h.count]) 191 bits = encodeDeltaVarintChunk(bits, dict.encodeNullableChunk(h.schema[:h.count])) 192 bits = encodeDeltaVarintChunk(bits, dict.encodeNullableChunk(h.table[:h.count])) 193 return bits 194 } 195 196 func (h *Headers) appendHeader(allocator *SliceAllocator, ts, ty uint64, partition int64, schema, table *string) int { 197 idx := h.count 198 if idx+1 > len(h.ty) { 199 size := newBufferSize(idx) 200 h.ts = allocator.resizeUint64Slice(h.ts, size) 201 h.ty = allocator.resizeUint64Slice(h.ty, size) 202 h.partition = allocator.resizeInt64Slice(h.partition, size) 203 h.schema = allocator.resizeNullableStringSlice(h.schema, size) 204 h.table = allocator.resizeNullableStringSlice(h.table, size) 205 } 206 h.ts[idx] = ts 207 h.ty[idx] = ty 208 h.partition[idx] = partition 209 h.schema[idx] = schema 210 h.table[idx] = table 211 h.count++ 212 213 return 32 + len(*schema) + len(*table) /* 4 64-bits integers and two bytes array */ 214 } 215 216 func (h *Headers) reset() { 217 h.count = 0 218 } 219 220 // GetType returns type of event at given index 221 func (h *Headers) GetType(index int) model.MessageType { 222 return model.MessageType(h.ty[index]) 223 } 224 225 // GetTs returns timestamp of event at given index 226 func (h *Headers) GetTs(index int) uint64 { 227 return h.ts[index] 228 } 229 230 // GetPartition returns partition of event at given index 231 func (h *Headers) GetPartition(index int) int64 { 232 return h.partition[index] 233 } 234 235 // GetSchema returns schema of event at given index 236 func (h *Headers) GetSchema(index int) string { 237 if h.schema[index] != nil { 238 return *h.schema[index] 239 } 240 return "" 241 } 242 243 // GetTable returns table of event at given index 244 func (h *Headers) GetTable(index int) string { 245 if h.table[index] != nil { 246 return *h.table[index] 247 } 248 return "" 249 } 250 251 func decodeHeaders(bits []byte, numHeaders int, allocator *SliceAllocator, dict *termDictionary) (*Headers, error) { 252 var ts, ty []uint64 253 var partition, tmp []int64 254 var schema, table []*string 255 var err error 256 if bits, ts, err = decodeDeltaUvarintChunk(bits, numHeaders, allocator); err != nil { 257 return nil, errors.Trace(err) 258 } 259 if bits, ty, err = decodeUvarintChunk(bits, numHeaders, allocator); err != nil { 260 return nil, errors.Trace(err) 261 } 262 if bits, partition, err = decodeDeltaVarintChunk(bits, numHeaders, allocator); err != nil { 263 return nil, errors.Trace(err) 264 } 265 if bits, tmp, err = decodeDeltaVarintChunk(bits, numHeaders, allocator); err != nil { 266 return nil, errors.Trace(err) 267 } 268 if schema, err = dict.decodeNullableChunk(tmp); err != nil { 269 return nil, errors.Trace(err) 270 } 271 if _, tmp, err = decodeDeltaVarintChunk(bits, numHeaders, allocator); err != nil { 272 return nil, errors.Trace(err) 273 } 274 if table, err = dict.decodeNullableChunk(tmp); err != nil { 275 return nil, errors.Trace(err) 276 } 277 return &Headers{ 278 ts: ts, 279 ty: ty, 280 partition: partition, 281 schema: schema, 282 table: table, 283 count: numHeaders, 284 }, nil 285 } 286 287 // Column group in columnar layout 288 type columnGroup struct { 289 ty byte 290 names []string 291 types []uint64 292 flags []uint64 293 values [][]byte 294 } 295 296 func (g *columnGroup) encode(bits []byte, dict *termDictionary) []byte { 297 bits = append(bits, g.ty) 298 bits = encodeUvarint(bits, uint64(len(g.names))) 299 bits = encodeDeltaVarintChunk(bits, dict.encodeChunk(g.names)) 300 bits = encodeUvarintChunk(bits, g.types) 301 bits = encodeUvarintChunk(bits, g.flags) 302 bits = encodeNullableBytesChunk(bits, g.values) 303 return bits 304 } 305 306 // ToModel converts column group into model 307 func (g *columnGroup) ToModel() ([]*model.Column, error) { 308 columns := make([]*model.Column, len(g.names)) 309 for i, name := range g.names { 310 ty := byte(g.types[i]) 311 flag := model.ColumnFlagType(g.flags[i]) 312 value, err := DecodeTiDBType(ty, flag, g.values[i]) 313 if err != nil { 314 return nil, errors.Trace(err) 315 } 316 columns[i] = &model.Column{ 317 Name: name, 318 Type: ty, 319 Flag: flag, 320 Value: value, 321 } 322 } 323 return columns, nil 324 } 325 326 func decodeColumnGroup(bits []byte, allocator *SliceAllocator, dict *termDictionary) (*columnGroup, error) { 327 var numColumns int 328 bits, ty, err := decodeUint8(bits) 329 if err != nil { 330 return nil, errors.Trace(err) 331 } 332 bits, numColumns, err = decodeUvarintLength(bits) 333 if err != nil { 334 return nil, errors.Trace(err) 335 } 336 var names []string 337 var tmp []int64 338 var values [][]byte 339 var types, flags []uint64 340 bits, tmp, err = decodeDeltaVarintChunk(bits, numColumns, allocator) 341 if err != nil { 342 return nil, errors.Trace(err) 343 } 344 names, err = dict.decodeChunk(tmp) 345 if err != nil { 346 return nil, errors.Trace(err) 347 } 348 bits, types, err = decodeUvarintChunk(bits, numColumns, allocator) 349 if err != nil { 350 return nil, errors.Trace(err) 351 } 352 bits, flags, err = decodeUvarintChunk(bits, numColumns, allocator) 353 if err != nil { 354 return nil, errors.Trace(err) 355 } 356 _, values, err = decodeNullableBytesChunk(bits, numColumns, allocator) 357 if err != nil { 358 return nil, errors.Trace(err) 359 } 360 return &columnGroup{ 361 ty: ty, 362 names: names, 363 types: types, 364 flags: flags, 365 values: values, 366 }, nil 367 } 368 369 func newColumnGroup(allocator *SliceAllocator, ty byte, columns []*model.Column, onlyHandleKeyColumns bool) (int, *columnGroup) { 370 l := len(columns) 371 if l == 0 { 372 return 0, nil 373 } 374 values := allocator.bytesSlice(l) 375 names := allocator.stringSlice(l) 376 types := allocator.uint64Slice(l) 377 flags := allocator.uint64Slice(l) 378 estimatedSize := 0 379 idx := 0 380 for _, col := range columns { 381 if col == nil { 382 continue 383 } 384 if onlyHandleKeyColumns && !col.Flag.IsHandleKey() { 385 continue 386 } 387 names[idx] = col.Name 388 types[idx] = uint64(col.Type) 389 flags[idx] = uint64(col.Flag) 390 value := EncodeTiDBType(allocator, col.Type, col.Flag, col.Value) 391 values[idx] = value 392 estimatedSize += len(col.Name) + len(value) + 16 /* two 64-bits integers */ 393 idx++ 394 } 395 if idx > 0 { 396 return estimatedSize, &columnGroup{ 397 ty: ty, 398 names: names[:idx], 399 types: types[:idx], 400 flags: flags[:idx], 401 values: values[:idx], 402 } 403 } 404 return estimatedSize, nil 405 } 406 407 // Row changed message is basically an array of column groups 408 type rowChangedEvent = []*columnGroup 409 410 func newRowChangedMessage(allocator *SliceAllocator, ev *model.RowChangedEvent, onlyHandleKeyColumns bool) (int, rowChangedEvent) { 411 numGroups := 0 412 if ev.PreColumns != nil { 413 numGroups++ 414 } 415 if ev.Columns != nil { 416 numGroups++ 417 } 418 groups := allocator.columnGroupSlice(numGroups) 419 estimatedSize := 0 420 idx := 0 421 if size, group := newColumnGroup( 422 allocator, 423 columnGroupTypeNew, 424 ev.GetColumns(), 425 false); group != nil { 426 groups[idx] = group 427 idx++ 428 estimatedSize += size 429 } 430 onlyHandleKeyColumns = onlyHandleKeyColumns && ev.IsDelete() 431 if size, group := newColumnGroup( 432 allocator, 433 columnGroupTypeOld, 434 ev.GetPreColumns(), 435 onlyHandleKeyColumns); group != nil { 436 groups[idx] = group 437 estimatedSize += size 438 } 439 return estimatedSize, groups 440 } 441 442 // RowChangedEventBuffer is a buffer to save row changed events in batch 443 type RowChangedEventBuffer struct { 444 headers *Headers 445 446 events []rowChangedEvent 447 eventsCount int 448 estimatedSize int 449 450 allocator *SliceAllocator 451 } 452 453 // NewRowChangedEventBuffer creates new row changed event buffer with given allocator 454 func NewRowChangedEventBuffer(allocator *SliceAllocator) *RowChangedEventBuffer { 455 return &RowChangedEventBuffer{ 456 headers: &Headers{}, 457 allocator: allocator, 458 } 459 } 460 461 // Encode row changed event buffer into bits 462 func (b *RowChangedEventBuffer) Encode() []byte { 463 bits := NewMessageEncoder(b.allocator).encodeHeaders(b.headers).encodeRowChangeEvents(b.events[:b.eventsCount]).Encode() 464 b.Reset() 465 return bits 466 } 467 468 // AppendRowChangedEvent append a new event to buffer 469 func (b *RowChangedEventBuffer) AppendRowChangedEvent(ev *model.RowChangedEvent, onlyHandleKeyColumns bool) (rows, size int) { 470 var partition int64 = -1 471 if ev.TableInfo.IsPartitionTable() { 472 partition = ev.PhysicalTableID 473 } 474 475 var schema, table *string 476 if len(ev.TableInfo.GetSchemaName()) > 0 { 477 schema = ev.TableInfo.GetSchemaNamePtr() 478 } 479 if len(ev.TableInfo.GetTableName()) > 0 { 480 table = ev.TableInfo.GetTableNamePtr() 481 } 482 483 b.estimatedSize += b.headers.appendHeader( 484 b.allocator, 485 ev.CommitTs, 486 uint64(model.MessageTypeRow), 487 partition, 488 schema, 489 table, 490 ) 491 if b.eventsCount+1 > len(b.events) { 492 b.events = b.allocator.resizeRowChangedEventSlice(b.events, newBufferSize(b.eventsCount)) 493 } 494 size, message := newRowChangedMessage(b.allocator, ev, onlyHandleKeyColumns) 495 b.events[b.eventsCount] = message 496 b.eventsCount++ 497 b.estimatedSize += size 498 return b.eventsCount, b.estimatedSize 499 } 500 501 // Reset buffer 502 func (b *RowChangedEventBuffer) Reset() { 503 b.headers.reset() 504 b.eventsCount = 0 505 b.estimatedSize = 0 506 } 507 508 // Size of buffer 509 func (b *RowChangedEventBuffer) Size() int { 510 return b.estimatedSize 511 } 512 513 // RowsCount returns number of rows batched in this buffer. 514 func (b *RowChangedEventBuffer) RowsCount() int { 515 return b.eventsCount 516 } 517 518 // GetHeaders returns headers of buffer 519 func (b *RowChangedEventBuffer) GetHeaders() *Headers { 520 return b.headers 521 }