github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/cloudstorage/table_definition.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //	http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  package cloudstorage
    14  
    15  import (
    16  	"encoding/json"
    17  	"sort"
    18  	"strconv"
    19  	"strings"
    20  
    21  	"github.com/pingcap/log"
    22  	"github.com/pingcap/tidb/pkg/parser/charset"
    23  	timodel "github.com/pingcap/tidb/pkg/parser/model"
    24  	"github.com/pingcap/tidb/pkg/parser/mysql"
    25  	"github.com/pingcap/tidb/pkg/parser/types"
    26  	"github.com/pingcap/tiflow/cdc/model"
    27  	"github.com/pingcap/tiflow/pkg/errors"
    28  	"github.com/pingcap/tiflow/pkg/hash"
    29  	"go.uber.org/zap"
    30  )
    31  
    32  const (
    33  	defaultTableDefinitionVersion = 1
    34  	marshalPrefix                 = ""
    35  	marshalIndent                 = "    "
    36  )
    37  
    38  // TableCol denotes the column info for a table definition.
    39  type TableCol struct {
    40  	ID        string      `json:"ColumnId,omitempty"`
    41  	Name      string      `json:"ColumnName" `
    42  	Tp        string      `json:"ColumnType"`
    43  	Default   interface{} `json:"ColumnDefault,omitempty"`
    44  	Precision string      `json:"ColumnPrecision,omitempty"`
    45  	Scale     string      `json:"ColumnScale,omitempty"`
    46  	Nullable  string      `json:"ColumnNullable,omitempty"`
    47  	IsPK      string      `json:"ColumnIsPk,omitempty"`
    48  }
    49  
    50  // FromTiColumnInfo converts from TiDB ColumnInfo to TableCol.
    51  func (t *TableCol) FromTiColumnInfo(col *timodel.ColumnInfo, outputColumnID bool) {
    52  	defaultFlen, defaultDecimal := mysql.GetDefaultFieldLengthAndDecimal(col.GetType())
    53  	isDecimalNotDefault := col.GetDecimal() != defaultDecimal &&
    54  		col.GetDecimal() != 0 &&
    55  		col.GetDecimal() != types.UnspecifiedLength
    56  
    57  	displayFlen, displayDecimal := col.GetFlen(), col.GetDecimal()
    58  	if displayFlen == types.UnspecifiedLength {
    59  		displayFlen = defaultFlen
    60  	}
    61  	if displayDecimal == types.UnspecifiedLength {
    62  		displayDecimal = defaultDecimal
    63  	}
    64  
    65  	if outputColumnID {
    66  		t.ID = strconv.FormatInt(col.ID, 10)
    67  	}
    68  	t.Name = col.Name.O
    69  	t.Tp = strings.ToUpper(types.TypeToStr(col.GetType(), col.GetCharset()))
    70  	if mysql.HasUnsignedFlag(col.GetFlag()) {
    71  		t.Tp += " UNSIGNED"
    72  	}
    73  	if mysql.HasPriKeyFlag(col.GetFlag()) {
    74  		t.IsPK = "true"
    75  	}
    76  	if mysql.HasNotNullFlag(col.GetFlag()) {
    77  		t.Nullable = "false"
    78  	}
    79  	t.Default = model.GetColumnDefaultValue(col)
    80  
    81  	switch col.GetType() {
    82  	case mysql.TypeTimestamp, mysql.TypeDatetime, mysql.TypeDuration:
    83  		if isDecimalNotDefault {
    84  			t.Scale = strconv.Itoa(displayDecimal)
    85  		}
    86  	case mysql.TypeDouble, mysql.TypeFloat:
    87  		t.Precision = strconv.Itoa(displayFlen)
    88  		if isDecimalNotDefault {
    89  			t.Scale = strconv.Itoa(displayDecimal)
    90  		}
    91  	case mysql.TypeNewDecimal:
    92  		t.Precision = strconv.Itoa(displayFlen)
    93  		t.Scale = strconv.Itoa(displayDecimal)
    94  	case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong,
    95  		mysql.TypeBit, mysql.TypeVarchar, mysql.TypeString, mysql.TypeVarString, mysql.TypeBlob,
    96  		mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob:
    97  		t.Precision = strconv.Itoa(displayFlen)
    98  	case mysql.TypeYear:
    99  		t.Precision = strconv.Itoa(displayFlen)
   100  	}
   101  }
   102  
   103  // ToTiColumnInfo converts from TableCol to TiDB ColumnInfo.
   104  func (t *TableCol) ToTiColumnInfo(colID int64) (*timodel.ColumnInfo, error) {
   105  	col := new(timodel.ColumnInfo)
   106  
   107  	if t.ID != "" {
   108  		var err error
   109  		col.ID, err = strconv.ParseInt(t.ID, 10, 64)
   110  		if err != nil {
   111  			return nil, errors.Trace(err)
   112  		}
   113  	}
   114  
   115  	col.ID = colID
   116  	col.Name = timodel.NewCIStr(t.Name)
   117  	tp := types.StrToType(strings.ToLower(strings.TrimSuffix(t.Tp, " UNSIGNED")))
   118  	col.FieldType = *types.NewFieldType(tp)
   119  	if strings.Contains(t.Tp, "UNSIGNED") {
   120  		col.AddFlag(mysql.UnsignedFlag)
   121  	}
   122  	if t.IsPK == "true" {
   123  		col.AddFlag(mysql.PriKeyFlag)
   124  	}
   125  	if t.Nullable == "false" {
   126  		col.AddFlag(mysql.NotNullFlag)
   127  	}
   128  	col.DefaultValue = t.Default
   129  	if strings.Contains(t.Tp, "BLOB") || strings.Contains(t.Tp, "BINARY") {
   130  		col.SetCharset(charset.CharsetBin)
   131  	} else {
   132  		col.SetCharset(charset.CharsetUTF8MB4)
   133  	}
   134  	setFlen := func(precision string) error {
   135  		if len(precision) > 0 {
   136  			flen, err := strconv.Atoi(precision)
   137  			if err != nil {
   138  				return errors.Trace(err)
   139  			}
   140  			col.SetFlen(flen)
   141  		}
   142  		return nil
   143  	}
   144  	setDecimal := func(scale string) error {
   145  		if len(scale) > 0 {
   146  			decimal, err := strconv.Atoi(scale)
   147  			if err != nil {
   148  				return errors.Trace(err)
   149  			}
   150  			col.SetDecimal(decimal)
   151  		}
   152  		return nil
   153  	}
   154  	switch col.GetType() {
   155  	case mysql.TypeTimestamp, mysql.TypeDatetime, mysql.TypeDuration:
   156  		err := setDecimal(t.Scale)
   157  		if err != nil {
   158  			return nil, errors.Trace(err)
   159  		}
   160  	case mysql.TypeDouble, mysql.TypeFloat, mysql.TypeNewDecimal:
   161  		err := setFlen(t.Precision)
   162  		if err != nil {
   163  			return nil, errors.Trace(err)
   164  		}
   165  		err = setDecimal(t.Scale)
   166  		if err != nil {
   167  			return nil, errors.Trace(err)
   168  		}
   169  	case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong,
   170  		mysql.TypeBit, mysql.TypeVarchar, mysql.TypeString, mysql.TypeVarString, mysql.TypeBlob,
   171  		mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeYear:
   172  		err := setFlen(t.Precision)
   173  		if err != nil {
   174  			return nil, errors.Trace(err)
   175  		}
   176  	}
   177  
   178  	return col, nil
   179  }
   180  
   181  // TableDefinition is the detailed table definition used for cloud storage sink.
   182  // TODO: find a better name for this struct.
   183  type TableDefinition struct {
   184  	Table        string             `json:"Table"`
   185  	Schema       string             `json:"Schema"`
   186  	Version      uint64             `json:"Version"`
   187  	TableVersion uint64             `json:"TableVersion"`
   188  	Query        string             `json:"Query"`
   189  	Type         timodel.ActionType `json:"Type"`
   190  	Columns      []TableCol         `json:"TableColumns"`
   191  	TotalColumns int                `json:"TableColumnsTotal"`
   192  }
   193  
   194  // tableDefWithoutQuery is the table definition without query, which ignores the
   195  // Query, Type and TableVersion field.
   196  type tableDefWithoutQuery struct {
   197  	Table        string     `json:"Table"`
   198  	Schema       string     `json:"Schema"`
   199  	Version      uint64     `json:"Version"`
   200  	Columns      []TableCol `json:"TableColumns"`
   201  	TotalColumns int        `json:"TableColumnsTotal"`
   202  }
   203  
   204  // FromDDLEvent converts from DDLEvent to TableDefinition.
   205  func (t *TableDefinition) FromDDLEvent(event *model.DDLEvent, outputColumnID bool) {
   206  	if event.CommitTs != event.TableInfo.Version {
   207  		log.Panic("commit ts and table info version should be equal",
   208  			zap.Any("event", event), zap.Any("tableInfo", event.TableInfo),
   209  		)
   210  	}
   211  	t.FromTableInfo(event.TableInfo, event.TableInfo.Version, outputColumnID)
   212  	t.Query = event.Query
   213  	t.Type = event.Type
   214  }
   215  
   216  // ToDDLEvent converts from TableDefinition to DDLEvent.
   217  func (t *TableDefinition) ToDDLEvent() (*model.DDLEvent, error) {
   218  	tableInfo, err := t.ToTableInfo()
   219  	if err != nil {
   220  		return nil, err
   221  	}
   222  
   223  	return &model.DDLEvent{
   224  		TableInfo: tableInfo,
   225  		CommitTs:  t.TableVersion,
   226  		Type:      t.Type,
   227  		Query:     t.Query,
   228  	}, nil
   229  }
   230  
   231  // FromTableInfo converts from TableInfo to TableDefinition.
   232  func (t *TableDefinition) FromTableInfo(
   233  	info *model.TableInfo, tableInfoVersion model.Ts, outputColumnID bool,
   234  ) {
   235  	t.Version = defaultTableDefinitionVersion
   236  	t.TableVersion = tableInfoVersion
   237  
   238  	t.Schema = info.TableName.Schema
   239  	if info.TableInfo == nil {
   240  		return
   241  	}
   242  	t.Table = info.TableName.Table
   243  	t.TotalColumns = len(info.Columns)
   244  	for _, col := range info.Columns {
   245  		var tableCol TableCol
   246  		tableCol.FromTiColumnInfo(col, outputColumnID)
   247  		t.Columns = append(t.Columns, tableCol)
   248  	}
   249  }
   250  
   251  // ToTableInfo converts from TableDefinition to DDLEvent.
   252  func (t *TableDefinition) ToTableInfo() (*model.TableInfo, error) {
   253  	tidbTableInfo := &timodel.TableInfo{
   254  		Name: timodel.NewCIStr(t.Table),
   255  	}
   256  	nextMockID := int64(100) // 100 is an arbitrary number
   257  	for _, col := range t.Columns {
   258  		tiCol, err := col.ToTiColumnInfo(nextMockID)
   259  		if err != nil {
   260  			return nil, err
   261  		}
   262  		if mysql.HasPriKeyFlag(tiCol.GetFlag()) {
   263  			// use PKIsHandle to make sure that the primary keys can be detected by `WrapTableInfo`
   264  			tidbTableInfo.PKIsHandle = true
   265  		}
   266  		tidbTableInfo.Columns = append(tidbTableInfo.Columns, tiCol)
   267  		nextMockID += 1
   268  	}
   269  	info := model.WrapTableInfo(100, t.Schema, 100, tidbTableInfo)
   270  
   271  	return info, nil
   272  }
   273  
   274  // IsTableSchema returns whether the TableDefinition is a table schema.
   275  func (t *TableDefinition) IsTableSchema() bool {
   276  	if len(t.Columns) != t.TotalColumns {
   277  		log.Panic("invalid table definition", zap.Any("tableDef", t))
   278  	}
   279  	return t.TotalColumns != 0
   280  }
   281  
   282  // MarshalWithQuery marshals TableDefinition with Query field.
   283  func (t *TableDefinition) MarshalWithQuery() ([]byte, error) {
   284  	data, err := json.MarshalIndent(t, marshalPrefix, marshalIndent)
   285  	if err != nil {
   286  		return nil, errors.WrapError(errors.ErrMarshalFailed, err)
   287  	}
   288  	return data, nil
   289  }
   290  
   291  // marshalWithoutQuery marshals TableDefinition without Query field.
   292  func (t *TableDefinition) marshalWithoutQuery() ([]byte, error) {
   293  	// sort columns by name
   294  	sortedColumns := make([]TableCol, len(t.Columns))
   295  	copy(sortedColumns, t.Columns)
   296  	sort.Slice(sortedColumns, func(i, j int) bool {
   297  		return sortedColumns[i].Name < sortedColumns[j].Name
   298  	})
   299  
   300  	defWithoutQuery := tableDefWithoutQuery{
   301  		Table:        t.Table,
   302  		Schema:       t.Schema,
   303  		Columns:      sortedColumns,
   304  		TotalColumns: t.TotalColumns,
   305  	}
   306  
   307  	data, err := json.MarshalIndent(defWithoutQuery, marshalPrefix, marshalIndent)
   308  	if err != nil {
   309  		return nil, errors.WrapError(errors.ErrMarshalFailed, err)
   310  	}
   311  	return data, nil
   312  }
   313  
   314  // Sum32 returns the 32-bits hash value of TableDefinition.
   315  func (t *TableDefinition) Sum32(hasher *hash.PositionInertia) (uint32, error) {
   316  	if hasher == nil {
   317  		hasher = hash.NewPositionInertia()
   318  	}
   319  	hasher.Reset()
   320  	data, err := t.marshalWithoutQuery()
   321  	if err != nil {
   322  		return 0, err
   323  	}
   324  
   325  	hasher.Write(data)
   326  	return hasher.Sum32(), nil
   327  }
   328  
   329  // GenerateSchemaFilePath generates the schema file path for TableDefinition.
   330  func (t *TableDefinition) GenerateSchemaFilePath() (string, error) {
   331  	checksum, err := t.Sum32(nil)
   332  	if err != nil {
   333  		return "", err
   334  	}
   335  	if !t.IsTableSchema() && t.Table != "" {
   336  		log.Panic("invalid table definition", zap.Any("tableDef", t))
   337  	}
   338  	return generateSchemaFilePath(t.Schema, t.Table, t.TableVersion, checksum), nil
   339  }