github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/cloudstorage/path_key.go (about)

     1  // Copyright 2023 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //	http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package cloudstorage
    15  
    16  import (
    17  	"fmt"
    18  	"regexp"
    19  	"strconv"
    20  	"strings"
    21  
    22  	"github.com/pingcap/tiflow/pkg/config"
    23  	"github.com/pingcap/tiflow/pkg/errors"
    24  	"github.com/pingcap/tiflow/pkg/quotes"
    25  )
    26  
    27  // SchemaPathKey is the key of schema path.
    28  type SchemaPathKey struct {
    29  	Schema       string
    30  	Table        string
    31  	TableVersion uint64
    32  }
    33  
    34  // GetKey returns the key of schema path.
    35  func (s *SchemaPathKey) GetKey() string {
    36  	return quotes.QuoteSchema(s.Schema, s.Table)
    37  }
    38  
    39  // ParseSchemaFilePath parses the schema file path and returns the table version and checksum.
    40  func (s *SchemaPathKey) ParseSchemaFilePath(path string) (uint32, error) {
    41  	// For <schema>/<table>/meta/schema_{tableVersion}_{checksum}.json, the parts
    42  	// should be ["<schema>", "<table>", "meta", "schema_{tableVersion}_{checksum}.json"].
    43  	matches := strings.Split(path, "/")
    44  
    45  	var schema, table string
    46  	schema = matches[0]
    47  	switch len(matches) {
    48  	case 3:
    49  		table = ""
    50  	case 4:
    51  		table = matches[1]
    52  	default:
    53  		return 0, errors.Trace(fmt.Errorf("cannot match schema path pattern for %s", path))
    54  	}
    55  
    56  	if matches[len(matches)-2] != "meta" {
    57  		return 0, errors.Trace(fmt.Errorf("cannot match schema path pattern for %s", path))
    58  	}
    59  
    60  	schemaFileName := matches[len(matches)-1]
    61  	version, checksum := mustParseSchemaName(schemaFileName)
    62  
    63  	*s = SchemaPathKey{
    64  		Schema:       schema,
    65  		Table:        table,
    66  		TableVersion: version,
    67  	}
    68  	return checksum, nil
    69  }
    70  
    71  // DmlPathKey is the key of dml path.
    72  type DmlPathKey struct {
    73  	SchemaPathKey
    74  	PartitionNum int64
    75  	Date         string
    76  }
    77  
    78  // GenerateDMLFilePath generates the dml file path.
    79  func (d *DmlPathKey) GenerateDMLFilePath(
    80  	idx uint64, extension string, fileIndexWidth int,
    81  ) string {
    82  	var elems []string
    83  
    84  	elems = append(elems, d.Schema)
    85  	elems = append(elems, d.Table)
    86  	elems = append(elems, fmt.Sprintf("%d", d.TableVersion))
    87  
    88  	if d.PartitionNum != 0 {
    89  		elems = append(elems, fmt.Sprintf("%d", d.PartitionNum))
    90  	}
    91  	if len(d.Date) != 0 {
    92  		elems = append(elems, d.Date)
    93  	}
    94  	elems = append(elems, generateDataFileName(idx, extension, fileIndexWidth))
    95  
    96  	return strings.Join(elems, "/")
    97  }
    98  
    99  // ParseDMLFilePath parses the dml file path and returns the max file index.
   100  // DML file path pattern is as follows:
   101  // {schema}/{table}/{table-version-separator}/{partition-separator}/{date-separator}/, where
   102  // partition-separator and date-separator could be empty.
   103  // DML file name pattern is as follows: CDC{num}.extension.
   104  func (d *DmlPathKey) ParseDMLFilePath(dateSeparator, path string) (uint64, error) {
   105  	var partitionNum int64
   106  
   107  	str := `(\w+)\/(\w+)\/(\d+)\/(\d+)?\/*`
   108  	switch dateSeparator {
   109  	case config.DateSeparatorNone.String():
   110  		str += `(\d{4})*`
   111  	case config.DateSeparatorYear.String():
   112  		str += `(\d{4})\/`
   113  	case config.DateSeparatorMonth.String():
   114  		str += `(\d{4}-\d{2})\/`
   115  	case config.DateSeparatorDay.String():
   116  		str += `(\d{4}-\d{2}-\d{2})\/`
   117  	}
   118  	str += `CDC(\d+).\w+`
   119  	pathRE, err := regexp.Compile(str)
   120  	if err != nil {
   121  		return 0, err
   122  	}
   123  
   124  	matches := pathRE.FindStringSubmatch(path)
   125  	if len(matches) != 7 {
   126  		return 0, fmt.Errorf("cannot match dml path pattern for %s", path)
   127  	}
   128  
   129  	version, err := strconv.ParseUint(matches[3], 10, 64)
   130  	if err != nil {
   131  		return 0, err
   132  	}
   133  
   134  	if len(matches[4]) > 0 {
   135  		partitionNum, err = strconv.ParseInt(matches[4], 10, 64)
   136  		if err != nil {
   137  			return 0, err
   138  		}
   139  	}
   140  	fileIdx, err := strconv.ParseUint(strings.TrimLeft(matches[6], "0"), 10, 64)
   141  	if err != nil {
   142  		return 0, err
   143  	}
   144  
   145  	*d = DmlPathKey{
   146  		SchemaPathKey: SchemaPathKey{
   147  			Schema:       matches[1],
   148  			Table:        matches[2],
   149  			TableVersion: version,
   150  		},
   151  		PartitionNum: partitionNum,
   152  		Date:         matches[5],
   153  	}
   154  
   155  	return fileIdx, nil
   156  }