github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/cloudstorage/path_key.go (about) 1 // Copyright 2023 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package cloudstorage 15 16 import ( 17 "fmt" 18 "regexp" 19 "strconv" 20 "strings" 21 22 "github.com/pingcap/tiflow/pkg/config" 23 "github.com/pingcap/tiflow/pkg/errors" 24 "github.com/pingcap/tiflow/pkg/quotes" 25 ) 26 27 // SchemaPathKey is the key of schema path. 28 type SchemaPathKey struct { 29 Schema string 30 Table string 31 TableVersion uint64 32 } 33 34 // GetKey returns the key of schema path. 35 func (s *SchemaPathKey) GetKey() string { 36 return quotes.QuoteSchema(s.Schema, s.Table) 37 } 38 39 // ParseSchemaFilePath parses the schema file path and returns the table version and checksum. 40 func (s *SchemaPathKey) ParseSchemaFilePath(path string) (uint32, error) { 41 // For <schema>/<table>/meta/schema_{tableVersion}_{checksum}.json, the parts 42 // should be ["<schema>", "<table>", "meta", "schema_{tableVersion}_{checksum}.json"]. 43 matches := strings.Split(path, "/") 44 45 var schema, table string 46 schema = matches[0] 47 switch len(matches) { 48 case 3: 49 table = "" 50 case 4: 51 table = matches[1] 52 default: 53 return 0, errors.Trace(fmt.Errorf("cannot match schema path pattern for %s", path)) 54 } 55 56 if matches[len(matches)-2] != "meta" { 57 return 0, errors.Trace(fmt.Errorf("cannot match schema path pattern for %s", path)) 58 } 59 60 schemaFileName := matches[len(matches)-1] 61 version, checksum := mustParseSchemaName(schemaFileName) 62 63 *s = SchemaPathKey{ 64 Schema: schema, 65 Table: table, 66 TableVersion: version, 67 } 68 return checksum, nil 69 } 70 71 // DmlPathKey is the key of dml path. 72 type DmlPathKey struct { 73 SchemaPathKey 74 PartitionNum int64 75 Date string 76 } 77 78 // GenerateDMLFilePath generates the dml file path. 79 func (d *DmlPathKey) GenerateDMLFilePath( 80 idx uint64, extension string, fileIndexWidth int, 81 ) string { 82 var elems []string 83 84 elems = append(elems, d.Schema) 85 elems = append(elems, d.Table) 86 elems = append(elems, fmt.Sprintf("%d", d.TableVersion)) 87 88 if d.PartitionNum != 0 { 89 elems = append(elems, fmt.Sprintf("%d", d.PartitionNum)) 90 } 91 if len(d.Date) != 0 { 92 elems = append(elems, d.Date) 93 } 94 elems = append(elems, generateDataFileName(idx, extension, fileIndexWidth)) 95 96 return strings.Join(elems, "/") 97 } 98 99 // ParseDMLFilePath parses the dml file path and returns the max file index. 100 // DML file path pattern is as follows: 101 // {schema}/{table}/{table-version-separator}/{partition-separator}/{date-separator}/, where 102 // partition-separator and date-separator could be empty. 103 // DML file name pattern is as follows: CDC{num}.extension. 104 func (d *DmlPathKey) ParseDMLFilePath(dateSeparator, path string) (uint64, error) { 105 var partitionNum int64 106 107 str := `(\w+)\/(\w+)\/(\d+)\/(\d+)?\/*` 108 switch dateSeparator { 109 case config.DateSeparatorNone.String(): 110 str += `(\d{4})*` 111 case config.DateSeparatorYear.String(): 112 str += `(\d{4})\/` 113 case config.DateSeparatorMonth.String(): 114 str += `(\d{4}-\d{2})\/` 115 case config.DateSeparatorDay.String(): 116 str += `(\d{4}-\d{2}-\d{2})\/` 117 } 118 str += `CDC(\d+).\w+` 119 pathRE, err := regexp.Compile(str) 120 if err != nil { 121 return 0, err 122 } 123 124 matches := pathRE.FindStringSubmatch(path) 125 if len(matches) != 7 { 126 return 0, fmt.Errorf("cannot match dml path pattern for %s", path) 127 } 128 129 version, err := strconv.ParseUint(matches[3], 10, 64) 130 if err != nil { 131 return 0, err 132 } 133 134 if len(matches[4]) > 0 { 135 partitionNum, err = strconv.ParseInt(matches[4], 10, 64) 136 if err != nil { 137 return 0, err 138 } 139 } 140 fileIdx, err := strconv.ParseUint(strings.TrimLeft(matches[6], "0"), 10, 64) 141 if err != nil { 142 return 0, err 143 } 144 145 *d = DmlPathKey{ 146 SchemaPathKey: SchemaPathKey{ 147 Schema: matches[1], 148 Table: matches[2], 149 TableVersion: version, 150 }, 151 PartitionNum: partitionNum, 152 Date: matches[5], 153 } 154 155 return fileIdx, nil 156 }