github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/changefeedccl/name.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package changefeedccl 10 11 import ( 12 "fmt" 13 "regexp" 14 "strconv" 15 "strings" 16 "unicode/utf8" 17 ) 18 19 var escapeRE = regexp.MustCompile(`_u[0-9a-fA-F]{2,8}_`) 20 var kafkaDisallowedRE = regexp.MustCompile(`[^a-zA-Z0-9\._\-]`) 21 var avroDisallowedRE = regexp.MustCompile(`[^A-Za-z0-9_]`) 22 23 func escapeRune(r rune) string { 24 if r <= 1<<16 { 25 return fmt.Sprintf(`_u%04x_`, r) 26 } 27 return fmt.Sprintf(`_u%08x_`, r) 28 } 29 30 // SQLNameToKafkaName escapes a sql table name into a valid kafka topic name. 31 // This is reversible by KafkaNameToSQLName except when the escaped string is 32 // longer than kafka's length limit. 33 // 34 // Kafka allows names matching `[a-zA-Z0-9\._\-]{1,249}` excepting `.` and `..`. 35 // 36 // Runes are escaped with _u<hex>_ in an attempt to look like U+0021. For 37 // example `!` escapes to `_u0021_`. 38 func SQLNameToKafkaName(s string) string { 39 if s == `.` { 40 return escapeRune('.') 41 } else if s == `..` { 42 return escapeRune('.') + escapeRune('.') 43 } 44 s = escapeSQLName(s, kafkaDisallowedRE) 45 if len(s) > 249 { 46 // Not going to roundtrip, but not much we can do about that. 47 return s[:249] 48 } 49 return s 50 } 51 52 // KafkaNameToSQLName is the inverse of SQLNameToKafkaName except when 53 // SQLNameToKafkaName had to truncate. 54 func KafkaNameToSQLName(s string) string { 55 return unescapeSQLName(s) 56 } 57 58 // SQLNameToAvroName escapes a sql table name into a valid avro record or field 59 // name. This is reversible by AvroNameToSQLName. 60 // 61 // Avro allows names matching `[a-zA-Z_][a-zA-Z0-9_]*`. 62 // 63 // Runes are escaped with _u<hex>_ in an attempt to look like U+0021. For 64 // example `!` escapes to `_u0021_`. 65 func SQLNameToAvroName(s string) string { 66 r, firstSize := utf8.DecodeRuneInString(s) 67 if r == utf8.RuneError { 68 // Invalid or empty string. Not much we can do here. 69 return s 70 } 71 // Avro disallows a leading 0-9, but allows them otherwise. 72 if r >= '0' && r <= '9' { 73 return escapeRune(r) + escapeSQLName(s[firstSize:], avroDisallowedRE) 74 } 75 return escapeSQLName(s, avroDisallowedRE) 76 } 77 78 // AvroNameToSQLName is the inverse of SQLNameToAvroName. 79 func AvroNameToSQLName(s string) string { 80 return unescapeSQLName(s) 81 } 82 83 func escapeSQLName(s string, disallowedRE *regexp.Regexp) string { 84 // First replace anything that looks like an escape, so we can roundtrip. 85 s = escapeRE.ReplaceAllStringFunc(s, func(match string) string { 86 var ret strings.Builder 87 for _, r := range match { 88 ret.WriteString(escapeRune(r)) 89 } 90 return ret.String() 91 }) 92 // Then replace anything disallowed. 93 s = disallowedRE.ReplaceAllStringFunc(s, func(match string) string { 94 var ret strings.Builder 95 for _, r := range match { 96 ret.WriteString(escapeRune(r)) 97 } 98 return ret.String() 99 }) 100 return s 101 } 102 103 func unescapeSQLName(s string) string { 104 var buf [utf8.UTFMax]byte 105 s = escapeRE.ReplaceAllStringFunc(s, func(match string) string { 106 // Cut off the `_u` prefix and the `_` suffix. 107 hex := match[2 : len(match)-1] 108 r, err := strconv.ParseInt(hex, 16, 32) 109 if err != nil { 110 // Should be unreachable. 111 return match 112 } 113 n := utf8.EncodeRune(buf[:utf8.UTFMax], rune(r)) 114 return string(buf[:n]) 115 }) 116 return s 117 }