github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/changefeedccl/name.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package changefeedccl
    10  
    11  import (
    12  	"fmt"
    13  	"regexp"
    14  	"strconv"
    15  	"strings"
    16  	"unicode/utf8"
    17  )
    18  
    19  var escapeRE = regexp.MustCompile(`_u[0-9a-fA-F]{2,8}_`)
    20  var kafkaDisallowedRE = regexp.MustCompile(`[^a-zA-Z0-9\._\-]`)
    21  var avroDisallowedRE = regexp.MustCompile(`[^A-Za-z0-9_]`)
    22  
    23  func escapeRune(r rune) string {
    24  	if r <= 1<<16 {
    25  		return fmt.Sprintf(`_u%04x_`, r)
    26  	}
    27  	return fmt.Sprintf(`_u%08x_`, r)
    28  }
    29  
    30  // SQLNameToKafkaName escapes a sql table name into a valid kafka topic name.
    31  // This is reversible by KafkaNameToSQLName except when the escaped string is
    32  // longer than kafka's length limit.
    33  //
    34  // Kafka allows names matching `[a-zA-Z0-9\._\-]{1,249}` excepting `.` and `..`.
    35  //
    36  // Runes are escaped with _u<hex>_ in an attempt to look like U+0021. For
    37  // example `!` escapes to `_u0021_`.
    38  func SQLNameToKafkaName(s string) string {
    39  	if s == `.` {
    40  		return escapeRune('.')
    41  	} else if s == `..` {
    42  		return escapeRune('.') + escapeRune('.')
    43  	}
    44  	s = escapeSQLName(s, kafkaDisallowedRE)
    45  	if len(s) > 249 {
    46  		// Not going to roundtrip, but not much we can do about that.
    47  		return s[:249]
    48  	}
    49  	return s
    50  }
    51  
    52  // KafkaNameToSQLName is the inverse of SQLNameToKafkaName except when
    53  // SQLNameToKafkaName had to truncate.
    54  func KafkaNameToSQLName(s string) string {
    55  	return unescapeSQLName(s)
    56  }
    57  
    58  // SQLNameToAvroName escapes a sql table name into a valid avro record or field
    59  // name. This is reversible by AvroNameToSQLName.
    60  //
    61  // Avro allows names matching `[a-zA-Z_][a-zA-Z0-9_]*`.
    62  //
    63  // Runes are escaped with _u<hex>_ in an attempt to look like U+0021. For
    64  // example `!` escapes to `_u0021_`.
    65  func SQLNameToAvroName(s string) string {
    66  	r, firstSize := utf8.DecodeRuneInString(s)
    67  	if r == utf8.RuneError {
    68  		// Invalid or empty string. Not much we can do here.
    69  		return s
    70  	}
    71  	// Avro disallows a leading 0-9, but allows them otherwise.
    72  	if r >= '0' && r <= '9' {
    73  		return escapeRune(r) + escapeSQLName(s[firstSize:], avroDisallowedRE)
    74  	}
    75  	return escapeSQLName(s, avroDisallowedRE)
    76  }
    77  
    78  // AvroNameToSQLName is the inverse of SQLNameToAvroName.
    79  func AvroNameToSQLName(s string) string {
    80  	return unescapeSQLName(s)
    81  }
    82  
    83  func escapeSQLName(s string, disallowedRE *regexp.Regexp) string {
    84  	// First replace anything that looks like an escape, so we can roundtrip.
    85  	s = escapeRE.ReplaceAllStringFunc(s, func(match string) string {
    86  		var ret strings.Builder
    87  		for _, r := range match {
    88  			ret.WriteString(escapeRune(r))
    89  		}
    90  		return ret.String()
    91  	})
    92  	// Then replace anything disallowed.
    93  	s = disallowedRE.ReplaceAllStringFunc(s, func(match string) string {
    94  		var ret strings.Builder
    95  		for _, r := range match {
    96  			ret.WriteString(escapeRune(r))
    97  		}
    98  		return ret.String()
    99  	})
   100  	return s
   101  }
   102  
   103  func unescapeSQLName(s string) string {
   104  	var buf [utf8.UTFMax]byte
   105  	s = escapeRE.ReplaceAllStringFunc(s, func(match string) string {
   106  		// Cut off the `_u` prefix and the `_` suffix.
   107  		hex := match[2 : len(match)-1]
   108  		r, err := strconv.ParseInt(hex, 16, 32)
   109  		if err != nil {
   110  			// Should be unreachable.
   111  			return match
   112  		}
   113  		n := utf8.EncodeRune(buf[:utf8.UTFMax], rune(r))
   114  		return string(buf[:n])
   115  	})
   116  	return s
   117  }