github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/util/stringencoding/string_encoding.go (about) 1 // Copyright 2012, Google Inc. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in licenses/BSD-vitess.txt. 4 5 // Portions of this file are additionally subject to the following 6 // license and copyright. 7 // 8 // Copyright 2017 The Cockroach Authors. 9 // 10 // Use of this software is governed by the Business Source License 11 // included in the file licenses/BSL.txt. 12 // 13 // As of the Change Date specified in that file, in accordance with 14 // the Business Source License, use of this software will be governed 15 // by the Apache License, Version 2.0, included in the file 16 // licenses/APL.txt. 17 18 // This code was derived from https://github.com/youtube/vitess. 19 20 package stringencoding 21 22 import ( 23 "bytes" 24 "unicode/utf8" 25 ) 26 27 // This is its own package so it can be shared among packages that parser 28 // depends on. 29 30 var ( 31 // DontEscape is a sentinel value for characters that don't need to be escaped. 32 DontEscape = byte(255) 33 // EncodeMap specifies how to escape binary data with '\'. 34 EncodeMap [256]byte 35 // HexMap is a mapping from each byte to the `\x%%` hex form as a []byte. 36 HexMap [256][]byte 37 // RawHexMap is a mapping from each byte to the `%%` hex form as a []byte. 38 RawHexMap [256][]byte 39 ) 40 41 func init() { 42 encodeRef := map[byte]byte{ 43 '\b': 'b', 44 '\f': 'f', 45 '\n': 'n', 46 '\r': 'r', 47 '\t': 't', 48 '\\': '\\', 49 } 50 51 for i := range EncodeMap { 52 EncodeMap[i] = DontEscape 53 } 54 for i := range EncodeMap { 55 if to, ok := encodeRef[byte(i)]; ok { 56 EncodeMap[byte(i)] = to 57 } 58 } 59 60 // underlyingHexMap contains the string "\x00\x01\x02..." which HexMap and 61 // RawHexMap then index into. 62 var underlyingHexMap bytes.Buffer 63 underlyingHexMap.Grow(1024) 64 65 for i := 0; i < 256; i++ { 66 underlyingHexMap.WriteString("\\x") 67 writeHexDigit(&underlyingHexMap, i/16) 68 writeHexDigit(&underlyingHexMap, i%16) 69 } 70 71 underlyingHexBytes := underlyingHexMap.Bytes() 72 73 for i := 0; i < 256; i++ { 74 HexMap[i] = underlyingHexBytes[i*4 : i*4+4] 75 RawHexMap[i] = underlyingHexBytes[i*4+2 : i*4+4] 76 } 77 } 78 79 // EncodeEscapedChar is used internally to write out a character from a larger 80 // string that needs to be escaped to a buffer. 81 func EncodeEscapedChar( 82 buf *bytes.Buffer, 83 entireString string, 84 currentRune rune, 85 currentByte byte, 86 currentIdx int, 87 quoteChar byte, 88 ) { 89 ln := utf8.RuneLen(currentRune) 90 if currentRune == utf8.RuneError { 91 // Errors are due to invalid unicode points, so escape the bytes. 92 // Make sure this is run at least once in case ln == -1. 93 buf.Write(HexMap[entireString[currentIdx]]) 94 for ri := 1; ri < ln; ri++ { 95 if currentIdx+ri < len(entireString) { 96 buf.Write(HexMap[entireString[currentIdx+ri]]) 97 } 98 } 99 } else if ln == 1 { 100 // For single-byte runes, do the same as encodeSQLBytes. 101 if encodedChar := EncodeMap[currentByte]; encodedChar != DontEscape { 102 buf.WriteByte('\\') 103 buf.WriteByte(encodedChar) 104 } else if currentByte == quoteChar { 105 buf.WriteByte('\\') 106 buf.WriteByte(quoteChar) 107 } else { 108 // Escape non-printable characters. 109 buf.Write(HexMap[currentByte]) 110 } 111 } else { 112 writeMultibyteRuneAsHex(buf, currentRune, ln) 113 } 114 } 115 116 const uppercaseHex = `0123456789ABCDEF` 117 118 // writeMultibyteRuneAsHex is equivalent to either 119 // fmt.FPrintf(`\u%04X`) or fmt.FPrintf(`\U%08X`). 120 // We can't quite just use strconv since we need uppercase hex. 121 func writeMultibyteRuneAsHex(buf *bytes.Buffer, r rune, ln int) { 122 if ln == 2 { 123 buf.WriteString(`\u0000`) 124 } else { 125 buf.WriteString(`\U00000000`) 126 } 127 for i := 1; r > 0; r >>= 4 { 128 buf.Bytes()[buf.Len()-i] = uppercaseHex[r&0x0f] 129 i++ 130 } 131 132 } 133 134 func writeHexDigit(buf *bytes.Buffer, v int) { 135 if v < 10 { 136 buf.WriteByte('0' + byte(v)) 137 } else { 138 buf.WriteByte('a' + byte(v-10)) 139 } 140 } 141 142 // NeedEscape returns whether the given byte needs to be escaped. 143 func NeedEscape(ch byte) bool { 144 return EncodeMap[ch] != DontEscape 145 }