github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/util/stringencoding/string_encoding.go (about) 1 // Copyright 2012, Google Inc. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in licenses/BSD-vitess.txt. 4 5 // Portions of this file are additionally subject to the following 6 // license and copyright. 7 // 8 // Copyright 2017 The Cockroach Authors. 9 // 10 // Use of this software is governed by the Business Source License 11 // included in the file licenses/BSL.txt. 12 // 13 // As of the Change Date specified in that file, in accordance with 14 // the Business Source License, use of this software will be governed 15 // by the Apache License, Version 2.0, included in the file 16 // licenses/APL.txt. 17 18 // This code was derived from https://github.com/youtube/vitess. 19 20 package stringencoding 21 22 import ( 23 "bytes" 24 "fmt" 25 "unicode/utf8" 26 ) 27 28 // This is its own package so it can be shared among packages that parser 29 // depends on. 30 31 var ( 32 // DontEscape is a sentinel value for characters that don't need to be escaped. 33 DontEscape = byte(255) 34 // EncodeMap specifies how to escape binary data with '\'. 35 EncodeMap [256]byte 36 // HexMap is a mapping from each byte to the `\x%%` hex form as a []byte. 37 HexMap [256][]byte 38 // RawHexMap is a mapping from each byte to the `%%` hex form as a []byte. 39 RawHexMap [256][]byte 40 ) 41 42 func init() { 43 encodeRef := map[byte]byte{ 44 '\b': 'b', 45 '\f': 'f', 46 '\n': 'n', 47 '\r': 'r', 48 '\t': 't', 49 '\\': '\\', 50 } 51 52 for i := range EncodeMap { 53 EncodeMap[i] = DontEscape 54 } 55 for i := range EncodeMap { 56 if to, ok := encodeRef[byte(i)]; ok { 57 EncodeMap[byte(i)] = to 58 } 59 } 60 61 // underlyingHexMap contains the string "\x00\x01\x02..." which HexMap and 62 // RawHexMap then index into. 63 var underlyingHexMap bytes.Buffer 64 underlyingHexMap.Grow(1024) 65 66 for i := 0; i < 256; i++ { 67 underlyingHexMap.WriteString("\\x") 68 writeHexDigit(&underlyingHexMap, i/16) 69 writeHexDigit(&underlyingHexMap, i%16) 70 } 71 72 underlyingHexBytes := underlyingHexMap.Bytes() 73 74 for i := 0; i < 256; i++ { 75 HexMap[i] = underlyingHexBytes[i*4 : i*4+4] 76 RawHexMap[i] = underlyingHexBytes[i*4+2 : i*4+4] 77 } 78 } 79 80 // EncodeEscapedChar is used internally to write out a character from a larger 81 // string that needs to be escaped to a buffer. 82 func EncodeEscapedChar( 83 buf *bytes.Buffer, 84 entireString string, 85 currentRune rune, 86 currentByte byte, 87 currentIdx int, 88 quoteChar byte, 89 ) { 90 ln := utf8.RuneLen(currentRune) 91 if currentRune == utf8.RuneError { 92 // Errors are due to invalid unicode points, so escape the bytes. 93 // Make sure this is run at least once in case ln == -1. 94 buf.Write(HexMap[entireString[currentIdx]]) 95 for ri := 1; ri < ln; ri++ { 96 if currentIdx+ri < len(entireString) { 97 buf.Write(HexMap[entireString[currentIdx+ri]]) 98 } 99 } 100 } else if ln == 1 { 101 // For single-byte runes, do the same as encodeSQLBytes. 102 if encodedChar := EncodeMap[currentByte]; encodedChar != DontEscape { 103 buf.WriteByte('\\') 104 buf.WriteByte(encodedChar) 105 } else if currentByte == quoteChar { 106 buf.WriteByte('\\') 107 buf.WriteByte(quoteChar) 108 } else { 109 // Escape non-printable characters. 110 buf.Write(HexMap[currentByte]) 111 } 112 } else if ln == 2 { 113 // For multi-byte runes, print them based on their width. 114 fmt.Fprintf(buf, `\u%04X`, currentRune) 115 } else { 116 fmt.Fprintf(buf, `\U%08X`, currentRune) 117 } 118 } 119 120 func writeHexDigit(buf *bytes.Buffer, v int) { 121 if v < 10 { 122 buf.WriteByte('0' + byte(v)) 123 } else { 124 buf.WriteByte('a' + byte(v-10)) 125 } 126 } 127 128 // NeedEscape returns whether the given byte needs to be escaped. 129 func NeedEscape(ch byte) bool { 130 return EncodeMap[ch] != DontEscape 131 }