github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/sql/lexbase/encode.go (about) 1 // Copyright 2012, Google Inc. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in licenses/BSD-vitess.txt. 4 5 // Portions of this file are additionally subject to the following 6 // license and copyright. 7 // 8 // Copyright 2020 The Cockroach Authors. 9 // 10 // Use of this software is governed by the Business Source License 11 // included in the file licenses/BSL.txt. 12 // 13 // As of the Change Date specified in that file, in accordance with 14 // the Business Source License, use of this software will be governed 15 // by the Apache License, Version 2.0, included in the file 16 // licenses/APL.txt. 17 18 // This code was derived from https://github.com/youtube/vitess. 19 20 // Package lexbase contains utilities for lexing sql. 21 package lexbase 22 23 import ( 24 "bytes" 25 "unicode/utf8" 26 27 "github.com/cockroachdb/cockroachdb-parser/pkg/util/stringencoding" 28 ) 29 30 // EncodeFlags influence the formatting of strings and identifiers. 31 type EncodeFlags int 32 33 // HasFlags tests whether the given flags are set. 34 func (f EncodeFlags) HasFlags(subset EncodeFlags) bool { 35 return f&subset == subset 36 } 37 38 const ( 39 // EncNoFlags indicates nothing special should happen while encoding. 40 EncNoFlags EncodeFlags = 0 41 42 // EncBareStrings indicates that strings will be rendered without 43 // wrapping quotes if they contain no special characters. 44 EncBareStrings EncodeFlags = 1 << iota 45 46 // EncBareIdentifiers indicates that identifiers will be rendered 47 // without wrapping quotes. 48 EncBareIdentifiers 49 50 // EncFirstFreeFlagBit needs to remain unused; it is used as base 51 // bit offset for tree.FmtFlags. 52 EncFirstFreeFlagBit 53 ) 54 55 // EncodeRestrictedSQLIdent writes the identifier in s to buf. The 56 // identifier is quoted if either the flags ask for it, the identifier 57 // contains special characters, or the identifier is a reserved SQL 58 // keyword. 59 func EncodeRestrictedSQLIdent(buf *bytes.Buffer, s string, flags EncodeFlags) { 60 if flags.HasFlags(EncBareIdentifiers) || (!isReservedKeyword(s) && IsBareIdentifier(s)) { 61 buf.WriteString(s) 62 return 63 } 64 EncodeEscapedSQLIdent(buf, s) 65 } 66 67 // EncodeUnrestrictedSQLIdent writes the identifier in s to buf. 68 // The identifier is only quoted if the flags don't tell otherwise and 69 // the identifier contains special characters. 70 func EncodeUnrestrictedSQLIdent(buf *bytes.Buffer, s string, flags EncodeFlags) { 71 if flags.HasFlags(EncBareIdentifiers) || IsBareIdentifier(s) { 72 buf.WriteString(s) 73 return 74 } 75 EncodeEscapedSQLIdent(buf, s) 76 } 77 78 // EscapeSQLIdent ensures that the potential identifier in s is fully 79 // quoted, so that any special character it contains is not at risk 80 // of "spilling" in the surrounding syntax. 81 func EscapeSQLIdent(s string) string { 82 var buf bytes.Buffer 83 EncodeEscapedSQLIdent(&buf, s) 84 return buf.String() 85 } 86 87 // EncodeEscapedSQLIdent writes the identifier in s to buf. The 88 // identifier is always quoted. Double quotes inside the identifier 89 // are escaped. 90 func EncodeEscapedSQLIdent(buf *bytes.Buffer, s string) { 91 buf.WriteByte('"') 92 start := 0 93 for i, n := 0, len(s); i < n; i++ { 94 ch := s[i] 95 // The only character that requires escaping is a double quote. 96 if ch == '"' { 97 if start != i { 98 buf.WriteString(s[start:i]) 99 } 100 start = i + 1 101 buf.WriteByte(ch) 102 buf.WriteByte(ch) // add extra copy of ch 103 } 104 } 105 if start < len(s) { 106 buf.WriteString(s[start:]) 107 } 108 buf.WriteByte('"') 109 } 110 111 var mustQuoteMap = map[byte]bool{ 112 ' ': true, 113 ',': true, 114 '{': true, 115 '}': true, 116 } 117 118 // EncodeSQLString writes a string literal to buf. All unicode and 119 // non-printable characters are escaped. 120 func EncodeSQLString(buf *bytes.Buffer, in string) { 121 EncodeSQLStringWithFlags(buf, in, EncNoFlags) 122 } 123 124 // EscapeSQLString returns an escaped SQL representation of the given 125 // string. This is suitable for safely producing a SQL string valid 126 // for input to the parser. 127 func EscapeSQLString(in string) string { 128 var buf bytes.Buffer 129 EncodeSQLString(&buf, in) 130 return buf.String() 131 } 132 133 // EncodeSQLStringWithFlags writes a string literal to buf. All 134 // unicode and non-printable characters are escaped. flags controls 135 // the output format: if encodeBareString is set, the output string 136 // will not be wrapped in quotes if the strings contains no special 137 // characters. 138 func EncodeSQLStringWithFlags(buf *bytes.Buffer, in string, flags EncodeFlags) { 139 // See http://www.postgresql.org/docs/9.4/static/sql-syntax-lexical.html 140 start := 0 141 escapedString := false 142 bareStrings := flags.HasFlags(EncBareStrings) 143 // Loop through each unicode code point. 144 for i, r := range in { 145 if i < start { 146 continue 147 } 148 ch := byte(r) 149 if r >= 0x20 && r < 0x7F { 150 if mustQuoteMap[ch] { 151 // We have to quote this string - ignore bareStrings setting 152 bareStrings = false 153 } 154 if !stringencoding.NeedEscape(ch) && ch != '\'' { 155 continue 156 } 157 } 158 159 if !escapedString { 160 buf.WriteString("e'") // begin e'xxx' string 161 escapedString = true 162 } 163 buf.WriteString(in[start:i]) 164 ln := utf8.RuneLen(r) 165 if ln < 0 { 166 start = i + 1 167 } else { 168 start = i + ln 169 } 170 stringencoding.EncodeEscapedChar(buf, in, r, ch, i, '\'') 171 } 172 173 quote := !escapedString && !bareStrings 174 if quote { 175 buf.WriteByte('\'') // begin 'xxx' string if nothing was escaped 176 } 177 if start < len(in) { 178 buf.WriteString(in[start:]) 179 } 180 if escapedString || quote { 181 buf.WriteByte('\'') 182 } 183 } 184 185 // EncodeSQLBytes encodes the SQL byte array in 'in' to buf, to a 186 // format suitable for re-scanning. We don't use a straightforward hex 187 // encoding here with x'...' because the result would be less 188 // compact. We are trading a little more time during the encoding to 189 // have a little less bytes on the wire. 190 func EncodeSQLBytes(buf *bytes.Buffer, in string) { 191 buf.WriteString("b'") 192 EncodeSQLBytesInner(buf, in) 193 buf.WriteByte('\'') 194 } 195 196 // EncodeSQLBytesInner is like EncodeSQLBytes but does not include the 197 // outer quote delimiter and the 'b' prefix. 198 func EncodeSQLBytesInner(buf *bytes.Buffer, in string) { 199 start := 0 200 // Loop over the bytes of the string (i.e., don't use range over unicode 201 // code points). 202 for i, n := 0, len(in); i < n; i++ { 203 ch := in[i] 204 if encodedChar := stringencoding.EncodeMap[ch]; encodedChar != stringencoding.DontEscape { 205 buf.WriteString(in[start:i]) 206 buf.WriteByte('\\') 207 buf.WriteByte(encodedChar) 208 start = i + 1 209 } else if ch == '\'' { 210 // We can't just fold this into stringencoding.EncodeMap because 211 // stringencoding.EncodeMap is also used for strings which 212 // aren't quoted with single-quotes 213 buf.WriteString(in[start:i]) 214 buf.WriteByte('\\') 215 buf.WriteByte(ch) 216 start = i + 1 217 } else if ch < 0x20 || ch >= 0x7F { 218 buf.WriteString(in[start:i]) 219 // Escape non-printable characters. 220 buf.Write(stringencoding.HexMap[ch]) 221 start = i + 1 222 } 223 } 224 buf.WriteString(in[start:]) 225 }