github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/sql/lexbase/encode.go (about)

     1  // Copyright 2012, Google Inc. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in licenses/BSD-vitess.txt.
     4  
     5  // Portions of this file are additionally subject to the following
     6  // license and copyright.
     7  //
     8  // Copyright 2020 The Cockroach Authors.
     9  //
    10  // Use of this software is governed by the Business Source License
    11  // included in the file licenses/BSL.txt.
    12  //
    13  // As of the Change Date specified in that file, in accordance with
    14  // the Business Source License, use of this software will be governed
    15  // by the Apache License, Version 2.0, included in the file
    16  // licenses/APL.txt.
    17  
    18  // This code was derived from https://github.com/youtube/vitess.
    19  
    20  // Package lexbase contains utilities for lexing sql.
    21  package lexbase
    22  
    23  import (
    24  	"bytes"
    25  	"unicode/utf8"
    26  
    27  	"github.com/cockroachdb/cockroachdb-parser/pkg/util/stringencoding"
    28  )
    29  
    30  // EncodeFlags influence the formatting of strings and identifiers.
    31  type EncodeFlags int
    32  
    33  // HasFlags tests whether the given flags are set.
    34  func (f EncodeFlags) HasFlags(subset EncodeFlags) bool {
    35  	return f&subset == subset
    36  }
    37  
    38  const (
    39  	// EncNoFlags indicates nothing special should happen while encoding.
    40  	EncNoFlags EncodeFlags = 0
    41  
    42  	// EncBareStrings indicates that strings will be rendered without
    43  	// wrapping quotes if they contain no special characters.
    44  	EncBareStrings EncodeFlags = 1 << iota
    45  
    46  	// EncBareIdentifiers indicates that identifiers will be rendered
    47  	// without wrapping quotes.
    48  	EncBareIdentifiers
    49  
    50  	// EncFirstFreeFlagBit needs to remain unused; it is used as base
    51  	// bit offset for tree.FmtFlags.
    52  	EncFirstFreeFlagBit
    53  )
    54  
    55  // EncodeRestrictedSQLIdent writes the identifier in s to buf. The
    56  // identifier is quoted if either the flags ask for it, the identifier
    57  // contains special characters, or the identifier is a reserved SQL
    58  // keyword.
    59  func EncodeRestrictedSQLIdent(buf *bytes.Buffer, s string, flags EncodeFlags) {
    60  	if flags.HasFlags(EncBareIdentifiers) || (!isReservedKeyword(s) && IsBareIdentifier(s)) {
    61  		buf.WriteString(s)
    62  		return
    63  	}
    64  	EncodeEscapedSQLIdent(buf, s)
    65  }
    66  
    67  // EncodeUnrestrictedSQLIdent writes the identifier in s to buf.
    68  // The identifier is only quoted if the flags don't tell otherwise and
    69  // the identifier contains special characters.
    70  func EncodeUnrestrictedSQLIdent(buf *bytes.Buffer, s string, flags EncodeFlags) {
    71  	if flags.HasFlags(EncBareIdentifiers) || IsBareIdentifier(s) {
    72  		buf.WriteString(s)
    73  		return
    74  	}
    75  	EncodeEscapedSQLIdent(buf, s)
    76  }
    77  
    78  // EscapeSQLIdent ensures that the potential identifier in s is fully
    79  // quoted, so that any special character it contains is not at risk
    80  // of "spilling" in the surrounding syntax.
    81  func EscapeSQLIdent(s string) string {
    82  	var buf bytes.Buffer
    83  	EncodeEscapedSQLIdent(&buf, s)
    84  	return buf.String()
    85  }
    86  
    87  // EncodeEscapedSQLIdent writes the identifier in s to buf. The
    88  // identifier is always quoted. Double quotes inside the identifier
    89  // are escaped.
    90  func EncodeEscapedSQLIdent(buf *bytes.Buffer, s string) {
    91  	buf.WriteByte('"')
    92  	start := 0
    93  	for i, n := 0, len(s); i < n; i++ {
    94  		ch := s[i]
    95  		// The only character that requires escaping is a double quote.
    96  		if ch == '"' {
    97  			if start != i {
    98  				buf.WriteString(s[start:i])
    99  			}
   100  			start = i + 1
   101  			buf.WriteByte(ch)
   102  			buf.WriteByte(ch) // add extra copy of ch
   103  		}
   104  	}
   105  	if start < len(s) {
   106  		buf.WriteString(s[start:])
   107  	}
   108  	buf.WriteByte('"')
   109  }
   110  
   111  var mustQuoteMap = map[byte]bool{
   112  	' ': true,
   113  	',': true,
   114  	'{': true,
   115  	'}': true,
   116  }
   117  
   118  // EncodeSQLString writes a string literal to buf. All unicode and
   119  // non-printable characters are escaped.
   120  func EncodeSQLString(buf *bytes.Buffer, in string) {
   121  	EncodeSQLStringWithFlags(buf, in, EncNoFlags)
   122  }
   123  
   124  // EscapeSQLString returns an escaped SQL representation of the given
   125  // string. This is suitable for safely producing a SQL string valid
   126  // for input to the parser.
   127  func EscapeSQLString(in string) string {
   128  	var buf bytes.Buffer
   129  	EncodeSQLString(&buf, in)
   130  	return buf.String()
   131  }
   132  
   133  // EncodeSQLStringWithFlags writes a string literal to buf. All
   134  // unicode and non-printable characters are escaped. flags controls
   135  // the output format: if encodeBareString is set, the output string
   136  // will not be wrapped in quotes if the strings contains no special
   137  // characters.
   138  func EncodeSQLStringWithFlags(buf *bytes.Buffer, in string, flags EncodeFlags) {
   139  	// See http://www.postgresql.org/docs/9.4/static/sql-syntax-lexical.html
   140  	start := 0
   141  	escapedString := false
   142  	bareStrings := flags.HasFlags(EncBareStrings)
   143  	// Loop through each unicode code point.
   144  	for i, r := range in {
   145  		if i < start {
   146  			continue
   147  		}
   148  		ch := byte(r)
   149  		if r >= 0x20 && r < 0x7F {
   150  			if mustQuoteMap[ch] {
   151  				// We have to quote this string - ignore bareStrings setting
   152  				bareStrings = false
   153  			}
   154  			if !stringencoding.NeedEscape(ch) && ch != '\'' {
   155  				continue
   156  			}
   157  		}
   158  
   159  		if !escapedString {
   160  			buf.WriteString("e'") // begin e'xxx' string
   161  			escapedString = true
   162  		}
   163  		buf.WriteString(in[start:i])
   164  		ln := utf8.RuneLen(r)
   165  		if ln < 0 {
   166  			start = i + 1
   167  		} else {
   168  			start = i + ln
   169  		}
   170  		stringencoding.EncodeEscapedChar(buf, in, r, ch, i, '\'')
   171  	}
   172  
   173  	quote := !escapedString && !bareStrings
   174  	if quote {
   175  		buf.WriteByte('\'') // begin 'xxx' string if nothing was escaped
   176  	}
   177  	if start < len(in) {
   178  		buf.WriteString(in[start:])
   179  	}
   180  	if escapedString || quote {
   181  		buf.WriteByte('\'')
   182  	}
   183  }
   184  
   185  // EncodeSQLBytes encodes the SQL byte array in 'in' to buf, to a
   186  // format suitable for re-scanning. We don't use a straightforward hex
   187  // encoding here with x'...'  because the result would be less
   188  // compact. We are trading a little more time during the encoding to
   189  // have a little less bytes on the wire.
   190  func EncodeSQLBytes(buf *bytes.Buffer, in string) {
   191  	buf.WriteString("b'")
   192  	EncodeSQLBytesInner(buf, in)
   193  	buf.WriteByte('\'')
   194  }
   195  
   196  // EncodeSQLBytesInner is like EncodeSQLBytes but does not include the
   197  // outer quote delimiter and the 'b' prefix.
   198  func EncodeSQLBytesInner(buf *bytes.Buffer, in string) {
   199  	start := 0
   200  	// Loop over the bytes of the string (i.e., don't use range over unicode
   201  	// code points).
   202  	for i, n := 0, len(in); i < n; i++ {
   203  		ch := in[i]
   204  		if encodedChar := stringencoding.EncodeMap[ch]; encodedChar != stringencoding.DontEscape {
   205  			buf.WriteString(in[start:i])
   206  			buf.WriteByte('\\')
   207  			buf.WriteByte(encodedChar)
   208  			start = i + 1
   209  		} else if ch == '\'' {
   210  			// We can't just fold this into stringencoding.EncodeMap because
   211  			// stringencoding.EncodeMap is also used for strings which
   212  			// aren't quoted with single-quotes
   213  			buf.WriteString(in[start:i])
   214  			buf.WriteByte('\\')
   215  			buf.WriteByte(ch)
   216  			start = i + 1
   217  		} else if ch < 0x20 || ch >= 0x7F {
   218  			buf.WriteString(in[start:i])
   219  			// Escape non-printable characters.
   220  			buf.Write(stringencoding.HexMap[ch])
   221  			start = i + 1
   222  		}
   223  	}
   224  	buf.WriteString(in[start:])
   225  }