github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/sql/sem/tree/parse_tuple.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package tree
    12  
    13  import (
    14  	"bytes"
    15  	"unicode"
    16  	"unicode/utf8"
    17  
    18  	"github.com/cockroachdb/cockroachdb-parser/pkg/sql/pgwire/pgcode"
    19  	"github.com/cockroachdb/cockroachdb-parser/pkg/sql/pgwire/pgerror"
    20  	"github.com/cockroachdb/cockroachdb-parser/pkg/sql/types"
    21  	"github.com/cockroachdb/errors"
    22  )
    23  
    24  var enclosingRecordError = pgerror.Newf(pgcode.InvalidTextRepresentation, "record must be enclosed in ( and )")
    25  var extraTextRecordError = pgerror.Newf(pgcode.InvalidTextRepresentation, "extra text after closing right paren")
    26  var malformedRecordError = pgerror.Newf(pgcode.InvalidTextRepresentation, "malformed record literal")
    27  var unsupportedRecordError = pgerror.Newf(pgcode.FeatureNotSupported, "cannot parse anonymous record type")
    28  
    29  var isTupleControlChar = func(ch byte) bool {
    30  	return ch == '(' || ch == ')' || ch == ','
    31  }
    32  
    33  var isTupleElementChar = func(r rune) bool {
    34  	return r != '(' && r != ')' && r != ','
    35  }
    36  
    37  // gobbleString advances the parser for the remainder of the current string
    38  // until it sees a non-escaped termination character, as specified by
    39  // isTerminatingChar, returning the resulting string, not including the
    40  // termination character.
    41  func (p *tupleParseState) gobbleString() (out string, err error) {
    42  	isTerminatingChar := func(inQuote bool, ch byte) bool {
    43  		if inQuote {
    44  			return isQuoteChar(ch)
    45  		}
    46  		return isTupleControlChar(ch)
    47  	}
    48  	var result bytes.Buffer
    49  	start := 0
    50  	i := 0
    51  	inQuote := false
    52  	for i < len(p.s) && (!isTerminatingChar(inQuote, p.s[i]) || inQuote) {
    53  		// In these strings, we just encode directly the character following a
    54  		// '\', even if it would normally be an escape sequence.
    55  		if i < len(p.s) && p.s[i] == '\\' {
    56  			result.WriteString(p.s[start:i])
    57  			i++
    58  			if i < len(p.s) {
    59  				result.WriteByte(p.s[i])
    60  				i++
    61  			}
    62  			start = i
    63  		} else if i < len(p.s) && p.s[i] == '"' {
    64  			result.WriteString(p.s[start:i])
    65  			i++
    66  			if inQuote && i < len(p.s) && p.s[i] == '"' {
    67  				// If we are inQuote and the following character is also a double quote,
    68  				// then the two characters are treated as an escape sequence for one
    69  				// double quote.
    70  				result.WriteByte(p.s[i])
    71  				i++
    72  			} else {
    73  				// Otherwise, to match Postgres, double quotes are allowed in the middle
    74  				// of an unquoted string, but they are just ignored, even though the
    75  				// quotes do need to be balanced!
    76  				inQuote = !inQuote
    77  			}
    78  
    79  			start = i
    80  		} else {
    81  			i++
    82  		}
    83  	}
    84  	if i >= len(p.s) {
    85  		return "", malformedRecordError
    86  	}
    87  	if inQuote {
    88  		return "", malformedRecordError
    89  	}
    90  	result.WriteString(p.s[start:i])
    91  	p.s = p.s[i:]
    92  	return result.String(), nil
    93  }
    94  
    95  type tupleParseState struct {
    96  	s                string
    97  	tupleIdx         int
    98  	ctx              ParseContext
    99  	dependsOnContext bool
   100  	result           *DTuple
   101  	t                *types.T
   102  }
   103  
   104  func (p *tupleParseState) advance() {
   105  	_, l := utf8.DecodeRuneInString(p.s)
   106  	p.s = p.s[l:]
   107  }
   108  
   109  func (p *tupleParseState) eatWhitespace() {
   110  	for unicode.IsSpace(p.peek()) {
   111  		p.advance()
   112  	}
   113  }
   114  
   115  func (p *tupleParseState) peek() rune {
   116  	r, _ := utf8.DecodeRuneInString(p.s)
   117  	return r
   118  }
   119  
   120  func (p *tupleParseState) eof() bool {
   121  	return len(p.s) == 0
   122  }
   123  
   124  func (p *tupleParseState) parseString() (string, error) {
   125  	out, err := p.gobbleString()
   126  	if err != nil {
   127  		return "", err
   128  	}
   129  	// Unlike arrays, we don't trim whitespace here.
   130  	return out, nil
   131  }
   132  
   133  func (p *tupleParseState) parseElement() error {
   134  	if p.tupleIdx >= len(p.t.TupleContents()) {
   135  		return errors.WithDetail(malformedRecordError, "Too many columns.")
   136  	}
   137  	var next string
   138  	var err error
   139  	r := p.peek()
   140  	switch r {
   141  	case ')', ',':
   142  		// NULLs are represented by an unquoted empty string.
   143  		p.result.D[p.tupleIdx] = DNull
   144  		p.tupleIdx++
   145  		return nil
   146  	default:
   147  		if !isTupleElementChar(r) {
   148  			return malformedRecordError
   149  		}
   150  		next, err = p.parseString()
   151  		if err != nil {
   152  			return err
   153  		}
   154  	}
   155  
   156  	d, dependsOnContext, err := ParseAndRequireString(
   157  		p.t.TupleContents()[p.tupleIdx],
   158  		next,
   159  		p.ctx,
   160  	)
   161  	if err != nil {
   162  		return err
   163  	}
   164  	if dependsOnContext {
   165  		p.dependsOnContext = true
   166  	}
   167  	p.result.D[p.tupleIdx] = d
   168  	p.tupleIdx++
   169  	return nil
   170  }
   171  
   172  // ParseDTupleFromString parses the string-form of constructing tuples, handling
   173  // cases such as `'(1,2,3)'::record`. The input type t is the type of the
   174  // tuple to parse.
   175  //
   176  // The dependsOnContext return value indicates if we had to consult the
   177  // ParseContext (either for the time or the local timezone).
   178  func ParseDTupleFromString(
   179  	ctx ParseContext, s string, t *types.T,
   180  ) (_ *DTuple, dependsOnContext bool, _ error) {
   181  	ret, dependsOnContext, err := doParseDTupleFromString(ctx, s, t)
   182  	if err != nil {
   183  		return ret, false, MakeParseError(s, t, err)
   184  	}
   185  	return ret, dependsOnContext, nil
   186  }
   187  
   188  // doParseDTupleFromString does most of the work of ParseDTupleFromString,
   189  // except the error it returns isn't prettified as a parsing error.
   190  //
   191  // The dependsOnContext return value indicates if we had to consult the
   192  // ParseContext (either for the time or the local timezone).
   193  func doParseDTupleFromString(
   194  	ctx ParseContext, s string, t *types.T,
   195  ) (_ *DTuple, dependsOnContext bool, _ error) {
   196  	if t.TupleContents() == nil {
   197  		return nil, false, errors.AssertionFailedf("not a tuple type %s", t.SQLStringForError())
   198  	}
   199  	if t == types.AnyTuple {
   200  		return nil, false, unsupportedRecordError
   201  	}
   202  	parser := tupleParseState{
   203  		s:      s,
   204  		ctx:    ctx,
   205  		result: NewDTupleWithLen(t, len(t.TupleContents())),
   206  		t:      t,
   207  	}
   208  
   209  	parser.eatWhitespace()
   210  	if parser.peek() != '(' {
   211  		return nil, false, enclosingRecordError
   212  	}
   213  	parser.advance()
   214  	if parser.peek() != ')' || len(t.TupleContents()) > 0 {
   215  		if err := parser.parseElement(); err != nil {
   216  			return nil, false, err
   217  		}
   218  		parser.eatWhitespace()
   219  		for parser.peek() == ',' {
   220  			parser.advance()
   221  			if err := parser.parseElement(); err != nil {
   222  				return nil, false, err
   223  			}
   224  		}
   225  	}
   226  	parser.eatWhitespace()
   227  	if parser.eof() {
   228  		return nil, false, enclosingRecordError
   229  	}
   230  	if parser.peek() != ')' {
   231  		return nil, false, malformedRecordError
   232  	}
   233  	if parser.tupleIdx < len(parser.t.TupleContents()) {
   234  		return nil, false, errors.WithDetail(malformedRecordError, "Too few columns.")
   235  	}
   236  	parser.advance()
   237  	parser.eatWhitespace()
   238  	if !parser.eof() {
   239  		return nil, false, extraTextRecordError
   240  	}
   241  
   242  	return parser.result, parser.dependsOnContext, nil
   243  }