github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/sql/sem/tree/parse_array.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package tree
    12  
    13  import (
    14  	"bytes"
    15  	"strings"
    16  	"unicode"
    17  	"unicode/utf8"
    18  
    19  	"github.com/cockroachdb/cockroachdb-parser/pkg/sql/pgwire/pgcode"
    20  	"github.com/cockroachdb/cockroachdb-parser/pkg/sql/pgwire/pgerror"
    21  	"github.com/cockroachdb/cockroachdb-parser/pkg/sql/types"
    22  	"github.com/cockroachdb/cockroachdb-parser/pkg/util/errorutil/unimplemented"
    23  )
    24  
    25  var enclosingError = pgerror.Newf(pgcode.InvalidTextRepresentation, "array must be enclosed in { and }")
    26  var extraTextError = pgerror.Newf(pgcode.InvalidTextRepresentation, "extra text after closing right brace")
    27  var nestedArraysNotSupportedError = unimplemented.NewWithIssueDetail(32552, "strcast", "nested arrays not supported")
    28  var malformedError = pgerror.Newf(pgcode.InvalidTextRepresentation, "malformed array")
    29  
    30  func isQuoteChar(ch byte) bool {
    31  	return ch == '"'
    32  }
    33  
    34  func isControlChar(ch byte) bool {
    35  	return ch == '{' || ch == '}' || ch == ',' || ch == '"'
    36  }
    37  
    38  func isElementChar(r rune) bool {
    39  	return r != '{' && r != '}' && r != ','
    40  }
    41  
    42  // isSpaceInParseArray returns true if the rune is a space. To match Postgres,
    43  // 0x85 and 0xA0 are not treated as whitespace.
    44  func isSpaceInParseArray(r rune) bool {
    45  	if r != 0x85 && r != 0xA0 && unicode.IsSpace(r) {
    46  		return true
    47  	}
    48  	return false
    49  }
    50  
    51  var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
    52  
    53  // trimSpaceInParseArray returns a slice of the string s, with all leading
    54  // and trailing white space removed, as defined by Postgres COPY. This is a
    55  // reimplementation of strings.TrimSpace from the standard library.
    56  func trimSpaceInParseArray(s string) string {
    57  	// Fast path for ASCII: look for the first ASCII non-space byte
    58  	start := 0
    59  	for ; start < len(s); start++ {
    60  		c := s[start]
    61  		if c >= utf8.RuneSelf {
    62  			// If we run into a non-ASCII byte, fall back to the
    63  			// slower unicode-aware method on the remaining bytes
    64  			return strings.TrimFunc(s[start:], isSpaceInParseArray)
    65  		}
    66  		if asciiSpace[c] == 0 {
    67  			break
    68  		}
    69  	}
    70  
    71  	// Now look for the first ASCII non-space byte from the end
    72  	stop := len(s)
    73  	for ; stop > start; stop-- {
    74  		c := s[stop-1]
    75  		if c >= utf8.RuneSelf {
    76  			return strings.TrimFunc(s[start:stop], isSpaceInParseArray)
    77  		}
    78  		if asciiSpace[c] == 0 {
    79  			break
    80  		}
    81  	}
    82  
    83  	// At this point s[start:stop] starts and ends with an ASCII
    84  	// non-space bytes, so we're done. Non-ASCII cases have already
    85  	// been handled above.
    86  	return s[start:stop]
    87  }
    88  
    89  // gobbleString advances the parser for the remainder of the current string
    90  // until it sees a non-escaped termination character, as specified by
    91  // isTerminatingChar, returning the resulting string, not including the
    92  // termination character.
    93  func (p *parseState) gobbleString(isTerminatingChar func(ch byte) bool) (out string, err error) {
    94  	var result bytes.Buffer
    95  	start := 0
    96  	i := 0
    97  	for i < len(p.s) && !isTerminatingChar(p.s[i]) {
    98  		// In these strings, we just encode directly the character following a
    99  		// '\', even if it would normally be an escape sequence.
   100  		if i < len(p.s) && p.s[i] == '\\' {
   101  			result.WriteString(p.s[start:i])
   102  			i++
   103  			if i < len(p.s) {
   104  				result.WriteByte(p.s[i])
   105  				i++
   106  			}
   107  			start = i
   108  		} else {
   109  			i++
   110  		}
   111  	}
   112  	if i >= len(p.s) {
   113  		return "", malformedError
   114  	}
   115  	result.WriteString(p.s[start:i])
   116  	p.s = p.s[i:]
   117  	return result.String(), nil
   118  }
   119  
   120  type parseState struct {
   121  	s                string
   122  	ctx              ParseContext
   123  	dependsOnContext bool
   124  	result           *DArray
   125  	t                *types.T
   126  }
   127  
   128  func (p *parseState) advance() {
   129  	_, l := utf8.DecodeRuneInString(p.s)
   130  	p.s = p.s[l:]
   131  }
   132  
   133  func (p *parseState) eatWhitespace() {
   134  	for isSpaceInParseArray(p.peek()) {
   135  		p.advance()
   136  	}
   137  }
   138  
   139  func (p *parseState) peek() rune {
   140  	r, _ := utf8.DecodeRuneInString(p.s)
   141  	return r
   142  }
   143  
   144  func (p *parseState) eof() bool {
   145  	return len(p.s) == 0
   146  }
   147  
   148  func (p *parseState) parseQuotedString() (string, error) {
   149  	return p.gobbleString(isQuoteChar)
   150  }
   151  
   152  func (p *parseState) parseUnquotedString() (string, error) {
   153  	out, err := p.gobbleString(isControlChar)
   154  	if err != nil {
   155  		return "", err
   156  	}
   157  	return trimSpaceInParseArray(out), nil
   158  }
   159  
   160  func (p *parseState) parseElement() error {
   161  	var next string
   162  	var err error
   163  	r := p.peek()
   164  	switch r {
   165  	case '{':
   166  		return nestedArraysNotSupportedError
   167  	case '"':
   168  		p.advance()
   169  		next, err = p.parseQuotedString()
   170  		if err != nil {
   171  			return err
   172  		}
   173  		p.advance()
   174  	default:
   175  		if !isElementChar(r) {
   176  			return malformedError
   177  		}
   178  		next, err = p.parseUnquotedString()
   179  		if err != nil {
   180  			return err
   181  		}
   182  		if strings.EqualFold(next, "null") {
   183  			return p.result.Append(DNull)
   184  		}
   185  	}
   186  
   187  	d, dependsOnContext, err := ParseAndRequireString(p.t, next, p.ctx)
   188  	if err != nil {
   189  		return err
   190  	}
   191  	if dependsOnContext {
   192  		p.dependsOnContext = true
   193  	}
   194  	return p.result.Append(d)
   195  }
   196  
   197  // ParseDArrayFromString parses the string-form of constructing arrays, handling
   198  // cases such as `'{1,2,3}'::INT[]`. The input type t is the type of the
   199  // parameter of the array to parse.
   200  //
   201  // The dependsOnContext return value indicates if we had to consult the
   202  // ParseContext (either for the time or the local timezone).
   203  func ParseDArrayFromString(
   204  	ctx ParseContext, s string, t *types.T,
   205  ) (_ *DArray, dependsOnContext bool, _ error) {
   206  	ret, dependsOnContext, err := doParseDArrayFromString(ctx, s, t)
   207  	if err != nil {
   208  		return ret, false, MakeParseError(s, types.MakeArray(t), err)
   209  	}
   210  	return ret, dependsOnContext, nil
   211  }
   212  
   213  // doParseDArrayFromString does most of the work of ParseDArrayFromString,
   214  // except the error it returns isn't prettified as a parsing error.
   215  //
   216  // The dependsOnContext return value indicates if we had to consult the
   217  // ParseContext (either for the time or the local timezone).
   218  func doParseDArrayFromString(
   219  	ctx ParseContext, s string, t *types.T,
   220  ) (_ *DArray, dependsOnContext bool, _ error) {
   221  	parser := parseState{
   222  		s:      s,
   223  		ctx:    ctx,
   224  		result: NewDArray(t),
   225  		t:      t,
   226  	}
   227  
   228  	parser.eatWhitespace()
   229  	if parser.peek() != '{' {
   230  		return nil, false, enclosingError
   231  	}
   232  	parser.advance()
   233  	parser.eatWhitespace()
   234  	if parser.peek() != '}' {
   235  		if err := parser.parseElement(); err != nil {
   236  			return nil, false, err
   237  		}
   238  		parser.eatWhitespace()
   239  		for string(parser.peek()) == t.Delimiter() {
   240  			parser.advance()
   241  			parser.eatWhitespace()
   242  			if err := parser.parseElement(); err != nil {
   243  				return nil, false, err
   244  			}
   245  		}
   246  	}
   247  	parser.eatWhitespace()
   248  	if parser.eof() {
   249  		return nil, false, enclosingError
   250  	}
   251  	if parser.peek() != '}' {
   252  		return nil, false, malformedError
   253  	}
   254  	parser.advance()
   255  	parser.eatWhitespace()
   256  	if !parser.eof() {
   257  		return nil, false, extraTextError
   258  	}
   259  
   260  	return parser.result, parser.dependsOnContext, nil
   261  }