github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/sql/sem/tree/parse_tuple.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package tree 12 13 import ( 14 "bytes" 15 "unicode" 16 "unicode/utf8" 17 18 "github.com/cockroachdb/cockroachdb-parser/pkg/sql/pgwire/pgcode" 19 "github.com/cockroachdb/cockroachdb-parser/pkg/sql/pgwire/pgerror" 20 "github.com/cockroachdb/cockroachdb-parser/pkg/sql/types" 21 "github.com/cockroachdb/errors" 22 ) 23 24 var enclosingRecordError = pgerror.Newf(pgcode.InvalidTextRepresentation, "record must be enclosed in ( and )") 25 var extraTextRecordError = pgerror.Newf(pgcode.InvalidTextRepresentation, "extra text after closing right paren") 26 var malformedRecordError = pgerror.Newf(pgcode.InvalidTextRepresentation, "malformed record literal") 27 var unsupportedRecordError = pgerror.Newf(pgcode.FeatureNotSupported, "cannot parse anonymous record type") 28 29 var isTupleControlChar = func(ch byte) bool { 30 return ch == '(' || ch == ')' || ch == ',' 31 } 32 33 var isTupleElementChar = func(r rune) bool { 34 return r != '(' && r != ')' && r != ',' 35 } 36 37 // gobbleString advances the parser for the remainder of the current string 38 // until it sees a non-escaped termination character, as specified by 39 // isTerminatingChar, returning the resulting string, not including the 40 // termination character. 41 func (p *tupleParseState) gobbleString() (out string, err error) { 42 isTerminatingChar := func(inQuote bool, ch byte) bool { 43 if inQuote { 44 return isQuoteChar(ch) 45 } 46 return isTupleControlChar(ch) 47 } 48 var result bytes.Buffer 49 start := 0 50 i := 0 51 inQuote := false 52 for i < len(p.s) && (!isTerminatingChar(inQuote, p.s[i]) || inQuote) { 53 // In these strings, we just encode directly the character following a 54 // '\', even if it would normally be an escape sequence. 55 if i < len(p.s) && p.s[i] == '\\' { 56 result.WriteString(p.s[start:i]) 57 i++ 58 if i < len(p.s) { 59 result.WriteByte(p.s[i]) 60 i++ 61 } 62 start = i 63 } else if i < len(p.s) && p.s[i] == '"' { 64 result.WriteString(p.s[start:i]) 65 i++ 66 if inQuote && i < len(p.s) && p.s[i] == '"' { 67 // If we are inQuote and the following character is also a double quote, 68 // then the two characters are treated as an escape sequence for one 69 // double quote. 70 result.WriteByte(p.s[i]) 71 i++ 72 } else { 73 // Otherwise, to match Postgres, double quotes are allowed in the middle 74 // of an unquoted string, but they are just ignored, even though the 75 // quotes do need to be balanced! 76 inQuote = !inQuote 77 } 78 79 start = i 80 } else { 81 i++ 82 } 83 } 84 if i >= len(p.s) { 85 return "", malformedRecordError 86 } 87 if inQuote { 88 return "", malformedRecordError 89 } 90 result.WriteString(p.s[start:i]) 91 p.s = p.s[i:] 92 return result.String(), nil 93 } 94 95 type tupleParseState struct { 96 s string 97 tupleIdx int 98 ctx ParseContext 99 dependsOnContext bool 100 result *DTuple 101 t *types.T 102 } 103 104 func (p *tupleParseState) advance() { 105 _, l := utf8.DecodeRuneInString(p.s) 106 p.s = p.s[l:] 107 } 108 109 func (p *tupleParseState) eatWhitespace() { 110 for unicode.IsSpace(p.peek()) { 111 p.advance() 112 } 113 } 114 115 func (p *tupleParseState) peek() rune { 116 r, _ := utf8.DecodeRuneInString(p.s) 117 return r 118 } 119 120 func (p *tupleParseState) eof() bool { 121 return len(p.s) == 0 122 } 123 124 func (p *tupleParseState) parseString() (string, error) { 125 out, err := p.gobbleString() 126 if err != nil { 127 return "", err 128 } 129 // Unlike arrays, we don't trim whitespace here. 130 return out, nil 131 } 132 133 func (p *tupleParseState) parseElement() error { 134 if p.tupleIdx >= len(p.t.TupleContents()) { 135 return errors.WithDetail(malformedRecordError, "Too many columns.") 136 } 137 var next string 138 var err error 139 r := p.peek() 140 switch r { 141 case ')', ',': 142 // NULLs are represented by an unquoted empty string. 143 p.result.D[p.tupleIdx] = DNull 144 p.tupleIdx++ 145 return nil 146 default: 147 if !isTupleElementChar(r) { 148 return malformedRecordError 149 } 150 next, err = p.parseString() 151 if err != nil { 152 return err 153 } 154 } 155 156 d, dependsOnContext, err := ParseAndRequireString( 157 p.t.TupleContents()[p.tupleIdx], 158 next, 159 p.ctx, 160 ) 161 if err != nil { 162 return err 163 } 164 if dependsOnContext { 165 p.dependsOnContext = true 166 } 167 p.result.D[p.tupleIdx] = d 168 p.tupleIdx++ 169 return nil 170 } 171 172 // ParseDTupleFromString parses the string-form of constructing tuples, handling 173 // cases such as `'(1,2,3)'::record`. The input type t is the type of the 174 // tuple to parse. 175 // 176 // The dependsOnContext return value indicates if we had to consult the 177 // ParseContext (either for the time or the local timezone). 178 func ParseDTupleFromString( 179 ctx ParseContext, s string, t *types.T, 180 ) (_ *DTuple, dependsOnContext bool, _ error) { 181 ret, dependsOnContext, err := doParseDTupleFromString(ctx, s, t) 182 if err != nil { 183 return ret, false, MakeParseError(s, t, err) 184 } 185 return ret, dependsOnContext, nil 186 } 187 188 // doParseDTupleFromString does most of the work of ParseDTupleFromString, 189 // except the error it returns isn't prettified as a parsing error. 190 // 191 // The dependsOnContext return value indicates if we had to consult the 192 // ParseContext (either for the time or the local timezone). 193 func doParseDTupleFromString( 194 ctx ParseContext, s string, t *types.T, 195 ) (_ *DTuple, dependsOnContext bool, _ error) { 196 if t.TupleContents() == nil { 197 return nil, false, errors.AssertionFailedf("not a tuple type %s", t.SQLStringForError()) 198 } 199 if t == types.AnyTuple { 200 return nil, false, unsupportedRecordError 201 } 202 parser := tupleParseState{ 203 s: s, 204 ctx: ctx, 205 result: NewDTupleWithLen(t, len(t.TupleContents())), 206 t: t, 207 } 208 209 parser.eatWhitespace() 210 if parser.peek() != '(' { 211 return nil, false, enclosingRecordError 212 } 213 parser.advance() 214 if parser.peek() != ')' || len(t.TupleContents()) > 0 { 215 if err := parser.parseElement(); err != nil { 216 return nil, false, err 217 } 218 parser.eatWhitespace() 219 for parser.peek() == ',' { 220 parser.advance() 221 if err := parser.parseElement(); err != nil { 222 return nil, false, err 223 } 224 } 225 } 226 parser.eatWhitespace() 227 if parser.eof() { 228 return nil, false, enclosingRecordError 229 } 230 if parser.peek() != ')' { 231 return nil, false, malformedRecordError 232 } 233 if parser.tupleIdx < len(parser.t.TupleContents()) { 234 return nil, false, errors.WithDetail(malformedRecordError, "Too few columns.") 235 } 236 parser.advance() 237 parser.eatWhitespace() 238 if !parser.eof() { 239 return nil, false, extraTextRecordError 240 } 241 242 return parser.result, parser.dependsOnContext, nil 243 }