github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/sql/sem/tree/parse_array.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package tree 12 13 import ( 14 "bytes" 15 "strings" 16 "unicode" 17 "unicode/utf8" 18 19 "github.com/cockroachdb/cockroachdb-parser/pkg/sql/pgwire/pgcode" 20 "github.com/cockroachdb/cockroachdb-parser/pkg/sql/pgwire/pgerror" 21 "github.com/cockroachdb/cockroachdb-parser/pkg/sql/types" 22 "github.com/cockroachdb/cockroachdb-parser/pkg/util/errorutil/unimplemented" 23 ) 24 25 var enclosingError = pgerror.Newf(pgcode.InvalidTextRepresentation, "array must be enclosed in { and }") 26 var extraTextError = pgerror.Newf(pgcode.InvalidTextRepresentation, "extra text after closing right brace") 27 var nestedArraysNotSupportedError = unimplemented.NewWithIssueDetail(32552, "strcast", "nested arrays not supported") 28 var malformedError = pgerror.Newf(pgcode.InvalidTextRepresentation, "malformed array") 29 30 func isQuoteChar(ch byte) bool { 31 return ch == '"' 32 } 33 34 func isControlChar(ch byte) bool { 35 return ch == '{' || ch == '}' || ch == ',' || ch == '"' 36 } 37 38 func isElementChar(r rune) bool { 39 return r != '{' && r != '}' && r != ',' 40 } 41 42 // isSpaceInParseArray returns true if the rune is a space. To match Postgres, 43 // 0x85 and 0xA0 are not treated as whitespace. 44 func isSpaceInParseArray(r rune) bool { 45 if r != 0x85 && r != 0xA0 && unicode.IsSpace(r) { 46 return true 47 } 48 return false 49 } 50 51 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} 52 53 // trimSpaceInParseArray returns a slice of the string s, with all leading 54 // and trailing white space removed, as defined by Postgres COPY. This is a 55 // reimplementation of strings.TrimSpace from the standard library. 56 func trimSpaceInParseArray(s string) string { 57 // Fast path for ASCII: look for the first ASCII non-space byte 58 start := 0 59 for ; start < len(s); start++ { 60 c := s[start] 61 if c >= utf8.RuneSelf { 62 // If we run into a non-ASCII byte, fall back to the 63 // slower unicode-aware method on the remaining bytes 64 return strings.TrimFunc(s[start:], isSpaceInParseArray) 65 } 66 if asciiSpace[c] == 0 { 67 break 68 } 69 } 70 71 // Now look for the first ASCII non-space byte from the end 72 stop := len(s) 73 for ; stop > start; stop-- { 74 c := s[stop-1] 75 if c >= utf8.RuneSelf { 76 return strings.TrimFunc(s[start:stop], isSpaceInParseArray) 77 } 78 if asciiSpace[c] == 0 { 79 break 80 } 81 } 82 83 // At this point s[start:stop] starts and ends with an ASCII 84 // non-space bytes, so we're done. Non-ASCII cases have already 85 // been handled above. 86 return s[start:stop] 87 } 88 89 // gobbleString advances the parser for the remainder of the current string 90 // until it sees a non-escaped termination character, as specified by 91 // isTerminatingChar, returning the resulting string, not including the 92 // termination character. 93 func (p *parseState) gobbleString(isTerminatingChar func(ch byte) bool) (out string, err error) { 94 var result bytes.Buffer 95 start := 0 96 i := 0 97 for i < len(p.s) && !isTerminatingChar(p.s[i]) { 98 // In these strings, we just encode directly the character following a 99 // '\', even if it would normally be an escape sequence. 100 if i < len(p.s) && p.s[i] == '\\' { 101 result.WriteString(p.s[start:i]) 102 i++ 103 if i < len(p.s) { 104 result.WriteByte(p.s[i]) 105 i++ 106 } 107 start = i 108 } else { 109 i++ 110 } 111 } 112 if i >= len(p.s) { 113 return "", malformedError 114 } 115 result.WriteString(p.s[start:i]) 116 p.s = p.s[i:] 117 return result.String(), nil 118 } 119 120 type parseState struct { 121 s string 122 ctx ParseContext 123 dependsOnContext bool 124 result *DArray 125 t *types.T 126 } 127 128 func (p *parseState) advance() { 129 _, l := utf8.DecodeRuneInString(p.s) 130 p.s = p.s[l:] 131 } 132 133 func (p *parseState) eatWhitespace() { 134 for isSpaceInParseArray(p.peek()) { 135 p.advance() 136 } 137 } 138 139 func (p *parseState) peek() rune { 140 r, _ := utf8.DecodeRuneInString(p.s) 141 return r 142 } 143 144 func (p *parseState) eof() bool { 145 return len(p.s) == 0 146 } 147 148 func (p *parseState) parseQuotedString() (string, error) { 149 return p.gobbleString(isQuoteChar) 150 } 151 152 func (p *parseState) parseUnquotedString() (string, error) { 153 out, err := p.gobbleString(isControlChar) 154 if err != nil { 155 return "", err 156 } 157 return trimSpaceInParseArray(out), nil 158 } 159 160 func (p *parseState) parseElement() error { 161 var next string 162 var err error 163 r := p.peek() 164 switch r { 165 case '{': 166 return nestedArraysNotSupportedError 167 case '"': 168 p.advance() 169 next, err = p.parseQuotedString() 170 if err != nil { 171 return err 172 } 173 p.advance() 174 default: 175 if !isElementChar(r) { 176 return malformedError 177 } 178 next, err = p.parseUnquotedString() 179 if err != nil { 180 return err 181 } 182 if strings.EqualFold(next, "null") { 183 return p.result.Append(DNull) 184 } 185 } 186 187 d, dependsOnContext, err := ParseAndRequireString(p.t, next, p.ctx) 188 if err != nil { 189 return err 190 } 191 if dependsOnContext { 192 p.dependsOnContext = true 193 } 194 return p.result.Append(d) 195 } 196 197 // ParseDArrayFromString parses the string-form of constructing arrays, handling 198 // cases such as `'{1,2,3}'::INT[]`. The input type t is the type of the 199 // parameter of the array to parse. 200 // 201 // The dependsOnContext return value indicates if we had to consult the 202 // ParseContext (either for the time or the local timezone). 203 func ParseDArrayFromString( 204 ctx ParseContext, s string, t *types.T, 205 ) (_ *DArray, dependsOnContext bool, _ error) { 206 ret, dependsOnContext, err := doParseDArrayFromString(ctx, s, t) 207 if err != nil { 208 return ret, false, MakeParseError(s, types.MakeArray(t), err) 209 } 210 return ret, dependsOnContext, nil 211 } 212 213 // doParseDArrayFromString does most of the work of ParseDArrayFromString, 214 // except the error it returns isn't prettified as a parsing error. 215 // 216 // The dependsOnContext return value indicates if we had to consult the 217 // ParseContext (either for the time or the local timezone). 218 func doParseDArrayFromString( 219 ctx ParseContext, s string, t *types.T, 220 ) (_ *DArray, dependsOnContext bool, _ error) { 221 parser := parseState{ 222 s: s, 223 ctx: ctx, 224 result: NewDArray(t), 225 t: t, 226 } 227 228 parser.eatWhitespace() 229 if parser.peek() != '{' { 230 return nil, false, enclosingError 231 } 232 parser.advance() 233 parser.eatWhitespace() 234 if parser.peek() != '}' { 235 if err := parser.parseElement(); err != nil { 236 return nil, false, err 237 } 238 parser.eatWhitespace() 239 for string(parser.peek()) == t.Delimiter() { 240 parser.advance() 241 parser.eatWhitespace() 242 if err := parser.parseElement(); err != nil { 243 return nil, false, err 244 } 245 } 246 } 247 parser.eatWhitespace() 248 if parser.eof() { 249 return nil, false, enclosingError 250 } 251 if parser.peek() != '}' { 252 return nil, false, malformedError 253 } 254 parser.advance() 255 parser.eatWhitespace() 256 if !parser.eof() { 257 return nil, false, extraTextError 258 } 259 260 return parser.result, parser.dependsOnContext, nil 261 }