github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/parser/lexer.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package parser 12 13 import ( 14 "bytes" 15 "fmt" 16 "strings" 17 18 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 19 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 20 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 21 "github.com/cockroachdb/cockroach/pkg/sql/types" 22 unimp "github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented" 23 "github.com/cockroachdb/errors" 24 ) 25 26 type lexer struct { 27 in string 28 // tokens contains tokens generated by the scanner. 29 tokens []sqlSymType 30 31 // The type that should be used when an INT or SERIAL is encountered. 32 nakedIntType *types.T 33 34 // lastPos is the position into the tokens slice of the last 35 // token returned by Lex(). 36 lastPos int 37 38 stmt tree.Statement 39 // numPlaceholders is 1 + the highest placeholder index encountered. 40 numPlaceholders int 41 numAnnotations tree.AnnotationIdx 42 43 lastError error 44 } 45 46 func (l *lexer) init(sql string, tokens []sqlSymType, nakedIntType *types.T) { 47 l.in = sql 48 l.tokens = tokens 49 l.lastPos = -1 50 l.stmt = nil 51 l.numPlaceholders = 0 52 l.numAnnotations = 0 53 l.lastError = nil 54 55 l.nakedIntType = nakedIntType 56 } 57 58 // cleanup is used to avoid holding on to memory unnecessarily (for the cases 59 // where we reuse a scanner). 60 func (l *lexer) cleanup() { 61 l.tokens = nil 62 l.stmt = nil 63 l.lastError = nil 64 } 65 66 // Lex lexes a token from input. 67 func (l *lexer) Lex(lval *sqlSymType) int { 68 l.lastPos++ 69 // The core lexing takes place in the scanner. Here we do a small bit of post 70 // processing of the lexical tokens so that the grammar only requires 71 // one-token lookahead despite SQL requiring multi-token lookahead in some 72 // cases. These special cases are handled below and the returned tokens are 73 // adjusted to reflect the lookahead (LA) that occurred. 74 if l.lastPos >= len(l.tokens) { 75 lval.id = 0 76 lval.pos = int32(len(l.in)) 77 lval.str = "EOF" 78 return 0 79 } 80 *lval = l.tokens[l.lastPos] 81 82 switch lval.id { 83 case NOT, WITH, AS, GENERATED: 84 nextID := int32(0) 85 if l.lastPos+1 < len(l.tokens) { 86 nextID = l.tokens[l.lastPos+1].id 87 } 88 89 // If you update these cases, update lex.lookaheadKeywords. 90 switch lval.id { 91 case AS: 92 switch nextID { 93 case OF: 94 lval.id = AS_LA 95 } 96 case NOT: 97 switch nextID { 98 case BETWEEN, IN, LIKE, ILIKE, SIMILAR: 99 lval.id = NOT_LA 100 } 101 case GENERATED: 102 switch nextID { 103 case ALWAYS: 104 lval.id = GENERATED_ALWAYS 105 } 106 107 case WITH: 108 switch nextID { 109 case TIME, ORDINALITY: 110 lval.id = WITH_LA 111 } 112 } 113 } 114 115 return int(lval.id) 116 } 117 118 func (l *lexer) lastToken() sqlSymType { 119 if l.lastPos < 0 { 120 return sqlSymType{} 121 } 122 123 if l.lastPos >= len(l.tokens) { 124 return sqlSymType{ 125 id: 0, 126 pos: int32(len(l.in)), 127 str: "EOF", 128 } 129 } 130 return l.tokens[l.lastPos] 131 } 132 133 // NewAnnotation returns a new annotation index. 134 func (l *lexer) NewAnnotation() tree.AnnotationIdx { 135 l.numAnnotations++ 136 return l.numAnnotations 137 } 138 139 // SetStmt is called from the parser when the statement is constructed. 140 func (l *lexer) SetStmt(stmt tree.Statement) { 141 l.stmt = stmt 142 } 143 144 // UpdateNumPlaceholders is called from the parser when a placeholder is constructed. 145 func (l *lexer) UpdateNumPlaceholders(p *tree.Placeholder) { 146 if n := int(p.Idx) + 1; l.numPlaceholders < n { 147 l.numPlaceholders = n 148 } 149 } 150 151 // Unimplemented wraps Error, setting lastUnimplementedError. 152 func (l *lexer) Unimplemented(feature string) { 153 l.lastError = unimp.New(feature, "this syntax") 154 l.populateErrorDetails() 155 } 156 157 // UnimplementedWithIssue wraps Error, setting lastUnimplementedError. 158 func (l *lexer) UnimplementedWithIssue(issue int) { 159 l.lastError = unimp.NewWithIssue(issue, "this syntax") 160 l.populateErrorDetails() 161 } 162 163 // UnimplementedWithIssueDetail wraps Error, setting lastUnimplementedError. 164 func (l *lexer) UnimplementedWithIssueDetail(issue int, detail string) { 165 l.lastError = unimp.NewWithIssueDetail(issue, detail, "this syntax") 166 l.populateErrorDetails() 167 } 168 169 // PurposelyUnimplemented wraps Error, setting lastUnimplementedError. 170 func (l *lexer) PurposelyUnimplemented(feature string, reason string) { 171 // We purposely do not use unimp here, as it appends hints to suggest that 172 // the error may be actively tracked as a bug. 173 l.lastError = errors.WithHint( 174 errors.WithTelemetry( 175 pgerror.Newf(pgcode.Syntax, "unimplemented: this syntax"), 176 fmt.Sprintf("sql.purposely_unimplemented.%s", feature), 177 ), 178 reason, 179 ) 180 l.populateErrorDetails() 181 } 182 183 // setErr is called from parsing action rules to register an error observed 184 // while running the action. That error becomes the actual "cause" of the 185 // syntax error. 186 func (l *lexer) setErr(err error) { 187 err = pgerror.WithCandidateCode(err, pgcode.Syntax) 188 l.lastError = err 189 l.populateErrorDetails() 190 } 191 192 func (l *lexer) Error(e string) { 193 e = strings.TrimPrefix(e, "syntax error: ") // we'll add it again below. 194 l.lastError = pgerror.WithCandidateCode(errors.Newf("%s", e), pgcode.Syntax) 195 l.populateErrorDetails() 196 } 197 198 func (l *lexer) populateErrorDetails() { 199 lastTok := l.lastToken() 200 201 if lastTok.id == ERROR { 202 // This is a tokenizer (lexical) error: the scanner 203 // will have stored the error message in the string field. 204 err := pgerror.WithCandidateCode(errors.Newf("lexical error: %s", lastTok.str), pgcode.Syntax) 205 l.lastError = errors.WithSecondaryError(err, l.lastError) 206 } else { 207 // This is a contextual error. Print the provided error message 208 // and the error context. 209 if !strings.Contains(l.lastError.Error(), "syntax error") { 210 // "syntax error" is already prepended when the yacc-generated 211 // parser encounters a parsing error. 212 l.lastError = errors.Wrap(l.lastError, "syntax error") 213 } 214 l.lastError = errors.Wrapf(l.lastError, "at or near \"%s\"", lastTok.str) 215 } 216 217 // Find the end of the line containing the last token. 218 i := strings.IndexByte(l.in[lastTok.pos:], '\n') 219 if i == -1 { 220 i = len(l.in) 221 } else { 222 i += int(lastTok.pos) 223 } 224 // Find the beginning of the line containing the last token. Note that 225 // LastIndexByte returns -1 if '\n' could not be found. 226 j := strings.LastIndexByte(l.in[:lastTok.pos], '\n') + 1 227 // Output everything up to and including the line containing the last token. 228 var buf bytes.Buffer 229 fmt.Fprintf(&buf, "source SQL:\n%s\n", l.in[:i]) 230 // Output a caret indicating where the last token starts. 231 fmt.Fprintf(&buf, "%s^", strings.Repeat(" ", int(lastTok.pos)-j)) 232 l.lastError = errors.WithDetail(l.lastError, buf.String()) 233 } 234 235 // SetHelp marks the "last error" field in the lexer to become a 236 // help text. This method is invoked in the error action of the 237 // parser, so the help text is only produced if the last token 238 // encountered was HELPTOKEN -- other cases are just syntax errors, 239 // and in that case we do not want the help text to overwrite the 240 // lastError field, which was set earlier to contain details about the 241 // syntax error. 242 func (l *lexer) SetHelp(msg HelpMessage) { 243 if l.lastError == nil { 244 l.lastError = pgerror.WithCandidateCode(errors.New("help request"), pgcode.Syntax) 245 } 246 247 if lastTok := l.lastToken(); lastTok.id == HELPTOKEN { 248 l.populateHelpMsg(msg.String()) 249 } else { 250 if msg.Command != "" { 251 l.lastError = errors.WithHintf(l.lastError, `try \h %s`, msg.Command) 252 } else { 253 l.lastError = errors.WithHintf(l.lastError, `try \hf %s`, msg.Function) 254 } 255 } 256 } 257 258 func (l *lexer) populateHelpMsg(msg string) { 259 l.lastError = errors.WithHint(errors.Wrap(l.lastError, "help token in input"), msg) 260 }