github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/parser/lexer.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package parser
    12  
    13  import (
    14  	"bytes"
    15  	"fmt"
    16  	"strings"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    22  	unimp "github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
    23  	"github.com/cockroachdb/errors"
    24  )
    25  
    26  type lexer struct {
    27  	in string
    28  	// tokens contains tokens generated by the scanner.
    29  	tokens []sqlSymType
    30  
    31  	// The type that should be used when an INT or SERIAL is encountered.
    32  	nakedIntType *types.T
    33  
    34  	// lastPos is the position into the tokens slice of the last
    35  	// token returned by Lex().
    36  	lastPos int
    37  
    38  	stmt tree.Statement
    39  	// numPlaceholders is 1 + the highest placeholder index encountered.
    40  	numPlaceholders int
    41  	numAnnotations  tree.AnnotationIdx
    42  
    43  	lastError error
    44  }
    45  
    46  func (l *lexer) init(sql string, tokens []sqlSymType, nakedIntType *types.T) {
    47  	l.in = sql
    48  	l.tokens = tokens
    49  	l.lastPos = -1
    50  	l.stmt = nil
    51  	l.numPlaceholders = 0
    52  	l.numAnnotations = 0
    53  	l.lastError = nil
    54  
    55  	l.nakedIntType = nakedIntType
    56  }
    57  
    58  // cleanup is used to avoid holding on to memory unnecessarily (for the cases
    59  // where we reuse a scanner).
    60  func (l *lexer) cleanup() {
    61  	l.tokens = nil
    62  	l.stmt = nil
    63  	l.lastError = nil
    64  }
    65  
    66  // Lex lexes a token from input.
    67  func (l *lexer) Lex(lval *sqlSymType) int {
    68  	l.lastPos++
    69  	// The core lexing takes place in the scanner. Here we do a small bit of post
    70  	// processing of the lexical tokens so that the grammar only requires
    71  	// one-token lookahead despite SQL requiring multi-token lookahead in some
    72  	// cases. These special cases are handled below and the returned tokens are
    73  	// adjusted to reflect the lookahead (LA) that occurred.
    74  	if l.lastPos >= len(l.tokens) {
    75  		lval.id = 0
    76  		lval.pos = int32(len(l.in))
    77  		lval.str = "EOF"
    78  		return 0
    79  	}
    80  	*lval = l.tokens[l.lastPos]
    81  
    82  	switch lval.id {
    83  	case NOT, WITH, AS, GENERATED:
    84  		nextID := int32(0)
    85  		if l.lastPos+1 < len(l.tokens) {
    86  			nextID = l.tokens[l.lastPos+1].id
    87  		}
    88  
    89  		// If you update these cases, update lex.lookaheadKeywords.
    90  		switch lval.id {
    91  		case AS:
    92  			switch nextID {
    93  			case OF:
    94  				lval.id = AS_LA
    95  			}
    96  		case NOT:
    97  			switch nextID {
    98  			case BETWEEN, IN, LIKE, ILIKE, SIMILAR:
    99  				lval.id = NOT_LA
   100  			}
   101  		case GENERATED:
   102  			switch nextID {
   103  			case ALWAYS:
   104  				lval.id = GENERATED_ALWAYS
   105  			}
   106  
   107  		case WITH:
   108  			switch nextID {
   109  			case TIME, ORDINALITY:
   110  				lval.id = WITH_LA
   111  			}
   112  		}
   113  	}
   114  
   115  	return int(lval.id)
   116  }
   117  
   118  func (l *lexer) lastToken() sqlSymType {
   119  	if l.lastPos < 0 {
   120  		return sqlSymType{}
   121  	}
   122  
   123  	if l.lastPos >= len(l.tokens) {
   124  		return sqlSymType{
   125  			id:  0,
   126  			pos: int32(len(l.in)),
   127  			str: "EOF",
   128  		}
   129  	}
   130  	return l.tokens[l.lastPos]
   131  }
   132  
   133  // NewAnnotation returns a new annotation index.
   134  func (l *lexer) NewAnnotation() tree.AnnotationIdx {
   135  	l.numAnnotations++
   136  	return l.numAnnotations
   137  }
   138  
   139  // SetStmt is called from the parser when the statement is constructed.
   140  func (l *lexer) SetStmt(stmt tree.Statement) {
   141  	l.stmt = stmt
   142  }
   143  
   144  // UpdateNumPlaceholders is called from the parser when a placeholder is constructed.
   145  func (l *lexer) UpdateNumPlaceholders(p *tree.Placeholder) {
   146  	if n := int(p.Idx) + 1; l.numPlaceholders < n {
   147  		l.numPlaceholders = n
   148  	}
   149  }
   150  
   151  // Unimplemented wraps Error, setting lastUnimplementedError.
   152  func (l *lexer) Unimplemented(feature string) {
   153  	l.lastError = unimp.New(feature, "this syntax")
   154  	l.populateErrorDetails()
   155  }
   156  
   157  // UnimplementedWithIssue wraps Error, setting lastUnimplementedError.
   158  func (l *lexer) UnimplementedWithIssue(issue int) {
   159  	l.lastError = unimp.NewWithIssue(issue, "this syntax")
   160  	l.populateErrorDetails()
   161  }
   162  
   163  // UnimplementedWithIssueDetail wraps Error, setting lastUnimplementedError.
   164  func (l *lexer) UnimplementedWithIssueDetail(issue int, detail string) {
   165  	l.lastError = unimp.NewWithIssueDetail(issue, detail, "this syntax")
   166  	l.populateErrorDetails()
   167  }
   168  
   169  // PurposelyUnimplemented wraps Error, setting lastUnimplementedError.
   170  func (l *lexer) PurposelyUnimplemented(feature string, reason string) {
   171  	// We purposely do not use unimp here, as it appends hints to suggest that
   172  	// the error may be actively tracked as a bug.
   173  	l.lastError = errors.WithHint(
   174  		errors.WithTelemetry(
   175  			pgerror.Newf(pgcode.Syntax, "unimplemented: this syntax"),
   176  			fmt.Sprintf("sql.purposely_unimplemented.%s", feature),
   177  		),
   178  		reason,
   179  	)
   180  	l.populateErrorDetails()
   181  }
   182  
   183  // setErr is called from parsing action rules to register an error observed
   184  // while running the action. That error becomes the actual "cause" of the
   185  // syntax error.
   186  func (l *lexer) setErr(err error) {
   187  	err = pgerror.WithCandidateCode(err, pgcode.Syntax)
   188  	l.lastError = err
   189  	l.populateErrorDetails()
   190  }
   191  
   192  func (l *lexer) Error(e string) {
   193  	e = strings.TrimPrefix(e, "syntax error: ") // we'll add it again below.
   194  	l.lastError = pgerror.WithCandidateCode(errors.Newf("%s", e), pgcode.Syntax)
   195  	l.populateErrorDetails()
   196  }
   197  
   198  func (l *lexer) populateErrorDetails() {
   199  	lastTok := l.lastToken()
   200  
   201  	if lastTok.id == ERROR {
   202  		// This is a tokenizer (lexical) error: the scanner
   203  		// will have stored the error message in the string field.
   204  		err := pgerror.WithCandidateCode(errors.Newf("lexical error: %s", lastTok.str), pgcode.Syntax)
   205  		l.lastError = errors.WithSecondaryError(err, l.lastError)
   206  	} else {
   207  		// This is a contextual error. Print the provided error message
   208  		// and the error context.
   209  		if !strings.Contains(l.lastError.Error(), "syntax error") {
   210  			// "syntax error" is already prepended when the yacc-generated
   211  			// parser encounters a parsing error.
   212  			l.lastError = errors.Wrap(l.lastError, "syntax error")
   213  		}
   214  		l.lastError = errors.Wrapf(l.lastError, "at or near \"%s\"", lastTok.str)
   215  	}
   216  
   217  	// Find the end of the line containing the last token.
   218  	i := strings.IndexByte(l.in[lastTok.pos:], '\n')
   219  	if i == -1 {
   220  		i = len(l.in)
   221  	} else {
   222  		i += int(lastTok.pos)
   223  	}
   224  	// Find the beginning of the line containing the last token. Note that
   225  	// LastIndexByte returns -1 if '\n' could not be found.
   226  	j := strings.LastIndexByte(l.in[:lastTok.pos], '\n') + 1
   227  	// Output everything up to and including the line containing the last token.
   228  	var buf bytes.Buffer
   229  	fmt.Fprintf(&buf, "source SQL:\n%s\n", l.in[:i])
   230  	// Output a caret indicating where the last token starts.
   231  	fmt.Fprintf(&buf, "%s^", strings.Repeat(" ", int(lastTok.pos)-j))
   232  	l.lastError = errors.WithDetail(l.lastError, buf.String())
   233  }
   234  
   235  // SetHelp marks the "last error" field in the lexer to become a
   236  // help text. This method is invoked in the error action of the
   237  // parser, so the help text is only produced if the last token
   238  // encountered was HELPTOKEN -- other cases are just syntax errors,
   239  // and in that case we do not want the help text to overwrite the
   240  // lastError field, which was set earlier to contain details about the
   241  // syntax error.
   242  func (l *lexer) SetHelp(msg HelpMessage) {
   243  	if l.lastError == nil {
   244  		l.lastError = pgerror.WithCandidateCode(errors.New("help request"), pgcode.Syntax)
   245  	}
   246  
   247  	if lastTok := l.lastToken(); lastTok.id == HELPTOKEN {
   248  		l.populateHelpMsg(msg.String())
   249  	} else {
   250  		if msg.Command != "" {
   251  			l.lastError = errors.WithHintf(l.lastError, `try \h %s`, msg.Command)
   252  		} else {
   253  			l.lastError = errors.WithHintf(l.lastError, `try \hf %s`, msg.Function)
   254  		}
   255  	}
   256  }
   257  
   258  func (l *lexer) populateHelpMsg(msg string) {
   259  	l.lastError = errors.WithHint(errors.Wrap(l.lastError, "help token in input"), msg)
   260  }