github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/internal/rsg/yacc/lex.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in licenses/BSD-golang.txt. 4 5 // Portions of this file are additionally subject to the following 6 // license and copyright. 7 // 8 // Copyright 2016 The Cockroach Authors. 9 // 10 // Use of this software is governed by the Business Source License 11 // included in the file licenses/BSL.txt. 12 // 13 // As of the Change Date specified in that file, in accordance with 14 // the Business Source License, use of this software will be governed 15 // by the Apache License, Version 2.0, included in the file 16 // licenses/APL.txt. 17 18 // Copied from Go's text/template/parse package and modified for yacc. 19 20 package yacc 21 22 import ( 23 "fmt" 24 "strings" 25 "unicode" 26 "unicode/utf8" 27 ) 28 29 // item represents a token or text string returned from the scanner. 30 type item struct { 31 typ itemType // The type of this item. 32 pos Pos // The starting position, in bytes, of this item in the input string. 33 val string // The value of this item. 34 } 35 36 func (i item) String() string { 37 switch { 38 case i.typ == itemEOF: 39 return "EOF" 40 case i.typ == itemError: 41 return i.val 42 case len(i.val) > 10: 43 return fmt.Sprintf("%.10q...", i.val) 44 } 45 return fmt.Sprintf("%q", i.val) 46 } 47 48 // itemType identifies the type of lex items. 49 type itemType int 50 51 const ( 52 itemError itemType = iota // error occurred; value is text of error 53 itemEOF 54 itemComment 55 itemPct 56 itemDoublePct 57 itemIdent 58 itemColon 59 itemLiteral 60 itemExpr 61 itemPipe 62 itemNL 63 ) 64 65 const eof = -1 66 67 // stateFn represents the state of the scanner as a function that returns the next state. 68 type stateFn func(*lexer) stateFn 69 70 // lexer holds the state of the scanner. 71 type lexer struct { 72 name string // the name of the input; used only for error reports 73 input string // the string being scanned 74 state stateFn // the next lexing function to enter 75 pos Pos // current position in the input 76 start Pos // start position of this item 77 width Pos // width of last rune read from input 78 lastPos Pos // position of most recent item returned by nextItem 79 items chan item // channel of scanned items 80 } 81 82 // next returns the next rune in the input. 83 func (l *lexer) next() rune { 84 if int(l.pos) >= len(l.input) { 85 l.width = 0 86 return eof 87 } 88 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 89 l.width = Pos(w) 90 l.pos += l.width 91 return r 92 } 93 94 // peek returns but does not consume the next rune in the input. 95 func (l *lexer) peek() rune { 96 r := l.next() 97 l.backup() 98 return r 99 } 100 101 // backup steps back one rune. Can only be called once per call of next. 102 func (l *lexer) backup() { 103 l.pos -= l.width 104 } 105 106 // emit passes an item back to the client. 107 func (l *lexer) emit(t itemType) { 108 l.items <- item{t, l.start, l.input[l.start:l.pos]} 109 l.start = l.pos 110 } 111 112 // ignore skips over the pending input before this point. 113 func (l *lexer) ignore() { 114 l.start = l.pos 115 } 116 117 // lineNumber reports which line we're on, based on the position of 118 // the previous item returned by nextItem. Doing it this way 119 // means we don't have to worry about peek double counting. 120 func (l *lexer) lineNumber() int { 121 return 1 + strings.Count(l.input[:l.lastPos], "\n") 122 } 123 124 // errorf returns an error token and terminates the scan by passing 125 // back a nil pointer that will be the next state, terminating l.nextItem. 126 func (l *lexer) errorf(format string, args ...interface{}) stateFn { 127 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)} 128 return nil 129 } 130 131 // nextItem returns the next item from the input. 132 func (l *lexer) nextItem() item { 133 i := <-l.items 134 l.lastPos = i.pos 135 return i 136 } 137 138 // lex creates a new scanner for the input string. 139 func lex(name, input string) *lexer { 140 l := &lexer{ 141 name: name, 142 input: input, 143 items: make(chan item), 144 } 145 go l.run() 146 return l 147 } 148 149 // run runs the state machine for the lexer. 150 func (l *lexer) run() { 151 for l.state = lexStart; l.state != nil; { 152 l.state = l.state(l) 153 } 154 } 155 156 // state functions 157 158 func lexStart(l *lexer) stateFn { 159 Loop: 160 for { 161 switch r := l.next(); { 162 case r == '/': 163 return lexComment 164 case r == '%': 165 return lexPct 166 case r == '\n': 167 l.emit(itemNL) 168 case r == ':': 169 l.emit(itemColon) 170 case r == '|': 171 l.emit(itemPipe) 172 case r == '{': 173 return lexExpr 174 case isSpace(r): 175 l.ignore() 176 case isIdent(r): 177 return lexIdent 178 case r == '\'': 179 return lexLiteral 180 case r == eof: 181 l.emit(itemEOF) 182 break Loop 183 default: 184 return l.errorf("invalid character: %v", string(r)) 185 } 186 } 187 return nil 188 } 189 190 func lexLiteral(l *lexer) stateFn { 191 for { 192 switch l.next() { 193 case '\'': 194 l.emit(itemLiteral) 195 return lexStart 196 } 197 } 198 } 199 200 func lexExpr(l *lexer) stateFn { 201 ct := 1 202 for { 203 switch l.next() { 204 case '{': 205 ct++ 206 case '}': 207 ct-- 208 if ct == 0 { 209 l.emit(itemExpr) 210 return lexStart 211 } 212 } 213 } 214 } 215 216 func lexComment(l *lexer) stateFn { 217 switch r := l.next(); r { 218 case '/': 219 for { 220 switch l.next() { 221 case '\n': 222 l.backup() 223 l.emit(itemComment) 224 return lexStart 225 } 226 } 227 case '*': 228 for { 229 switch l.next() { 230 case '*': 231 if l.peek() == '/' { 232 l.next() 233 l.emit(itemComment) 234 return lexStart 235 } 236 } 237 } 238 default: 239 return l.errorf("expected comment: %c", r) 240 } 241 } 242 243 func lexPct(l *lexer) stateFn { 244 switch l.next() { 245 case '%': 246 l.emit(itemDoublePct) 247 return lexStart 248 case '{': 249 for { 250 switch l.next() { 251 case '%': 252 if l.peek() == '}' { 253 l.next() 254 l.emit(itemPct) 255 return lexStart 256 } 257 } 258 } 259 case 'p': 260 if l.next() != 'r' || l.next() != 'e' || l.next() != 'c' || l.next() != ' ' { 261 l.errorf("expected %%prec") 262 } 263 for { 264 switch r := l.next(); { 265 case isIdent(r): 266 // absorb 267 default: 268 l.backup() 269 l.emit(itemPct) 270 return lexStart 271 } 272 } 273 default: 274 ct := 0 275 for { 276 switch l.next() { 277 case ' ': 278 case '{': 279 ct++ 280 case '}': 281 ct-- 282 if ct == 0 { 283 l.emit(itemPct) 284 return lexStart 285 } 286 case '\n': 287 if ct == 0 { 288 l.backup() 289 l.emit(itemPct) 290 return lexStart 291 } 292 } 293 } 294 } 295 } 296 297 func lexIdent(l *lexer) stateFn { 298 for { 299 switch r := l.next(); { 300 case isIdent(r): 301 // absorb 302 default: 303 l.backup() 304 l.emit(itemIdent) 305 return lexStart 306 } 307 } 308 } 309 310 func isSpace(r rune) bool { 311 return r == ' ' || r == '\t' 312 } 313 314 func isIdent(r rune) bool { 315 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 316 }