github.com/arnodel/golua@v0.0.0-20230215163904-e0b5347eaaa1/scanner/scanner.go (about) 1 // Package scanner implements a tokeniser for lua. 2 // Inspired by https://talks.golang.org/2011/lex.slide#1 3 package scanner 4 5 import ( 6 "fmt" 7 "strings" 8 "unicode/utf8" 9 10 "github.com/arnodel/golua/token" 11 ) 12 13 // Scanner holds the state of the scanner. 14 type Scanner struct { 15 name string // used only for error reports. 16 input []byte // the string being scanned. 17 start, last, pos token.Pos 18 items chan *token.Token // channel of scanned items. 19 state stateFn 20 errorMsg string 21 } 22 23 type Option func(*Scanner) 24 25 // Specializes in scanning a number, used in file:read("n") 26 func ForNumber() Option { 27 return func(s *Scanner) { 28 s.state = scanNumberPrefix 29 } 30 } 31 32 func WithStartLine(l int) Option { 33 return func(s *Scanner) { 34 pos := token.Pos{Line: l, Column: 1} 35 s.start = pos 36 s.pos = pos 37 } 38 } 39 40 // New creates a new scanner for the input string. 41 func New(name string, input []byte, opts ...Option) *Scanner { 42 l := &Scanner{ 43 name: name, 44 input: input, 45 state: scanToken, 46 items: make(chan *token.Token, 2), // Two items sufficient. 47 pos: token.Pos{Line: 1, Column: 1}, 48 start: token.Pos{Line: 1, Column: 1}, 49 } 50 for _, opt := range opts { 51 opt(l) 52 } 53 return l 54 } 55 56 // stateFn represents the state of the scanner 57 // as a function that returns the next state. 58 type stateFn func(*Scanner) stateFn 59 60 // emit passes an item back to the client. 61 func (l *Scanner) emit(tp token.Type) { 62 lit := l.lit() 63 if tp == token.INVALID { 64 fmt.Println("Cannot emit", string(lit)) 65 panic("emit bails out") 66 } 67 l.items <- &token.Token{ 68 Type: tp, 69 Lit: lit, 70 Pos: l.start, 71 } 72 l.start = l.pos 73 } 74 75 func (l *Scanner) lit() []byte { 76 return l.input[l.start.Offset:l.pos.Offset] 77 } 78 79 // next returns the next rune in the input. 80 func (l *Scanner) next() rune { 81 i := l.pos.Offset 82 if i >= len(l.input) { 83 l.last = l.pos 84 // fmt.Println("NEXT EOF") 85 return -1 86 } 87 c, width := utf8.DecodeRune(l.input[i:]) 88 l.last = l.pos 89 l.pos.Offset += width 90 i += width 91 if c == '\n' { 92 if i < len(l.input) && l.input[i] == '\r' { 93 l.pos.Offset++ 94 } 95 l.pos.Line++ 96 l.pos.Column = 1 97 } else if c == '\r' { 98 if i < len(l.input) && l.input[i] == '\n' { 99 l.pos.Offset++ 100 } 101 l.pos.Line++ 102 l.pos.Column = 1 103 c = '\n' 104 } else { 105 l.pos.Column++ 106 } 107 // fmt.Println("NEXT", strconv.QuoteRune(c)) 108 return c 109 } 110 111 // ignore skips over the pending input before this point. 112 func (l *Scanner) ignore() { 113 l.start = l.pos 114 l.last = token.Pos{} 115 } 116 117 // backup steps back one rune. 118 // Can be called only once per call of next. 119 func (l *Scanner) backup() { 120 l.pos = l.last 121 } 122 123 // peek returns but does not consume 124 // the next rune in the input. 125 func (l *Scanner) peek() rune { 126 next := l.next() 127 l.backup() 128 return next 129 } 130 131 // accept consumes the next rune 132 // if it's from the valid set. 133 func (l *Scanner) accept(valid string) bool { 134 if strings.ContainsRune(valid, l.next()) { 135 return true 136 } 137 l.backup() 138 return false 139 } 140 141 func (l *Scanner) acceptRune(r rune) bool { 142 if l.next() == r { 143 return true 144 } 145 l.backup() 146 return false 147 } 148 149 // errorf returns an error token and terminates the scan 150 // by passing back a nil pointer that will be the next 151 // state, terminating l.run. 152 func (l *Scanner) errorf(tp token.Type, format string, args ...interface{}) stateFn { 153 l.errorMsg = fmt.Sprintf(format, args...) 154 l.items <- &token.Token{ 155 Type: tp, 156 Lit: l.lit(), 157 Pos: l.start, 158 } 159 return nil 160 } 161 162 // Scan returns the next item from the input (or nil) 163 func (l *Scanner) Scan() *token.Token { 164 for { 165 select { 166 case item := <-l.items: 167 return item 168 default: 169 if l.state == nil { 170 return nil 171 } 172 l.state = l.state(l) 173 } 174 } 175 } 176 177 // ErrorMsg returns the current error message or an empty string if there is none. 178 func (l *Scanner) ErrorMsg() string { 179 return l.errorMsg 180 }