github.com/arnodel/golua@v0.0.0-20230215163904-e0b5347eaaa1/scanner/scanner.go (about)

     1  // Package scanner implements a tokeniser for lua.
     2  // Inspired by https://talks.golang.org/2011/lex.slide#1
     3  package scanner
     4  
     5  import (
     6  	"fmt"
     7  	"strings"
     8  	"unicode/utf8"
     9  
    10  	"github.com/arnodel/golua/token"
    11  )
    12  
    13  // Scanner holds the state of the scanner.
    14  type Scanner struct {
    15  	name             string // used only for error reports.
    16  	input            []byte // the string being scanned.
    17  	start, last, pos token.Pos
    18  	items            chan *token.Token // channel of scanned items.
    19  	state            stateFn
    20  	errorMsg         string
    21  }
    22  
    23  type Option func(*Scanner)
    24  
    25  // Specializes in scanning a number, used in file:read("n")
    26  func ForNumber() Option {
    27  	return func(s *Scanner) {
    28  		s.state = scanNumberPrefix
    29  	}
    30  }
    31  
    32  func WithStartLine(l int) Option {
    33  	return func(s *Scanner) {
    34  		pos := token.Pos{Line: l, Column: 1}
    35  		s.start = pos
    36  		s.pos = pos
    37  	}
    38  }
    39  
    40  // New creates a new scanner for the input string.
    41  func New(name string, input []byte, opts ...Option) *Scanner {
    42  	l := &Scanner{
    43  		name:  name,
    44  		input: input,
    45  		state: scanToken,
    46  		items: make(chan *token.Token, 2), // Two items sufficient.
    47  		pos:   token.Pos{Line: 1, Column: 1},
    48  		start: token.Pos{Line: 1, Column: 1},
    49  	}
    50  	for _, opt := range opts {
    51  		opt(l)
    52  	}
    53  	return l
    54  }
    55  
    56  // stateFn represents the state of the scanner
    57  // as a function that returns the next state.
    58  type stateFn func(*Scanner) stateFn
    59  
    60  // emit passes an item back to the client.
    61  func (l *Scanner) emit(tp token.Type) {
    62  	lit := l.lit()
    63  	if tp == token.INVALID {
    64  		fmt.Println("Cannot emit", string(lit))
    65  		panic("emit bails out")
    66  	}
    67  	l.items <- &token.Token{
    68  		Type: tp,
    69  		Lit:  lit,
    70  		Pos:  l.start,
    71  	}
    72  	l.start = l.pos
    73  }
    74  
    75  func (l *Scanner) lit() []byte {
    76  	return l.input[l.start.Offset:l.pos.Offset]
    77  }
    78  
    79  // next returns the next rune in the input.
    80  func (l *Scanner) next() rune {
    81  	i := l.pos.Offset
    82  	if i >= len(l.input) {
    83  		l.last = l.pos
    84  		// fmt.Println("NEXT EOF")
    85  		return -1
    86  	}
    87  	c, width := utf8.DecodeRune(l.input[i:])
    88  	l.last = l.pos
    89  	l.pos.Offset += width
    90  	i += width
    91  	if c == '\n' {
    92  		if i < len(l.input) && l.input[i] == '\r' {
    93  			l.pos.Offset++
    94  		}
    95  		l.pos.Line++
    96  		l.pos.Column = 1
    97  	} else if c == '\r' {
    98  		if i < len(l.input) && l.input[i] == '\n' {
    99  			l.pos.Offset++
   100  		}
   101  		l.pos.Line++
   102  		l.pos.Column = 1
   103  		c = '\n'
   104  	} else {
   105  		l.pos.Column++
   106  	}
   107  	// fmt.Println("NEXT", strconv.QuoteRune(c))
   108  	return c
   109  }
   110  
   111  // ignore skips over the pending input before this point.
   112  func (l *Scanner) ignore() {
   113  	l.start = l.pos
   114  	l.last = token.Pos{}
   115  }
   116  
   117  // backup steps back one rune.
   118  // Can be called only once per call of next.
   119  func (l *Scanner) backup() {
   120  	l.pos = l.last
   121  }
   122  
   123  // peek returns but does not consume
   124  // the next rune in the input.
   125  func (l *Scanner) peek() rune {
   126  	next := l.next()
   127  	l.backup()
   128  	return next
   129  }
   130  
   131  // accept consumes the next rune
   132  // if it's from the valid set.
   133  func (l *Scanner) accept(valid string) bool {
   134  	if strings.ContainsRune(valid, l.next()) {
   135  		return true
   136  	}
   137  	l.backup()
   138  	return false
   139  }
   140  
   141  func (l *Scanner) acceptRune(r rune) bool {
   142  	if l.next() == r {
   143  		return true
   144  	}
   145  	l.backup()
   146  	return false
   147  }
   148  
   149  // errorf returns an error token and terminates the scan
   150  // by passing back a nil pointer that will be the next
   151  // state, terminating l.run.
   152  func (l *Scanner) errorf(tp token.Type, format string, args ...interface{}) stateFn {
   153  	l.errorMsg = fmt.Sprintf(format, args...)
   154  	l.items <- &token.Token{
   155  		Type: tp,
   156  		Lit:  l.lit(),
   157  		Pos:  l.start,
   158  	}
   159  	return nil
   160  }
   161  
   162  // Scan returns the next item from the input (or nil)
   163  func (l *Scanner) Scan() *token.Token {
   164  	for {
   165  		select {
   166  		case item := <-l.items:
   167  			return item
   168  		default:
   169  			if l.state == nil {
   170  				return nil
   171  			}
   172  			l.state = l.state(l)
   173  		}
   174  	}
   175  }
   176  
   177  // ErrorMsg returns the current error message or an empty string if there is none.
   178  func (l *Scanner) ErrorMsg() string {
   179  	return l.errorMsg
   180  }