github.com/authzed/spicedb@v1.32.1-0.20240520085336-ebda56537386/pkg/schemadsl/lexer/lex.go (about)

     1  // Based on design first introduced in: http://blog.golang.org/two-go-talks-lexical-scanning-in-go-and
     2  // Portions copied and modified from: https://github.com/golang/go/blob/master/src/text/template/parse/lex.go
     3  
     4  package lexer
     5  
     6  import (
     7  	"fmt"
     8  	"strings"
     9  	"sync"
    10  	"unicode/utf8"
    11  
    12  	"github.com/authzed/spicedb/pkg/schemadsl/input"
    13  )
    14  
    15  const EOFRUNE = -1
    16  
    17  // createLexer creates a new scanner for the input string.
    18  func createLexer(source input.Source, input string) *Lexer {
    19  	l := &Lexer{
    20  		source: source,
    21  		input:  input,
    22  		tokens: make(chan Lexeme),
    23  		closed: make(chan struct{}),
    24  	}
    25  	go l.run()
    26  	return l
    27  }
    28  
    29  // run runs the state machine for the lexer.
    30  func (l *Lexer) run() {
    31  	defer func() {
    32  		close(l.tokens)
    33  	}()
    34  	l.withLock(func() {
    35  		l.state = lexSource
    36  	})
    37  	var state stateFn
    38  	for {
    39  		l.withRLock(func() {
    40  			state = l.state
    41  		})
    42  		if state == nil {
    43  			break
    44  		}
    45  		next := state(l)
    46  		l.withLock(func() {
    47  			l.state = next
    48  		})
    49  	}
    50  }
    51  
    52  // Close stops the lexer from running.
    53  func (l *Lexer) Close() {
    54  	close(l.closed)
    55  	l.withLock(func() {
    56  		l.state = nil
    57  	})
    58  }
    59  
    60  // withLock runs f protected by l's lock
    61  func (l *Lexer) withLock(f func()) {
    62  	l.Lock()
    63  	defer l.Unlock()
    64  	f()
    65  }
    66  
    67  // withRLock runs f protected by l's read lock
    68  func (l *Lexer) withRLock(f func()) {
    69  	l.RLock()
    70  	defer l.RUnlock()
    71  	f()
    72  }
    73  
    74  // Lexeme represents a token returned from scanning the contents of a file.
    75  type Lexeme struct {
    76  	Kind     TokenType          // The type of this lexeme.
    77  	Position input.BytePosition // The starting position of this token in the input string.
    78  	Value    string             // The textual value of this token.
    79  	Error    string             // The error associated with the lexeme, if any.
    80  }
    81  
    82  // stateFn represents the state of the scanner as a function that returns the next state.
    83  type stateFn func(*Lexer) stateFn
    84  
    85  // Lexer holds the state of the scanner.
    86  type Lexer struct {
    87  	sync.RWMutex
    88  	source              input.Source       // the name of the input; used only for error reports
    89  	input               string             // the string being scanned
    90  	state               stateFn            // the next lexing function to enter
    91  	pos                 input.BytePosition // current position in the input
    92  	start               input.BytePosition // start position of this token
    93  	width               input.BytePosition // width of last rune read from input
    94  	lastPos             input.BytePosition // position of most recent token returned by nextToken
    95  	tokens              chan Lexeme        // channel of scanned lexemes
    96  	currentToken        Lexeme             // The current token if any
    97  	lastNonIgnoredToken Lexeme             // The last token returned that is non-whitespace and non-comment
    98  	closed              chan struct{}      // Holds the closed channel
    99  }
   100  
   101  // nextToken returns the next token from the input.
   102  func (l *Lexer) nextToken() Lexeme {
   103  	token := <-l.tokens
   104  	l.lastPos = token.Position
   105  	return token
   106  }
   107  
   108  // next returns the next rune in the input.
   109  func (l *Lexer) next() rune {
   110  	if int(l.pos) >= len(l.input) {
   111  		l.width = 0
   112  		return EOFRUNE
   113  	}
   114  	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
   115  	l.width = input.BytePosition(w)
   116  	l.pos += l.width
   117  	return r
   118  }
   119  
   120  // peek returns but does not consume the next rune in the input.
   121  func (l *Lexer) peek() rune {
   122  	r := l.next()
   123  	l.backup()
   124  	return r
   125  }
   126  
   127  // backup steps back one rune. Can only be called once per call of next.
   128  func (l *Lexer) backup() {
   129  	l.pos -= l.width
   130  }
   131  
   132  // value returns the current value of the token in the lexer.
   133  func (l *Lexer) value() string {
   134  	return l.input[l.start:l.pos]
   135  }
   136  
   137  // emit passes an token back to the client.
   138  func (l *Lexer) emit(t TokenType) {
   139  	currentToken := Lexeme{t, l.start, l.value(), ""}
   140  
   141  	if t != TokenTypeWhitespace && t != TokenTypeMultilineComment && t != TokenTypeSinglelineComment {
   142  		l.lastNonIgnoredToken = currentToken
   143  	}
   144  
   145  	select {
   146  	case l.tokens <- currentToken:
   147  		l.currentToken = currentToken
   148  		l.start = l.pos
   149  
   150  	case <-l.closed:
   151  		return
   152  	}
   153  }
   154  
   155  // errorf returns an error token and terminates the scan by passing
   156  // back a nil pointer that will be the next state, terminating l.nexttoken.
   157  func (l *Lexer) errorf(currentRune rune, format string, args ...interface{}) stateFn {
   158  	l.tokens <- Lexeme{TokenTypeError, l.start, string(currentRune), fmt.Sprintf(format, args...)}
   159  	return nil
   160  }
   161  
   162  // peekValue looks forward for the given value string. If found, returns true.
   163  func (l *Lexer) peekValue(value string) bool {
   164  	for index, runeValue := range value {
   165  		r := l.next()
   166  		if r != runeValue {
   167  			for j := 0; j <= index; j++ {
   168  				l.backup()
   169  			}
   170  			return false
   171  		}
   172  	}
   173  
   174  	for i := 0; i < len(value); i++ {
   175  		l.backup()
   176  	}
   177  
   178  	return true
   179  }
   180  
   181  // accept consumes the next rune if it's from the valid set.
   182  func (l *Lexer) accept(valid string) bool {
   183  	if nextRune := l.next(); strings.ContainsRune(valid, nextRune) {
   184  		return true
   185  	}
   186  	l.backup()
   187  	return false
   188  }
   189  
   190  // acceptString consumes the full given string, if the next tokens in the stream.
   191  func (l *Lexer) acceptString(value string) bool {
   192  	for index, runeValue := range value {
   193  		if l.next() != runeValue {
   194  			for i := 0; i <= index; i++ {
   195  				l.backup()
   196  			}
   197  
   198  			return false
   199  		}
   200  	}
   201  
   202  	return true
   203  }
   204  
   205  // lexSource scans until EOFRUNE
   206  func lexSource(l *Lexer) stateFn {
   207  	return lexerEntrypoint(l)
   208  }
   209  
   210  // checkFn returns whether a rune matches for continue looping.
   211  type checkFn func(r rune) (bool, error)
   212  
   213  func buildLexUntil(findType TokenType, checker checkFn) stateFn {
   214  	return func(l *Lexer) stateFn {
   215  		for {
   216  			r := l.next()
   217  			isValid, err := checker(r)
   218  			if err != nil {
   219  				return l.errorf(r, "%v", err)
   220  			}
   221  			if !isValid {
   222  				l.backup()
   223  				break
   224  			}
   225  		}
   226  
   227  		l.emit(findType)
   228  		return lexSource
   229  	}
   230  }