github.com/amarpal/go-tools@v0.0.0-20240422043104-40142f59f616/pattern/lexer.go (about)

     1  package pattern
     2  
     3  import (
     4  	"fmt"
     5  	"go/token"
     6  	"unicode"
     7  	"unicode/utf8"
     8  )
     9  
    10  type lexer struct {
    11  	f *token.File
    12  
    13  	input string
    14  	start int
    15  	pos   int
    16  	width int
    17  	items chan item
    18  }
    19  
    20  type itemType int
    21  
    22  const eof = -1
    23  
    24  const (
    25  	itemError itemType = iota
    26  	itemLeftParen
    27  	itemRightParen
    28  	itemLeftBracket
    29  	itemRightBracket
    30  	itemTypeName
    31  	itemVariable
    32  	itemAt
    33  	itemColon
    34  	itemBlank
    35  	itemString
    36  	itemEOF
    37  )
    38  
    39  func (typ itemType) String() string {
    40  	switch typ {
    41  	case itemError:
    42  		return "ERROR"
    43  	case itemLeftParen:
    44  		return "("
    45  	case itemRightParen:
    46  		return ")"
    47  	case itemLeftBracket:
    48  		return "["
    49  	case itemRightBracket:
    50  		return "]"
    51  	case itemTypeName:
    52  		return "TYPE"
    53  	case itemVariable:
    54  		return "VAR"
    55  	case itemAt:
    56  		return "@"
    57  	case itemColon:
    58  		return ":"
    59  	case itemBlank:
    60  		return "_"
    61  	case itemString:
    62  		return "STRING"
    63  	case itemEOF:
    64  		return "EOF"
    65  	default:
    66  		return fmt.Sprintf("itemType(%d)", typ)
    67  	}
    68  }
    69  
    70  type item struct {
    71  	typ itemType
    72  	val string
    73  	pos int
    74  }
    75  
    76  type stateFn func(*lexer) stateFn
    77  
    78  func (l *lexer) run() {
    79  	for state := lexStart; state != nil; {
    80  		state = state(l)
    81  	}
    82  	close(l.items)
    83  }
    84  
    85  func (l *lexer) emitValue(t itemType, value string) {
    86  	l.items <- item{t, value, l.start}
    87  	l.start = l.pos
    88  }
    89  
    90  func (l *lexer) emit(t itemType) {
    91  	l.items <- item{t, l.input[l.start:l.pos], l.start}
    92  	l.start = l.pos
    93  }
    94  
    95  func lexStart(l *lexer) stateFn {
    96  	switch r := l.next(); {
    97  	case r == eof:
    98  		l.emit(itemEOF)
    99  		return nil
   100  	case unicode.IsSpace(r):
   101  		l.ignore()
   102  	case r == '(':
   103  		l.emit(itemLeftParen)
   104  	case r == ')':
   105  		l.emit(itemRightParen)
   106  	case r == '[':
   107  		l.emit(itemLeftBracket)
   108  	case r == ']':
   109  		l.emit(itemRightBracket)
   110  	case r == '@':
   111  		l.emit(itemAt)
   112  	case r == ':':
   113  		l.emit(itemColon)
   114  	case r == '_':
   115  		l.emit(itemBlank)
   116  	case r == '"':
   117  		l.backup()
   118  		return lexString
   119  	case unicode.IsUpper(r):
   120  		l.backup()
   121  		return lexType
   122  	case unicode.IsLower(r):
   123  		l.backup()
   124  		return lexVariable
   125  	default:
   126  		return l.errorf("unexpected character %c", r)
   127  	}
   128  	return lexStart
   129  }
   130  
   131  func (l *lexer) next() (r rune) {
   132  	if l.pos >= len(l.input) {
   133  		l.width = 0
   134  		return eof
   135  	}
   136  	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
   137  
   138  	if r == '\n' {
   139  		l.f.AddLine(l.pos)
   140  	}
   141  
   142  	l.pos += l.width
   143  
   144  	return r
   145  }
   146  
   147  func (l *lexer) ignore() {
   148  	l.start = l.pos
   149  }
   150  
   151  func (l *lexer) backup() {
   152  	l.pos -= l.width
   153  }
   154  
   155  func (l *lexer) errorf(format string, args ...interface{}) stateFn {
   156  	// TODO(dh): emit position information in errors
   157  	l.items <- item{
   158  		itemError,
   159  		fmt.Sprintf(format, args...),
   160  		l.start,
   161  	}
   162  	return nil
   163  }
   164  
   165  func isAlphaNumeric(r rune) bool {
   166  	return r >= '0' && r <= '9' ||
   167  		r >= 'a' && r <= 'z' ||
   168  		r >= 'A' && r <= 'Z'
   169  }
   170  
   171  func lexString(l *lexer) stateFn {
   172  	l.next() // skip quote
   173  	escape := false
   174  
   175  	var runes []rune
   176  	for {
   177  		switch r := l.next(); r {
   178  		case eof:
   179  			return l.errorf("unterminated string")
   180  		case '"':
   181  			if !escape {
   182  				l.emitValue(itemString, string(runes))
   183  				return lexStart
   184  			} else {
   185  				runes = append(runes, '"')
   186  				escape = false
   187  			}
   188  		case '\\':
   189  			if escape {
   190  				runes = append(runes, '\\')
   191  				escape = false
   192  			} else {
   193  				escape = true
   194  			}
   195  		default:
   196  			runes = append(runes, r)
   197  		}
   198  	}
   199  }
   200  
   201  func lexType(l *lexer) stateFn {
   202  	l.next()
   203  	for {
   204  		if !isAlphaNumeric(l.next()) {
   205  			l.backup()
   206  			l.emit(itemTypeName)
   207  			return lexStart
   208  		}
   209  	}
   210  }
   211  
   212  func lexVariable(l *lexer) stateFn {
   213  	l.next()
   214  	for {
   215  		if !isAlphaNumeric(l.next()) {
   216  			l.backup()
   217  			l.emit(itemVariable)
   218  			return lexStart
   219  		}
   220  	}
   221  }