github.com/amarpal/go-tools@v0.0.0-20240422043104-40142f59f616/pattern/lexer.go (about) 1 package pattern 2 3 import ( 4 "fmt" 5 "go/token" 6 "unicode" 7 "unicode/utf8" 8 ) 9 10 type lexer struct { 11 f *token.File 12 13 input string 14 start int 15 pos int 16 width int 17 items chan item 18 } 19 20 type itemType int 21 22 const eof = -1 23 24 const ( 25 itemError itemType = iota 26 itemLeftParen 27 itemRightParen 28 itemLeftBracket 29 itemRightBracket 30 itemTypeName 31 itemVariable 32 itemAt 33 itemColon 34 itemBlank 35 itemString 36 itemEOF 37 ) 38 39 func (typ itemType) String() string { 40 switch typ { 41 case itemError: 42 return "ERROR" 43 case itemLeftParen: 44 return "(" 45 case itemRightParen: 46 return ")" 47 case itemLeftBracket: 48 return "[" 49 case itemRightBracket: 50 return "]" 51 case itemTypeName: 52 return "TYPE" 53 case itemVariable: 54 return "VAR" 55 case itemAt: 56 return "@" 57 case itemColon: 58 return ":" 59 case itemBlank: 60 return "_" 61 case itemString: 62 return "STRING" 63 case itemEOF: 64 return "EOF" 65 default: 66 return fmt.Sprintf("itemType(%d)", typ) 67 } 68 } 69 70 type item struct { 71 typ itemType 72 val string 73 pos int 74 } 75 76 type stateFn func(*lexer) stateFn 77 78 func (l *lexer) run() { 79 for state := lexStart; state != nil; { 80 state = state(l) 81 } 82 close(l.items) 83 } 84 85 func (l *lexer) emitValue(t itemType, value string) { 86 l.items <- item{t, value, l.start} 87 l.start = l.pos 88 } 89 90 func (l *lexer) emit(t itemType) { 91 l.items <- item{t, l.input[l.start:l.pos], l.start} 92 l.start = l.pos 93 } 94 95 func lexStart(l *lexer) stateFn { 96 switch r := l.next(); { 97 case r == eof: 98 l.emit(itemEOF) 99 return nil 100 case unicode.IsSpace(r): 101 l.ignore() 102 case r == '(': 103 l.emit(itemLeftParen) 104 case r == ')': 105 l.emit(itemRightParen) 106 case r == '[': 107 l.emit(itemLeftBracket) 108 case r == ']': 109 l.emit(itemRightBracket) 110 case r == '@': 111 l.emit(itemAt) 112 case r == ':': 113 l.emit(itemColon) 114 case r == '_': 115 l.emit(itemBlank) 116 case r == '"': 117 l.backup() 118 return lexString 119 case unicode.IsUpper(r): 120 l.backup() 121 return lexType 122 case unicode.IsLower(r): 123 l.backup() 124 return lexVariable 125 default: 126 return l.errorf("unexpected character %c", r) 127 } 128 return lexStart 129 } 130 131 func (l *lexer) next() (r rune) { 132 if l.pos >= len(l.input) { 133 l.width = 0 134 return eof 135 } 136 r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) 137 138 if r == '\n' { 139 l.f.AddLine(l.pos) 140 } 141 142 l.pos += l.width 143 144 return r 145 } 146 147 func (l *lexer) ignore() { 148 l.start = l.pos 149 } 150 151 func (l *lexer) backup() { 152 l.pos -= l.width 153 } 154 155 func (l *lexer) errorf(format string, args ...interface{}) stateFn { 156 // TODO(dh): emit position information in errors 157 l.items <- item{ 158 itemError, 159 fmt.Sprintf(format, args...), 160 l.start, 161 } 162 return nil 163 } 164 165 func isAlphaNumeric(r rune) bool { 166 return r >= '0' && r <= '9' || 167 r >= 'a' && r <= 'z' || 168 r >= 'A' && r <= 'Z' 169 } 170 171 func lexString(l *lexer) stateFn { 172 l.next() // skip quote 173 escape := false 174 175 var runes []rune 176 for { 177 switch r := l.next(); r { 178 case eof: 179 return l.errorf("unterminated string") 180 case '"': 181 if !escape { 182 l.emitValue(itemString, string(runes)) 183 return lexStart 184 } else { 185 runes = append(runes, '"') 186 escape = false 187 } 188 case '\\': 189 if escape { 190 runes = append(runes, '\\') 191 escape = false 192 } else { 193 escape = true 194 } 195 default: 196 runes = append(runes, r) 197 } 198 } 199 } 200 201 func lexType(l *lexer) stateFn { 202 l.next() 203 for { 204 if !isAlphaNumeric(l.next()) { 205 l.backup() 206 l.emit(itemTypeName) 207 return lexStart 208 } 209 } 210 } 211 212 func lexVariable(l *lexer) stateFn { 213 l.next() 214 for { 215 if !isAlphaNumeric(l.next()) { 216 l.backup() 217 l.emit(itemVariable) 218 return lexStart 219 } 220 } 221 }