github.com/movsb/taorm@v0.0.0-20201209183410-91bafb0b22a6/filter/tokenizer.go (about) 1 package filter 2 3 import ( 4 "bytes" 5 "container/list" 6 "fmt" 7 "strings" 8 "unicode/utf8" 9 ) 10 11 // TokenType is the type of a token. 12 type TokenType uint 13 14 // TokenType list 15 const ( 16 _ TokenType = iota 17 18 TokenTypeInvalid 19 TokenTypeEOF 20 21 TokenTypeAnd 22 TokenTypeOr 23 24 TokenTypeIdent 25 26 TokenTypeEqual 27 TokenTypeNotEqual 28 TokenTypeInclude 29 TokenTypeNotInclude 30 TokenTypeStartsWith 31 TokenTypeEndsWith 32 TokenTypeMatch 33 TokenTypeNotMatch 34 35 TokenTypeGreaterThan 36 TokenTypeLessThan 37 TokenTypeGreaterThanOrEqual 38 TokenTypeLessThanOrEqual 39 ) 40 41 // Token is a token. 42 type Token struct { 43 TokenType TokenType 44 TokenValue string 45 } 46 47 // Tokenizer tokenize input into tokens. 48 type Tokenizer struct { 49 sr *strings.Reader 50 buf *list.List 51 } 52 53 // NewTokenizer creates a new tokenizer. 54 func NewTokenizer(input string) *Tokenizer { 55 return &Tokenizer{ 56 sr: strings.NewReader(input), 57 buf: list.New(), 58 } 59 } 60 61 // Next returns the next token tokenized. 62 func (t *Tokenizer) Next() (token Token, err error) { 63 if t.buf.Len() > 0 { 64 e := t.buf.Front() 65 t.buf.Remove(e) 66 return e.Value.(Token), nil 67 } 68 69 defer func() { 70 if er := recover(); er != nil { 71 err = er.(error) 72 } 73 }() 74 75 token = t.next() 76 err = nil 77 return 78 } 79 80 // Undo undo-es a token. 81 func (t *Tokenizer) Undo(token Token) { 82 t.buf.PushFront(token) 83 } 84 85 func (t *Tokenizer) read() rune { 86 ch, _, _ := t.sr.ReadRune() 87 return ch 88 } 89 90 func (t *Tokenizer) unread(r rune) { 91 if r != 0 { 92 t.sr.UnreadRune() 93 } 94 } 95 96 func (t *Tokenizer) next() (token Token) { 97 var ch rune 98 99 for { 100 ch = t.read() 101 if ch == '=' { 102 ch = t.read() 103 switch ch { 104 case '=': 105 token.TokenType = TokenTypeEqual 106 token.TokenValue = "==" 107 case '@': 108 token.TokenType = TokenTypeInclude 109 token.TokenValue = "=@" 110 case '~': 111 token.TokenType = TokenTypeMatch 112 token.TokenValue = "=~" 113 default: 114 t.unread(ch) 115 token.TokenType = TokenTypeInvalid 116 token.TokenValue = fmt.Sprintf("%c", ch) 117 } 118 return 119 } else if ch == '!' { 120 ch = t.read() 121 switch ch { 122 case '=': 123 token.TokenType = TokenTypeNotEqual 124 token.TokenValue = "!=" 125 case '@': 126 token.TokenType = TokenTypeNotInclude 127 token.TokenValue = "!@" 128 case '~': 129 token.TokenType = TokenTypeNotMatch 130 token.TokenValue = "!~" 131 default: 132 t.unread(ch) 133 } 134 return 135 } else if ch == '^' { 136 ch = t.read() 137 switch ch { 138 case '=': 139 token.TokenType = TokenTypeStartsWith 140 token.TokenValue = "^=" 141 default: 142 t.unread(ch) 143 } 144 return 145 } else if ch == '$' { 146 ch = t.read() 147 switch ch { 148 case '=': 149 token.TokenType = TokenTypeEndsWith 150 token.TokenValue = "$=" 151 default: 152 t.unread(ch) 153 } 154 return 155 } else if ch == '>' { 156 ch = t.read() 157 if ch == '=' { 158 token.TokenType = TokenTypeGreaterThanOrEqual 159 token.TokenValue = ">=" 160 } else { 161 t.unread(ch) 162 token.TokenType = TokenTypeGreaterThan 163 token.TokenValue = ">" 164 } 165 return 166 } else if ch == '<' { 167 ch = t.read() 168 if ch == '=' { 169 token.TokenType = TokenTypeLessThanOrEqual 170 token.TokenValue = "<=" 171 } else { 172 t.unread(ch) 173 token.TokenType = TokenTypeLessThan 174 token.TokenValue = "<" 175 } 176 return 177 } else if ch == ';' { 178 token.TokenType = TokenTypeAnd 179 token.TokenValue = " AND " 180 return 181 } else if ch == ',' { 182 token.TokenType = TokenTypeOr 183 token.TokenValue = " OR " 184 return 185 } else if ch == ' ' { 186 continue 187 } else if ch == 0 { 188 token.TokenType = TokenTypeEOF 189 token.TokenValue = "" 190 return 191 } else { 192 t.unread(ch) 193 sw := bytes.NewBuffer(nil) 194 for { 195 ch = t.read() 196 if ch >= '0' && ch <= '9' || 197 ch >= 'a' && ch <= 'z' || 198 ch >= 'A' && ch <= 'Z' || 199 ch == ':' || ch == '.' || 200 ch == ' ' || ch == '/' || 201 ch == '_' || ch == '-' || 202 ch == '*' || ch == '+' || 203 ch == '\t' || ch >= utf8.RuneSelf { 204 sw.WriteRune(ch) 205 } else if ch == '\\' { 206 ch = t.read() 207 if ch == ',' || ch == ';' { 208 sw.WriteRune(ch) 209 } else { 210 t.unread(ch) 211 sw.WriteRune('\\') 212 } 213 } else { 214 s := strings.Trim(sw.String(), "\t ") 215 if len(s) > 0 { 216 token.TokenType = TokenTypeIdent 217 token.TokenValue = s 218 t.unread(ch) 219 } else { 220 token.TokenType = TokenTypeInvalid 221 token.TokenValue = fmt.Sprintf("%c", ch) 222 } 223 return 224 } 225 } 226 } 227 } 228 }