github.com/movsb/taorm@v0.0.0-20201209183410-91bafb0b22a6/filter/tokenizer.go (about)

     1  package filter
     2  
     3  import (
     4  	"bytes"
     5  	"container/list"
     6  	"fmt"
     7  	"strings"
     8  	"unicode/utf8"
     9  )
    10  
    11  // TokenType is the type of a token.
    12  type TokenType uint
    13  
    14  // TokenType list
    15  const (
    16  	_ TokenType = iota
    17  
    18  	TokenTypeInvalid
    19  	TokenTypeEOF
    20  
    21  	TokenTypeAnd
    22  	TokenTypeOr
    23  
    24  	TokenTypeIdent
    25  
    26  	TokenTypeEqual
    27  	TokenTypeNotEqual
    28  	TokenTypeInclude
    29  	TokenTypeNotInclude
    30  	TokenTypeStartsWith
    31  	TokenTypeEndsWith
    32  	TokenTypeMatch
    33  	TokenTypeNotMatch
    34  
    35  	TokenTypeGreaterThan
    36  	TokenTypeLessThan
    37  	TokenTypeGreaterThanOrEqual
    38  	TokenTypeLessThanOrEqual
    39  )
    40  
    41  // Token is a token.
    42  type Token struct {
    43  	TokenType  TokenType
    44  	TokenValue string
    45  }
    46  
    47  // Tokenizer tokenize input into tokens.
    48  type Tokenizer struct {
    49  	sr  *strings.Reader
    50  	buf *list.List
    51  }
    52  
    53  // NewTokenizer creates a new tokenizer.
    54  func NewTokenizer(input string) *Tokenizer {
    55  	return &Tokenizer{
    56  		sr:  strings.NewReader(input),
    57  		buf: list.New(),
    58  	}
    59  }
    60  
    61  // Next returns the next token tokenized.
    62  func (t *Tokenizer) Next() (token Token, err error) {
    63  	if t.buf.Len() > 0 {
    64  		e := t.buf.Front()
    65  		t.buf.Remove(e)
    66  		return e.Value.(Token), nil
    67  	}
    68  
    69  	defer func() {
    70  		if er := recover(); er != nil {
    71  			err = er.(error)
    72  		}
    73  	}()
    74  
    75  	token = t.next()
    76  	err = nil
    77  	return
    78  }
    79  
    80  // Undo undo-es a token.
    81  func (t *Tokenizer) Undo(token Token) {
    82  	t.buf.PushFront(token)
    83  }
    84  
    85  func (t *Tokenizer) read() rune {
    86  	ch, _, _ := t.sr.ReadRune()
    87  	return ch
    88  }
    89  
    90  func (t *Tokenizer) unread(r rune) {
    91  	if r != 0 {
    92  		t.sr.UnreadRune()
    93  	}
    94  }
    95  
    96  func (t *Tokenizer) next() (token Token) {
    97  	var ch rune
    98  
    99  	for {
   100  		ch = t.read()
   101  		if ch == '=' {
   102  			ch = t.read()
   103  			switch ch {
   104  			case '=':
   105  				token.TokenType = TokenTypeEqual
   106  				token.TokenValue = "=="
   107  			case '@':
   108  				token.TokenType = TokenTypeInclude
   109  				token.TokenValue = "=@"
   110  			case '~':
   111  				token.TokenType = TokenTypeMatch
   112  				token.TokenValue = "=~"
   113  			default:
   114  				t.unread(ch)
   115  				token.TokenType = TokenTypeInvalid
   116  				token.TokenValue = fmt.Sprintf("%c", ch)
   117  			}
   118  			return
   119  		} else if ch == '!' {
   120  			ch = t.read()
   121  			switch ch {
   122  			case '=':
   123  				token.TokenType = TokenTypeNotEqual
   124  				token.TokenValue = "!="
   125  			case '@':
   126  				token.TokenType = TokenTypeNotInclude
   127  				token.TokenValue = "!@"
   128  			case '~':
   129  				token.TokenType = TokenTypeNotMatch
   130  				token.TokenValue = "!~"
   131  			default:
   132  				t.unread(ch)
   133  			}
   134  			return
   135  		} else if ch == '^' {
   136  			ch = t.read()
   137  			switch ch {
   138  			case '=':
   139  				token.TokenType = TokenTypeStartsWith
   140  				token.TokenValue = "^="
   141  			default:
   142  				t.unread(ch)
   143  			}
   144  			return
   145  		} else if ch == '$' {
   146  			ch = t.read()
   147  			switch ch {
   148  			case '=':
   149  				token.TokenType = TokenTypeEndsWith
   150  				token.TokenValue = "$="
   151  			default:
   152  				t.unread(ch)
   153  			}
   154  			return
   155  		} else if ch == '>' {
   156  			ch = t.read()
   157  			if ch == '=' {
   158  				token.TokenType = TokenTypeGreaterThanOrEqual
   159  				token.TokenValue = ">="
   160  			} else {
   161  				t.unread(ch)
   162  				token.TokenType = TokenTypeGreaterThan
   163  				token.TokenValue = ">"
   164  			}
   165  			return
   166  		} else if ch == '<' {
   167  			ch = t.read()
   168  			if ch == '=' {
   169  				token.TokenType = TokenTypeLessThanOrEqual
   170  				token.TokenValue = "<="
   171  			} else {
   172  				t.unread(ch)
   173  				token.TokenType = TokenTypeLessThan
   174  				token.TokenValue = "<"
   175  			}
   176  			return
   177  		} else if ch == ';' {
   178  			token.TokenType = TokenTypeAnd
   179  			token.TokenValue = " AND "
   180  			return
   181  		} else if ch == ',' {
   182  			token.TokenType = TokenTypeOr
   183  			token.TokenValue = " OR "
   184  			return
   185  		} else if ch == ' ' {
   186  			continue
   187  		} else if ch == 0 {
   188  			token.TokenType = TokenTypeEOF
   189  			token.TokenValue = ""
   190  			return
   191  		} else {
   192  			t.unread(ch)
   193  			sw := bytes.NewBuffer(nil)
   194  			for {
   195  				ch = t.read()
   196  				if ch >= '0' && ch <= '9' ||
   197  					ch >= 'a' && ch <= 'z' ||
   198  					ch >= 'A' && ch <= 'Z' ||
   199  					ch == ':' || ch == '.' ||
   200  					ch == ' ' || ch == '/' ||
   201  					ch == '_' || ch == '-' ||
   202  					ch == '*' || ch == '+' ||
   203  					ch == '\t' || ch >= utf8.RuneSelf {
   204  					sw.WriteRune(ch)
   205  				} else if ch == '\\' {
   206  					ch = t.read()
   207  					if ch == ',' || ch == ';' {
   208  						sw.WriteRune(ch)
   209  					} else {
   210  						t.unread(ch)
   211  						sw.WriteRune('\\')
   212  					}
   213  				} else {
   214  					s := strings.Trim(sw.String(), "\t ")
   215  					if len(s) > 0 {
   216  						token.TokenType = TokenTypeIdent
   217  						token.TokenValue = s
   218  						t.unread(ch)
   219  					} else {
   220  						token.TokenType = TokenTypeInvalid
   221  						token.TokenValue = fmt.Sprintf("%c", ch)
   222  					}
   223  					return
   224  				}
   225  			}
   226  		}
   227  	}
   228  }