github.com/walf443/mgr@v0.0.0-20150203144449-6f7a3a548462/sqlparser/mysql/lexer.go (about)

     1  package mysql
     2  
     3  //go:generate go tool yacc -o parser.go -v parser.output parser.go.y
     4  import (
     5  	"errors"
     6  	"fmt"
     7  	"strings"
     8  )
     9  
    10  const (
    11  	EOF     = -1
    12  	UNKNOWN = 0
    13  )
    14  
    15  var keywords = map[string]int{
    16  	"ADD":               ADD,
    17  	"DROP":              DROP,
    18  	"CREATE":            CREATE,
    19  	"ALTER":             ALTER,
    20  	"COLUMN":            COLUMN,
    21  	"TABLE":             TABLE,
    22  	"INDEX":             INDEX,
    23  	"KEY":               KEY,
    24  	"DATABASE":          DATABASE,
    25  	"NULL":              NULL,
    26  	"NOT":               NOT,
    27  	"AUTO_INCREMENT":    AUTO_INCREMENT,
    28  	"DEFAULT":           DEFAULT,
    29  	"CURRENT_TIMESTAMP": CURRENT_TIMESTAMP,
    30  	"ON":                ON,
    31  	"UPDATE":            UPDATE,
    32  	"PRIMARY":           PRIMARY,
    33  	"UNIQUE":            UNIQUE,
    34  	"USING":             USING,
    35  	"HASH":              HASH,
    36  	"BTREE":             BTREE,
    37  	"ENGINE":            ENGINE,
    38  	"CHARSET":           CHARSET,
    39  	"CHARACTER":         CHARACTER,
    40  	"COLLATE":           COLLATE,
    41  	"SET":               SET,
    42  	"AVG_ROW_LENGTH":    AVG_ROW_LENGTH,
    43  	"CHECKSUM":          CHECKSUM,
    44  	"COMMENT":           COMMENT,
    45  	"KEY_BLOCK_SIZE":    KEY_BLOCK_SIZE,
    46  	"MAX_ROWS":          MAX_ROWS,
    47  	"MIN_ROWS":          MIN_ROWS,
    48  	"ROW_FORMAT":        ROW_FORMAT,
    49  
    50  	// datatypes
    51  	"BIT":        BIT,
    52  	"TINYINT":    TINYINT,
    53  	"SMALLINT":   SMALLINT,
    54  	"MEDIUMINT":  MEDIUMINT,
    55  	"INT":        INT,
    56  	"INTEGER":    INTEGER,
    57  	"BIGINT":     BIGINT,
    58  	"REAL":       REAL,
    59  	"DOUBLE":     DOUBLE,
    60  	"FLOAT":      FLOAT,
    61  	"DECIMAL":    DECIMAL,
    62  	"NUMERIC":    NUMERIC,
    63  	"DATE":       DATE,
    64  	"TIME":       TIME,
    65  	"TIMESTAMP":  TIMESTAMP,
    66  	"DATETIME":   DATETIME,
    67  	"YEAR":       YEAR,
    68  	"CHAR":       CHAR,
    69  	"VARCHAR":    VARCHAR,
    70  	"BINARY":     BINARY,
    71  	"VARBINARY":  VARBINARY,
    72  	"TINYBLOB":   TINYBLOB,
    73  	"BLOB":       BLOB,
    74  	"MEDIUMBLOB": MEDIUMBLOB,
    75  	"LONGBLOB":   LONGBLOB,
    76  	"TINYTEXT":   TINYTEXT,
    77  	"TEXT":       TEXT,
    78  	"MEDIUMTEXT": MEDIUMTEXT,
    79  	"LONGTEXT":   LONGTEXT,
    80  
    81  	// datatype options
    82  	"UNSIGNED": UNSIGNED,
    83  	"ZEROFILL": ZEROFILL,
    84  }
    85  
    86  type Position struct {
    87  	Line   int
    88  	Column int
    89  }
    90  
    91  type Scanner struct {
    92  	src          []rune
    93  	offset       int
    94  	lineHead     int
    95  	line         int
    96  	markRawUntil []rune
    97  	nextLiteral  string
    98  }
    99  
   100  func (s *Scanner) Init(src string) {
   101  	s.src = []rune(src)
   102  }
   103  
   104  func (s *Scanner) Scan() (tok int, lit string, pos Position) {
   105  	if s.nextLiteral != "" {
   106  		switch s.nextLiteral {
   107  		case "*/":
   108  			tok = COMMENT_FINISH
   109  		case "`":
   110  			tok = int('`')
   111  		case "'":
   112  			tok = int('\'')
   113  		case "\"":
   114  			tok = int('"')
   115  		}
   116  		pos = s.position()
   117  		for i := 0; i < len(s.nextLiteral); i++ {
   118  			s.next()
   119  		}
   120  		lit = s.nextLiteral
   121  		s.nextLiteral = ""
   122  		return
   123  	}
   124  	if len(s.markRawUntil) == 0 {
   125  		s.skipWhiteSpace()
   126  		pos = s.position()
   127  		switch ch := s.peek(); {
   128  		case ch == '/' && s.readAhead(1) == '*':
   129  			s.next()
   130  			s.next()
   131  			tok = COMMENT_START
   132  			lit = "/*"
   133  			s.markRawUntil = []rune{'*', '/'}
   134  		case isLetter(ch):
   135  			lit = s.scanIdentifier()
   136  			if keyword, ok := keywords[strings.ToUpper(lit)]; ok {
   137  				tok = keyword
   138  			} else {
   139  				tok = IDENT
   140  			}
   141  		case isNumber(ch):
   142  			lit = s.scanNumber()
   143  			tok = NUMBER
   144  		case ch == '`':
   145  			s.markRawUntil = []rune{'`'}
   146  			tok = int(ch)
   147  			lit = string(ch)
   148  			s.next()
   149  		case ch == '\'':
   150  			s.markRawUntil = []rune{'\''}
   151  			tok = int(ch)
   152  			lit = string(ch)
   153  			s.next()
   154  		case ch == '"':
   155  			s.markRawUntil = []rune{'"'}
   156  			tok = int(ch)
   157  			lit = string(ch)
   158  			s.next()
   159  		default:
   160  			switch ch {
   161  			case -1:
   162  				tok = EOF
   163  			case ';', ',', '`', '.', '(', ')', '=':
   164  				tok = int(ch)
   165  				lit = string(ch)
   166  			}
   167  			s.next()
   168  		}
   169  	} else {
   170  		var err error
   171  		lit, err = s.scanUntil(s.markRawUntil)
   172  		if err != nil {
   173  			panic(err)
   174  		}
   175  		tok = RAW
   176  		s.nextLiteral = string(s.markRawUntil)
   177  		s.markRawUntil = []rune{}
   178  	}
   179  	return
   180  }
   181  
   182  func (s *Scanner) peek() rune {
   183  	if !s.reachEOF(0) {
   184  		return s.src[s.offset]
   185  	} else {
   186  		return -1
   187  	}
   188  }
   189  
   190  func (s *Scanner) readAhead(offset int) rune {
   191  	if !s.reachEOF(offset) {
   192  		return s.src[s.offset+offset]
   193  	} else {
   194  		return -1
   195  	}
   196  }
   197  
   198  func (s *Scanner) next() {
   199  	if !s.reachEOF(0) {
   200  		if s.peek() == '\n' {
   201  			s.lineHead = s.offset + 1
   202  			s.line++
   203  		}
   204  		s.offset++
   205  	}
   206  }
   207  
   208  func (s *Scanner) CurrentLine() string {
   209  	cursor := s.lineHead
   210  	var bytes []rune
   211  	for {
   212  		ch := s.src[cursor]
   213  
   214  		if ch == '\n' {
   215  			break
   216  		}
   217  		bytes = append(bytes, ch)
   218  		cursor++
   219  		if len(s.src) <= cursor {
   220  			break
   221  		}
   222  	}
   223  	return string(bytes)
   224  }
   225  
   226  func isLetter(ch rune) bool {
   227  	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
   228  }
   229  
   230  func isNumber(ch rune) bool {
   231  	return '0' <= ch && ch <= '9'
   232  }
   233  
   234  func isWhiteSpace(ch rune) bool {
   235  	return ch == ' ' || ch == '\t' || ch == '\n'
   236  }
   237  
   238  func (s *Scanner) reachEOF(offset int) bool {
   239  	return len(s.src) <= s.offset+offset
   240  }
   241  
   242  func (s *Scanner) position() Position {
   243  	return Position{Line: s.line + 1, Column: s.offset - s.lineHead + 1}
   244  }
   245  
   246  func (s *Scanner) skipWhiteSpace() {
   247  	for isWhiteSpace(s.peek()) {
   248  		s.next()
   249  	}
   250  }
   251  
   252  func (s *Scanner) scanIdentifier() string {
   253  	var ret []rune
   254  	for isLetter(s.peek()) || isNumber(s.peek()) {
   255  		ret = append(ret, s.peek())
   256  		s.next()
   257  	}
   258  
   259  	return string(ret)
   260  }
   261  
   262  func (s *Scanner) scanUntil(finish []rune) (string, error) {
   263  	cursor := 0
   264  	finish_pos := len(finish) - 1
   265  	var ret []rune
   266  	for {
   267  		ch := s.peek()
   268  		if ch == finish[cursor] {
   269  			for {
   270  				cursor++
   271  				if cursor > finish_pos {
   272  					return string(ret), nil
   273  				}
   274  				ch2 := s.readAhead(cursor)
   275  				if ch2 != finish[cursor] {
   276  					cursor = 0
   277  					break
   278  				}
   279  				if ch2 == -1 {
   280  					return "", errors.New(fmt.Sprintf("unexpected EOF string. exptected \"%s\"", finish))
   281  				}
   282  			}
   283  		}
   284  
   285  		ret = append(ret, s.peek())
   286  		s.next()
   287  	}
   288  
   289  	return string(ret), nil
   290  }
   291  
   292  func (s *Scanner) scanNumber() string {
   293  	var ret []rune
   294  	for isNumber(s.peek()) {
   295  		ret = append(ret, s.peek())
   296  		s.next()
   297  	}
   298  	return string(ret)
   299  }