github.com/DQNEO/babygo@v0.0.3/scanner.go (about) 1 package main 2 3 import ( 4 "github.com/DQNEO/babygo/lib/mylib" 5 "github.com/DQNEO/babygo/lib/strconv" 6 ) 7 8 type scanner struct { 9 src []uint8 10 ch uint8 11 offset int 12 nextOffset int 13 insertSemi bool 14 } 15 16 func (s *scanner) next() { 17 if s.nextOffset < len(s.src) { 18 s.offset = s.nextOffset 19 s.ch = s.src[s.offset] 20 s.nextOffset++ 21 } else { 22 s.offset = len(s.src) 23 s.ch = 1 //EOF 24 } 25 } 26 27 var keywords []string 28 29 func (s *scanner) Init(src []uint8) { 30 // https://golang.org/ref/spec#Keywords 31 keywords = []string{ 32 "break", "default", "func", "interface", "select", 33 "case", "defer", "go", "map", "struct", 34 "chan", "else", "goto", "package", "switch", 35 "const", "fallthrough", "if", "range", "type", 36 "continue", "for", "import", "return", "var", 37 } 38 s.src = src 39 s.offset = 0 40 s.ch = ' ' 41 s.nextOffset = 0 42 s.insertSemi = false 43 s.next() 44 } 45 46 func isLetter(ch uint8) bool { 47 if ch == '_' { 48 return true 49 } 50 return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z') 51 } 52 53 func isDecimal(ch uint8) bool { 54 return '0' <= ch && ch <= '9' 55 } 56 57 func (s *scanner) scanIdentifier() string { 58 var offset = s.offset 59 for isLetter(s.ch) || isDecimal(s.ch) { 60 s.next() 61 } 62 return string(s.src[offset:s.offset]) 63 } 64 65 func (s *scanner) scanNumber() string { 66 var offset = s.offset 67 for isDecimal(s.ch) { 68 s.next() 69 } 70 return string(s.src[offset:s.offset]) 71 } 72 73 func (s *scanner) scanString() string { 74 var offset = s.offset - 1 75 var escaped bool 76 for !escaped && s.ch != '"' { 77 if s.ch == '\\' { 78 escaped = true 79 s.next() 80 s.next() 81 escaped = false 82 continue 83 } 84 s.next() 85 } 86 s.next() // consume ending '"" 87 return string(s.src[offset:s.offset]) 88 } 89 90 func (s *scanner) scanChar() string { 91 // '\'' opening already consumed 92 var offset = s.offset - 1 93 var ch uint8 94 for { 95 ch = s.ch 96 s.next() 97 if ch == '\'' { 98 break 99 } 100 if ch == '\\' { 101 s.next() 102 } 103 } 104 105 return string(s.src[offset:s.offset]) 106 } 107 108 func (s *scanner) scanComment() string { 109 var offset = s.offset - 1 110 for s.ch != '\n' { 111 s.next() 112 } 113 return string(s.src[offset:s.offset]) 114 } 115 116 type TokenContainer struct { 117 pos int // what's this ? 118 tok string // token.Token 119 lit string // raw data 120 } 121 122 // https://golang.org/ref/spec#Tokens 123 func (s *scanner) skipWhitespace() { 124 for s.ch == ' ' || s.ch == '\t' || (s.ch == '\n' && !s.insertSemi) || s.ch == '\r' { 125 s.next() 126 } 127 } 128 129 func (s *scanner) Scan() *TokenContainer { 130 s.skipWhitespace() 131 var tc = &TokenContainer{} 132 var lit string 133 var tok string 134 var insertSemi bool 135 var ch = s.ch 136 if isLetter(ch) { 137 lit = s.scanIdentifier() 138 if mylib.InArray(lit, keywords) { 139 tok = lit 140 switch tok { 141 case "break", "continue", "fallthrough", "return": 142 insertSemi = true 143 } 144 } else { 145 insertSemi = true 146 tok = "IDENT" 147 } 148 } else if isDecimal(ch) { 149 insertSemi = true 150 lit = s.scanNumber() 151 tok = "INT" 152 } else { 153 s.next() 154 switch ch { 155 case '\n': 156 tok = ";" 157 lit = "\n" 158 insertSemi = false 159 case '"': // double quote 160 insertSemi = true 161 lit = s.scanString() 162 tok = "STRING" 163 case '\'': // single quote 164 insertSemi = true 165 lit = s.scanChar() 166 tok = "CHAR" 167 // https://golang.org/ref/spec#Operators_and_punctuation 168 // + & += &= && == != ( ) 169 // - | -= |= || < <= [ ] 170 // * ^ *= ^= <- > >= { } 171 // / << /= <<= ++ = := , ; 172 // % >> %= >>= -- ! ... . : 173 // &^ &^= 174 case ':': // :=, : 175 if s.ch == '=' { 176 s.next() 177 tok = ":=" 178 } else { 179 tok = ":" 180 } 181 case '.': // ..., . 182 var peekCh = s.src[s.nextOffset] 183 if s.ch == '.' && peekCh == '.' { 184 s.next() 185 s.next() 186 tok = "..." 187 } else { 188 tok = "." 189 } 190 case ',': 191 tok = "," 192 case ';': 193 tok = ";" 194 lit = ";" 195 case '(': 196 tok = "(" 197 case ')': 198 insertSemi = true 199 tok = ")" 200 case '[': 201 tok = "[" 202 case ']': 203 insertSemi = true 204 tok = "]" 205 case '{': 206 tok = "{" 207 case '}': 208 insertSemi = true 209 tok = "}" 210 case '+': // +=, ++, + 211 switch s.ch { 212 case '=': 213 s.next() 214 tok = "+=" 215 case '+': 216 s.next() 217 tok = "++" 218 insertSemi = true 219 default: 220 tok = "+" 221 } 222 case '-': // -= -- - 223 switch s.ch { 224 case '-': 225 s.next() 226 tok = "--" 227 insertSemi = true 228 case '=': 229 s.next() 230 tok = "-=" 231 default: 232 tok = "-" 233 } 234 case '*': // *= * 235 if s.ch == '=' { 236 s.next() 237 tok = "*=" 238 } else { 239 tok = "*" 240 } 241 case '/': 242 if s.ch == '/' { 243 // comment 244 // @TODO block comment 245 if s.insertSemi { 246 s.ch = '/' 247 s.offset = s.offset - 1 248 s.nextOffset = s.offset + 1 249 tc.lit = "\n" 250 tc.tok = ";" 251 s.insertSemi = false 252 return tc 253 } 254 lit = s.scanComment() 255 tok = "COMMENT" 256 } else if s.ch == '=' { 257 tok = "/=" 258 } else { 259 tok = "/" 260 } 261 case '%': // %= % 262 if s.ch == '=' { 263 s.next() 264 tok = "%=" 265 } else { 266 tok = "%" 267 } 268 case '^': // ^= ^ 269 if s.ch == '=' { 270 s.next() 271 tok = "^=" 272 } else { 273 tok = "^" 274 } 275 case '<': // <= <- <<= << 276 switch s.ch { 277 case '-': 278 s.next() 279 tok = "<-" 280 case '=': 281 s.next() 282 tok = "<=" 283 case '<': 284 var peekCh = s.src[s.nextOffset] 285 if peekCh == '=' { 286 s.next() 287 s.next() 288 tok = "<<=" 289 } else { 290 s.next() 291 tok = "<<" 292 } 293 default: 294 tok = "<" 295 } 296 case '>': // >= >>= >> > 297 switch s.ch { 298 case '=': 299 s.next() 300 tok = ">=" 301 case '>': 302 var peekCh = s.src[s.nextOffset] 303 if peekCh == '=' { 304 s.next() 305 s.next() 306 tok = ">>=" 307 } else { 308 s.next() 309 tok = ">>" 310 } 311 default: 312 tok = ">" 313 } 314 case '=': // == = 315 if s.ch == '=' { 316 s.next() 317 tok = "==" 318 } else { 319 tok = "=" 320 } 321 case '!': // !=, ! 322 if s.ch == '=' { 323 s.next() 324 tok = "!=" 325 } else { 326 tok = "!" 327 } 328 case '&': // & &= && &^ &^= 329 switch s.ch { 330 case '=': 331 s.next() 332 tok = "&=" 333 case '&': 334 s.next() 335 tok = "&&" 336 case '^': 337 var peekCh = s.src[s.nextOffset] 338 if peekCh == '=' { 339 s.next() 340 s.next() 341 tok = "&^=" 342 } else { 343 s.next() 344 tok = "&^" 345 } 346 default: 347 tok = "&" 348 } 349 case '|': // |= || | 350 switch s.ch { 351 case '|': 352 s.next() 353 tok = "||" 354 case '=': 355 s.next() 356 tok = "|=" 357 default: 358 tok = "|" 359 } 360 case 1: 361 tok = "EOF" 362 default: 363 panic2(__func__, "unknown char:"+string([]uint8{ch})+":"+strconv.Itoa(int(ch))) 364 tok = "UNKNOWN" 365 } 366 } 367 tc.lit = lit 368 tc.pos = 0 369 tc.tok = tok 370 s.insertSemi = insertSemi 371 return tc 372 }