github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/flosch/pongo2.v3/lexer.go (about) 1 package pongo2 2 3 import ( 4 "fmt" 5 "strings" 6 "unicode/utf8" 7 ) 8 9 const ( 10 TokenError = iota 11 EOF 12 13 TokenHTML 14 15 TokenKeyword 16 TokenIdentifier 17 TokenString 18 TokenNumber 19 TokenSymbol 20 ) 21 22 var ( 23 tokenSpaceChars = " \n\r\t" 24 tokenIdentifierChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" 25 tokenIdentifierCharsWithDigits = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789" 26 tokenDigits = "0123456789" 27 28 // Available symbols in pongo2 (within filters/tag) 29 TokenSymbols = []string{ 30 // 3-Char symbols 31 32 // 2-Char symbols 33 "==", ">=", "<=", "&&", "||", "{{", "}}", "{%", "%}", "!=", "<>", 34 35 // 1-Char symbol 36 "(", ")", "+", "-", "*", "<", ">", "/", "^", ",", ".", "!", "|", ":", "=", "%", 37 } 38 39 // Available keywords in pongo2 40 TokenKeywords = []string{"in", "and", "or", "not", "true", "false", "as", "export"} 41 ) 42 43 type TokenType int 44 type Token struct { 45 Filename string 46 Typ TokenType 47 Val string 48 Line int 49 Col int 50 } 51 52 type lexerStateFn func() lexerStateFn 53 type lexer struct { 54 name string 55 input string 56 start int // start pos of the item 57 pos int // current pos 58 width int // width of last rune 59 tokens []*Token 60 errored bool 61 startline int 62 startcol int 63 line int 64 col int 65 66 in_verbatim bool 67 verbatim_name string 68 } 69 70 func (t *Token) String() string { 71 val := t.Val 72 if len(val) > 1000 { 73 val = fmt.Sprintf("%s...%s", val[:10], val[len(val)-5:len(val)]) 74 } 75 76 typ := "" 77 switch t.Typ { 78 case TokenHTML: 79 typ = "HTML" 80 case TokenError: 81 typ = "Error" 82 case TokenIdentifier: 83 typ = "Identifier" 84 case TokenKeyword: 85 typ = "Keyword" 86 case TokenNumber: 87 typ = "Number" 88 case TokenString: 89 typ = "String" 90 case TokenSymbol: 91 typ = "Symbol" 92 default: 93 typ = "Unknown" 94 } 95 96 return fmt.Sprintf("<Token Typ=%s (%d) Val='%s' Line=%d Col=%d>", 97 typ, t.Typ, val, t.Line, t.Col) 98 } 99 100 func lex(name string, input string) ([]*Token, *Error) { 101 l := &lexer{ 102 name: name, 103 input: input, 104 tokens: make([]*Token, 0, 100), 105 line: 1, 106 col: 1, 107 startline: 1, 108 startcol: 1, 109 } 110 l.run() 111 if l.errored { 112 errtoken := l.tokens[len(l.tokens)-1] 113 return nil, &Error{ 114 Filename: name, 115 Line: errtoken.Line, 116 Column: errtoken.Col, 117 Sender: "lexer", 118 ErrorMsg: errtoken.Val, 119 } 120 } 121 return l.tokens, nil 122 } 123 124 func (l *lexer) value() string { 125 return l.input[l.start:l.pos] 126 } 127 128 func (l *lexer) length() int { 129 return l.pos - l.start 130 } 131 132 func (l *lexer) emit(t TokenType) { 133 tok := &Token{ 134 Filename: l.name, 135 Typ: t, 136 Val: l.value(), 137 Line: l.startline, 138 Col: l.startcol, 139 } 140 141 if t == TokenString { 142 // Escape sequence \" in strings 143 tok.Val = strings.Replace(tok.Val, `\"`, `"`, -1) 144 tok.Val = strings.Replace(tok.Val, `\\`, `\`, -1) 145 } 146 147 l.tokens = append(l.tokens, tok) 148 l.start = l.pos 149 l.startline = l.line 150 l.startcol = l.col 151 } 152 153 func (l *lexer) next() rune { 154 if l.pos >= len(l.input) { 155 l.width = 0 156 return EOF 157 } 158 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 159 l.width = w 160 l.pos += l.width 161 l.col += l.width 162 return r 163 } 164 165 func (l *lexer) backup() { 166 l.pos -= l.width 167 l.col -= l.width 168 } 169 170 func (l *lexer) peek() rune { 171 r := l.next() 172 l.backup() 173 return r 174 } 175 176 func (l *lexer) ignore() { 177 l.start = l.pos 178 l.startline = l.line 179 l.startcol = l.col 180 } 181 182 func (l *lexer) accept(what string) bool { 183 if strings.IndexRune(what, l.next()) >= 0 { 184 return true 185 } 186 l.backup() 187 return false 188 } 189 190 func (l *lexer) acceptRun(what string) { 191 for strings.IndexRune(what, l.next()) >= 0 { 192 } 193 l.backup() 194 } 195 196 func (l *lexer) errorf(format string, args ...interface{}) lexerStateFn { 197 t := &Token{ 198 Filename: l.name, 199 Typ: TokenError, 200 Val: fmt.Sprintf(format, args...), 201 Line: l.startline, 202 Col: l.startcol, 203 } 204 l.tokens = append(l.tokens, t) 205 l.errored = true 206 l.startline = l.line 207 l.startcol = l.col 208 return nil 209 } 210 211 func (l *lexer) eof() bool { 212 return l.start >= len(l.input)-1 213 } 214 215 func (l *lexer) run() { 216 for { 217 // TODO: Support verbatim tag names 218 // https://docs.djangoproject.com/en/dev/ref/templates/builtins/#verbatim 219 if l.in_verbatim { 220 name := l.verbatim_name 221 if name != "" { 222 name += " " 223 } 224 if strings.HasPrefix(l.input[l.pos:], fmt.Sprintf("{%% endverbatim %s%%}", name)) { // end verbatim 225 if l.pos > l.start { 226 l.emit(TokenHTML) 227 } 228 w := len("{% endverbatim %}") 229 l.pos += w 230 l.col += w 231 l.ignore() 232 l.in_verbatim = false 233 } 234 } else if strings.HasPrefix(l.input[l.pos:], "{% verbatim %}") { // tag 235 if l.pos > l.start { 236 l.emit(TokenHTML) 237 } 238 l.in_verbatim = true 239 w := len("{% verbatim %}") 240 l.pos += w 241 l.col += w 242 l.ignore() 243 } 244 245 if !l.in_verbatim { 246 // Ignore single-line comments {# ... #} 247 if strings.HasPrefix(l.input[l.pos:], "{#") { 248 if l.pos > l.start { 249 l.emit(TokenHTML) 250 } 251 252 l.pos += 2 // pass '{#' 253 l.col += 2 254 255 for { 256 switch l.peek() { 257 case EOF: 258 l.errorf("Single-line comment not closed.") 259 return 260 case '\n': 261 l.errorf("Newline not permitted in a single-line comment.") 262 return 263 } 264 265 if strings.HasPrefix(l.input[l.pos:], "#}") { 266 l.pos += 2 // pass '#}' 267 l.col += 2 268 break 269 } 270 271 l.next() 272 } 273 l.ignore() // ignore whole comment 274 275 // Comment skipped 276 continue // next token 277 } 278 279 if strings.HasPrefix(l.input[l.pos:], "{{") || // variable 280 strings.HasPrefix(l.input[l.pos:], "{%") { // tag 281 if l.pos > l.start { 282 l.emit(TokenHTML) 283 } 284 l.tokenize() 285 if l.errored { 286 return 287 } 288 continue 289 } 290 } 291 292 switch l.peek() { 293 case '\n': 294 l.line++ 295 l.col = 0 296 } 297 if l.next() == EOF { 298 break 299 } 300 } 301 302 if l.pos > l.start { 303 l.emit(TokenHTML) 304 } 305 306 if l.in_verbatim { 307 l.errorf("verbatim-tag not closed, got EOF.") 308 } 309 } 310 311 func (l *lexer) tokenize() { 312 for state := l.stateCode; state != nil; { 313 state = state() 314 } 315 } 316 317 func (l *lexer) stateCode() lexerStateFn { 318 outer_loop: 319 for { 320 switch { 321 case l.accept(tokenSpaceChars): 322 if l.value() == "\n" { 323 return l.errorf("Newline not allowed within tag/variable.") 324 } 325 l.ignore() 326 continue 327 case l.accept(tokenIdentifierChars): 328 return l.stateIdentifier 329 case l.accept(tokenDigits): 330 return l.stateNumber 331 case l.accept(`"`): 332 return l.stateString 333 } 334 335 // Check for symbol 336 for _, sym := range TokenSymbols { 337 if strings.HasPrefix(l.input[l.start:], sym) { 338 l.pos += len(sym) 339 l.col += l.length() 340 l.emit(TokenSymbol) 341 342 if sym == "%}" || sym == "}}" { 343 // Tag/variable end, return after emit 344 return nil 345 } 346 347 continue outer_loop 348 } 349 } 350 351 if l.pos < len(l.input) { 352 return l.errorf("Unknown character: %q (%d)", l.peek(), l.peek()) 353 } 354 355 break 356 } 357 358 // Normal shut down 359 return nil 360 } 361 362 func (l *lexer) stateIdentifier() lexerStateFn { 363 l.acceptRun(tokenIdentifierChars) 364 l.acceptRun(tokenIdentifierCharsWithDigits) 365 for _, kw := range TokenKeywords { 366 if kw == l.value() { 367 l.emit(TokenKeyword) 368 return l.stateCode 369 } 370 } 371 l.emit(TokenIdentifier) 372 return l.stateCode 373 } 374 375 func (l *lexer) stateNumber() lexerStateFn { 376 l.acceptRun(tokenDigits) 377 /* 378 Maybe context-sensitive number lexing? 379 * comments.0.Text // first comment 380 * usercomments.1.0 // second user, first comment 381 * if (score >= 8.5) // 8.5 as a number 382 383 if l.peek() == '.' { 384 l.accept(".") 385 if !l.accept(tokenDigits) { 386 return l.errorf("Malformed number.") 387 } 388 l.acceptRun(tokenDigits) 389 } 390 */ 391 l.emit(TokenNumber) 392 return l.stateCode 393 } 394 395 func (l *lexer) stateString() lexerStateFn { 396 l.ignore() 397 l.startcol -= 1 // we're starting the position at the first " 398 for !l.accept(`"`) { 399 switch l.next() { 400 case '\\': 401 // escape sequence 402 switch l.peek() { 403 case '"', '\\': 404 l.next() 405 default: 406 return l.errorf("Unknown escape sequence: \\%c", l.peek()) 407 } 408 case EOF: 409 return l.errorf("Unexpected EOF, string not closed.") 410 case '\n': 411 return l.errorf("Newline in string is not allowed.") 412 } 413 } 414 l.backup() 415 l.emit(TokenString) 416 417 l.next() 418 l.ignore() 419 420 return l.stateCode 421 }