github.com/searKing/golang/go@v1.2.117/go/token/consume.go (about) 1 // Copyright 2020 The searKing Author. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package token 6 7 import ( 8 "bytes" 9 "regexp" 10 "strings" 11 "unicode" 12 ) 13 14 // A mode value is a set of flags (or 0). 15 // They control scanner behavior. 16 type Mode uint 17 18 const ( 19 ModeCaseSensitive Mode = 1 << iota 20 ModeRegexpPerl 21 ModeRegexpPosix 22 ) 23 24 func ConsumeIdentifier(inputs []rune, current int, runeType Type) (token Token, next int) { 25 posBegin := current 26 if current < 0 { 27 current = 0 28 } 29 30 if current >= len(inputs) { 31 return Token{ 32 Typ: TypeEOF, 33 Value: "", 34 }, len(inputs) 35 } 36 37 char := inputs[current] 38 var value bytes.Buffer 39 40 // identifier = letter { letter | unicode_digit } . 41 // letter = unicode_letter | "_" . 42 // decimal_digit = "0" … "9" . 43 // octal_digit = "0" … "7" . 44 // hex_digit = "0" … "9" | "A" … "F" | "a" … "f" . 45 // newline = /* the Unicode code point U+000A */ . 46 // unicode_char = /* an arbitrary Unicode code point except newline */ . 47 // unicode_letter = /* a Unicode code point classified as "Letter" */ . 48 // unicode_digit = /* a Unicode code point classified as "Number, decimal digit" */ . 49 if unicode.IsLetter(char) || char == '_' { 50 for unicode.IsLetter(char) || char == '_' || unicode.IsNumber(char) || char == '.' { 51 value.WriteRune(char) 52 current++ 53 if current >= len(inputs) { 54 break 55 } 56 char = inputs[current] 57 } 58 59 return Token{ 60 Typ: runeType, 61 Value: value.String(), 62 }, current 63 } 64 // restore pos 65 return Token{Typ: TypeILLEGAL}, posBegin 66 } 67 68 func ComsumeRunesAny(inputs []rune, current int, runeType Type, expectRunes ...rune) (token Token, next int) { 69 posBegin := current 70 if current < 0 { 71 current = 0 72 } 73 74 if current >= len(inputs) { 75 return Token{ 76 Typ: TypeEOF, 77 Value: "", 78 }, len(inputs) 79 } 80 81 char := inputs[current] 82 current++ 83 84 for _, expect := range expectRunes { 85 if char == expect { 86 return Token{ 87 Typ: runeType, 88 Value: "", 89 }, current 90 } 91 } 92 // restore pos 93 return Token{Typ: TypeILLEGAL}, posBegin 94 } 95 96 func ComsumeStringsAny(inputs []rune, current int, runeType Type, mode Mode, expectStrs ...string) (token Token, next int) { 97 posBegin := current 98 if current < 0 { 99 current = 0 100 } 101 102 if current >= len(inputs) { 103 return Token{ 104 Typ: TypeEOF, 105 Value: "", 106 }, len(inputs) 107 } 108 109 // regex mode 110 if mode&(ModeRegexpPerl|ModeRegexpPosix) != 0 { 111 for _, expect := range expectStrs { 112 var reg *regexp.Regexp 113 if mode&ModeRegexpPosix != 0 { 114 reg = regexp.MustCompilePOSIX(expect) 115 } else { 116 reg = regexp.MustCompile(expect) 117 } 118 119 matches := reg.FindStringSubmatch(string(inputs[current:])) 120 if len(matches) == 0 { 121 continue 122 } 123 124 current = current + len(matches[0]) 125 return Token{ 126 Typ: runeType, 127 Value: string(matches[0]), 128 }, current 129 } 130 // restore pos 131 return Token{Typ: TypeILLEGAL}, posBegin 132 } 133 134 // none regexp 135 for _, expect := range expectStrs { 136 137 endPos := current + len(expect) 138 if endPos > len(inputs) { 139 continue 140 } 141 selected := inputs[current:endPos] 142 143 if ((mode&ModeCaseSensitive != 0) && strings.EqualFold(string(selected), expect)) || 144 string(selected) == expect { 145 return Token{ 146 Typ: runeType, 147 Value: string(selected), 148 }, endPos 149 } 150 } 151 // restore pos 152 return Token{Typ: TypeILLEGAL}, posBegin 153 }