github.com/searKing/golang/go@v1.2.74/go/token/consume.go (about) 1 // Copyright 2020 The searKing Author. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package token 6 7 import ( 8 "bytes" 9 "regexp" 10 "strings" 11 "unicode" 12 ) 13 14 // A mode value is a set of flags (or 0). 15 // They control scanner behavior. 16 // 17 type Mode uint 18 19 const ( 20 ModeCaseSensitive Mode = 1 << iota 21 ModeRegexpPerl 22 ModeRegexpPosix 23 ) 24 25 func ConsumeIdentifier(inputs []rune, current int, runeType Type) (token Token, next int) { 26 posBegin := current 27 if current < 0 { 28 current = 0 29 } 30 31 if current >= len(inputs) { 32 return Token{ 33 Typ: TypeEOF, 34 Value: "", 35 }, len(inputs) 36 } 37 38 char := inputs[current] 39 var value bytes.Buffer 40 41 // identifier = letter { letter | unicode_digit } . 42 // letter = unicode_letter | "_" . 43 // decimal_digit = "0" … "9" . 44 // octal_digit = "0" … "7" . 45 // hex_digit = "0" … "9" | "A" … "F" | "a" … "f" . 46 // newline = /* the Unicode code point U+000A */ . 47 // unicode_char = /* an arbitrary Unicode code point except newline */ . 48 // unicode_letter = /* a Unicode code point classified as "Letter" */ . 49 // unicode_digit = /* a Unicode code point classified as "Number, decimal digit" */ . 50 if unicode.IsLetter(char) || char == '_' { 51 for unicode.IsLetter(char) || char == '_' || unicode.IsNumber(char) || char == '.' { 52 value.WriteRune(char) 53 current++ 54 if current >= len(inputs) { 55 break 56 } 57 char = inputs[current] 58 } 59 60 return Token{ 61 Typ: runeType, 62 Value: value.String(), 63 }, current 64 } 65 // restore pos 66 return Token{Typ: TypeILLEGAL}, posBegin 67 } 68 69 func ComsumeRunesAny(inputs []rune, current int, runeType Type, expectRunes ...rune) (token Token, next int) { 70 posBegin := current 71 if current < 0 { 72 current = 0 73 } 74 75 if current >= len(inputs) { 76 return Token{ 77 Typ: TypeEOF, 78 Value: "", 79 }, len(inputs) 80 } 81 82 char := inputs[current] 83 current++ 84 85 for _, expect := range expectRunes { 86 if char == expect { 87 return Token{ 88 Typ: runeType, 89 Value: "", 90 }, current 91 } 92 } 93 // restore pos 94 return Token{Typ: TypeILLEGAL}, posBegin 95 } 96 97 func ComsumeStringsAny(inputs []rune, current int, runeType Type, mode Mode, expectStrs ...string) (token Token, next int) { 98 posBegin := current 99 if current < 0 { 100 current = 0 101 } 102 103 if current >= len(inputs) { 104 return Token{ 105 Typ: TypeEOF, 106 Value: "", 107 }, len(inputs) 108 } 109 110 // regex mode 111 if mode&(ModeRegexpPerl|ModeRegexpPosix) != 0 { 112 for _, expect := range expectStrs { 113 var reg *regexp.Regexp 114 if mode&ModeRegexpPosix != 0 { 115 reg = regexp.MustCompilePOSIX(expect) 116 } else { 117 reg = regexp.MustCompile(expect) 118 } 119 120 matches := reg.FindStringSubmatch(string(inputs[current:])) 121 if len(matches) == 0 { 122 continue 123 } 124 125 current = current + len(matches[0]) 126 return Token{ 127 Typ: runeType, 128 Value: string(matches[0]), 129 }, current 130 } 131 // restore pos 132 return Token{Typ: TypeILLEGAL}, posBegin 133 } 134 135 // none regexp 136 for _, expect := range expectStrs { 137 138 endPos := current + len(expect) 139 if endPos > len(inputs) { 140 continue 141 } 142 selected := inputs[current:endPos] 143 144 if ((mode&ModeCaseSensitive != 0) && strings.EqualFold(string(selected), expect)) || 145 string(selected) == expect { 146 return Token{ 147 Typ: runeType, 148 Value: string(selected), 149 }, endPos 150 } 151 } 152 // restore pos 153 return Token{Typ: TypeILLEGAL}, posBegin 154 }