github.com/informationsea/shellflow@v0.1.3/flowscript/flowscript_tokeninzer.go (about) 1 package flowscript 2 3 import ( 4 "bufio" 5 "errors" 6 "io" 7 "regexp" 8 "strings" 9 "unicode/utf8" 10 ) 11 12 var whiteSpaceRegexp = regexp.MustCompile("\\s") 13 var digitRegexp = regexp.MustCompile("\\d") 14 var wordCharacterRegexp = regexp.MustCompile("\\w") 15 16 func checkMatch(ch rune, exp *regexp.Regexp) bool { 17 var data [5]byte 18 var p = data[:] 19 utf8.EncodeRune(p, ch) 20 return exp.Match(p) 21 } 22 23 func IsDigit(ch rune) bool { 24 return checkMatch(ch, digitRegexp) 25 } 26 27 func IsWhiteSpace(ch rune) bool { 28 return checkMatch(ch, whiteSpaceRegexp) 29 } 30 31 func IsWordCharacter(ch rune) bool { 32 return checkMatch(ch, wordCharacterRegexp) 33 } 34 35 type RuneCheck func(ch rune) (match bool) 36 37 func takeRuneWhile(data []byte, checker RuneCheck) (length int) { 38 length = 0 39 for { 40 ch, l := utf8.DecodeRune(data) 41 if checker(ch) { 42 length += l 43 data = data[l:] 44 } else { 45 break 46 } 47 } 48 return 49 } 50 51 func SplitToken(data []byte, atEOF bool) (advance int, token []byte, err error) { 52 advance = 0 53 token = nil 54 err = nil 55 56 l := takeRuneWhile(data, IsWhiteSpace) 57 advance += l 58 data = data[l:] 59 60 firstChar, l3 := utf8.DecodeRune(data) 61 62 if IsDigit(firstChar) { 63 l2 := takeRuneWhile(data, IsDigit) 64 if len(data) != l2 || atEOF { 65 advance += l2 66 token = data[:l2] 67 err = nil 68 } 69 } else if IsWordCharacter(firstChar) { 70 l2 := takeRuneWhile(data, IsWordCharacter) 71 if len(data) != l2 || atEOF { 72 advance += l2 73 token = data[:l2] 74 err = nil 75 } 76 } else if firstChar == rune('"') { 77 escaping := false 78 currentLength := l3 79 for { 80 ch, l2 := utf8.DecodeRune(data[currentLength:]) 81 currentLength += l2 82 83 if (l2 > 0 && ch == rune('"') && !escaping) || (l2 == 0 && atEOF) { 84 advance += currentLength 85 token = data[:currentLength] 86 err = nil 87 return 88 } else if l2 == 0 { 89 break 90 } else if ch == rune('\\') && !escaping { 91 escaping = true 92 } else { 93 escaping = false 94 } 95 } 96 } else if l3 > 0 { 97 advance += l3 98 token = data[:l3] 99 err = nil 100 } 101 102 if atEOF && token == nil { 103 err = errors.New("FAIL") 104 } 105 106 return 107 } 108 109 func NewTokenizer(r io.Reader) *LookAheadScanner { 110 scanner := bufio.NewScanner(r) 111 scanner.Split(SplitToken) 112 return NewLookAheadScanner(scanner) 113 } 114 115 func NewTokenizerFromText(text string) *LookAheadScanner { 116 reader := strings.NewReader(text) 117 return NewTokenizer(reader) 118 }