github.com/informationsea/shellflow@v0.1.3/flowscript/flowscript_tokeninzer.go (about)

     1  package flowscript
     2  
     3  import (
     4  	"bufio"
     5  	"errors"
     6  	"io"
     7  	"regexp"
     8  	"strings"
     9  	"unicode/utf8"
    10  )
    11  
    12  var whiteSpaceRegexp = regexp.MustCompile("\\s")
    13  var digitRegexp = regexp.MustCompile("\\d")
    14  var wordCharacterRegexp = regexp.MustCompile("\\w")
    15  
    16  func checkMatch(ch rune, exp *regexp.Regexp) bool {
    17  	var data [5]byte
    18  	var p = data[:]
    19  	utf8.EncodeRune(p, ch)
    20  	return exp.Match(p)
    21  }
    22  
    23  func IsDigit(ch rune) bool {
    24  	return checkMatch(ch, digitRegexp)
    25  }
    26  
    27  func IsWhiteSpace(ch rune) bool {
    28  	return checkMatch(ch, whiteSpaceRegexp)
    29  }
    30  
    31  func IsWordCharacter(ch rune) bool {
    32  	return checkMatch(ch, wordCharacterRegexp)
    33  }
    34  
    35  type RuneCheck func(ch rune) (match bool)
    36  
    37  func takeRuneWhile(data []byte, checker RuneCheck) (length int) {
    38  	length = 0
    39  	for {
    40  		ch, l := utf8.DecodeRune(data)
    41  		if checker(ch) {
    42  			length += l
    43  			data = data[l:]
    44  		} else {
    45  			break
    46  		}
    47  	}
    48  	return
    49  }
    50  
    51  func SplitToken(data []byte, atEOF bool) (advance int, token []byte, err error) {
    52  	advance = 0
    53  	token = nil
    54  	err = nil
    55  
    56  	l := takeRuneWhile(data, IsWhiteSpace)
    57  	advance += l
    58  	data = data[l:]
    59  
    60  	firstChar, l3 := utf8.DecodeRune(data)
    61  
    62  	if IsDigit(firstChar) {
    63  		l2 := takeRuneWhile(data, IsDigit)
    64  		if len(data) != l2 || atEOF {
    65  			advance += l2
    66  			token = data[:l2]
    67  			err = nil
    68  		}
    69  	} else if IsWordCharacter(firstChar) {
    70  		l2 := takeRuneWhile(data, IsWordCharacter)
    71  		if len(data) != l2 || atEOF {
    72  			advance += l2
    73  			token = data[:l2]
    74  			err = nil
    75  		}
    76  	} else if firstChar == rune('"') {
    77  		escaping := false
    78  		currentLength := l3
    79  		for {
    80  			ch, l2 := utf8.DecodeRune(data[currentLength:])
    81  			currentLength += l2
    82  
    83  			if (l2 > 0 && ch == rune('"') && !escaping) || (l2 == 0 && atEOF) {
    84  				advance += currentLength
    85  				token = data[:currentLength]
    86  				err = nil
    87  				return
    88  			} else if l2 == 0 {
    89  				break
    90  			} else if ch == rune('\\') && !escaping {
    91  				escaping = true
    92  			} else {
    93  				escaping = false
    94  			}
    95  		}
    96  	} else if l3 > 0 {
    97  		advance += l3
    98  		token = data[:l3]
    99  		err = nil
   100  	}
   101  
   102  	if atEOF && token == nil {
   103  		err = errors.New("FAIL")
   104  	}
   105  
   106  	return
   107  }
   108  
   109  func NewTokenizer(r io.Reader) *LookAheadScanner {
   110  	scanner := bufio.NewScanner(r)
   111  	scanner.Split(SplitToken)
   112  	return NewLookAheadScanner(scanner)
   113  }
   114  
   115  func NewTokenizerFromText(text string) *LookAheadScanner {
   116  	reader := strings.NewReader(text)
   117  	return NewTokenizer(reader)
   118  }