github.com/grahambrereton-form3/tilt@v0.10.18/internal/rty/tokenizer.go (about) 1 package rty 2 3 import ( 4 "io" 5 "unicode" 6 ) 7 8 // A tokenizer that breaks a string up by spaces. 9 // 10 // Ideally, we'd use the table-based algorithm defined in: 11 // http://www.unicode.org/reports/tr14/ 12 // like this package does: 13 // https://godoc.org/github.com/gorilla/i18n/linebreak 14 // but I didn't find a good implementation of that algorithm in Go 15 // (the one above is half-implemented and doesn't work for 16 // the most basic things). 17 // 18 // This is a half-assed implementation that should have a similar interface 19 // to a "real" implementation. 20 type Tokenizer struct { 21 runes []rune 22 pos int 23 } 24 25 func NewTokenizer(s string) *Tokenizer { 26 return &Tokenizer{ 27 runes: []rune(s), 28 pos: 0, 29 } 30 } 31 32 func (t *Tokenizer) Next() ([]rune, error) { 33 if t.pos >= len(t.runes) { 34 return nil, io.EOF 35 } 36 37 firstRune := t.runes[t.pos] 38 isSpace := unicode.IsSpace(firstRune) 39 result := []rune{t.runes[t.pos]} 40 t.pos++ 41 42 for t.pos < len(t.runes) { 43 nextRune := t.runes[t.pos] 44 isNextSpace := unicode.IsSpace(nextRune) 45 if isNextSpace || isSpace { 46 return result, nil 47 } 48 49 result = append(result, nextRune) 50 t.pos++ 51 } 52 53 return result, nil 54 }