github.com/grahambrereton-form3/tilt@v0.10.18/internal/rty/tokenizer.go (about)

     1  package rty
     2  
     3  import (
     4  	"io"
     5  	"unicode"
     6  )
     7  
     8  // A tokenizer that breaks a string up by spaces.
     9  //
    10  // Ideally, we'd use the table-based algorithm defined in:
    11  // http://www.unicode.org/reports/tr14/
    12  // like this package does:
    13  // https://godoc.org/github.com/gorilla/i18n/linebreak
    14  // but I didn't find a good implementation of that algorithm in Go
    15  // (the one above is half-implemented and doesn't work for
    16  // the most basic things).
    17  //
    18  // This is a half-assed implementation that should have a similar interface
    19  // to a "real" implementation.
    20  type Tokenizer struct {
    21  	runes []rune
    22  	pos   int
    23  }
    24  
    25  func NewTokenizer(s string) *Tokenizer {
    26  	return &Tokenizer{
    27  		runes: []rune(s),
    28  		pos:   0,
    29  	}
    30  }
    31  
    32  func (t *Tokenizer) Next() ([]rune, error) {
    33  	if t.pos >= len(t.runes) {
    34  		return nil, io.EOF
    35  	}
    36  
    37  	firstRune := t.runes[t.pos]
    38  	isSpace := unicode.IsSpace(firstRune)
    39  	result := []rune{t.runes[t.pos]}
    40  	t.pos++
    41  
    42  	for t.pos < len(t.runes) {
    43  		nextRune := t.runes[t.pos]
    44  		isNextSpace := unicode.IsSpace(nextRune)
    45  		if isNextSpace || isSpace {
    46  			return result, nil
    47  		}
    48  
    49  		result = append(result, nextRune)
    50  		t.pos++
    51  	}
    52  
    53  	return result, nil
    54  }