github.com/yunabe/lgo@v0.0.0-20190709125917-42c42d410fdf/cmd/lgo-internal/utf8_reader.go (about)

     1  package main
     2  
     3  import (
     4  	"errors"
     5  	"io"
     6  	"unicode/utf8"
     7  )
     8  
     9  var errBufTooSmall = errors.New("buf is too small")
    10  
    11  type utf8AwareReader struct {
    12  	reader     io.Reader
    13  	residual   []byte
    14  	pendingErr error
    15  }
    16  
    17  func newUTF8AwareReader(r io.Reader) *utf8AwareReader {
    18  	return &utf8AwareReader{
    19  		reader:   r,
    20  		residual: make([]byte, 0, utf8.UTFMax-1),
    21  	}
    22  }
    23  
    24  func (r *utf8AwareReader) Read(p []byte) (int, error) {
    25  	if r.pendingErr != nil {
    26  		err := r.pendingErr
    27  		r.pendingErr = nil
    28  		return 0, err
    29  	}
    30  	if len(p) < utf8.UTFMax*2 {
    31  		return 0, errBufTooSmall
    32  	}
    33  	if len(p) <= len(r.residual) {
    34  		panic("r.residual must be smaller than utf8.UTFMax")
    35  	}
    36  	copy(p, r.residual)
    37  	n, err := r.reader.Read(p[len(r.residual):])
    38  
    39  	if n == 0 && err != nil && len(r.residual) > 0 {
    40  		r.pendingErr = err
    41  		copy(p, r.residual)
    42  		n = len(r.residual)
    43  		r.residual = r.residual[:0]
    44  		return n, nil
    45  	}
    46  	n += len(r.residual)
    47  	if err != nil {
    48  		// e.g. io.EOF
    49  		r.residual = r.residual[:0]
    50  		return n, err
    51  	}
    52  	for i := 0; i < utf8.UTFMax && i < n; i++ {
    53  		ru, _ := utf8.DecodeLastRune(p[:n-i])
    54  		if ru != utf8.RuneError {
    55  			r.residual = r.residual[:i]
    56  			copy(r.residual, p[n-i:])
    57  			return n - i, nil
    58  		}
    59  	}
    60  	// The last utf8.UTFMax bytes are invalid as UTF8. It means the data is not valid UTF8 string.
    61  	// Return everything.
    62  	return n, nil
    63  }