github.com/yunabe/lgo@v0.0.0-20190709125917-42c42d410fdf/cmd/lgo-internal/utf8_reader.go (about) 1 package main 2 3 import ( 4 "errors" 5 "io" 6 "unicode/utf8" 7 ) 8 9 var errBufTooSmall = errors.New("buf is too small") 10 11 type utf8AwareReader struct { 12 reader io.Reader 13 residual []byte 14 pendingErr error 15 } 16 17 func newUTF8AwareReader(r io.Reader) *utf8AwareReader { 18 return &utf8AwareReader{ 19 reader: r, 20 residual: make([]byte, 0, utf8.UTFMax-1), 21 } 22 } 23 24 func (r *utf8AwareReader) Read(p []byte) (int, error) { 25 if r.pendingErr != nil { 26 err := r.pendingErr 27 r.pendingErr = nil 28 return 0, err 29 } 30 if len(p) < utf8.UTFMax*2 { 31 return 0, errBufTooSmall 32 } 33 if len(p) <= len(r.residual) { 34 panic("r.residual must be smaller than utf8.UTFMax") 35 } 36 copy(p, r.residual) 37 n, err := r.reader.Read(p[len(r.residual):]) 38 39 if n == 0 && err != nil && len(r.residual) > 0 { 40 r.pendingErr = err 41 copy(p, r.residual) 42 n = len(r.residual) 43 r.residual = r.residual[:0] 44 return n, nil 45 } 46 n += len(r.residual) 47 if err != nil { 48 // e.g. io.EOF 49 r.residual = r.residual[:0] 50 return n, err 51 } 52 for i := 0; i < utf8.UTFMax && i < n; i++ { 53 ru, _ := utf8.DecodeLastRune(p[:n-i]) 54 if ru != utf8.RuneError { 55 r.residual = r.residual[:i] 56 copy(r.residual, p[n-i:]) 57 return n - i, nil 58 } 59 } 60 // The last utf8.UTFMax bytes are invalid as UTF8. It means the data is not valid UTF8 string. 61 // Return everything. 62 return n, nil 63 }