github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/third_party/code.google.com/p/go-charset/charset/utf8.go (about) 1 package charset 2 3 import ( 4 "unicode/utf8" 5 ) 6 7 func init() { 8 registerClass("utf8", toUTF8, toUTF8) 9 } 10 11 type translateToUTF8 struct { 12 scratch []byte 13 } 14 15 var errorBytes = []byte(string(utf8.RuneError)) 16 17 const errorRuneLen = len(string(utf8.RuneError)) 18 19 func (p *translateToUTF8) Translate(data []byte, eof bool) (int, []byte, error) { 20 p.scratch = ensureCap(p.scratch, (len(data))*errorRuneLen) 21 buf := p.scratch[:0] 22 for i := 0; i < len(data); { 23 // fast path for ASCII 24 if b := data[i]; b < utf8.RuneSelf { 25 buf = append(buf, b) 26 i++ 27 continue 28 } 29 _, size := utf8.DecodeRune(data[i:]) 30 if size == 1 { 31 if !eof && !utf8.FullRune(data) { 32 // When DecodeRune has converted only a single 33 // byte, we know there must be some kind of error 34 // because we know the byte's not ASCII. 35 // If we aren't at EOF, and it's an incomplete 36 // rune encoding, then we return to process 37 // the final bytes in a subsequent call. 38 return i, buf, nil 39 } 40 buf = append(buf, errorBytes...) 41 } else { 42 buf = append(buf, data[i:i+size]...) 43 } 44 i += size 45 } 46 return len(data), buf, nil 47 } 48 49 func toUTF8(arg string) (Translator, error) { 50 return new(translateToUTF8), nil 51 }