github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/third_party/code.google.com/p/go-charset/charset/utf8.go (about)

     1  package charset
     2  
     3  import (
     4  	"unicode/utf8"
     5  )
     6  
     7  func init() {
     8  	registerClass("utf8", toUTF8, toUTF8)
     9  }
    10  
    11  type translateToUTF8 struct {
    12  	scratch []byte
    13  }
    14  
    15  var errorBytes = []byte(string(utf8.RuneError))
    16  
    17  const errorRuneLen = len(string(utf8.RuneError))
    18  
    19  func (p *translateToUTF8) Translate(data []byte, eof bool) (int, []byte, error) {
    20  	p.scratch = ensureCap(p.scratch, (len(data))*errorRuneLen)
    21  	buf := p.scratch[:0]
    22  	for i := 0; i < len(data); {
    23  		// fast path for ASCII
    24  		if b := data[i]; b < utf8.RuneSelf {
    25  			buf = append(buf, b)
    26  			i++
    27  			continue
    28  		}
    29  		_, size := utf8.DecodeRune(data[i:])
    30  		if size == 1 {
    31  			if !eof && !utf8.FullRune(data) {
    32  				// When DecodeRune has converted only a single
    33  				// byte, we know there must be some kind of error
    34  				// because we know the byte's not ASCII.
    35  				// If we aren't at EOF, and it's an incomplete
    36  				// rune encoding, then we return to process
    37  				// the final bytes in a subsequent call.
    38  				return i, buf, nil
    39  			}
    40  			buf = append(buf, errorBytes...)
    41  		} else {
    42  			buf = append(buf, data[i:i+size]...)
    43  		}
    44  		i += size
    45  	}
    46  	return len(data), buf, nil
    47  }
    48  
    49  func toUTF8(arg string) (Translator, error) {
    50  	return new(translateToUTF8), nil
    51  }