github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/third_party/code.google.com/p/go-charset/charset/utf16.go (about) 1 package charset 2 3 import ( 4 "encoding/binary" 5 "errors" 6 "unicode/utf8" 7 ) 8 9 func init() { 10 registerClass("utf16", fromUTF16, toUTF16) 11 } 12 13 type translateFromUTF16 struct { 14 first bool 15 endian binary.ByteOrder 16 scratch []byte 17 } 18 19 func (p *translateFromUTF16) Translate(data []byte, eof bool) (int, []byte, error) { 20 data = data[0 : len(data)&^1] // round to even number of bytes. 21 if len(data) < 2 { 22 return 0, nil, nil 23 } 24 n := 0 25 if p.first && p.endian == nil { 26 switch binary.BigEndian.Uint16(data) { 27 case 0xfeff: 28 p.endian = binary.BigEndian 29 data = data[2:] 30 n += 2 31 case 0xfffe: 32 p.endian = binary.LittleEndian 33 data = data[2:] 34 n += 2 35 default: 36 p.endian = guessEndian(data) 37 } 38 p.first = false 39 } 40 41 p.scratch = p.scratch[:0] 42 for ; len(data) > 0; data = data[2:] { 43 p.scratch = appendRune(p.scratch, rune(p.endian.Uint16(data))) 44 n += 2 45 } 46 return n, p.scratch, nil 47 } 48 49 func guessEndian(data []byte) binary.ByteOrder { 50 // XXX TODO 51 return binary.LittleEndian 52 } 53 54 type translateToUTF16 struct { 55 first bool 56 endian binary.ByteOrder 57 scratch []byte 58 } 59 60 func (p *translateToUTF16) Translate(data []byte, eof bool) (int, []byte, error) { 61 p.scratch = ensureCap(p.scratch[:0], (len(data)+1)*2) 62 if p.first { 63 p.scratch = p.scratch[0:2] 64 p.endian.PutUint16(p.scratch, 0xfeff) 65 p.first = false 66 } 67 n := 0 68 for len(data) > 0 { 69 if !utf8.FullRune(data) && !eof { 70 break 71 } 72 r, size := utf8.DecodeRune(data) 73 // TODO if r > 65535? 74 75 slen := len(p.scratch) 76 p.scratch = p.scratch[0 : slen+2] 77 p.endian.PutUint16(p.scratch[slen:], uint16(r)) 78 data = data[size:] 79 n += size 80 } 81 return n, p.scratch, nil 82 } 83 84 func getEndian(arg string) (binary.ByteOrder, error) { 85 switch arg { 86 case "le": 87 return binary.LittleEndian, nil 88 case "be": 89 return binary.BigEndian, nil 90 case "": 91 return nil, nil 92 } 93 return nil, errors.New("charset: unknown utf16 endianness") 94 } 95 96 func fromUTF16(arg string) (Translator, error) { 97 endian, err := getEndian(arg) 98 if err != nil { 99 return nil, err 100 } 101 return &translateFromUTF16{first: true, endian: endian}, nil 102 } 103 104 func toUTF16(arg string) (Translator, error) { 105 endian, err := getEndian(arg) 106 if err != nil { 107 return nil, err 108 } 109 return &translateToUTF16{first: false, endian: endian}, nil 110 }