github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/third_party/code.google.com/p/go-charset/charset/utf16.go (about)

     1  package charset
     2  
     3  import (
     4  	"encoding/binary"
     5  	"errors"
     6  	"unicode/utf8"
     7  )
     8  
     9  func init() {
    10  	registerClass("utf16", fromUTF16, toUTF16)
    11  }
    12  
    13  type translateFromUTF16 struct {
    14  	first   bool
    15  	endian  binary.ByteOrder
    16  	scratch []byte
    17  }
    18  
    19  func (p *translateFromUTF16) Translate(data []byte, eof bool) (int, []byte, error) {
    20  	data = data[0 : len(data)&^1] // round to even number of bytes.
    21  	if len(data) < 2 {
    22  		return 0, nil, nil
    23  	}
    24  	n := 0
    25  	if p.first && p.endian == nil {
    26  		switch binary.BigEndian.Uint16(data) {
    27  		case 0xfeff:
    28  			p.endian = binary.BigEndian
    29  			data = data[2:]
    30  			n += 2
    31  		case 0xfffe:
    32  			p.endian = binary.LittleEndian
    33  			data = data[2:]
    34  			n += 2
    35  		default:
    36  			p.endian = guessEndian(data)
    37  		}
    38  		p.first = false
    39  	}
    40  
    41  	p.scratch = p.scratch[:0]
    42  	for ; len(data) > 0; data = data[2:] {
    43  		p.scratch = appendRune(p.scratch, rune(p.endian.Uint16(data)))
    44  		n += 2
    45  	}
    46  	return n, p.scratch, nil
    47  }
    48  
    49  func guessEndian(data []byte) binary.ByteOrder {
    50  	// XXX TODO
    51  	return binary.LittleEndian
    52  }
    53  
    54  type translateToUTF16 struct {
    55  	first   bool
    56  	endian  binary.ByteOrder
    57  	scratch []byte
    58  }
    59  
    60  func (p *translateToUTF16) Translate(data []byte, eof bool) (int, []byte, error) {
    61  	p.scratch = ensureCap(p.scratch[:0], (len(data)+1)*2)
    62  	if p.first {
    63  		p.scratch = p.scratch[0:2]
    64  		p.endian.PutUint16(p.scratch, 0xfeff)
    65  		p.first = false
    66  	}
    67  	n := 0
    68  	for len(data) > 0 {
    69  		if !utf8.FullRune(data) && !eof {
    70  			break
    71  		}
    72  		r, size := utf8.DecodeRune(data)
    73  		// TODO if r > 65535?
    74  
    75  		slen := len(p.scratch)
    76  		p.scratch = p.scratch[0 : slen+2]
    77  		p.endian.PutUint16(p.scratch[slen:], uint16(r))
    78  		data = data[size:]
    79  		n += size
    80  	}
    81  	return n, p.scratch, nil
    82  }
    83  
    84  func getEndian(arg string) (binary.ByteOrder, error) {
    85  	switch arg {
    86  	case "le":
    87  		return binary.LittleEndian, nil
    88  	case "be":
    89  		return binary.BigEndian, nil
    90  	case "":
    91  		return nil, nil
    92  	}
    93  	return nil, errors.New("charset: unknown utf16 endianness")
    94  }
    95  
    96  func fromUTF16(arg string) (Translator, error) {
    97  	endian, err := getEndian(arg)
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  	return &translateFromUTF16{first: true, endian: endian}, nil
   102  }
   103  
   104  func toUTF16(arg string) (Translator, error) {
   105  	endian, err := getEndian(arg)
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  	return &translateToUTF16{first: false, endian: endian}, nil
   110  }