github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/third_party/code.google.com/p/go-charset/charset/codepage.go (about)

     1  package charset
     2  
     3  import (
     4  	"fmt"
     5  	"unicode/utf8"
     6  )
     7  
     8  func init() {
     9  	registerClass("cp", fromCodePage, toCodePage)
    10  }
    11  
    12  type translateFromCodePage struct {
    13  	byte2rune *[256]rune
    14  	scratch   []byte
    15  }
    16  
    17  type cpKeyFrom string
    18  type cpKeyTo string
    19  
    20  func (p *translateFromCodePage) Translate(data []byte, eof bool) (int, []byte, error) {
    21  	p.scratch = ensureCap(p.scratch, len(data)*utf8.UTFMax)[:0]
    22  	buf := p.scratch
    23  	for _, x := range data {
    24  		r := p.byte2rune[x]
    25  		if r < utf8.RuneSelf {
    26  			buf = append(buf, byte(r))
    27  			continue
    28  		}
    29  		size := utf8.EncodeRune(buf[len(buf):cap(buf)], r)
    30  		buf = buf[0 : len(buf)+size]
    31  	}
    32  	return len(data), buf, nil
    33  }
    34  
    35  type toCodePageInfo struct {
    36  	rune2byte map[rune]byte
    37  	// same gives the number of runes at start of code page that map exactly to
    38  	// unicode.
    39  	same rune
    40  }
    41  
    42  type translateToCodePage struct {
    43  	toCodePageInfo
    44  	scratch []byte
    45  }
    46  
    47  func (p *translateToCodePage) Translate(data []byte, eof bool) (int, []byte, error) {
    48  	p.scratch = ensureCap(p.scratch, len(data))
    49  	buf := p.scratch[:0]
    50  
    51  	for i := 0; i < len(data); {
    52  		r := rune(data[i])
    53  		size := 1
    54  		if r >= utf8.RuneSelf {
    55  			r, size = utf8.DecodeRune(data[i:])
    56  			if size == 1 && !eof && !utf8.FullRune(data[i:]) {
    57  				return i, buf, nil
    58  			}
    59  		}
    60  
    61  		var b byte
    62  		if r < p.same {
    63  			b = byte(r)
    64  		} else {
    65  			var ok bool
    66  			b, ok = p.rune2byte[r]
    67  			if !ok {
    68  				b = '?'
    69  			}
    70  		}
    71  		buf = append(buf, b)
    72  		i += size
    73  	}
    74  	return len(data), buf, nil
    75  }
    76  
    77  func fromCodePage(arg string) (Translator, error) {
    78  	runes, err := cache(cpKeyFrom(arg), func() (interface{}, error) {
    79  		data, err := readFile(arg)
    80  		if err != nil {
    81  			return nil, err
    82  		}
    83  		runes := []rune(string(data))
    84  		if len(runes) != 256 {
    85  			return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, len(runes))
    86  		}
    87  		r := new([256]rune)
    88  		copy(r[:], runes)
    89  		return r, nil
    90  	})
    91  	if err != nil {
    92  		return nil, err
    93  	}
    94  	return &translateFromCodePage{byte2rune: runes.(*[256]rune)}, nil
    95  }
    96  
    97  func toCodePage(arg string) (Translator, error) {
    98  	m, err := cache(cpKeyTo(arg), func() (interface{}, error) {
    99  		data, err := readFile(arg)
   100  		if err != nil {
   101  			return nil, err
   102  		}
   103  
   104  		info := toCodePageInfo{
   105  			rune2byte: make(map[rune]byte),
   106  			same:      256,
   107  		}
   108  		atStart := true
   109  		i := rune(0)
   110  		for _, r := range string(data) {
   111  			if atStart {
   112  				if r == i {
   113  					i++
   114  					continue
   115  				}
   116  				info.same = i
   117  				atStart = false
   118  			}
   119  			info.rune2byte[r] = byte(i)
   120  			i++
   121  		}
   122  		// TODO fix tables
   123  		// fmt.Printf("%s, same = %d\n", arg, info.same)
   124  		if i != 256 {
   125  			return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, i)
   126  		}
   127  		return info, nil
   128  	})
   129  	if err != nil {
   130  		return nil, err
   131  	}
   132  	return &translateToCodePage{toCodePageInfo: m.(toCodePageInfo)}, nil
   133  }