github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/third_party/code.google.com/p/go-charset/charset/codepage.go (about) 1 package charset 2 3 import ( 4 "fmt" 5 "unicode/utf8" 6 ) 7 8 func init() { 9 registerClass("cp", fromCodePage, toCodePage) 10 } 11 12 type translateFromCodePage struct { 13 byte2rune *[256]rune 14 scratch []byte 15 } 16 17 type cpKeyFrom string 18 type cpKeyTo string 19 20 func (p *translateFromCodePage) Translate(data []byte, eof bool) (int, []byte, error) { 21 p.scratch = ensureCap(p.scratch, len(data)*utf8.UTFMax)[:0] 22 buf := p.scratch 23 for _, x := range data { 24 r := p.byte2rune[x] 25 if r < utf8.RuneSelf { 26 buf = append(buf, byte(r)) 27 continue 28 } 29 size := utf8.EncodeRune(buf[len(buf):cap(buf)], r) 30 buf = buf[0 : len(buf)+size] 31 } 32 return len(data), buf, nil 33 } 34 35 type toCodePageInfo struct { 36 rune2byte map[rune]byte 37 // same gives the number of runes at start of code page that map exactly to 38 // unicode. 39 same rune 40 } 41 42 type translateToCodePage struct { 43 toCodePageInfo 44 scratch []byte 45 } 46 47 func (p *translateToCodePage) Translate(data []byte, eof bool) (int, []byte, error) { 48 p.scratch = ensureCap(p.scratch, len(data)) 49 buf := p.scratch[:0] 50 51 for i := 0; i < len(data); { 52 r := rune(data[i]) 53 size := 1 54 if r >= utf8.RuneSelf { 55 r, size = utf8.DecodeRune(data[i:]) 56 if size == 1 && !eof && !utf8.FullRune(data[i:]) { 57 return i, buf, nil 58 } 59 } 60 61 var b byte 62 if r < p.same { 63 b = byte(r) 64 } else { 65 var ok bool 66 b, ok = p.rune2byte[r] 67 if !ok { 68 b = '?' 69 } 70 } 71 buf = append(buf, b) 72 i += size 73 } 74 return len(data), buf, nil 75 } 76 77 func fromCodePage(arg string) (Translator, error) { 78 runes, err := cache(cpKeyFrom(arg), func() (interface{}, error) { 79 data, err := readFile(arg) 80 if err != nil { 81 return nil, err 82 } 83 runes := []rune(string(data)) 84 if len(runes) != 256 { 85 return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, len(runes)) 86 } 87 r := new([256]rune) 88 copy(r[:], runes) 89 return r, nil 90 }) 91 if err != nil { 92 return nil, err 93 } 94 return &translateFromCodePage{byte2rune: runes.(*[256]rune)}, nil 95 } 96 97 func toCodePage(arg string) (Translator, error) { 98 m, err := cache(cpKeyTo(arg), func() (interface{}, error) { 99 data, err := readFile(arg) 100 if err != nil { 101 return nil, err 102 } 103 104 info := toCodePageInfo{ 105 rune2byte: make(map[rune]byte), 106 same: 256, 107 } 108 atStart := true 109 i := rune(0) 110 for _, r := range string(data) { 111 if atStart { 112 if r == i { 113 i++ 114 continue 115 } 116 info.same = i 117 atStart = false 118 } 119 info.rune2byte[r] = byte(i) 120 i++ 121 } 122 // TODO fix tables 123 // fmt.Printf("%s, same = %d\n", arg, info.same) 124 if i != 256 { 125 return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, i) 126 } 127 return info, nil 128 }) 129 if err != nil { 130 return nil, err 131 } 132 return &translateToCodePage{toCodePageInfo: m.(toCodePageInfo)}, nil 133 }