github.com/go-xe2/third@v1.0.3/golang.org/x/text/encoding/encoding_test.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package encoding_test 6 7 import ( 8 "io/ioutil" 9 "strings" 10 "testing" 11 12 "github.com/go-xe2/third/golang.org/x/text/encoding" 13 "github.com/go-xe2/third/golang.org/x/text/encoding/charmap" 14 "github.com/go-xe2/third/golang.org/x/text/transform" 15 ) 16 17 func TestEncodeInvalidUTF8(t *testing.T) { 18 inputs := []string{ 19 "hello.", 20 "wo\ufffdld.", 21 "ABC\xff\x80\x80", // Invalid UTF-8. 22 "\x80\x80\x80\x80\x80", 23 "\x80\x80D\x80\x80", // Valid rune at "D". 24 "E\xed\xa0\x80\xed\xbf\xbfF", // Two invalid UTF-8 runes (surrogates). 25 "G", 26 "H\xe2\x82", // U+20AC in UTF-8 is "\xe2\x82\xac", which we split over two 27 "\xacI\xe2\x82", // input lines. It maps to 0x80 in the Windows-1252 encoding. 28 } 29 // Each invalid source byte becomes '\x1a'. 30 want := strings.Replace("hello.wo?ld.ABC??????????D??E??????FGH\x80I??", "?", "\x1a", -1) 31 32 transformer := encoding.ReplaceUnsupported(charmap.Windows1252.NewEncoder()) 33 gotBuf := make([]byte, 0, 1024) 34 src := make([]byte, 0, 1024) 35 for i, input := range inputs { 36 dst := make([]byte, 1024) 37 src = append(src, input...) 38 atEOF := i == len(inputs)-1 39 nDst, nSrc, err := transformer.Transform(dst, src, atEOF) 40 gotBuf = append(gotBuf, dst[:nDst]...) 41 src = src[nSrc:] 42 if err != nil && err != transform.ErrShortSrc { 43 t.Fatalf("i=%d: %v", i, err) 44 } 45 if atEOF && err != nil { 46 t.Fatalf("i=%d: atEOF: %v", i, err) 47 } 48 } 49 if got := string(gotBuf); got != want { 50 t.Fatalf("\ngot %+q\nwant %+q", got, want) 51 } 52 } 53 54 func TestReplacement(t *testing.T) { 55 for _, direction := range []string{"Decode", "Encode"} { 56 enc, want := (transform.Transformer)(nil), "" 57 if direction == "Decode" { 58 enc = encoding.Replacement.NewDecoder() 59 want = "\ufffd" 60 } else { 61 enc = encoding.Replacement.NewEncoder() 62 want = "AB\x00CD\ufffdYZ" 63 } 64 sr := strings.NewReader("AB\x00CD\x80YZ") 65 g, err := ioutil.ReadAll(transform.NewReader(sr, enc)) 66 if err != nil { 67 t.Errorf("%s: ReadAll: %v", direction, err) 68 continue 69 } 70 if got := string(g); got != want { 71 t.Errorf("%s:\ngot %q\nwant %q", direction, got, want) 72 continue 73 } 74 } 75 } 76 77 func TestUTF8Validator(t *testing.T) { 78 testCases := []struct { 79 desc string 80 dstSize int 81 src string 82 atEOF bool 83 want string 84 wantErr error 85 }{ 86 { 87 "empty input", 88 100, 89 "", 90 false, 91 "", 92 nil, 93 }, 94 { 95 "valid 1-byte 1-rune input", 96 100, 97 "a", 98 false, 99 "a", 100 nil, 101 }, 102 { 103 "valid 3-byte 1-rune input", 104 100, 105 "\u1234", 106 false, 107 "\u1234", 108 nil, 109 }, 110 { 111 "valid 5-byte 3-rune input", 112 100, 113 "a\u0100\u0101", 114 false, 115 "a\u0100\u0101", 116 nil, 117 }, 118 { 119 "perfectly sized dst (non-ASCII)", 120 5, 121 "a\u0100\u0101", 122 false, 123 "a\u0100\u0101", 124 nil, 125 }, 126 { 127 "short dst (non-ASCII)", 128 4, 129 "a\u0100\u0101", 130 false, 131 "a\u0100", 132 transform.ErrShortDst, 133 }, 134 { 135 "perfectly sized dst (ASCII)", 136 5, 137 "abcde", 138 false, 139 "abcde", 140 nil, 141 }, 142 { 143 "short dst (ASCII)", 144 4, 145 "abcde", 146 false, 147 "abcd", 148 transform.ErrShortDst, 149 }, 150 { 151 "partial input (!EOF)", 152 100, 153 "a\u0100\xf1", 154 false, 155 "a\u0100", 156 transform.ErrShortSrc, 157 }, 158 { 159 "invalid input (EOF)", 160 100, 161 "a\u0100\xf1", 162 true, 163 "a\u0100", 164 encoding.ErrInvalidUTF8, 165 }, 166 { 167 "invalid input (!EOF)", 168 100, 169 "a\u0100\x80", 170 false, 171 "a\u0100", 172 encoding.ErrInvalidUTF8, 173 }, 174 { 175 "invalid input (above U+10FFFF)", 176 100, 177 "a\u0100\xf7\xbf\xbf\xbf", 178 false, 179 "a\u0100", 180 encoding.ErrInvalidUTF8, 181 }, 182 { 183 "invalid input (surrogate half)", 184 100, 185 "a\u0100\xed\xa0\x80", 186 false, 187 "a\u0100", 188 encoding.ErrInvalidUTF8, 189 }, 190 } 191 for _, tc := range testCases { 192 dst := make([]byte, tc.dstSize) 193 nDst, nSrc, err := encoding.UTF8Validator.Transform(dst, []byte(tc.src), tc.atEOF) 194 if nDst < 0 || len(dst) < nDst { 195 t.Errorf("%s: nDst=%d out of range", tc.desc, nDst) 196 continue 197 } 198 got := string(dst[:nDst]) 199 if got != tc.want || nSrc != len(tc.want) || err != tc.wantErr { 200 t.Errorf("%s:\ngot %+q, %d, %v\nwant %+q, %d, %v", 201 tc.desc, got, nSrc, err, tc.want, len(tc.want), tc.wantErr) 202 continue 203 } 204 } 205 } 206 207 func TestErrorHandler(t *testing.T) { 208 testCases := []struct { 209 desc string 210 handler func(*encoding.Encoder) *encoding.Encoder 211 sizeDst int 212 src, want string 213 nSrc int 214 err error 215 }{ 216 { 217 desc: "one rune replacement", 218 handler: encoding.ReplaceUnsupported, 219 sizeDst: 100, 220 src: "\uAC00", 221 want: "\x1a", 222 nSrc: 3, 223 }, 224 { 225 desc: "mid-stream rune replacement", 226 handler: encoding.ReplaceUnsupported, 227 sizeDst: 100, 228 src: "a\uAC00bcd\u00e9", 229 want: "a\x1abcd\xe9", 230 nSrc: 9, 231 }, 232 { 233 desc: "at end rune replacement", 234 handler: encoding.ReplaceUnsupported, 235 sizeDst: 10, 236 src: "\u00e9\uAC00", 237 want: "\xe9\x1a", 238 nSrc: 5, 239 }, 240 { 241 desc: "short buffer replacement", 242 handler: encoding.ReplaceUnsupported, 243 sizeDst: 1, 244 src: "\u00e9\uAC00", 245 want: "\xe9", 246 nSrc: 2, 247 err: transform.ErrShortDst, 248 }, 249 { 250 desc: "one rune html escape", 251 handler: encoding.HTMLEscapeUnsupported, 252 sizeDst: 100, 253 src: "\uAC00", 254 want: "가", 255 nSrc: 3, 256 }, 257 { 258 desc: "mid-stream html escape", 259 handler: encoding.HTMLEscapeUnsupported, 260 sizeDst: 100, 261 src: "\u00e9\uAC00dcba", 262 want: "\xe9가dcba", 263 nSrc: 9, 264 }, 265 { 266 desc: "short buffer html escape", 267 handler: encoding.HTMLEscapeUnsupported, 268 sizeDst: 9, 269 src: "ab\uAC01", 270 want: "ab", 271 nSrc: 2, 272 err: transform.ErrShortDst, 273 }, 274 } 275 for i, tc := range testCases { 276 tr := tc.handler(charmap.Windows1250.NewEncoder()) 277 b := make([]byte, tc.sizeDst) 278 nDst, nSrc, err := tr.Transform(b, []byte(tc.src), true) 279 if err != tc.err { 280 t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err) 281 } 282 if got := string(b[:nDst]); got != tc.want { 283 t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want) 284 } 285 if nSrc != tc.nSrc { 286 t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc) 287 } 288 289 } 290 }