github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/encoding/encoding_test.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package encoding_test
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"io/ioutil"
    12  	"strings"
    13  	"testing"
    14  
    15  	"golang.org/x/text/encoding"
    16  	"golang.org/x/text/encoding/charmap"
    17  	"golang.org/x/text/encoding/japanese"
    18  	"golang.org/x/text/encoding/korean"
    19  	"golang.org/x/text/encoding/simplifiedchinese"
    20  	"golang.org/x/text/encoding/traditionalchinese"
    21  	"golang.org/x/text/encoding/unicode"
    22  	"golang.org/x/text/transform"
    23  )
    24  
    25  func trim(s string) string {
    26  	if len(s) < 120 {
    27  		return s
    28  	}
    29  	return s[:50] + "..." + s[len(s)-50:]
    30  }
    31  
    32  var basicTestCases = []struct {
    33  	e         encoding.Encoding
    34  	encPrefix string
    35  	encSuffix string
    36  	encoded   string
    37  	utf8      string
    38  }{
    39  	// The encoded forms can be verified by the iconv program:
    40  	// $ echo 月日は百代 | iconv -f UTF-8 -t SHIFT-JIS | xxd
    41  
    42  	// Charmap tests.
    43  	{
    44  		e:       charmap.CodePage437,
    45  		encoded: "H\x82ll\x93 \x9d\xa7\xf4\x9c\xbe",
    46  		utf8:    "Héllô ¥º⌠£╛",
    47  	},
    48  	{
    49  		e:       charmap.CodePage866,
    50  		encoded: "H\xf3\xd3o \x98\xfd\x9f\xdd\xa1",
    51  		utf8:    "Hє╙o Ш¤Я▌б",
    52  	},
    53  	{
    54  		e:       charmap.ISO8859_2,
    55  		encoded: "Hel\xe5\xf5",
    56  		utf8:    "Helĺő",
    57  	},
    58  	{
    59  		e:       charmap.ISO8859_3,
    60  		encoded: "He\xbd\xd4",
    61  		utf8:    "He½Ô",
    62  	},
    63  	{
    64  		e:       charmap.ISO8859_4,
    65  		encoded: "Hel\xb6\xf8",
    66  		utf8:    "Helļø",
    67  	},
    68  	{
    69  		e:       charmap.ISO8859_5,
    70  		encoded: "H\xd7\xc6o",
    71  		utf8:    "HзЦo",
    72  	},
    73  	{
    74  		e:       charmap.ISO8859_6,
    75  		encoded: "Hel\xc2\xc9",
    76  		utf8:    "Helآة",
    77  	},
    78  	{
    79  		e:       charmap.ISO8859_7,
    80  		encoded: "H\xeel\xebo",
    81  		utf8:    "Hξlλo",
    82  	},
    83  	{
    84  		e:       charmap.ISO8859_8,
    85  		encoded: "Hel\xf5\xed",
    86  		utf8:    "Helץם",
    87  	},
    88  	{
    89  		e:       charmap.ISO8859_10,
    90  		encoded: "H\xea\xbfo",
    91  		utf8:    "Hęŋo",
    92  	},
    93  	{
    94  		e:       charmap.ISO8859_13,
    95  		encoded: "H\xe6l\xf9o",
    96  		utf8:    "Hęlło",
    97  	},
    98  	{
    99  		e:       charmap.ISO8859_14,
   100  		encoded: "He\xfe\xd0o",
   101  		utf8:    "HeŷŴo",
   102  	},
   103  	{
   104  		e:       charmap.ISO8859_15,
   105  		encoded: "H\xa4ll\xd8",
   106  		utf8:    "H€llØ",
   107  	},
   108  	{
   109  		e:       charmap.ISO8859_16,
   110  		encoded: "H\xe6ll\xbd",
   111  		utf8:    "Hællœ",
   112  	},
   113  	{
   114  		e:       charmap.KOI8R,
   115  		encoded: "He\x93\xad\x9c",
   116  		utf8:    "He⌠╜°",
   117  	},
   118  	{
   119  		e:       charmap.KOI8U,
   120  		encoded: "He\x93\xad\x9c",
   121  		utf8:    "He⌠ґ°",
   122  	},
   123  	{
   124  		e:       charmap.Macintosh,
   125  		encoded: "He\xdf\xd7",
   126  		utf8:    "Hefl◊",
   127  	},
   128  	{
   129  		e:       charmap.MacintoshCyrillic,
   130  		encoded: "He\xbe\x94",
   131  		utf8:    "HeЊФ",
   132  	},
   133  	{
   134  		e:       charmap.Windows874,
   135  		encoded: "He\xb7\xf0",
   136  		utf8:    "Heท๐",
   137  	},
   138  	{
   139  		e:       charmap.Windows1250,
   140  		encoded: "He\xe5\xe5o",
   141  		utf8:    "Heĺĺo",
   142  	},
   143  	{
   144  		e:       charmap.Windows1251,
   145  		encoded: "H\xball\xfe",
   146  		utf8:    "Hєllю",
   147  	},
   148  	{
   149  		e:       charmap.Windows1252,
   150  		encoded: "H\xe9ll\xf4 \xa5\xbA\xae\xa3\xd0",
   151  		utf8:    "Héllô ¥º®£Ð",
   152  	},
   153  	{
   154  		e:       charmap.Windows1253,
   155  		encoded: "H\xe5ll\xd6",
   156  		utf8:    "HεllΦ",
   157  	},
   158  	{
   159  		e:       charmap.Windows1254,
   160  		encoded: "\xd0ello",
   161  		utf8:    "Ğello",
   162  	},
   163  	{
   164  		e:       charmap.Windows1255,
   165  		encoded: "He\xd4o",
   166  		utf8:    "Heװo",
   167  	},
   168  	{
   169  		e:       charmap.Windows1256,
   170  		encoded: "H\xdbllo",
   171  		utf8:    "Hغllo",
   172  	},
   173  	{
   174  		e:       charmap.Windows1257,
   175  		encoded: "He\xeflo",
   176  		utf8:    "Heļlo",
   177  	},
   178  	{
   179  		e:       charmap.Windows1258,
   180  		encoded: "Hell\xf5",
   181  		utf8:    "Hellơ",
   182  	},
   183  	{
   184  		e:       charmap.XUserDefined,
   185  		encoded: "\x00\x40\x7f\x80\xab\xff",
   186  		utf8:    "\u0000\u0040\u007f\uf780\uf7ab\uf7ff",
   187  	},
   188  
   189  	// UTF-16 tests.
   190  	{
   191  		e:       unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
   192  		encoded: "\x00\x57\x00\xe4\xd8\x35\xdd\x65",
   193  		utf8:    "\x57\u00e4\U0001d565",
   194  	},
   195  	{
   196  		e:         utf16BEEB,
   197  		encPrefix: "\xfe\xff",
   198  		encoded:   "\x00\x57\x00\xe4\xd8\x35\xdd\x65",
   199  		utf8:      "\x57\u00e4\U0001d565",
   200  	},
   201  	{
   202  		e:       unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
   203  		encoded: "\x57\x00\xe4\x00\x35\xd8\x65\xdd",
   204  		utf8:    "\x57\u00e4\U0001d565",
   205  	},
   206  	{
   207  		e:         utf16LEEB,
   208  		encPrefix: "\xff\xfe",
   209  		encoded:   "\x57\x00\xe4\x00\x35\xd8\x65\xdd",
   210  		utf8:      "\x57\u00e4\U0001d565",
   211  	},
   212  
   213  	// Chinese tests.
   214  	//
   215  	// "\u0081\u00de\u00df\u00e0\u00e1\u00e2\u00e3\uffff\U00010000" is a
   216  	// nonsense string that contains GB18030 encodable codepoints of which
   217  	// only U+00E0 and U+00E1 are GBK encodable.
   218  	//
   219  	// "A\u3000\u554a\u4e02\u4e90\u72dc\u7349\u02ca\u2588Z€" is a nonsense
   220  	// string that contains ASCII and GBK encodable codepoints from Levels
   221  	// 1-5 as well as the Euro sign.
   222  	//
   223  	// "A\u43f0\u4c32\U00027267\u3000\U0002910d\u79d4Z€" is a nonsense string
   224  	// that contains ASCII and Big5 encodable codepoints from the Basic
   225  	// Multilingual Plane and the Supplementary Ideographic Plane as well as
   226  	// the Euro sign.
   227  	//
   228  	// "花间一壶酒,独酌无相亲。" (simplified) and
   229  	// "花間一壺酒,獨酌無相親。" (traditional)
   230  	// are from the 8th century poem "Yuè Xià Dú Zhuó".
   231  	{
   232  		e: simplifiedchinese.GB18030,
   233  		encoded: "\x81\x30\x81\x31\x81\x30\x89\x37\x81\x30\x89\x38\xa8\xa4\xa8\xa2" +
   234  			"\x81\x30\x89\x39\x81\x30\x8a\x30\x84\x31\xa4\x39\x90\x30\x81\x30",
   235  		utf8: "\u0081\u00de\u00df\u00e0\u00e1\u00e2\u00e3\uffff\U00010000",
   236  	},
   237  	{
   238  		e: simplifiedchinese.GB18030,
   239  		encoded: "\xbb\xa8\xbc\xe4\xd2\xbb\xba\xf8\xbe\xc6\xa3\xac\xb6\xc0\xd7\xc3" +
   240  			"\xce\xde\xcf\xe0\xc7\xd7\xa1\xa3",
   241  		utf8: "花间一壶酒,独酌无相亲。",
   242  	},
   243  	{
   244  		e:       simplifiedchinese.GBK,
   245  		encoded: "A\xa1\xa1\xb0\xa1\x81\x40\x81\x80\xaa\x40\xaa\x80\xa8\x40\xa8\x80Z\x80",
   246  		utf8:    "A\u3000\u554a\u4e02\u4e90\u72dc\u7349\u02ca\u2588Z€",
   247  	},
   248  	{
   249  		e: simplifiedchinese.GBK,
   250  		encoded: "\xbb\xa8\xbc\xe4\xd2\xbb\xba\xf8\xbe\xc6\xa3\xac\xb6\xc0\xd7\xc3" +
   251  			"\xce\xde\xcf\xe0\xc7\xd7\xa1\xa3",
   252  		utf8: "花间一壶酒,独酌无相亲。",
   253  	},
   254  	{
   255  		e:       simplifiedchinese.HZGB2312,
   256  		encoded: "A~{\x21\x21~~\x30\x21~}Z~~",
   257  		utf8:    "A\u3000~\u554aZ~",
   258  	},
   259  	{
   260  		e:         simplifiedchinese.HZGB2312,
   261  		encPrefix: "~{",
   262  		encoded:   ";(<dR;:x>F#,6@WCN^O`GW!#",
   263  		utf8:      "花间一壶酒,独酌无相亲。",
   264  	},
   265  	{
   266  		e:       traditionalchinese.Big5,
   267  		encoded: "A\x87\x40\x87\x41\x87\x45\xa1\x40\xfe\xfd\xfe\xfeZ\xa3\xe1",
   268  		utf8:    "A\u43f0\u4c32\U00027267\u3000\U0002910d\u79d4Z€",
   269  	},
   270  	{
   271  		e: traditionalchinese.Big5,
   272  		encoded: "\xaa\xe1\xb6\xa1\xa4\x40\xb3\xfd\xb0\x73\xa1\x41\xbf\x57\xb0\x75" +
   273  			"\xb5\x4c\xac\xdb\xbf\xcb\xa1\x43",
   274  		utf8: "花間一壺酒,獨酌無相親。",
   275  	},
   276  
   277  	// Japanese tests.
   278  	//
   279  	// "A。カ゚ 0208: etc 0212: etc" is a nonsense string that contains ASCII, half-width
   280  	// kana, JIS X 0208 (including two near the kink in the Shift JIS second byte
   281  	// encoding) and JIS X 0212 encodable codepoints.
   282  	//
   283  	// "月日は百代の過客にして、行かふ年も又旅人也。" is from the 17th century poem
   284  	// "Oku no Hosomichi" and contains both hiragana and kanji.
   285  	{
   286  		e: japanese.EUCJP,
   287  		encoded: "A\x8e\xa1\x8e\xb6\x8e\xdf " +
   288  			"0208: \xa1\xa1\xa1\xa2\xa1\xdf\xa1\xe0\xa1\xfd\xa1\xfe\xa2\xa1\xa2\xa2\xf4\xa6 " +
   289  			"0212: \x8f\xa2\xaf\x8f\xed\xe3",
   290  		utf8: "A。カ゚ " +
   291  			"0208: \u3000\u3001\u00d7\u00f7\u25ce\u25c7\u25c6\u25a1\u7199 " +
   292  			"0212: \u02d8\u9fa5",
   293  	},
   294  	{
   295  		e: japanese.EUCJP,
   296  		encoded: "\xb7\xee\xc6\xfc\xa4\xcf\xc9\xb4\xc2\xe5\xa4\xce\xb2\xe1\xb5\xd2" +
   297  			"\xa4\xcb\xa4\xb7\xa4\xc6\xa1\xa2\xb9\xd4\xa4\xab\xa4\xd5\xc7\xaf" +
   298  			"\xa4\xe2\xcb\xf4\xce\xb9\xbf\xcd\xcc\xe9\xa1\xa3",
   299  		utf8: "月日は百代の過客にして、行かふ年も又旅人也。",
   300  	},
   301  	{
   302  		e:         japanese.ISO2022JP,
   303  		encSuffix: "\x1b\x28\x42",
   304  		encoded: "\x1b\x28\x49\x21\x36\x5f\x1b\x28\x42 " +
   305  			"0208: \x1b\x24\x42\x21\x21\x21\x22\x21\x5f\x21\x60\x21\x7d\x21\x7e\x22\x21\x22\x22\x74\x26",
   306  		utf8: "。カ゚ " +
   307  			"0208: \u3000\u3001\u00d7\u00f7\u25ce\u25c7\u25c6\u25a1\u7199",
   308  	},
   309  	{
   310  		e:         japanese.ISO2022JP,
   311  		encPrefix: "\x1b\x24\x42",
   312  		encSuffix: "\x1b\x28\x42",
   313  		encoded: "\x37\x6e\x46\x7c\x24\x4f\x49\x34\x42\x65\x24\x4e\x32\x61\x35\x52" +
   314  			"\x24\x4b\x24\x37\x24\x46\x21\x22\x39\x54\x24\x2b\x24\x55\x47\x2f" +
   315  			"\x24\x62\x4b\x74\x4e\x39\x3f\x4d\x4c\x69\x21\x23",
   316  		utf8: "月日は百代の過客にして、行かふ年も又旅人也。",
   317  	},
   318  	{
   319  		e: japanese.ShiftJIS,
   320  		encoded: "A\xa1\xb6\xdf " +
   321  			"0208: \x81\x40\x81\x41\x81\x7e\x81\x80\x81\x9d\x81\x9e\x81\x9f\x81\xa0\xea\xa4",
   322  		utf8: "A。カ゚ " +
   323  			"0208: \u3000\u3001\u00d7\u00f7\u25ce\u25c7\u25c6\u25a1\u7199",
   324  	},
   325  	{
   326  		e: japanese.ShiftJIS,
   327  		encoded: "\x8c\x8e\x93\xfa\x82\xcd\x95\x53\x91\xe3\x82\xcc\x89\xdf\x8b\x71" +
   328  			"\x82\xc9\x82\xb5\x82\xc4\x81\x41\x8d\x73\x82\xa9\x82\xd3\x94\x4e" +
   329  			"\x82\xe0\x96\x94\x97\xb7\x90\x6c\x96\xe7\x81\x42",
   330  		utf8: "月日は百代の過客にして、行かふ年も又旅人也。",
   331  	},
   332  
   333  	// Korean tests.
   334  	//
   335  	// "A\uac02\uac35\uac56\ud401B\ud408\ud620\ud624C\u4f3d\u8a70D" is a
   336  	// nonsense string that contains ASCII, Hangul and CJK ideographs.
   337  	//
   338  	// "세계야, 안녕" translates as "Hello, world".
   339  	{
   340  		e:       korean.EUCKR,
   341  		encoded: "A\x81\x41\x81\x61\x81\x81\xc6\xfeB\xc7\xa1\xc7\xfe\xc8\xa1C\xca\xa1\xfd\xfeD",
   342  		utf8:    "A\uac02\uac35\uac56\ud401B\ud408\ud620\ud624C\u4f3d\u8a70D",
   343  	},
   344  	{
   345  		e:       korean.EUCKR,
   346  		encoded: "\xbc\xbc\xb0\xe8\xbe\xdf\x2c\x20\xbe\xc8\xb3\xe7",
   347  		utf8:    "세계야, 안녕",
   348  	},
   349  }
   350  
   351  func TestBasics(t *testing.T) {
   352  	for _, tc := range basicTestCases {
   353  		for _, direction := range []string{"Decode", "Encode"} {
   354  			var coder Transcoder
   355  			var want, src, wPrefix, sPrefix, wSuffix, sSuffix string
   356  			if direction == "Decode" {
   357  				coder, want, src = tc.e.NewDecoder(), tc.utf8, tc.encoded
   358  				wPrefix, sPrefix, wSuffix, sSuffix = "", tc.encPrefix, "", tc.encSuffix
   359  			} else {
   360  				coder, want, src = tc.e.NewEncoder(), tc.encoded, tc.utf8
   361  				wPrefix, sPrefix, wSuffix, sSuffix = tc.encPrefix, "", tc.encSuffix, ""
   362  			}
   363  
   364  			dst := make([]byte, len(wPrefix)+len(want)+len(wSuffix))
   365  			nDst, nSrc, err := coder.Transform(dst, []byte(sPrefix+src+sSuffix), true)
   366  			if err != nil {
   367  				t.Errorf("%v: %s: %v", tc.e, direction, err)
   368  				continue
   369  			}
   370  			if nDst != len(wPrefix)+len(want)+len(wSuffix) {
   371  				t.Errorf("%v: %s: nDst got %d, want %d",
   372  					tc.e, direction, nDst, len(wPrefix)+len(want)+len(wSuffix))
   373  				continue
   374  			}
   375  			if nSrc != len(sPrefix)+len(src)+len(sSuffix) {
   376  				t.Errorf("%v: %s: nSrc got %d, want %d",
   377  					tc.e, direction, nSrc, len(sPrefix)+len(src)+len(sSuffix))
   378  				continue
   379  			}
   380  			if got := string(dst); got != wPrefix+want+wSuffix {
   381  				t.Errorf("%v: %s:\ngot  %q\nwant %q",
   382  					tc.e, direction, got, wPrefix+want+wSuffix)
   383  				continue
   384  			}
   385  
   386  			for _, n := range []int{0, 1, 2, 10, 123, 4567} {
   387  				input := sPrefix + strings.Repeat(src, n) + sSuffix
   388  				g, err := coder.String(input)
   389  				if err != nil {
   390  					t.Errorf("%v: %s: Bytes: n=%d: %v", tc.e, direction, n, err)
   391  					continue
   392  				}
   393  				if len(g) == 0 && len(input) == 0 {
   394  					// If the input is empty then the output can be empty,
   395  					// regardless of whatever wPrefix is.
   396  					continue
   397  				}
   398  				got1, want1 := string(g), wPrefix+strings.Repeat(want, n)+wSuffix
   399  				if got1 != want1 {
   400  					t.Errorf("%v: %s: ReadAll: n=%d\ngot  %q\nwant %q",
   401  						tc.e, direction, n, trim(got1), trim(want1))
   402  					continue
   403  				}
   404  			}
   405  		}
   406  	}
   407  }
   408  
   409  // TestBig5CircumflexAndMacron tests the special cases listed in
   410  // http://encoding.spec.whatwg.org/#big5
   411  // Note that these special cases aren't preserved by round-tripping through
   412  // decoding and encoding (since
   413  // http://encoding.spec.whatwg.org/index-big5.txt does not have an entry for
   414  // U+0304 or U+030C), so we can't test this in TestBasics.
   415  func TestBig5CircumflexAndMacron(t *testing.T) {
   416  	src := "\x88\x5f\x88\x60\x88\x61\x88\x62\x88\x63\x88\x64\x88\x65\x88\x66 " +
   417  		"\x88\xa2\x88\xa3\x88\xa4\x88\xa5\x88\xa6"
   418  	want := "ÓǑÒ\u00ca\u0304Ế\u00ca\u030cỀÊ " +
   419  		"ü\u00ea\u0304ế\u00ea\u030cề"
   420  	dst, err := ioutil.ReadAll(transform.NewReader(
   421  		strings.NewReader(src), traditionalchinese.Big5.NewDecoder()))
   422  	if err != nil {
   423  		t.Fatal(err)
   424  	}
   425  	if got := string(dst); got != want {
   426  		t.Fatalf("\ngot  %q\nwant %q", got, want)
   427  	}
   428  }
   429  
   430  func TestEncodeInvalidUTF8(t *testing.T) {
   431  	inputs := []string{
   432  		"hello.",
   433  		"wo\ufffdld.",
   434  		"ABC\xff\x80\x80", // Invalid UTF-8.
   435  		"\x80\x80\x80\x80\x80",
   436  		"\x80\x80D\x80\x80",          // Valid rune at "D".
   437  		"E\xed\xa0\x80\xed\xbf\xbfF", // Two invalid UTF-8 runes (surrogates).
   438  		"G",
   439  		"H\xe2\x82",     // U+20AC in UTF-8 is "\xe2\x82\xac", which we split over two
   440  		"\xacI\xe2\x82", // input lines. It maps to 0x80 in the Windows-1252 encoding.
   441  	}
   442  	// Each invalid source byte becomes '\x1a'.
   443  	want := strings.Replace("hello.wo?ld.ABC??????????D??E??????FGH\x80I??", "?", "\x1a", -1)
   444  
   445  	transformer := encoding.ReplaceUnsupported(charmap.Windows1252.NewEncoder())
   446  	gotBuf := make([]byte, 0, 1024)
   447  	src := make([]byte, 0, 1024)
   448  	for i, input := range inputs {
   449  		dst := make([]byte, 1024)
   450  		src = append(src, input...)
   451  		atEOF := i == len(inputs)-1
   452  		nDst, nSrc, err := transformer.Transform(dst, src, atEOF)
   453  		gotBuf = append(gotBuf, dst[:nDst]...)
   454  		src = src[nSrc:]
   455  		if err != nil && err != transform.ErrShortSrc {
   456  			t.Fatalf("i=%d: %v", i, err)
   457  		}
   458  		if atEOF && err != nil {
   459  			t.Fatalf("i=%d: atEOF: %v", i, err)
   460  		}
   461  	}
   462  	if got := string(gotBuf); got != want {
   463  		t.Fatalf("\ngot  %+q\nwant %+q", got, want)
   464  	}
   465  }
   466  
   467  func TestReplacement(t *testing.T) {
   468  	for _, direction := range []string{"Decode", "Encode"} {
   469  		enc, want := (transform.Transformer)(nil), ""
   470  		if direction == "Decode" {
   471  			enc = encoding.Replacement.NewDecoder()
   472  			want = "\ufffd"
   473  		} else {
   474  			enc = encoding.Replacement.NewEncoder()
   475  			want = "AB\x00CD\ufffdYZ"
   476  		}
   477  		sr := strings.NewReader("AB\x00CD\x80YZ")
   478  		g, err := ioutil.ReadAll(transform.NewReader(sr, enc))
   479  		if err != nil {
   480  			t.Errorf("%s: ReadAll: %v", direction, err)
   481  			continue
   482  		}
   483  		if got := string(g); got != want {
   484  			t.Errorf("%s:\ngot  %q\nwant %q", direction, got, want)
   485  			continue
   486  		}
   487  	}
   488  }
   489  
   490  func TestUTF8Validator(t *testing.T) {
   491  	testCases := []struct {
   492  		desc    string
   493  		dstSize int
   494  		src     string
   495  		atEOF   bool
   496  		want    string
   497  		wantErr error
   498  	}{
   499  		{
   500  			"empty input",
   501  			100,
   502  			"",
   503  			false,
   504  			"",
   505  			nil,
   506  		},
   507  		{
   508  			"valid 1-byte 1-rune input",
   509  			100,
   510  			"a",
   511  			false,
   512  			"a",
   513  			nil,
   514  		},
   515  		{
   516  			"valid 3-byte 1-rune input",
   517  			100,
   518  			"\u1234",
   519  			false,
   520  			"\u1234",
   521  			nil,
   522  		},
   523  		{
   524  			"valid 5-byte 3-rune input",
   525  			100,
   526  			"a\u0100\u0101",
   527  			false,
   528  			"a\u0100\u0101",
   529  			nil,
   530  		},
   531  		{
   532  			"perfectly sized dst (non-ASCII)",
   533  			5,
   534  			"a\u0100\u0101",
   535  			false,
   536  			"a\u0100\u0101",
   537  			nil,
   538  		},
   539  		{
   540  			"short dst (non-ASCII)",
   541  			4,
   542  			"a\u0100\u0101",
   543  			false,
   544  			"a\u0100",
   545  			transform.ErrShortDst,
   546  		},
   547  		{
   548  			"perfectly sized dst (ASCII)",
   549  			5,
   550  			"abcde",
   551  			false,
   552  			"abcde",
   553  			nil,
   554  		},
   555  		{
   556  			"short dst (ASCII)",
   557  			4,
   558  			"abcde",
   559  			false,
   560  			"abcd",
   561  			transform.ErrShortDst,
   562  		},
   563  		{
   564  			"partial input (!EOF)",
   565  			100,
   566  			"a\u0100\xf1",
   567  			false,
   568  			"a\u0100",
   569  			transform.ErrShortSrc,
   570  		},
   571  		{
   572  			"invalid input (EOF)",
   573  			100,
   574  			"a\u0100\xf1",
   575  			true,
   576  			"a\u0100",
   577  			encoding.ErrInvalidUTF8,
   578  		},
   579  		{
   580  			"invalid input (!EOF)",
   581  			100,
   582  			"a\u0100\x80",
   583  			false,
   584  			"a\u0100",
   585  			encoding.ErrInvalidUTF8,
   586  		},
   587  		{
   588  			"invalid input (above U+10FFFF)",
   589  			100,
   590  			"a\u0100\xf7\xbf\xbf\xbf",
   591  			false,
   592  			"a\u0100",
   593  			encoding.ErrInvalidUTF8,
   594  		},
   595  		{
   596  			"invalid input (surrogate half)",
   597  			100,
   598  			"a\u0100\xed\xa0\x80",
   599  			false,
   600  			"a\u0100",
   601  			encoding.ErrInvalidUTF8,
   602  		},
   603  	}
   604  	for _, tc := range testCases {
   605  		dst := make([]byte, tc.dstSize)
   606  		nDst, nSrc, err := encoding.UTF8Validator.Transform(dst, []byte(tc.src), tc.atEOF)
   607  		if nDst < 0 || len(dst) < nDst {
   608  			t.Errorf("%s: nDst=%d out of range", tc.desc, nDst)
   609  			continue
   610  		}
   611  		got := string(dst[:nDst])
   612  		if got != tc.want || nSrc != len(tc.want) || err != tc.wantErr {
   613  			t.Errorf("%s:\ngot  %+q, %d, %v\nwant %+q, %d, %v",
   614  				tc.desc, got, nSrc, err, tc.want, len(tc.want), tc.wantErr)
   615  			continue
   616  		}
   617  	}
   618  }
   619  
   620  var (
   621  	utf16LEIB = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) // UTF-16LE (atypical interpretation)
   622  	utf16LEUB = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM)    // UTF-16, LE
   623  	utf16LEEB = unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM) // UTF-16, LE, Expect
   624  	utf16BEIB = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)    // UTF-16BE (atypical interpretation)
   625  	utf16BEUB = unicode.UTF16(unicode.BigEndian, unicode.UseBOM)       // UTF-16 default
   626  	utf16BEEB = unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM)    // UTF-16 Expect
   627  )
   628  
   629  func TestUTF16(t *testing.T) {
   630  	testCases := []struct {
   631  		desc    string
   632  		src     string
   633  		notEOF  bool // the inverse of atEOF
   634  		sizeDst int
   635  		want    string
   636  		nSrc    int
   637  		err     error
   638  		t       transform.Transformer
   639  	}{{
   640  		desc:    "utf-16 dec: BOM determines encoding BE (RFC 2781:3.3)",
   641  		src:     "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
   642  		sizeDst: 100,
   643  		want:    "\U00012345=Ra",
   644  		nSrc:    12,
   645  		t:       utf16BEUB.NewDecoder(),
   646  	}, {
   647  		desc:    "utf-16 dec: BOM determines encoding LE (RFC 2781:3.3)",
   648  		src:     "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
   649  		sizeDst: 100,
   650  		want:    "\U00012345=Ra",
   651  		nSrc:    12,
   652  		t:       utf16LEUB.NewDecoder(),
   653  	}, {
   654  		desc:    "utf-16 dec: BOM determines encoding LE, change default (RFC 2781:3.3)",
   655  		src:     "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
   656  		sizeDst: 100,
   657  		want:    "\U00012345=Ra",
   658  		nSrc:    12,
   659  		t:       utf16BEUB.NewDecoder(),
   660  	}, {
   661  		desc:    "utf-16 dec: Fail on missing BOM when required",
   662  		src:     "\x08\xD8\x45\xDF\x3D\x00\xFF\xFE\xFE\xFF\x00\x52\x00\x61",
   663  		sizeDst: 100,
   664  		want:    "",
   665  		nSrc:    0,
   666  		err:     unicode.ErrMissingBOM,
   667  		t:       utf16BEEB.NewDecoder(),
   668  	}, {
   669  		desc:    "utf-16 dec: SHOULD interpret text as big-endian when BOM not present (RFC 2781:4.3)",
   670  		src:     "\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
   671  		sizeDst: 100,
   672  		want:    "\U00012345=Ra",
   673  		nSrc:    10,
   674  		t:       utf16BEUB.NewDecoder(),
   675  	}, {
   676  		// This is an error according to RFC 2781. But errors in RFC 2781 are
   677  		// open to interpretations, so I guess this is fine.
   678  		desc:    "utf-16le dec: incorrect BOM is an error (RFC 2781:4.1)",
   679  		src:     "\xFE\xFF\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
   680  		sizeDst: 100,
   681  		want:    "\uFFFE\U00012345=Ra",
   682  		nSrc:    12,
   683  		t:       utf16LEIB.NewDecoder(),
   684  	}, {
   685  		desc:    "utf-16 enc: SHOULD write BOM (RFC 2781:3.3)",
   686  		src:     "\U00012345=Ra",
   687  		sizeDst: 100,
   688  		want:    "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
   689  		nSrc:    7,
   690  		t:       utf16LEUB.NewEncoder(),
   691  	}, {
   692  		desc:    "utf-16 enc: SHOULD write BOM (RFC 2781:3.3)",
   693  		src:     "\U00012345=Ra",
   694  		sizeDst: 100,
   695  		want:    "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
   696  		nSrc:    7,
   697  		t:       utf16BEUB.NewEncoder(),
   698  	}, {
   699  		desc:    "utf-16le enc: MUST NOT write BOM (RFC 2781:3.3)",
   700  		src:     "\U00012345=Ra",
   701  		sizeDst: 100,
   702  		want:    "\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
   703  		nSrc:    7,
   704  		t:       utf16LEIB.NewEncoder(),
   705  	}, {
   706  		desc:    "utf-16be dec: incorrect UTF-16: odd bytes",
   707  		src:     "\x00",
   708  		sizeDst: 100,
   709  		want:    "\uFFFD",
   710  		nSrc:    1,
   711  		t:       utf16BEIB.NewDecoder(),
   712  	}, {
   713  		desc:    "utf-16be dec: unpaired surrogate, odd bytes",
   714  		src:     "\xD8\x45\x00",
   715  		sizeDst: 100,
   716  		want:    "\uFFFD\uFFFD",
   717  		nSrc:    3,
   718  		t:       utf16BEIB.NewDecoder(),
   719  	}, {
   720  		desc:    "utf-16be dec: unpaired low surrogate + valid text",
   721  		src:     "\xD8\x45\x00a",
   722  		sizeDst: 100,
   723  		want:    "\uFFFDa",
   724  		nSrc:    4,
   725  		t:       utf16BEIB.NewDecoder(),
   726  	}, {
   727  		desc:    "utf-16be dec: unpaired low surrogate + valid text + single byte",
   728  		src:     "\xD8\x45\x00ab",
   729  		sizeDst: 100,
   730  		want:    "\uFFFDa\uFFFD",
   731  		nSrc:    5,
   732  		t:       utf16BEIB.NewDecoder(),
   733  	}, {
   734  		desc:    "utf-16le dec: unpaired high surrogate",
   735  		src:     "\x00\x00\x00\xDC\x12\xD8",
   736  		sizeDst: 100,
   737  		want:    "\x00\uFFFD\uFFFD",
   738  		nSrc:    6,
   739  		t:       utf16LEIB.NewDecoder(),
   740  	}, {
   741  		desc:    "utf-16be dec: two unpaired low surrogates",
   742  		src:     "\xD8\x45\xD8\x12",
   743  		sizeDst: 100,
   744  		want:    "\uFFFD\uFFFD",
   745  		nSrc:    4,
   746  		t:       utf16BEIB.NewDecoder(),
   747  	}, {
   748  		desc:    "utf-16be dec: short dst",
   749  		src:     "\x00a",
   750  		sizeDst: 0,
   751  		want:    "",
   752  		nSrc:    0,
   753  		t:       utf16BEIB.NewDecoder(),
   754  		err:     transform.ErrShortDst,
   755  	}, {
   756  		desc:    "utf-16be dec: short dst surrogate",
   757  		src:     "\xD8\xF5\xDC\x12",
   758  		sizeDst: 3,
   759  		want:    "",
   760  		nSrc:    0,
   761  		t:       utf16BEIB.NewDecoder(),
   762  		err:     transform.ErrShortDst,
   763  	}, {
   764  		desc:    "utf-16be dec: short dst trailing byte",
   765  		src:     "\x00",
   766  		sizeDst: 2,
   767  		want:    "",
   768  		nSrc:    0,
   769  		t:       utf16BEIB.NewDecoder(),
   770  		err:     transform.ErrShortDst,
   771  	}, {
   772  		desc:    "utf-16be dec: short src",
   773  		src:     "\x00",
   774  		notEOF:  true,
   775  		sizeDst: 3,
   776  		want:    "",
   777  		nSrc:    0,
   778  		t:       utf16BEIB.NewDecoder(),
   779  		err:     transform.ErrShortSrc,
   780  	}, {
   781  		desc:    "utf-16 enc",
   782  		src:     "\U00012345=Ra",
   783  		sizeDst: 100,
   784  		want:    "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
   785  		nSrc:    7,
   786  		t:       utf16BEUB.NewEncoder(),
   787  	}, {
   788  		desc:    "utf-16 enc: short dst normal",
   789  		src:     "\U00012345=Ra",
   790  		sizeDst: 9,
   791  		want:    "\xD8\x08\xDF\x45\x00\x3D\x00\x52",
   792  		nSrc:    6,
   793  		t:       utf16BEIB.NewEncoder(),
   794  		err:     transform.ErrShortDst,
   795  	}, {
   796  		desc:    "utf-16 enc: short dst surrogate",
   797  		src:     "\U00012345=Ra",
   798  		sizeDst: 3,
   799  		want:    "",
   800  		nSrc:    0,
   801  		t:       utf16BEIB.NewEncoder(),
   802  		err:     transform.ErrShortDst,
   803  	}, {
   804  		desc:    "utf-16 enc: short src",
   805  		src:     "\U00012345=Ra\xC2",
   806  		notEOF:  true,
   807  		sizeDst: 100,
   808  		want:    "\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
   809  		nSrc:    7,
   810  		t:       utf16BEIB.NewEncoder(),
   811  		err:     transform.ErrShortSrc,
   812  	}, {
   813  		desc:    "utf-16be dec: don't change byte order mid-stream",
   814  		src:     "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\xFF\xFE\x00\x52\x00\x61",
   815  		sizeDst: 100,
   816  		want:    "\U00012345=\ufffeRa",
   817  		nSrc:    14,
   818  		t:       utf16BEUB.NewDecoder(),
   819  	}, {
   820  		desc:    "utf-16le dec: don't change byte order mid-stream",
   821  		src:     "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\xFF\xFE\xFE\xFF\x52\x00\x61\x00",
   822  		sizeDst: 100,
   823  		want:    "\U00012345=\ufeff\ufffeRa",
   824  		nSrc:    16,
   825  		t:       utf16LEUB.NewDecoder(),
   826  	}}
   827  	for i, tc := range testCases {
   828  		b := make([]byte, tc.sizeDst)
   829  		nDst, nSrc, err := tc.t.Transform(b, []byte(tc.src), !tc.notEOF)
   830  		if err != tc.err {
   831  			t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
   832  		}
   833  		if got := string(b[:nDst]); got != tc.want {
   834  			t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
   835  		}
   836  		if nSrc != tc.nSrc {
   837  			t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
   838  		}
   839  	}
   840  }
   841  
   842  func TestErrorHandler(t *testing.T) {
   843  	testCases := []struct {
   844  		desc      string
   845  		handler   func(*encoding.Encoder) *encoding.Encoder
   846  		sizeDst   int
   847  		src, want string
   848  		nSrc      int
   849  		err       error
   850  	}{
   851  		{
   852  			desc:    "one rune replacement",
   853  			handler: encoding.ReplaceUnsupported,
   854  			sizeDst: 100,
   855  			src:     "\uAC00",
   856  			want:    "\x1a",
   857  			nSrc:    3,
   858  		},
   859  		{
   860  			desc:    "mid-stream rune replacement",
   861  			handler: encoding.ReplaceUnsupported,
   862  			sizeDst: 100,
   863  			src:     "a\uAC00bcd\u00e9",
   864  			want:    "a\x1abcd\xe9",
   865  			nSrc:    9,
   866  		},
   867  		{
   868  			desc:    "at end rune replacement",
   869  			handler: encoding.ReplaceUnsupported,
   870  			sizeDst: 10,
   871  			src:     "\u00e9\uAC00",
   872  			want:    "\xe9\x1a",
   873  			nSrc:    5,
   874  		},
   875  		{
   876  			desc:    "short buffer replacement",
   877  			handler: encoding.ReplaceUnsupported,
   878  			sizeDst: 1,
   879  			src:     "\u00e9\uAC00",
   880  			want:    "\xe9",
   881  			nSrc:    2,
   882  			err:     transform.ErrShortDst,
   883  		},
   884  		{
   885  			desc:    "one rune html escape",
   886  			handler: encoding.HTMLEscapeUnsupported,
   887  			sizeDst: 100,
   888  			src:     "\uAC00",
   889  			want:    "&#44032;",
   890  			nSrc:    3,
   891  		},
   892  		{
   893  			desc:    "mid-stream html escape",
   894  			handler: encoding.HTMLEscapeUnsupported,
   895  			sizeDst: 100,
   896  			src:     "\u00e9\uAC00dcba",
   897  			want:    "\xe9&#44032;dcba",
   898  			nSrc:    9,
   899  		},
   900  		{
   901  			desc:    "short buffer html escape",
   902  			handler: encoding.HTMLEscapeUnsupported,
   903  			sizeDst: 9,
   904  			src:     "ab\uAC01",
   905  			want:    "ab",
   906  			nSrc:    2,
   907  			err:     transform.ErrShortDst,
   908  		},
   909  	}
   910  	for i, tc := range testCases {
   911  		tr := tc.handler(charmap.Windows1250.NewEncoder())
   912  		b := make([]byte, tc.sizeDst)
   913  		nDst, nSrc, err := tr.Transform(b, []byte(tc.src), true)
   914  		if err != tc.err {
   915  			t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
   916  		}
   917  		if got := string(b[:nDst]); got != tc.want {
   918  			t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
   919  		}
   920  		if nSrc != tc.nSrc {
   921  			t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
   922  		}
   923  
   924  	}
   925  }
   926  func TestBOMOverride(t *testing.T) {
   927  	dec := unicode.BOMOverride(charmap.CodePage437.NewDecoder())
   928  	dst := make([]byte, 100)
   929  	for i, tc := range []struct {
   930  		src   string
   931  		atEOF bool
   932  		dst   string
   933  		nSrc  int
   934  		err   error
   935  	}{
   936  		0:  {"H\x82ll\x93", true, "Héllô", 5, nil},
   937  		1:  {"\uFEFFHéllö", true, "Héllö", 10, nil},
   938  		2:  {"\xFE\xFF\x00H\x00e\x00l\x00l\x00o", true, "Hello", 12, nil},
   939  		3:  {"\xFF\xFEH\x00e\x00l\x00l\x00o\x00", true, "Hello", 12, nil},
   940  		4:  {"\uFEFF", true, "", 3, nil},
   941  		5:  {"\xFE\xFF", true, "", 2, nil},
   942  		6:  {"\xFF\xFE", true, "", 2, nil},
   943  		7:  {"\xEF\xBB", true, "\u2229\u2557", 2, nil},
   944  		8:  {"\xEF", true, "\u2229", 1, nil},
   945  		9:  {"", true, "", 0, nil},
   946  		10: {"\xFE", true, "\u25a0", 1, nil},
   947  		11: {"\xFF", true, "\u00a0", 1, nil},
   948  		12: {"\xEF\xBB", false, "", 0, transform.ErrShortSrc},
   949  		13: {"\xEF", false, "", 0, transform.ErrShortSrc},
   950  		14: {"", false, "", 0, transform.ErrShortSrc},
   951  		15: {"\xFE", false, "", 0, transform.ErrShortSrc},
   952  		16: {"\xFF", false, "", 0, transform.ErrShortSrc},
   953  		17: {"\xFF\xFE", false, "", 0, transform.ErrShortSrc},
   954  	} {
   955  		dec.Reset()
   956  		nDst, nSrc, err := dec.Transform(dst, []byte(tc.src), tc.atEOF)
   957  		got := string(dst[:nDst])
   958  		if nSrc != tc.nSrc {
   959  			t.Errorf("%d: nSrc: got %d; want %d", i, nSrc, tc.nSrc)
   960  		}
   961  		if got != tc.dst {
   962  			t.Errorf("%d: got %+q; want %+q", i, got, tc.dst)
   963  		}
   964  		if err != tc.err {
   965  			t.Errorf("%d: error: got %v; want %v", i, err, tc.err)
   966  		}
   967  	}
   968  }
   969  
   970  // testdataFiles are files in testdata/*.txt.
   971  var testdataFiles = []struct {
   972  	enc           encoding.Encoding
   973  	basename, ext string
   974  }{
   975  	{charmap.Windows1252, "candide", "windows-1252"},
   976  	{japanese.EUCJP, "rashomon", "euc-jp"},
   977  	{japanese.ISO2022JP, "rashomon", "iso-2022-jp"},
   978  	{japanese.ShiftJIS, "rashomon", "shift-jis"},
   979  	{korean.EUCKR, "unsu-joh-eun-nal", "euc-kr"},
   980  	{simplifiedchinese.GBK, "sunzi-bingfa-simplified", "gbk"},
   981  	{simplifiedchinese.HZGB2312, "sunzi-bingfa-gb-levels-1-and-2", "hz-gb2312"},
   982  	{traditionalchinese.Big5, "sunzi-bingfa-traditional", "big5"},
   983  	{utf16LEIB, "candide", "utf-16le"},
   984  	{unicode.UTF8, "candide", "utf-8"},
   985  
   986  	// GB18030 is a superset of GBK and is nominally a Simplified Chinese
   987  	// encoding, but it can also represent the entire Basic Multilingual
   988  	// Plane, including codepoints like 'â' that aren't encodable by GBK.
   989  	// GB18030 on Simplified Chinese should perform similarly to GBK on
   990  	// Simplified Chinese. GB18030 on "candide" is more interesting.
   991  	{simplifiedchinese.GB18030, "candide", "gb18030"},
   992  }
   993  
   994  // Encoder or Decoder
   995  type Transcoder interface {
   996  	transform.Transformer
   997  	Bytes([]byte) ([]byte, error)
   998  	String(string) (string, error)
   999  }
  1000  
  1001  func load(direction string, enc encoding.Encoding) ([]byte, []byte, Transcoder, error) {
  1002  	basename, ext, count := "", "", 0
  1003  	for _, tf := range testdataFiles {
  1004  		if tf.enc == enc {
  1005  			basename, ext = tf.basename, tf.ext
  1006  			count++
  1007  		}
  1008  	}
  1009  	if count != 1 {
  1010  		if count == 0 {
  1011  			return nil, nil, nil, fmt.Errorf("no testdataFiles for %s", enc)
  1012  		}
  1013  		return nil, nil, nil, fmt.Errorf("too many testdataFiles for %s", enc)
  1014  	}
  1015  	dstFile := fmt.Sprintf("testdata/%s-%s.txt", basename, ext)
  1016  	srcFile := fmt.Sprintf("testdata/%s-utf-8.txt", basename)
  1017  	var coder Transcoder = encoding.ReplaceUnsupported(enc.NewEncoder())
  1018  	if direction == "Decode" {
  1019  		dstFile, srcFile = srcFile, dstFile
  1020  		coder = enc.NewDecoder()
  1021  	}
  1022  	dst, err := ioutil.ReadFile(dstFile)
  1023  	if err != nil {
  1024  		return nil, nil, nil, err
  1025  	}
  1026  	src, err := ioutil.ReadFile(srcFile)
  1027  	if err != nil {
  1028  		return nil, nil, nil, err
  1029  	}
  1030  	return dst, src, coder, nil
  1031  }
  1032  
  1033  func TestFiles(t *testing.T) {
  1034  	for _, dir := range []string{"Decode", "Encode"} {
  1035  		for _, tf := range testdataFiles {
  1036  			dst, src, transformer, err := load(dir, tf.enc)
  1037  			if err != nil {
  1038  				t.Errorf("%s, %s: load: %v", dir, tf.enc, err)
  1039  				continue
  1040  			}
  1041  			buf, err := transformer.Bytes(src)
  1042  			if err != nil {
  1043  				t.Errorf("%s, %s: transform: %v", dir, tf.enc, err)
  1044  				continue
  1045  			}
  1046  			if !bytes.Equal(buf, dst) {
  1047  				t.Errorf("%s, %s: transformed bytes did not match golden file", dir, tf.enc)
  1048  				continue
  1049  			}
  1050  		}
  1051  	}
  1052  }
  1053  
  1054  func benchmark(b *testing.B, direction string, enc encoding.Encoding) {
  1055  	_, src, transformer, err := load(direction, enc)
  1056  	if err != nil {
  1057  		b.Fatal(err)
  1058  	}
  1059  	b.SetBytes(int64(len(src)))
  1060  	b.ResetTimer()
  1061  	for i := 0; i < b.N; i++ {
  1062  		r := transform.NewReader(bytes.NewReader(src), transformer)
  1063  		io.Copy(ioutil.Discard, r)
  1064  	}
  1065  }
  1066  
  1067  func BenchmarkBig5Decoder(b *testing.B)      { benchmark(b, "Decode", traditionalchinese.Big5) }
  1068  func BenchmarkBig5Encoder(b *testing.B)      { benchmark(b, "Encode", traditionalchinese.Big5) }
  1069  func BenchmarkCharmapDecoder(b *testing.B)   { benchmark(b, "Decode", charmap.Windows1252) }
  1070  func BenchmarkCharmapEncoder(b *testing.B)   { benchmark(b, "Encode", charmap.Windows1252) }
  1071  func BenchmarkEUCJPDecoder(b *testing.B)     { benchmark(b, "Decode", japanese.EUCJP) }
  1072  func BenchmarkEUCJPEncoder(b *testing.B)     { benchmark(b, "Encode", japanese.EUCJP) }
  1073  func BenchmarkEUCKRDecoder(b *testing.B)     { benchmark(b, "Decode", korean.EUCKR) }
  1074  func BenchmarkEUCKREncoder(b *testing.B)     { benchmark(b, "Encode", korean.EUCKR) }
  1075  func BenchmarkGB18030Decoder(b *testing.B)   { benchmark(b, "Decode", simplifiedchinese.GB18030) }
  1076  func BenchmarkGB18030Encoder(b *testing.B)   { benchmark(b, "Encode", simplifiedchinese.GB18030) }
  1077  func BenchmarkGBKDecoder(b *testing.B)       { benchmark(b, "Decode", simplifiedchinese.GBK) }
  1078  func BenchmarkGBKEncoder(b *testing.B)       { benchmark(b, "Encode", simplifiedchinese.GBK) }
  1079  func BenchmarkHZGB2312Decoder(b *testing.B)  { benchmark(b, "Decode", simplifiedchinese.HZGB2312) }
  1080  func BenchmarkHZGB2312Encoder(b *testing.B)  { benchmark(b, "Encode", simplifiedchinese.HZGB2312) }
  1081  func BenchmarkISO2022JPDecoder(b *testing.B) { benchmark(b, "Decode", japanese.ISO2022JP) }
  1082  func BenchmarkISO2022JPEncoder(b *testing.B) { benchmark(b, "Encode", japanese.ISO2022JP) }
  1083  func BenchmarkShiftJISDecoder(b *testing.B)  { benchmark(b, "Decode", japanese.ShiftJIS) }
  1084  func BenchmarkShiftJISEncoder(b *testing.B)  { benchmark(b, "Encode", japanese.ShiftJIS) }
  1085  func BenchmarkUTF8Decoder(b *testing.B)      { benchmark(b, "Decode", unicode.UTF8) }
  1086  func BenchmarkUTF8Encoder(b *testing.B)      { benchmark(b, "Encode", unicode.UTF8) }
  1087  func BenchmarkUTF16Decoder(b *testing.B)     { benchmark(b, "Decode", utf16LEIB) }
  1088  func BenchmarkUTF16Encoder(b *testing.B)     { benchmark(b, "Encode", utf16LEIB) }