github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/encoding/unicode/unicode_test.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package unicode
     6  
     7  import (
     8  	"testing"
     9  
    10  	"golang.org/x/text/transform"
    11  )
    12  
    13  func TestUTF8Decoder(t *testing.T) {
    14  	testCases := []struct {
    15  		desc    string
    16  		src     string
    17  		notEOF  bool // the inverse of atEOF
    18  		sizeDst int
    19  		want    string
    20  		nSrc    int
    21  		err     error
    22  	}{{
    23  		desc: "empty string, empty dest buffer",
    24  	}, {
    25  		desc:    "empty string",
    26  		sizeDst: 8,
    27  	}, {
    28  		desc:    "empty string, streaming",
    29  		notEOF:  true,
    30  		sizeDst: 8,
    31  	}, {
    32  		desc:    "ascii",
    33  		src:     "abcde",
    34  		sizeDst: 8,
    35  		want:    "abcde",
    36  		nSrc:    5,
    37  	}, {
    38  		desc:    "ascii and error",
    39  		src:     "ab\x80de",
    40  		sizeDst: 7,
    41  		want:    "ab\ufffdde",
    42  		nSrc:    5,
    43  	}, {
    44  		desc:    "valid two-byte sequence",
    45  		src:     "a\u0300bc",
    46  		sizeDst: 7,
    47  		want:    "a\u0300bc",
    48  		nSrc:    5,
    49  	}, {
    50  		desc:    "valid three-byte sequence",
    51  		src:     "a\u0300中",
    52  		sizeDst: 7,
    53  		want:    "a\u0300中",
    54  		nSrc:    6,
    55  	}, {
    56  		desc:    "valid four-byte sequence",
    57  		src:     "a中\U00016F50",
    58  		sizeDst: 8,
    59  		want:    "a中\U00016F50",
    60  		nSrc:    8,
    61  	}, {
    62  		desc:    "short source buffer",
    63  		src:     "abc\xf0\x90",
    64  		notEOF:  true,
    65  		sizeDst: 10,
    66  		want:    "abc",
    67  		nSrc:    3,
    68  		err:     transform.ErrShortSrc,
    69  	}, {
    70  		// We don't check for the maximal subpart of an ill-formed subsequence
    71  		// at the end of an open segment.
    72  		desc:    "complete invalid that looks like short at end",
    73  		src:     "abc\xf0\x80",
    74  		notEOF:  true,
    75  		sizeDst: 10,
    76  		want:    "abc", // instead of "abc\ufffd\ufffd",
    77  		nSrc:    3,
    78  		err:     transform.ErrShortSrc,
    79  	}, {
    80  		desc:    "incomplete sequence at end",
    81  		src:     "a\x80bc\xf0\x90",
    82  		sizeDst: 9,
    83  		want:    "a\ufffdbc\ufffd",
    84  		nSrc:    6,
    85  	}, {
    86  		desc:    "invalid second byte",
    87  		src:     "abc\xf0dddd",
    88  		sizeDst: 10,
    89  		want:    "abc\ufffddddd",
    90  		nSrc:    8,
    91  	}, {
    92  		desc:    "invalid second byte at end",
    93  		src:     "abc\xf0d",
    94  		sizeDst: 10,
    95  		want:    "abc\ufffdd",
    96  		nSrc:    5,
    97  	}, {
    98  		desc:    "invalid third byte",
    99  		src:     "a\u0300bc\xf0\x90dddd",
   100  		sizeDst: 12,
   101  		want:    "a\u0300bc\ufffddddd",
   102  		nSrc:    11,
   103  	}, {
   104  		desc:    "invalid third byte at end",
   105  		src:     "a\u0300bc\xf0\x90d",
   106  		sizeDst: 12,
   107  		want:    "a\u0300bc\ufffdd",
   108  		nSrc:    8,
   109  	}, {
   110  		desc:    "invalid fourth byte, tight buffer",
   111  		src:     "a\u0300bc\xf0\x90\x80d",
   112  		sizeDst: 9,
   113  		want:    "a\u0300bc\ufffdd",
   114  		nSrc:    9,
   115  	}, {
   116  		desc:    "invalid fourth byte at end",
   117  		src:     "a\u0300bc\xf0\x90\x80",
   118  		sizeDst: 8,
   119  		want:    "a\u0300bc\ufffd",
   120  		nSrc:    8,
   121  	}, {
   122  		desc:    "invalid fourth byte and short four byte sequence",
   123  		src:     "a\u0300bc\xf0\x90\x80\xf0\x90\x80",
   124  		notEOF:  true,
   125  		sizeDst: 20,
   126  		want:    "a\u0300bc\ufffd",
   127  		nSrc:    8,
   128  		err:     transform.ErrShortSrc,
   129  	}, {
   130  		desc:    "valid four-byte sequence overflowing short buffer",
   131  		src:     "a\u0300bc\xf0\x90\x80\x80",
   132  		notEOF:  true,
   133  		sizeDst: 8,
   134  		want:    "a\u0300bc",
   135  		nSrc:    5,
   136  		err:     transform.ErrShortDst,
   137  	}, {
   138  		desc:    "invalid fourth byte at end short, but short dst",
   139  		src:     "a\u0300bc\xf0\x90\x80\xf0\x90\x80",
   140  		notEOF:  true,
   141  		sizeDst: 8,
   142  		// More bytes would fit in the buffer, but this seems to require a more
   143  		// complicated and slower algorithm.
   144  		want: "a\u0300bc", // instead of "a\u0300bc"
   145  		nSrc: 5,
   146  		err:  transform.ErrShortDst,
   147  	}, {
   148  		desc:    "short dst for error",
   149  		src:     "abc\x80",
   150  		notEOF:  true,
   151  		sizeDst: 5,
   152  		want:    "abc",
   153  		nSrc:    3,
   154  		err:     transform.ErrShortDst,
   155  	}, {
   156  		desc:    "adjusting short dst buffer",
   157  		src:     "abc\x80ef",
   158  		notEOF:  true,
   159  		sizeDst: 6,
   160  		want:    "abc\ufffd",
   161  		nSrc:    4,
   162  		err:     transform.ErrShortDst,
   163  	}}
   164  	tr := UTF8.NewDecoder()
   165  	for i, tc := range testCases {
   166  		b := make([]byte, tc.sizeDst)
   167  		nDst, nSrc, err := tr.Transform(b, []byte(tc.src), !tc.notEOF)
   168  		if err != tc.err {
   169  			t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
   170  		}
   171  		if got := string(b[:nDst]); got != tc.want {
   172  			t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
   173  		}
   174  		if nSrc != tc.nSrc {
   175  			t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
   176  		}
   177  	}
   178  }