golang.org/x/text@v0.14.0/encoding/unicode/utf32/utf32_test.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package utf32 6 7 import ( 8 "testing" 9 10 "golang.org/x/text/encoding" 11 "golang.org/x/text/encoding/internal/enctest" 12 "golang.org/x/text/transform" 13 ) 14 15 var ( 16 utf32LEIB = UTF32(LittleEndian, IgnoreBOM) // UTF-32LE (atypical interpretation) 17 utf32LEUB = UTF32(LittleEndian, UseBOM) // UTF-32, LE 18 // utf32LEEB = UTF32(LittleEndian, ExpectBOM) // UTF-32, LE, Expect - covered in encoding_test.go 19 utf32BEIB = UTF32(BigEndian, IgnoreBOM) // UTF-32BE (atypical interpretation) 20 utf32BEUB = UTF32(BigEndian, UseBOM) // UTF-32 default 21 utf32BEEB = UTF32(BigEndian, ExpectBOM) // UTF-32 Expect 22 ) 23 24 func TestBasics(t *testing.T) { 25 testCases := []struct { 26 e encoding.Encoding 27 encPrefix string 28 encSuffix string 29 encoded string 30 utf8 string 31 }{{ 32 e: utf32BEIB, 33 encoded: "\x00\x00\x00\x57\x00\x00\x00\xe4\x00\x01\xd5\x65", 34 utf8: "\x57\u00e4\U0001d565", 35 }, { 36 e: UTF32(BigEndian, ExpectBOM), 37 encPrefix: "\x00\x00\xfe\xff", 38 encoded: "\x00\x00\x00\x57\x00\x00\x00\xe4\x00\x01\xd5\x65", 39 utf8: "\x57\u00e4\U0001d565", 40 }, { 41 e: UTF32(LittleEndian, IgnoreBOM), 42 encoded: "\x57\x00\x00\x00\xe4\x00\x00\x00\x65\xd5\x01\x00", 43 utf8: "\x57\u00e4\U0001d565", 44 }, { 45 e: UTF32(LittleEndian, ExpectBOM), 46 encPrefix: "\xff\xfe\x00\x00", 47 encoded: "\x57\x00\x00\x00\xe4\x00\x00\x00\x65\xd5\x01\x00", 48 utf8: "\x57\u00e4\U0001d565", 49 }} 50 51 for _, tc := range testCases { 52 enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, tc.encPrefix, tc.encSuffix) 53 } 54 } 55 56 func TestFiles(t *testing.T) { enctest.TestFile(t, utf32BEIB) } 57 58 func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, utf32BEIB) } 59 60 func TestUTF32(t *testing.T) { 61 testCases := []struct { 62 desc string 63 src string 64 notEOF bool // the inverse of atEOF 65 sizeDst int 66 want string 67 nSrc int 68 err error 69 t transform.Transformer 70 }{{ 71 desc: "utf-32 IgnoreBOM dec: empty string", 72 t: utf32BEIB.NewDecoder(), 73 }, { 74 desc: "utf-32 UseBOM dec: empty string", 75 t: utf32BEUB.NewDecoder(), 76 }, { 77 desc: "utf-32 ExpectBOM dec: empty string", 78 err: ErrMissingBOM, 79 t: utf32BEEB.NewDecoder(), 80 }, { 81 desc: "utf-32be dec: Doesn't interpret U+FEFF as BOM", 82 src: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", 83 sizeDst: 100, 84 want: "\uFEFF\U00012345=Ra", 85 nSrc: 20, 86 t: utf32BEIB.NewDecoder(), 87 }, { 88 desc: "utf-32be dec: Interprets little endian U+FEFF as invalid", 89 src: "\xFF\xFE\x00\x00\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", 90 sizeDst: 100, 91 want: "\uFFFD\U00012345=Ra", 92 nSrc: 20, 93 t: utf32BEIB.NewDecoder(), 94 }, { 95 desc: "utf-32le dec: Doesn't interpret U+FEFF as BOM", 96 src: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00", 97 sizeDst: 100, 98 want: "\uFEFF\U00012345=Ra", 99 nSrc: 20, 100 t: utf32LEIB.NewDecoder(), 101 }, { 102 desc: "utf-32le dec: Interprets big endian U+FEFF as invalid", 103 src: "\x00\x00\xFE\xFF\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00", 104 sizeDst: 100, 105 want: "\uFFFD\U00012345=Ra", 106 nSrc: 20, 107 t: utf32LEIB.NewDecoder(), 108 }, { 109 desc: "utf-32 enc: Writes big-endian BOM", 110 src: "\U00012345=Ra", 111 sizeDst: 100, 112 want: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", 113 nSrc: 7, 114 t: utf32BEUB.NewEncoder(), 115 }, { 116 desc: "utf-32 enc: Writes little-endian BOM", 117 src: "\U00012345=Ra", 118 sizeDst: 100, 119 want: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00", 120 nSrc: 7, 121 t: utf32LEUB.NewEncoder(), 122 }, { 123 desc: "utf-32 dec: Interprets text using big-endian default when BOM not present", 124 src: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", 125 sizeDst: 100, 126 want: "\U00012345=Ra", 127 nSrc: 16, 128 t: utf32BEUB.NewDecoder(), 129 }, { 130 desc: "utf-32 dec: Interprets text using little-endian default when BOM not present", 131 src: "\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00", 132 sizeDst: 100, 133 want: "\U00012345=Ra", 134 nSrc: 16, 135 t: utf32LEUB.NewDecoder(), 136 }, { 137 desc: "utf-32 dec: BOM determines encoding BE", 138 src: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", 139 sizeDst: 100, 140 want: "\U00012345=Ra", 141 nSrc: 20, 142 t: utf32BEUB.NewDecoder(), 143 }, { 144 desc: "utf-32 dec: BOM determines encoding LE", 145 src: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00", 146 sizeDst: 100, 147 want: "\U00012345=Ra", 148 nSrc: 20, 149 t: utf32LEUB.NewDecoder(), 150 }, { 151 desc: "utf-32 dec: BOM determines encoding LE, change default", 152 src: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00", 153 sizeDst: 100, 154 want: "\U00012345=Ra", 155 nSrc: 20, 156 t: utf32BEUB.NewDecoder(), 157 }, { 158 desc: "utf-32 dec: BOM determines encoding BE, change default", 159 src: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", 160 sizeDst: 100, 161 want: "\U00012345=Ra", 162 nSrc: 20, 163 t: utf32LEUB.NewDecoder(), 164 }, { 165 desc: "utf-32 dec: Don't change big-endian byte order mid-stream", 166 src: "\x00\x01\x23\x45\x00\x00\x00\x3D\xFF\xFE\x00\x00\x00\x00\xFE\xFF\x00\x00\x00\x52\x00\x00\x00\x61", 167 sizeDst: 100, 168 want: "\U00012345=\uFFFD\uFEFFRa", 169 nSrc: 24, 170 t: utf32BEUB.NewDecoder(), 171 }, { 172 desc: "utf-32 dec: Don't change little-endian byte order mid-stream", 173 src: "\x45\x23\x01\x00\x3D\x00\x00\x00\x00\x00\xFE\xFF\xFF\xFE\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00", 174 sizeDst: 100, 175 want: "\U00012345=\uFFFD\uFEFFRa", 176 nSrc: 24, 177 t: utf32LEUB.NewDecoder(), 178 }, { 179 desc: "utf-32 dec: Fail on missing BOM when required", 180 src: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", 181 sizeDst: 100, 182 want: "", 183 nSrc: 0, 184 err: ErrMissingBOM, 185 t: utf32BEEB.NewDecoder(), 186 }, { 187 desc: "utf-32 enc: Short dst", 188 src: "\U00012345=Ra", 189 sizeDst: 15, 190 want: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52", 191 nSrc: 6, 192 err: transform.ErrShortDst, 193 t: utf32BEIB.NewEncoder(), 194 }, { 195 desc: "utf-32 enc: Short src", 196 src: "\U00012345=Ra\xC2", 197 notEOF: true, 198 sizeDst: 100, 199 want: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", 200 nSrc: 7, 201 err: transform.ErrShortSrc, 202 t: utf32BEIB.NewEncoder(), 203 }, { 204 desc: "utf-32 enc: Invalid input", 205 src: "\x80\xC1\xC2\x7F\xC2", 206 sizeDst: 100, 207 want: "\x00\x00\xFF\xFD\x00\x00\xFF\xFD\x00\x00\xFF\xFD\x00\x00\x00\x7F\x00\x00\xFF\xFD", 208 nSrc: 5, 209 t: utf32BEIB.NewEncoder(), 210 }, { 211 desc: "utf-32 dec: Short dst", 212 src: "\x00\x00\x00\x41", 213 sizeDst: 0, 214 want: "", 215 nSrc: 0, 216 err: transform.ErrShortDst, 217 t: utf32BEIB.NewDecoder(), 218 }, { 219 desc: "utf-32 dec: Short src", 220 src: "\x00\x00\x00", 221 notEOF: true, 222 sizeDst: 4, 223 want: "", 224 nSrc: 0, 225 err: transform.ErrShortSrc, 226 t: utf32BEIB.NewDecoder(), 227 }, { 228 desc: "utf-32 dec: Invalid input", 229 src: "\x00\x00\xD8\x00\x00\x00\xDF\xFF\x00\x11\x00\x00\x00\x00\x00", 230 sizeDst: 100, 231 want: "\uFFFD\uFFFD\uFFFD\uFFFD", 232 nSrc: 15, 233 t: utf32BEIB.NewDecoder(), 234 }} 235 for i, tc := range testCases { 236 b := make([]byte, tc.sizeDst) 237 nDst, nSrc, err := tc.t.Transform(b, []byte(tc.src), !tc.notEOF) 238 if err != tc.err { 239 t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err) 240 } 241 if got := string(b[:nDst]); got != tc.want { 242 t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want) 243 } 244 if nSrc != tc.nSrc { 245 t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc) 246 } 247 } 248 }