github.com/gnolang/gno@v0.0.0-20240520182011-228e9d0192ce/gnovm/stdlibs/unicode/utf16/utf16_test.gno (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package utf16
     6  
     7  import (
     8  	"testing"
     9  	"unicode"
    10  	"unicode/utf16"
    11  )
    12  
    13  type encodeTest struct {
    14  	in  []rune
    15  	out []uint16
    16  }
    17  
    18  var encodeTests = []encodeTest{
    19  	{[]rune{1, 2, 3, 4}, []uint16{1, 2, 3, 4}},
    20  	{
    21  		[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff},
    22  		[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff},
    23  	},
    24  	{
    25  		[]rune{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1},
    26  		[]uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd},
    27  	},
    28  }
    29  
    30  func slicesEqual(a, b []uint16) bool {
    31  	if len(a) != len(b) {
    32  		return false
    33  	}
    34  	for i, v := range a {
    35  		if v != b[i] {
    36  			return false
    37  		}
    38  	}
    39  	return true
    40  }
    41  
    42  func TestEncode(t *testing.T) {
    43  	for _, tt := range encodeTests {
    44  		out := Encode(tt.in)
    45  		if !slicesEqual(out, tt.out) {
    46  			t.Errorf("Encode(%x) = %x; want %x", tt.in, out, tt.out)
    47  		}
    48  	}
    49  }
    50  
    51  func TestEncodeRune(t *testing.T) {
    52  	for i, tt := range encodeTests {
    53  		j := 0
    54  		for _, r := range tt.in {
    55  			r1, r2 := EncodeRune(r)
    56  			if r < 0x10000 || r > unicode.MaxRune {
    57  				if j >= len(tt.out) {
    58  					t.Errorf("#%d: ran out of tt.out", i)
    59  					break
    60  				}
    61  				if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar {
    62  					t.Errorf("EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd", r, r1, r2)
    63  				}
    64  				j++
    65  			} else {
    66  				if j+1 >= len(tt.out) {
    67  					t.Errorf("#%d: ran out of tt.out", i)
    68  					break
    69  				}
    70  				if r1 != rune(tt.out[j]) || r2 != rune(tt.out[j+1]) {
    71  					t.Errorf("EncodeRune(%#x) = %#x, %#x; want %#x, %#x", r, r1, r2, tt.out[j], tt.out[j+1])
    72  				}
    73  				j += 2
    74  				dec := DecodeRune(r1, r2)
    75  				if dec != r {
    76  					t.Errorf("DecodeRune(%#x, %#x) = %#x; want %#x", r1, r2, dec, r)
    77  				}
    78  			}
    79  		}
    80  		if j != len(tt.out) {
    81  			t.Errorf("#%d: EncodeRune didn't generate enough output", i)
    82  		}
    83  	}
    84  }
    85  
    86  type decodeTest struct {
    87  	in  []uint16
    88  	out []rune
    89  }
    90  
    91  var decodeTests = []decodeTest{
    92  	{[]uint16{1, 2, 3, 4}, []rune{1, 2, 3, 4}},
    93  	{
    94  		[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff},
    95  		[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff},
    96  	},
    97  	{[]uint16{0xd800, 'a'}, []rune{0xfffd, 'a'}},
    98  	{[]uint16{0xdfff}, []rune{0xfffd}},
    99  }
   100  
   101  func TestDecode(t *testing.T) {
   102  	for _, tt := range decodeTests {
   103  		out := Decode(tt.in)
   104  		if !runesEqual(out, tt.out) {
   105  			t.Errorf("Decode(%x) = %x; want %x", tt.in, out, tt.out)
   106  		}
   107  	}
   108  }
   109  
   110  func runesEqual(a, b []rune) bool {
   111  	if len(a) != len(b) {
   112  		return false
   113  	}
   114  	for i, v := range a {
   115  		if v != b[i] {
   116  			return false
   117  		}
   118  	}
   119  	return true
   120  }
   121  
   122  var decodeRuneTests = []struct {
   123  	r1, r2 rune
   124  	want   rune
   125  }{
   126  	{0xd800, 0xdc00, 0x10000},
   127  	{0xd800, 0xdc01, 0x10001},
   128  	{0xd808, 0xdf45, 0x12345},
   129  	{0xdbff, 0xdfff, 0x10ffff},
   130  	{0xd800, 'a', 0xfffd}, // illegal, replacement rune substituted
   131  }
   132  
   133  func TestDecodeRune(t *testing.T) {
   134  	for i, tt := range decodeRuneTests {
   135  		got := DecodeRune(tt.r1, tt.r2)
   136  		if got != tt.want {
   137  			t.Errorf("%d: DecodeRune(%q, %q) = %v; want %v", i, tt.r1, tt.r2, got, tt.want)
   138  		}
   139  	}
   140  }
   141  
   142  var surrogateTests = []struct {
   143  	r    rune
   144  	want bool
   145  }{
   146  	// from https://en.wikipedia.org/wiki/UTF-16
   147  	{'\u007A', false},     // LATIN SMALL LETTER Z
   148  	{'\u6C34', false},     // CJK UNIFIED IDEOGRAPH-6C34 (water)
   149  	{'\uFEFF', false},     // Byte Order Mark
   150  	{'\U00010000', false}, // LINEAR B SYLLABLE B008 A (first non-BMP code point)
   151  	{'\U0001D11E', false}, // MUSICAL SYMBOL G CLEF
   152  	{'\U0010FFFD', false}, // PRIVATE USE CHARACTER-10FFFD (last Unicode code point)
   153  
   154  	{rune(0xd7ff), false}, // surr1-1
   155  	{rune(0xd800), true},  // surr1
   156  	{rune(0xdc00), true},  // surr2
   157  	{rune(0xe000), false}, // surr3
   158  	{rune(0xdfff), true},  // surr3-1
   159  }
   160  
   161  func TestIsSurrogate(t *testing.T) {
   162  	for i, tt := range surrogateTests {
   163  		got := IsSurrogate(tt.r)
   164  		if got != tt.want {
   165  			t.Errorf("%d: IsSurrogate(%q) = %v; want %v", i, tt.r, got, tt.want)
   166  		}
   167  	}
   168  }
   169  
   170  func BenchmarkDecodeValidASCII(b *testing.B) {
   171  	// "hello world"
   172  	data := []uint16{104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100}
   173  	for i := 0; i < b.N; i++ {
   174  		Decode(data)
   175  	}
   176  }
   177  
   178  func BenchmarkDecodeValidJapaneseChars(b *testing.B) {
   179  	// "日本語日本語日本語"
   180  	data := []uint16{26085, 26412, 35486, 26085, 26412, 35486, 26085, 26412, 35486}
   181  	for i := 0; i < b.N; i++ {
   182  		Decode(data)
   183  	}
   184  }
   185  
   186  func BenchmarkDecodeRune(b *testing.B) {
   187  	rs := make([]rune, 10)
   188  	// U+1D4D0 to U+1D4D4: MATHEMATICAL BOLD SCRIPT CAPITAL LETTERS
   189  	for i, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {
   190  		rs[2*i], rs[2*i+1] = EncodeRune(u)
   191  	}
   192  
   193  	b.ResetTimer()
   194  	for i := 0; i < b.N; i++ {
   195  		for j := 0; j < 5; j++ {
   196  			DecodeRune(rs[2*j], rs[2*j+1])
   197  		}
   198  	}
   199  }
   200  
   201  func BenchmarkEncodeValidASCII(b *testing.B) {
   202  	data := []rune{'h', 'e', 'l', 'l', 'o'}
   203  	for i := 0; i < b.N; i++ {
   204  		Encode(data)
   205  	}
   206  }
   207  
   208  func BenchmarkEncodeValidJapaneseChars(b *testing.B) {
   209  	data := []rune{'日', '本', '語'}
   210  	for i := 0; i < b.N; i++ {
   211  		Encode(data)
   212  	}
   213  }
   214  
   215  func BenchmarkEncodeRune(b *testing.B) {
   216  	for i := 0; i < b.N; i++ {
   217  		for _, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {
   218  			EncodeRune(u)
   219  		}
   220  	}
   221  }