golang.org/x/text@v0.14.0/unicode/norm/iter_test.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package norm 6 7 import ( 8 "strings" 9 "testing" 10 ) 11 12 func doIterNormString(f Form, s string) []byte { 13 acc := []byte{} 14 i := Iter{} 15 i.InitString(f, s) 16 for !i.Done() { 17 acc = append(acc, i.Next()...) 18 } 19 return acc 20 } 21 22 func doIterNorm(f Form, s string) []byte { 23 acc := []byte{} 24 i := Iter{} 25 i.Init(f, []byte(s)) 26 for !i.Done() { 27 acc = append(acc, i.Next()...) 28 } 29 return acc 30 } 31 32 func TestIterNext(t *testing.T) { 33 runNormTests(t, "IterNext", func(f Form, out []byte, s string) []byte { 34 return doIterNormString(f, string(append(out, s...))) 35 }) 36 runNormTests(t, "IterNext", func(f Form, out []byte, s string) []byte { 37 return doIterNorm(f, string(append(out, s...))) 38 }) 39 } 40 41 type SegmentTest struct { 42 in string 43 out []string 44 } 45 46 var segmentTests = []SegmentTest{ 47 {"\u1E0A\u0323a", []string{"\x44\u0323\u0307", "a", ""}}, 48 {rep('a', segSize), append(strings.Split(rep('a', segSize), ""), "")}, 49 {rep('a', segSize+2), append(strings.Split(rep('a', segSize+2), ""), "")}, 50 {rep('a', segSize) + "\u0300aa", 51 append(strings.Split(rep('a', segSize-1), ""), "a\u0300", "a", "a", "")}, 52 53 // U+0f73 is NOT treated as a starter as it is a modifier 54 {"a" + grave(29) + "\u0f73", []string{"a" + grave(29), cgj + "\u0f73"}}, 55 {"a\u0f73", []string{"a\u0f73"}}, 56 57 // U+ff9e is treated as a non-starter. 58 // TODO: should we? Note that this will only affect iteration, as whether 59 // or not we do so does not affect the normalization output and will either 60 // way result in consistent iteration output. 61 {"a" + grave(30) + "\uff9e", []string{"a" + grave(30), cgj + "\uff9e"}}, 62 {"a\uff9e", []string{"a\uff9e"}}, 63 } 64 65 var segmentTestsK = []SegmentTest{ 66 {"\u3332", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u3099", ""}}, 67 // last segment of multi-segment decomposition needs normalization 68 {"\u3332\u093C", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u093C\u3099", ""}}, 69 {"\u320E", []string{"\x28", "\uAC00", "\x29"}}, 70 71 // last segment should be copied to start of buffer. 72 {"\ufdfa", []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645", ""}}, 73 {"\ufdfa" + grave(30), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), ""}}, 74 {"\uFDFA" + grave(64), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), cgj + grave(30), cgj + grave(4), ""}}, 75 76 // Hangul and Jamo are grouped together. 77 {"\uAC00", []string{"\u1100\u1161", ""}}, 78 {"\uAC01", []string{"\u1100\u1161\u11A8", ""}}, 79 {"\u1100\u1161", []string{"\u1100\u1161", ""}}, 80 } 81 82 // Note that, by design, segmentation is equal for composing and decomposing forms. 83 func TestIterSegmentation(t *testing.T) { 84 segmentTest(t, "SegmentTestD", NFD, segmentTests) 85 segmentTest(t, "SegmentTestC", NFC, segmentTests) 86 segmentTest(t, "SegmentTestKD", NFKD, segmentTestsK) 87 segmentTest(t, "SegmentTestKC", NFKC, segmentTestsK) 88 } 89 90 func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) { 91 iter := Iter{} 92 for i, tt := range tests { 93 iter.InitString(f, tt.in) 94 for j, seg := range tt.out { 95 if seg == "" { 96 if !iter.Done() { 97 res := string(iter.Next()) 98 t.Errorf(`%s:%d:%d: expected Done()==true, found segment %+q`, name, i, j, res) 99 } 100 continue 101 } 102 if iter.Done() { 103 t.Errorf("%s:%d:%d: Done()==true, want false", name, i, j) 104 } 105 seg = f.String(seg) 106 if res := string(iter.Next()); res != seg { 107 t.Errorf(`%s:%d:%d" segment was %+q (%d); want %+q (%d)`, name, i, j, pc(res), len(res), pc(seg), len(seg)) 108 } 109 } 110 } 111 }