golang.org/x/text@v0.14.0/collate/table_test.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package collate 6 7 import ( 8 "testing" 9 10 "golang.org/x/text/collate/build" 11 "golang.org/x/text/internal/colltab" 12 "golang.org/x/text/unicode/norm" 13 ) 14 15 type ColElems []Weights 16 17 type input struct { 18 str string 19 ces [][]int 20 } 21 22 type check struct { 23 in string 24 n int 25 out ColElems 26 } 27 28 type tableTest struct { 29 in []input 30 chk []check 31 } 32 33 func w(ce ...int) Weights { 34 return W(ce...) 35 } 36 37 var defaults = w(0) 38 39 func pt(p, t int) []int { 40 return []int{p, defaults.Secondary, t} 41 } 42 43 func makeTable(in []input) (*Collator, error) { 44 b := build.NewBuilder() 45 for _, r := range in { 46 if e := b.Add([]rune(r.str), r.ces, nil); e != nil { 47 panic(e) 48 } 49 } 50 t, err := b.Build() 51 if err != nil { 52 return nil, err 53 } 54 return NewFromTable(t), nil 55 } 56 57 // modSeq holds a seqeunce of modifiers in increasing order of CCC long enough 58 // to cause a segment overflow if not handled correctly. The last rune in this 59 // list has a CCC of 214. 60 var modSeq = []rune{ 61 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BB, 62 0x05BC, 0x05BD, 0x05BF, 0x05C1, 0x05C2, 0xFB1E, 0x064B, 0x064C, 0x064D, 0x064E, 63 0x064F, 0x0650, 0x0651, 0x0652, 0x0670, 0x0711, 0x0C55, 0x0C56, 0x0E38, 0x0E48, 64 0x0EB8, 0x0EC8, 0x0F71, 0x0F72, 0x0F74, 0x0321, 0x1DCE, 65 } 66 67 var mods []input 68 var modW = func() ColElems { 69 ws := ColElems{} 70 for _, r := range modSeq { 71 rune := norm.NFC.PropertiesString(string(r)) 72 ws = append(ws, w(0, int(rune.CCC()))) 73 mods = append(mods, input{string(r), [][]int{{0, int(rune.CCC())}}}) 74 } 75 return ws 76 }() 77 78 var appendNextTests = []tableTest{ 79 { // test getWeights 80 []input{ 81 {"a", [][]int{{100}}}, 82 {"b", [][]int{{105}}}, 83 {"c", [][]int{{110}}}, 84 {"ß", [][]int{{120}}}, 85 }, 86 []check{ 87 {"a", 1, ColElems{w(100)}}, 88 {"b", 1, ColElems{w(105)}}, 89 {"c", 1, ColElems{w(110)}}, 90 {"d", 1, ColElems{w(0x50064)}}, 91 {"ab", 1, ColElems{w(100)}}, 92 {"bc", 1, ColElems{w(105)}}, 93 {"dd", 1, ColElems{w(0x50064)}}, 94 {"ß", 2, ColElems{w(120)}}, 95 }, 96 }, 97 { // test expansion 98 []input{ 99 {"u", [][]int{{100}}}, 100 {"U", [][]int{{100}, {0, 25}}}, 101 {"w", [][]int{{100}, {100}}}, 102 {"W", [][]int{{100}, {0, 25}, {100}, {0, 25}}}, 103 }, 104 []check{ 105 {"u", 1, ColElems{w(100)}}, 106 {"U", 1, ColElems{w(100), w(0, 25)}}, 107 {"w", 1, ColElems{w(100), w(100)}}, 108 {"W", 1, ColElems{w(100), w(0, 25), w(100), w(0, 25)}}, 109 }, 110 }, 111 { // test decompose 112 []input{ 113 {"D", [][]int{pt(104, 8)}}, 114 {"z", [][]int{pt(130, 8)}}, 115 {"\u030C", [][]int{{0, 40}}}, // Caron 116 {"\u01C5", [][]int{pt(104, 9), pt(130, 4), {0, 40, 0x1F}}}, // Dž = D+z+caron 117 }, 118 []check{ 119 {"\u01C5", 2, ColElems{w(pt(104, 9)...), w(pt(130, 4)...), w(0, 40, 0x1F)}}, 120 }, 121 }, 122 { // test basic contraction 123 []input{ 124 {"a", [][]int{{100}}}, 125 {"ab", [][]int{{101}}}, 126 {"aab", [][]int{{101}, {101}}}, 127 {"abc", [][]int{{102}}}, 128 {"b", [][]int{{200}}}, 129 {"c", [][]int{{300}}}, 130 {"d", [][]int{{400}}}, 131 }, 132 []check{ 133 {"a", 1, ColElems{w(100)}}, 134 {"aa", 1, ColElems{w(100)}}, 135 {"aac", 1, ColElems{w(100)}}, 136 {"d", 1, ColElems{w(400)}}, 137 {"ab", 2, ColElems{w(101)}}, 138 {"abb", 2, ColElems{w(101)}}, 139 {"aab", 3, ColElems{w(101), w(101)}}, 140 {"aaba", 3, ColElems{w(101), w(101)}}, 141 {"abc", 3, ColElems{w(102)}}, 142 {"abcd", 3, ColElems{w(102)}}, 143 }, 144 }, 145 { // test discontinuous contraction 146 append(mods, []input{ 147 // modifiers; secondary weight equals ccc 148 {"\u0316", [][]int{{0, 220}}}, 149 {"\u0317", [][]int{{0, 220}, {0, 220}}}, 150 {"\u302D", [][]int{{0, 222}}}, 151 {"\u302E", [][]int{{0, 225}}}, // used as starter 152 {"\u302F", [][]int{{0, 224}}}, // used as starter 153 {"\u18A9", [][]int{{0, 228}}}, 154 {"\u0300", [][]int{{0, 230}}}, 155 {"\u0301", [][]int{{0, 230}}}, 156 {"\u0315", [][]int{{0, 232}}}, 157 {"\u031A", [][]int{{0, 232}}}, 158 {"\u035C", [][]int{{0, 233}}}, 159 {"\u035F", [][]int{{0, 233}}}, 160 {"\u035D", [][]int{{0, 234}}}, 161 {"\u035E", [][]int{{0, 234}}}, 162 {"\u0345", [][]int{{0, 240}}}, 163 164 // starters 165 {"a", [][]int{{100}}}, 166 {"b", [][]int{{200}}}, 167 {"c", [][]int{{300}}}, 168 {"\u03B1", [][]int{{900}}}, 169 {"\x01", [][]int{{0, 0, 0, 0}}}, 170 171 // contractions 172 {"a\u0300", [][]int{{101}}}, 173 {"a\u0301", [][]int{{102}}}, 174 {"a\u035E", [][]int{{110}}}, 175 {"a\u035Eb\u035E", [][]int{{115}}}, 176 {"ac\u035Eaca\u035E", [][]int{{116}}}, 177 {"a\u035Db\u035D", [][]int{{117}}}, 178 {"a\u0301\u035Db", [][]int{{120}}}, 179 {"a\u0301\u035F", [][]int{{121}}}, 180 {"a\u0301\u035Fb", [][]int{{119}}}, 181 {"\u03B1\u0345", [][]int{{901}, {902}}}, 182 {"\u302E\u302F", [][]int{{0, 131}, {0, 131}}}, 183 {"\u302F\u18A9", [][]int{{0, 130}}}, 184 }...), 185 []check{ 186 {"a\x01\u0300", 1, ColElems{w(100)}}, 187 {"ab", 1, ColElems{w(100)}}, // closing segment 188 {"a\u0316\u0300b", 5, ColElems{w(101), w(0, 220)}}, // closing segment 189 {"a\u0316\u0300", 5, ColElems{w(101), w(0, 220)}}, // no closing segment 190 {"a\u0316\u0300\u035Cb", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end 191 {"a\u0316\u0300\u035C", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end 192 193 {"a\u0316\u0301b", 5, ColElems{w(102), w(0, 220)}}, // closing segment 194 {"a\u0316\u0301", 5, ColElems{w(102), w(0, 220)}}, // no closing segment 195 {"a\u0316\u0301\u035Cb", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end 196 {"a\u0316\u0301\u035C", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end 197 198 // match blocked by modifier with same ccc 199 {"a\u0301\u0315\u031A\u035Fb", 3, ColElems{w(102)}}, 200 201 // multiple gaps 202 {"a\u0301\u035Db", 6, ColElems{w(120)}}, 203 {"a\u0301\u035F", 5, ColElems{w(121)}}, 204 {"a\u0301\u035Fb", 6, ColElems{w(119)}}, 205 {"a\u0316\u0301\u035F", 7, ColElems{w(121), w(0, 220)}}, 206 {"a\u0301\u0315\u035Fb", 7, ColElems{w(121), w(0, 232)}}, 207 {"a\u0316\u0301\u0315\u035Db", 5, ColElems{w(102), w(0, 220)}}, 208 {"a\u0316\u0301\u0315\u035F", 9, ColElems{w(121), w(0, 220), w(0, 232)}}, 209 {"a\u0316\u0301\u0315\u035Fb", 9, ColElems{w(121), w(0, 220), w(0, 232)}}, 210 {"a\u0316\u0301\u0315\u035F\u035D", 9, ColElems{w(121), w(0, 220), w(0, 232)}}, 211 {"a\u0316\u0301\u0315\u035F\u035Db", 9, ColElems{w(121), w(0, 220), w(0, 232)}}, 212 213 // handling of segment overflow 214 { // just fits within segment 215 "a" + string(modSeq[:30]) + "\u0301", 216 3 + len(string(modSeq[:30])), 217 append(ColElems{w(102)}, modW[:30]...), 218 }, 219 {"a" + string(modSeq[:31]) + "\u0301", 1, ColElems{w(100)}}, // overflow 220 {"a" + string(modSeq) + "\u0301", 1, ColElems{w(100)}}, 221 { // just fits within segment with two interstitial runes 222 "a" + string(modSeq[:28]) + "\u0301\u0315\u035F", 223 7 + len(string(modSeq[:28])), 224 append(append(ColElems{w(121)}, modW[:28]...), w(0, 232)), 225 }, 226 { // second half does not fit within segment 227 "a" + string(modSeq[:29]) + "\u0301\u0315\u035F", 228 3 + len(string(modSeq[:29])), 229 append(ColElems{w(102)}, modW[:29]...), 230 }, 231 232 // discontinuity can only occur in last normalization segment 233 {"a\u035Eb\u035E", 6, ColElems{w(115)}}, 234 {"a\u0316\u035Eb\u035E", 5, ColElems{w(110), w(0, 220)}}, 235 {"a\u035Db\u035D", 6, ColElems{w(117)}}, 236 {"a\u0316\u035Db\u035D", 1, ColElems{w(100)}}, 237 {"a\u035Eb\u0316\u035E", 8, ColElems{w(115), w(0, 220)}}, 238 {"a\u035Db\u0316\u035D", 8, ColElems{w(117), w(0, 220)}}, 239 {"ac\u035Eaca\u035E", 9, ColElems{w(116)}}, 240 {"a\u0316c\u035Eaca\u035E", 1, ColElems{w(100)}}, 241 {"ac\u035Eac\u0316a\u035E", 1, ColElems{w(100)}}, 242 243 // expanding contraction 244 {"\u03B1\u0345", 4, ColElems{w(901), w(902)}}, 245 246 // Theoretical possibilities 247 // contraction within a gap 248 {"a\u302F\u18A9\u0301", 9, ColElems{w(102), w(0, 130)}}, 249 // expansion within a gap 250 {"a\u0317\u0301", 5, ColElems{w(102), w(0, 220), w(0, 220)}}, 251 // repeating CCC blocks last modifier 252 {"a\u302E\u302F\u0301", 1, ColElems{w(100)}}, 253 // The trailing combining characters (with lower CCC) should block the first one. 254 // TODO: make the following pass. 255 // {"a\u035E\u0316\u0316", 1, ColElems{w(100)}}, 256 {"a\u035F\u035Eb", 5, ColElems{w(110), w(0, 233)}}, 257 // Last combiner should match after normalization. 258 // TODO: make the following pass. 259 // {"a\u035D\u0301", 3, ColElems{w(102), w(0, 234)}}, 260 // The first combiner is blocking the second one as they have the same CCC. 261 {"a\u035D\u035Eb", 1, ColElems{w(100)}}, 262 }, 263 }, 264 } 265 266 func TestAppendNext(t *testing.T) { 267 for i, tt := range appendNextTests { 268 c, err := makeTable(tt.in) 269 if err != nil { 270 t.Errorf("%d: error creating table: %v", i, err) 271 continue 272 } 273 for j, chk := range tt.chk { 274 ws, n := c.t.AppendNext(nil, []byte(chk.in)) 275 if n != chk.n { 276 t.Errorf("%d:%d: bytes consumed was %d; want %d", i, j, n, chk.n) 277 } 278 out := convertFromWeights(chk.out) 279 if len(ws) != len(out) { 280 t.Errorf("%d:%d: len(ws) was %d; want %d (%X vs %X)\n%X", i, j, len(ws), len(out), ws, out, chk.in) 281 continue 282 } 283 for k, w := range ws { 284 w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0) 285 if w != out[k] { 286 t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k]) 287 } 288 } 289 } 290 } 291 }