golang.org/x/text@v0.14.0/collate/build/builder_test.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package build 6 7 import "testing" 8 9 // cjk returns an implicit collation element for a CJK rune. 10 func cjk(r rune) []rawCE { 11 // A CJK character C is represented in the DUCET as 12 // [.AAAA.0020.0002.C][.BBBB.0000.0000.C] 13 // Where AAAA is the most significant 15 bits plus a base value. 14 // Any base value will work for the test, so we pick the common value of FB40. 15 const base = 0xFB40 16 return []rawCE{ 17 {w: []int{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)}}, 18 {w: []int{int(r&0x7FFF) | 0x8000, 0, 0, int(r)}}, 19 } 20 } 21 22 func pCE(p int) []rawCE { 23 return mkCE([]int{p, defaultSecondary, defaultTertiary, 0}, 0) 24 } 25 26 func pqCE(p, q int) []rawCE { 27 return mkCE([]int{p, defaultSecondary, defaultTertiary, q}, 0) 28 } 29 30 func ptCE(p, t int) []rawCE { 31 return mkCE([]int{p, defaultSecondary, t, 0}, 0) 32 } 33 34 func ptcCE(p, t int, ccc uint8) []rawCE { 35 return mkCE([]int{p, defaultSecondary, t, 0}, ccc) 36 } 37 38 func sCE(s int) []rawCE { 39 return mkCE([]int{0, s, defaultTertiary, 0}, 0) 40 } 41 42 func stCE(s, t int) []rawCE { 43 return mkCE([]int{0, s, t, 0}, 0) 44 } 45 46 func scCE(s int, ccc uint8) []rawCE { 47 return mkCE([]int{0, s, defaultTertiary, 0}, ccc) 48 } 49 50 func mkCE(w []int, ccc uint8) []rawCE { 51 return []rawCE{rawCE{w, ccc}} 52 } 53 54 // ducetElem is used to define test data that is used to generate a table. 55 type ducetElem struct { 56 str string 57 ces []rawCE 58 } 59 60 func newBuilder(t *testing.T, ducet []ducetElem) *Builder { 61 b := NewBuilder() 62 for _, e := range ducet { 63 ces := [][]int{} 64 for _, ce := range e.ces { 65 ces = append(ces, ce.w) 66 } 67 if err := b.Add([]rune(e.str), ces, nil); err != nil { 68 t.Errorf(err.Error()) 69 } 70 } 71 b.t = &table{} 72 b.root.sort() 73 return b 74 } 75 76 type convertTest struct { 77 in, out []rawCE 78 err bool 79 } 80 81 var convLargeTests = []convertTest{ 82 {pCE(0xFB39), pCE(0xFB39), false}, 83 {cjk(0x2F9B2), pqCE(0x3F9B2, 0x2F9B2), false}, 84 {pCE(0xFB40), pCE(0), true}, 85 {append(pCE(0xFB40), pCE(0)[0]), pCE(0), true}, 86 {pCE(0xFFFE), pCE(illegalOffset), false}, 87 {pCE(0xFFFF), pCE(illegalOffset + 1), false}, 88 } 89 90 func TestConvertLarge(t *testing.T) { 91 for i, tt := range convLargeTests { 92 e := new(entry) 93 for _, ce := range tt.in { 94 e.elems = append(e.elems, makeRawCE(ce.w, ce.ccc)) 95 } 96 elems, err := convertLargeWeights(e.elems) 97 if tt.err { 98 if err == nil { 99 t.Errorf("%d: expected error; none found", i) 100 } 101 continue 102 } else if err != nil { 103 t.Errorf("%d: unexpected error: %v", i, err) 104 } 105 if !equalCEArrays(elems, tt.out) { 106 t.Errorf("%d: conversion was %x; want %x", i, elems, tt.out) 107 } 108 } 109 } 110 111 // Collation element table for simplify tests. 112 var simplifyTest = []ducetElem{ 113 {"\u0300", sCE(30)}, // grave 114 {"\u030C", sCE(40)}, // caron 115 {"A", ptCE(100, 8)}, 116 {"D", ptCE(104, 8)}, 117 {"E", ptCE(105, 8)}, 118 {"I", ptCE(110, 8)}, 119 {"z", ptCE(130, 8)}, 120 {"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0])}, 121 {"\u05B7", sCE(80)}, 122 {"\u00C0", append(ptCE(100, 8), sCE(30)...)}, // A with grave, can be removed 123 {"\u00C8", append(ptCE(105, 8), sCE(30)...)}, // E with grave 124 {"\uFB1F", append(ptCE(200, 4), ptCE(200, 4)[0], sCE(80)[0])}, // eliminated by NFD 125 {"\u00C8\u0302", ptCE(106, 8)}, // block previous from simplifying 126 {"\u01C5", append(ptCE(104, 9), ptCE(130, 4)[0], stCE(40, maxTertiary)[0])}, // eliminated by NFKD 127 // no removal: tertiary value of third element is not maxTertiary 128 {"\u2162", append(ptCE(110, 9), ptCE(110, 4)[0], ptCE(110, 8)[0])}, 129 } 130 131 var genColTests = []ducetElem{ 132 {"\uFA70", pqCE(0x1FA70, 0xFA70)}, 133 {"A\u0300", append(ptCE(100, 8), sCE(30)...)}, 134 {"A\u0300\uFA70", append(ptCE(100, 8), sCE(30)[0], pqCE(0x1FA70, 0xFA70)[0])}, 135 {"A\u0300A\u0300", append(ptCE(100, 8), sCE(30)[0], ptCE(100, 8)[0], sCE(30)[0])}, 136 } 137 138 func TestGenColElems(t *testing.T) { 139 b := newBuilder(t, simplifyTest[:5]) 140 141 for i, tt := range genColTests { 142 res := b.root.genColElems(tt.str) 143 if !equalCEArrays(tt.ces, res) { 144 t.Errorf("%d: result %X; want %X", i, res, tt.ces) 145 } 146 } 147 } 148 149 type strArray []string 150 151 func (sa strArray) contains(s string) bool { 152 for _, e := range sa { 153 if e == s { 154 return true 155 } 156 } 157 return false 158 } 159 160 var simplifyRemoved = strArray{"\u00C0", "\uFB1F"} 161 var simplifyMarked = strArray{"\u01C5"} 162 163 func TestSimplify(t *testing.T) { 164 b := newBuilder(t, simplifyTest) 165 o := &b.root 166 simplify(o) 167 168 for i, tt := range simplifyTest { 169 if simplifyRemoved.contains(tt.str) { 170 continue 171 } 172 e := o.find(tt.str) 173 if e.str != tt.str || !equalCEArrays(e.elems, tt.ces) { 174 t.Errorf("%d: found element %s -> %X; want %s -> %X", i, e.str, e.elems, tt.str, tt.ces) 175 break 176 } 177 } 178 var i, k int 179 for e := o.front(); e != nil; e, _ = e.nextIndexed() { 180 gold := simplifyMarked.contains(e.str) 181 if gold { 182 k++ 183 } 184 if gold != e.decompose { 185 t.Errorf("%d: %s has decompose %v; want %v", i, e.str, e.decompose, gold) 186 } 187 i++ 188 } 189 if k != len(simplifyMarked) { 190 t.Errorf(" an entry that should be marked as decompose was deleted") 191 } 192 } 193 194 var expandTest = []ducetElem{ 195 {"\u0300", append(scCE(29, 230), scCE(30, 230)...)}, 196 {"\u00C0", append(ptCE(100, 8), scCE(30, 230)...)}, 197 {"\u00C8", append(ptCE(105, 8), scCE(30, 230)...)}, 198 {"\u00C9", append(ptCE(105, 8), scCE(30, 230)...)}, // identical expansion 199 {"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0], ptCE(200, 4)[0])}, 200 {"\u01FF", append(ptCE(200, 4), ptcCE(201, 4, 0)[0], scCE(30, 230)[0])}, 201 } 202 203 func TestExpand(t *testing.T) { 204 const ( 205 totalExpansions = 5 206 totalElements = 2 + 2 + 2 + 3 + 3 + totalExpansions 207 ) 208 b := newBuilder(t, expandTest) 209 o := &b.root 210 b.processExpansions(o) 211 212 e := o.front() 213 for _, tt := range expandTest { 214 exp := b.t.ExpandElem[e.expansionIndex:] 215 if int(exp[0]) != len(tt.ces) { 216 t.Errorf("%U: len(expansion)==%d; want %d", []rune(tt.str)[0], exp[0], len(tt.ces)) 217 } 218 exp = exp[1:] 219 for j, w := range tt.ces { 220 if ce, _ := makeCE(w); exp[j] != ce { 221 t.Errorf("%U: element %d is %X; want %X", []rune(tt.str)[0], j, exp[j], ce) 222 } 223 } 224 e, _ = e.nextIndexed() 225 } 226 // Verify uniquing. 227 if len(b.t.ExpandElem) != totalElements { 228 t.Errorf("len(expandElem)==%d; want %d", len(b.t.ExpandElem), totalElements) 229 } 230 } 231 232 var contractTest = []ducetElem{ 233 {"abc", pCE(102)}, 234 {"abd", pCE(103)}, 235 {"a", pCE(100)}, 236 {"ab", pCE(101)}, 237 {"ac", pCE(104)}, 238 {"bcd", pCE(202)}, 239 {"b", pCE(200)}, 240 {"bc", pCE(201)}, 241 {"bd", pCE(203)}, 242 // shares suffixes with a* 243 {"Ab", pCE(301)}, 244 {"A", pCE(300)}, 245 {"Ac", pCE(304)}, 246 {"Abc", pCE(302)}, 247 {"Abd", pCE(303)}, 248 // starter to be ignored 249 {"z", pCE(1000)}, 250 } 251 252 func TestContract(t *testing.T) { 253 const ( 254 totalElements = 5 + 5 + 4 255 ) 256 b := newBuilder(t, contractTest) 257 o := &b.root 258 b.processContractions(o) 259 260 indexMap := make(map[int]bool) 261 handleMap := make(map[rune]*entry) 262 for e := o.front(); e != nil; e, _ = e.nextIndexed() { 263 if e.contractionHandle.n > 0 { 264 handleMap[e.runes[0]] = e 265 indexMap[e.contractionHandle.index] = true 266 } 267 } 268 // Verify uniquing. 269 if len(indexMap) != 2 { 270 t.Errorf("number of tries is %d; want %d", len(indexMap), 2) 271 } 272 for _, tt := range contractTest { 273 e, ok := handleMap[[]rune(tt.str)[0]] 274 if !ok { 275 continue 276 } 277 str := tt.str[1:] 278 offset, n := lookup(&b.t.ContractTries, e.contractionHandle, []byte(str)) 279 if len(str) != n { 280 t.Errorf("%s: bytes consumed==%d; want %d", tt.str, n, len(str)) 281 } 282 ce := b.t.ContractElem[offset+e.contractionIndex] 283 if want, _ := makeCE(tt.ces[0]); want != ce { 284 t.Errorf("%s: element %X; want %X", tt.str, ce, want) 285 } 286 } 287 if len(b.t.ContractElem) != totalElements { 288 t.Errorf("len(expandElem)==%d; want %d", len(b.t.ContractElem), totalElements) 289 } 290 }