golang.org/x/text@v0.14.0/cases/context_test.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package cases 6 7 import ( 8 "strings" 9 "testing" 10 "unicode" 11 12 "golang.org/x/text/internal/testtext" 13 "golang.org/x/text/language" 14 "golang.org/x/text/transform" 15 "golang.org/x/text/unicode/norm" 16 "golang.org/x/text/unicode/rangetable" 17 ) 18 19 // The following definitions are taken directly from Chapter 3 of The Unicode 20 // Standard. 21 22 func propCased(r rune) bool { 23 return propLower(r) || propUpper(r) || unicode.IsTitle(r) 24 } 25 26 func propLower(r rune) bool { 27 return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r) 28 } 29 30 func propUpper(r rune) bool { 31 return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r) 32 } 33 34 func propIgnore(r rune) bool { 35 if unicode.In(r, unicode.Mn, unicode.Me, unicode.Cf, unicode.Lm, unicode.Sk) { 36 return true 37 } 38 return caseIgnorable[r] 39 } 40 41 func hasBreakProp(r rune) bool { 42 // binary search over ranges 43 lo := 0 44 hi := len(breakProp) 45 for lo < hi { 46 m := lo + (hi-lo)/2 47 bp := &breakProp[m] 48 if bp.lo <= r && r <= bp.hi { 49 return true 50 } 51 if r < bp.lo { 52 hi = m 53 } else { 54 lo = m + 1 55 } 56 } 57 return false 58 } 59 60 func contextFromRune(r rune) *context { 61 c := context{dst: make([]byte, 128), src: []byte(string(r)), atEOF: true} 62 c.next() 63 return &c 64 } 65 66 func TestCaseProperties(t *testing.T) { 67 if unicode.Version != UnicodeVersion { 68 // Properties of existing code points may change by Unicode version, so 69 // we need to skip. 70 t.Skipf("Skipping as core Unicode version %s different than %s", unicode.Version, UnicodeVersion) 71 } 72 assigned := rangetable.Assigned(UnicodeVersion) 73 coreVersion := rangetable.Assigned(unicode.Version) 74 for r := rune(0); r <= lastRuneForTesting; r++ { 75 if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) { 76 continue 77 } 78 c := contextFromRune(r) 79 if got, want := c.info.isCaseIgnorable(), propIgnore(r); got != want { 80 t.Errorf("caseIgnorable(%U): got %v; want %v (%x)", r, got, want, c.info) 81 } 82 // New letters may change case types, but existing case pairings should 83 // not change. See Case Pair Stability in 84 // https://unicode.org/policies/stability_policy.html. 85 if rf := unicode.SimpleFold(r); rf != r && unicode.In(rf, assigned) { 86 if got, want := c.info.isCased(), propCased(r); got != want { 87 t.Errorf("cased(%U): got %v; want %v (%x)", r, got, want, c.info) 88 } 89 if got, want := c.caseType() == cUpper, propUpper(r); got != want { 90 t.Errorf("upper(%U): got %v; want %v (%x)", r, got, want, c.info) 91 } 92 if got, want := c.caseType() == cLower, propLower(r); got != want { 93 t.Errorf("lower(%U): got %v; want %v (%x)", r, got, want, c.info) 94 } 95 } 96 if got, want := c.info.isBreak(), hasBreakProp(r); got != want { 97 t.Errorf("isBreak(%U): got %v; want %v (%x)", r, got, want, c.info) 98 } 99 } 100 // TODO: get title case from unicode file. 101 } 102 103 func TestMapping(t *testing.T) { 104 assigned := rangetable.Assigned(UnicodeVersion) 105 coreVersion := rangetable.Assigned(unicode.Version) 106 if coreVersion == nil { 107 coreVersion = assigned 108 } 109 apply := func(r rune, f func(c *context) bool) string { 110 c := contextFromRune(r) 111 f(c) 112 return string(c.dst[:c.pDst]) 113 } 114 115 for r, tt := range special { 116 if got, want := apply(r, lower), tt.toLower; got != want { 117 t.Errorf("lowerSpecial:(%U): got %+q; want %+q", r, got, want) 118 } 119 if got, want := apply(r, title), tt.toTitle; got != want { 120 t.Errorf("titleSpecial:(%U): got %+q; want %+q", r, got, want) 121 } 122 if got, want := apply(r, upper), tt.toUpper; got != want { 123 t.Errorf("upperSpecial:(%U): got %+q; want %+q", r, got, want) 124 } 125 } 126 127 for r := rune(0); r <= lastRuneForTesting; r++ { 128 if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) { 129 continue 130 } 131 if rf := unicode.SimpleFold(r); rf == r || !unicode.In(rf, assigned) { 132 continue 133 } 134 if _, ok := special[r]; ok { 135 continue 136 } 137 want := string(unicode.ToLower(r)) 138 if got := apply(r, lower); got != want { 139 t.Errorf("lower:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want)) 140 } 141 142 want = string(unicode.ToUpper(r)) 143 if got := apply(r, upper); got != want { 144 t.Errorf("upper:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want)) 145 } 146 147 want = string(unicode.ToTitle(r)) 148 if got := apply(r, title); got != want { 149 t.Errorf("title:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want)) 150 } 151 } 152 } 153 154 func runeFoldData(r rune) (x struct{ simple, full, special string }) { 155 x = foldMap[r] 156 if x.simple == "" { 157 x.simple = string(unicode.ToLower(r)) 158 } 159 if x.full == "" { 160 x.full = string(unicode.ToLower(r)) 161 } 162 if x.special == "" { 163 x.special = x.full 164 } 165 return 166 } 167 168 func TestFoldData(t *testing.T) { 169 assigned := rangetable.Assigned(UnicodeVersion) 170 coreVersion := rangetable.Assigned(unicode.Version) 171 if coreVersion == nil { 172 coreVersion = assigned 173 } 174 apply := func(r rune, f func(c *context) bool) (string, info) { 175 c := contextFromRune(r) 176 f(c) 177 return string(c.dst[:c.pDst]), c.info.cccType() 178 } 179 for r := rune(0); r <= lastRuneForTesting; r++ { 180 if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) { 181 continue 182 } 183 x := runeFoldData(r) 184 if got, info := apply(r, foldFull); got != x.full { 185 t.Errorf("full:%q (%U): got %q %U; want %q %U (ccc=%x)", r, r, got, []rune(got), x.full, []rune(x.full), info) 186 } 187 // TODO: special and simple. 188 } 189 } 190 191 func TestCCC(t *testing.T) { 192 assigned := rangetable.Assigned(UnicodeVersion) 193 normVersion := rangetable.Assigned(norm.Version) 194 for r := rune(0); r <= lastRuneForTesting; r++ { 195 if !unicode.In(r, assigned) || !unicode.In(r, normVersion) { 196 continue 197 } 198 c := contextFromRune(r) 199 200 p := norm.NFC.PropertiesString(string(r)) 201 want := cccOther 202 switch p.CCC() { 203 case 0: 204 want = cccZero 205 case above: 206 want = cccAbove 207 } 208 if got := c.info.cccType(); got != want { 209 t.Errorf("%U: got %x; want %x", r, got, want) 210 } 211 } 212 } 213 214 func TestWordBreaks(t *testing.T) { 215 for _, tt := range breakTest { 216 testtext.Run(t, tt, func(t *testing.T) { 217 parts := strings.Split(tt, "|") 218 want := "" 219 for _, s := range parts { 220 found := false 221 // This algorithm implements title casing given word breaks 222 // as defined in the Unicode standard 3.13 R3. 223 for _, r := range s { 224 title := unicode.ToTitle(r) 225 lower := unicode.ToLower(r) 226 if !found && title != lower { 227 found = true 228 want += string(title) 229 } else { 230 want += string(lower) 231 } 232 } 233 } 234 src := strings.Join(parts, "") 235 got := Title(language.Und).String(src) 236 if got != want { 237 t.Errorf("got %q; want %q", got, want) 238 } 239 }) 240 } 241 } 242 243 func TestContext(t *testing.T) { 244 tests := []struct { 245 desc string 246 dstSize int 247 atEOF bool 248 src string 249 out string 250 nSrc int 251 err error 252 ops string 253 prefixArg string 254 prefixWant bool 255 }{{ 256 desc: "next: past end, atEOF, no checkpoint", 257 dstSize: 10, 258 atEOF: true, 259 src: "12", 260 out: "", 261 nSrc: 2, 262 ops: "next;next;next", 263 // Test that calling prefix with a non-empty argument when the buffer 264 // is depleted returns false. 265 prefixArg: "x", 266 prefixWant: false, 267 }, { 268 desc: "next: not at end, atEOF, no checkpoint", 269 dstSize: 10, 270 atEOF: false, 271 src: "12", 272 out: "", 273 nSrc: 0, 274 err: transform.ErrShortSrc, 275 ops: "next;next", 276 prefixArg: "", 277 prefixWant: true, 278 }, { 279 desc: "next: past end, !atEOF, no checkpoint", 280 dstSize: 10, 281 atEOF: false, 282 src: "12", 283 out: "", 284 nSrc: 0, 285 err: transform.ErrShortSrc, 286 ops: "next;next;next", 287 prefixArg: "", 288 prefixWant: true, 289 }, { 290 desc: "next: past end, !atEOF, checkpoint", 291 dstSize: 10, 292 atEOF: false, 293 src: "12", 294 out: "", 295 nSrc: 2, 296 ops: "next;next;checkpoint;next", 297 prefixArg: "", 298 prefixWant: true, 299 }, { 300 desc: "copy: exact count, atEOF, no checkpoint", 301 dstSize: 2, 302 atEOF: true, 303 src: "12", 304 out: "12", 305 nSrc: 2, 306 ops: "next;copy;next;copy;next", 307 prefixArg: "", 308 prefixWant: true, 309 }, { 310 desc: "copy: past end, !atEOF, no checkpoint", 311 dstSize: 2, 312 atEOF: false, 313 src: "12", 314 out: "", 315 nSrc: 0, 316 err: transform.ErrShortSrc, 317 ops: "next;copy;next;copy;next", 318 prefixArg: "", 319 prefixWant: true, 320 }, { 321 desc: "copy: past end, !atEOF, checkpoint", 322 dstSize: 2, 323 atEOF: false, 324 src: "12", 325 out: "12", 326 nSrc: 2, 327 ops: "next;copy;next;copy;checkpoint;next", 328 prefixArg: "", 329 prefixWant: true, 330 }, { 331 desc: "copy: short dst", 332 dstSize: 1, 333 atEOF: false, 334 src: "12", 335 out: "", 336 nSrc: 0, 337 err: transform.ErrShortDst, 338 ops: "next;copy;next;copy;checkpoint;next", 339 prefixArg: "12", 340 prefixWant: false, 341 }, { 342 desc: "copy: short dst, checkpointed", 343 dstSize: 1, 344 atEOF: false, 345 src: "12", 346 out: "1", 347 nSrc: 1, 348 err: transform.ErrShortDst, 349 ops: "next;copy;checkpoint;next;copy;next", 350 prefixArg: "", 351 prefixWant: true, 352 }, { 353 desc: "writeString: simple", 354 dstSize: 3, 355 atEOF: true, 356 src: "1", 357 out: "1ab", 358 nSrc: 1, 359 ops: "next;copy;writeab;next", 360 prefixArg: "", 361 prefixWant: true, 362 }, { 363 desc: "writeString: short dst", 364 dstSize: 2, 365 atEOF: true, 366 src: "12", 367 out: "", 368 nSrc: 0, 369 err: transform.ErrShortDst, 370 ops: "next;copy;writeab;next", 371 prefixArg: "2", 372 prefixWant: true, 373 }, { 374 desc: "writeString: simple", 375 dstSize: 3, 376 atEOF: true, 377 src: "12", 378 out: "1ab", 379 nSrc: 2, 380 ops: "next;copy;next;writeab;next", 381 prefixArg: "", 382 prefixWant: true, 383 }, { 384 desc: "writeString: short dst", 385 dstSize: 2, 386 atEOF: true, 387 src: "12", 388 out: "", 389 nSrc: 0, 390 err: transform.ErrShortDst, 391 ops: "next;copy;next;writeab;next", 392 prefixArg: "1", 393 prefixWant: false, 394 }, { 395 desc: "prefix", 396 dstSize: 2, 397 atEOF: true, 398 src: "12", 399 out: "", 400 nSrc: 0, 401 // Context will assign an ErrShortSrc if the input wasn't exhausted. 402 err: transform.ErrShortSrc, 403 prefixArg: "12", 404 prefixWant: true, 405 }} 406 for _, tt := range tests { 407 c := context{dst: make([]byte, tt.dstSize), src: []byte(tt.src), atEOF: tt.atEOF} 408 409 for _, op := range strings.Split(tt.ops, ";") { 410 switch op { 411 case "next": 412 c.next() 413 case "checkpoint": 414 c.checkpoint() 415 case "writeab": 416 c.writeString("ab") 417 case "copy": 418 c.copy() 419 case "": 420 default: 421 t.Fatalf("unknown op %q", op) 422 } 423 } 424 if got := c.hasPrefix(tt.prefixArg); got != tt.prefixWant { 425 t.Errorf("%s:\nprefix was %v; want %v", tt.desc, got, tt.prefixWant) 426 } 427 nDst, nSrc, err := c.ret() 428 if err != tt.err { 429 t.Errorf("%s:\nerror was %v; want %v", tt.desc, err, tt.err) 430 } 431 if out := string(c.dst[:nDst]); out != tt.out { 432 t.Errorf("%s:\nout was %q; want %q", tt.desc, out, tt.out) 433 } 434 if nSrc != tt.nSrc { 435 t.Errorf("%s:\nnSrc was %d; want %d", tt.desc, nSrc, tt.nSrc) 436 } 437 } 438 }