github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/unicode/norm/normalize_test.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package norm 6 7 import ( 8 "bytes" 9 "flag" 10 "fmt" 11 "io" 12 "log" 13 "strings" 14 "testing" 15 "unicode/utf8" 16 ) 17 18 var ( 19 testn = flag.Int("testn", -1, "specific test number to run or -1 for all") 20 ) 21 22 // pc replaces any rune r that is repeated n times, for n > 1, with r{n}. 23 func pc(s string) []byte { 24 b := bytes.NewBuffer(make([]byte, 0, len(s))) 25 for i := 0; i < len(s); { 26 r, sz := utf8.DecodeRuneInString(s[i:]) 27 n := 0 28 if sz == 1 { 29 // Special-case one-byte case to handle repetition for invalid UTF-8. 30 for c := s[i]; i+n < len(s) && s[i+n] == c; n++ { 31 } 32 } else { 33 for _, r2 := range s[i:] { 34 if r2 != r { 35 break 36 } 37 n++ 38 } 39 } 40 b.WriteString(s[i : i+sz]) 41 if n > 1 { 42 fmt.Fprintf(b, "{%d}", n) 43 } 44 i += sz * n 45 } 46 return b.Bytes() 47 } 48 49 // pidx finds the index from which two strings start to differ, plus context. 50 // It returns the index and ellipsis if the index is greater than 0. 51 func pidx(a, b string) (i int, prefix string) { 52 for ; i < len(a) && i < len(b) && a[i] == b[i]; i++ { 53 } 54 if i < 8 { 55 return 0, "" 56 } 57 i -= 3 // ensure taking at least one full rune before the difference. 58 for k := i - 7; i > k && !utf8.RuneStart(a[i]); i-- { 59 } 60 return i, "..." 61 } 62 63 type PositionTest struct { 64 input string 65 pos int 66 buffer string // expected contents of reorderBuffer, if applicable 67 } 68 69 type positionFunc func(rb *reorderBuffer, s string) (int, []byte) 70 71 func runPosTests(t *testing.T, name string, f Form, fn positionFunc, tests []PositionTest) { 72 rb := reorderBuffer{} 73 rb.init(f, nil) 74 for i, test := range tests { 75 rb.reset() 76 rb.src = inputString(test.input) 77 rb.nsrc = len(test.input) 78 pos, out := fn(&rb, test.input) 79 if pos != test.pos { 80 t.Errorf("%s:%d: position is %d; want %d", name, i, pos, test.pos) 81 } 82 if outs := string(out); outs != test.buffer { 83 k, pfx := pidx(outs, test.buffer) 84 t.Errorf("%s:%d: buffer \nwas %s%+q; \nwant %s%+q", name, i, pfx, pc(outs[k:]), pfx, pc(test.buffer[k:])) 85 } 86 } 87 } 88 89 func grave(n int) string { 90 return rep(0x0300, n) 91 } 92 93 func rep(r rune, n int) string { 94 return strings.Repeat(string(r), n) 95 } 96 97 const segSize = maxByteBufferSize 98 99 var cgj = GraphemeJoiner 100 101 var decomposeSegmentTests = []PositionTest{ 102 // illegal runes 103 {"\xC2", 0, ""}, 104 {"\xC0", 1, "\xC0"}, 105 {"\u00E0\x80", 2, "\u0061\u0300"}, 106 // starter 107 {"a", 1, "a"}, 108 {"ab", 1, "a"}, 109 // starter + composing 110 {"a\u0300", 3, "a\u0300"}, 111 {"a\u0300b", 3, "a\u0300"}, 112 // with decomposition 113 {"\u00C0", 2, "A\u0300"}, 114 {"\u00C0b", 2, "A\u0300"}, 115 // long 116 {grave(31), 60, grave(30) + cgj}, 117 {"a" + grave(31), 61, "a" + grave(30) + cgj}, 118 119 // Stability tests: see http://www.unicode.org/review/pr-29.html. 120 // U+0300 COMBINING GRAVE ACCENT;Mn;230;NSM;;;;;N;NON-SPACING GRAVE;;;; 121 // U+0B47 ORIYA VOWEL SIGN E;Mc;0;L;;;;;N;;;;; 122 // U+0B3E ORIYA VOWEL SIGN AA;Mc;0;L;;;;;N;;;;; 123 // U+1100 HANGUL CHOSEONG KIYEOK;Lo;0;L;;;;;N;;;;; 124 // U+1161 HANGUL JUNGSEONG A;Lo;0;L;;;;;N;;;;; 125 {"\u0B47\u0300\u0B3E", 8, "\u0B47\u0300\u0B3E"}, 126 {"\u1100\u0300\u1161", 8, "\u1100\u0300\u1161"}, 127 {"\u0B47\u0B3E", 6, "\u0B47\u0B3E"}, 128 {"\u1100\u1161", 6, "\u1100\u1161"}, 129 130 // U+04DA MALAYALAM VOWEL SIGN O;Mc;0;L;0D46 0D3E;;;;N;;;;; 131 // Sequence of decomposing characters that are starters and modifiers. 132 {"\u0d4a" + strings.Repeat("\u0d3e", 31), 90, "\u0d46" + strings.Repeat("\u0d3e", 30) + cgj}, 133 134 {grave(30), 60, grave(30)}, 135 // U+FF9E is a starter, but decomposes to U+3099, which is not. 136 {grave(30) + "\uff9e", 60, grave(30) + cgj}, 137 // ends with incomplete UTF-8 encoding 138 {"\xCC", 0, ""}, 139 {"\u0300\xCC", 2, "\u0300"}, 140 } 141 142 func decomposeSegmentF(rb *reorderBuffer, s string) (int, []byte) { 143 rb.initString(NFD, s) 144 rb.setFlusher(nil, appendFlush) 145 p := decomposeSegment(rb, 0, true) 146 return p, rb.out 147 } 148 149 func TestDecomposeSegment(t *testing.T) { 150 runPosTests(t, "TestDecomposeSegment", NFC, decomposeSegmentF, decomposeSegmentTests) 151 } 152 153 var firstBoundaryTests = []PositionTest{ 154 // no boundary 155 {"", -1, ""}, 156 {"\u0300", -1, ""}, 157 {"\x80\x80", -1, ""}, 158 // illegal runes 159 {"\xff", 0, ""}, 160 {"\u0300\xff", 2, ""}, 161 {"\u0300\xc0\x80\x80", 2, ""}, 162 // boundaries 163 {"a", 0, ""}, 164 {"\u0300a", 2, ""}, 165 // Hangul 166 {"\u1103\u1161", 0, ""}, 167 {"\u110B\u1173\u11B7", 0, ""}, 168 {"\u1161\u110B\u1173\u11B7", 3, ""}, 169 {"\u1173\u11B7\u1103\u1161", 6, ""}, 170 // too many combining characters. 171 {grave(maxNonStarters - 1), -1, ""}, 172 {grave(maxNonStarters), 60, ""}, 173 {grave(maxNonStarters + 1), 60, ""}, 174 } 175 176 func firstBoundaryF(rb *reorderBuffer, s string) (int, []byte) { 177 return rb.f.form.FirstBoundary([]byte(s)), nil 178 } 179 180 func firstBoundaryStringF(rb *reorderBuffer, s string) (int, []byte) { 181 return rb.f.form.FirstBoundaryInString(s), nil 182 } 183 184 func TestFirstBoundary(t *testing.T) { 185 runPosTests(t, "TestFirstBoundary", NFC, firstBoundaryF, firstBoundaryTests) 186 runPosTests(t, "TestFirstBoundaryInString", NFC, firstBoundaryStringF, firstBoundaryTests) 187 } 188 189 var decomposeToLastTests = []PositionTest{ 190 // ends with inert character 191 {"Hello!", 6, ""}, 192 {"\u0632", 2, ""}, 193 {"a\u0301\u0635", 5, ""}, 194 // ends with non-inert starter 195 {"a", 0, "a"}, 196 {"a\u0301a", 3, "a"}, 197 {"a\u0301\u03B9", 3, "\u03B9"}, 198 {"a\u0327", 0, "a\u0327"}, 199 // illegal runes 200 {"\xFF", 1, ""}, 201 {"aa\xFF", 3, ""}, 202 {"\xC0\x80\x80", 3, ""}, 203 {"\xCC\x80\x80", 3, ""}, 204 // ends with incomplete UTF-8 encoding 205 {"a\xCC", 2, ""}, 206 // ends with combining characters 207 {"\u0300\u0301", 0, "\u0300\u0301"}, 208 {"a\u0300\u0301", 0, "a\u0300\u0301"}, 209 {"a\u0301\u0308", 0, "a\u0301\u0308"}, 210 {"a\u0308\u0301", 0, "a\u0308\u0301"}, 211 {"aaaa\u0300\u0301", 3, "a\u0300\u0301"}, 212 {"\u0300a\u0300\u0301", 2, "a\u0300\u0301"}, 213 {"\u00C0", 0, "A\u0300"}, 214 {"a\u00C0", 1, "A\u0300"}, 215 // decomposing 216 {"a\u0300\u00E0", 3, "a\u0300"}, 217 // multisegment decompositions (flushes leading segments) 218 {"a\u0300\uFDC0", 7, "\u064A"}, 219 {"\uFDC0" + grave(29), 4, "\u064A" + grave(29)}, 220 {"\uFDC0" + grave(30), 4, "\u064A" + grave(30)}, 221 {"\uFDC0" + grave(31), 5, grave(30)}, 222 {"\uFDFA" + grave(14), 31, "\u0645" + grave(14)}, 223 // Overflow 224 {"\u00E0" + grave(29), 0, "a" + grave(30)}, 225 {"\u00E0" + grave(30), 2, grave(30)}, 226 // Hangul 227 {"a\u1103", 1, "\u1103"}, 228 {"a\u110B", 1, "\u110B"}, 229 {"a\u110B\u1173", 1, "\u110B\u1173"}, 230 // See comment in composition.go:compBoundaryAfter. 231 {"a\u110B\u1173\u11B7", 1, "\u110B\u1173\u11B7"}, 232 {"a\uC73C", 1, "\u110B\u1173"}, 233 {"다음", 3, "\u110B\u1173\u11B7"}, 234 {"다", 0, "\u1103\u1161"}, 235 {"\u1103\u1161\u110B\u1173\u11B7", 6, "\u110B\u1173\u11B7"}, 236 {"\u110B\u1173\u11B7\u1103\u1161", 9, "\u1103\u1161"}, 237 {"다음음", 6, "\u110B\u1173\u11B7"}, 238 {"음다다", 6, "\u1103\u1161"}, 239 // maximized buffer 240 {"a" + grave(30), 0, "a" + grave(30)}, 241 // Buffer overflow 242 {"a" + grave(31), 3, grave(30)}, 243 // weird UTF-8 244 {"a\u0300\u11B7", 0, "a\u0300\u11B7"}, 245 } 246 247 func decomposeToLast(rb *reorderBuffer, s string) (int, []byte) { 248 rb.setFlusher([]byte(s), appendFlush) 249 decomposeToLastBoundary(rb) 250 buf := rb.flush(nil) 251 return len(rb.out), buf 252 } 253 254 func TestDecomposeToLastBoundary(t *testing.T) { 255 runPosTests(t, "TestDecomposeToLastBoundary", NFKC, decomposeToLast, decomposeToLastTests) 256 } 257 258 var lastBoundaryTests = []PositionTest{ 259 // ends with inert character 260 {"Hello!", 6, ""}, 261 {"\u0632", 2, ""}, 262 // ends with non-inert starter 263 {"a", 0, ""}, 264 // illegal runes 265 {"\xff", 1, ""}, 266 {"aa\xff", 3, ""}, 267 {"a\xff\u0300", 1, ""}, // TODO: should probably be 2. 268 {"\xc0\x80\x80", 3, ""}, 269 {"\xc0\x80\x80\u0300", 3, ""}, 270 // ends with incomplete UTF-8 encoding 271 {"\xCC", -1, ""}, 272 {"\xE0\x80", -1, ""}, 273 {"\xF0\x80\x80", -1, ""}, 274 {"a\xCC", 0, ""}, 275 {"\x80\xCC", 1, ""}, 276 {"\xCC\xCC", 1, ""}, 277 // ends with combining characters 278 {"a\u0300\u0301", 0, ""}, 279 {"aaaa\u0300\u0301", 3, ""}, 280 {"\u0300a\u0300\u0301", 2, ""}, 281 {"\u00C2", 0, ""}, 282 {"a\u00C2", 1, ""}, 283 // decomposition may recombine 284 {"\u0226", 0, ""}, 285 // no boundary 286 {"", -1, ""}, 287 {"\u0300\u0301", -1, ""}, 288 {"\u0300", -1, ""}, 289 {"\x80\x80", -1, ""}, 290 {"\x80\x80\u0301", -1, ""}, 291 // Hangul 292 {"다음", 3, ""}, 293 {"다", 0, ""}, 294 {"\u1103\u1161\u110B\u1173\u11B7", 6, ""}, 295 {"\u110B\u1173\u11B7\u1103\u1161", 9, ""}, 296 // too many combining characters. 297 {grave(maxNonStarters - 1), -1, ""}, 298 // May still be preceded with a non-starter. 299 {grave(maxNonStarters), -1, ""}, 300 // May still need to insert a cgj after the last combiner. 301 {grave(maxNonStarters + 1), 2, ""}, 302 {grave(maxNonStarters + 2), 4, ""}, 303 304 {"a" + grave(maxNonStarters-1), 0, ""}, 305 {"a" + grave(maxNonStarters), 0, ""}, 306 // May still need to insert a cgj after the last combiner. 307 {"a" + grave(maxNonStarters+1), 3, ""}, 308 {"a" + grave(maxNonStarters+2), 5, ""}, 309 } 310 311 func lastBoundaryF(rb *reorderBuffer, s string) (int, []byte) { 312 return rb.f.form.LastBoundary([]byte(s)), nil 313 } 314 315 func TestLastBoundary(t *testing.T) { 316 runPosTests(t, "TestLastBoundary", NFC, lastBoundaryF, lastBoundaryTests) 317 } 318 319 var quickSpanTests = []PositionTest{ 320 {"", 0, ""}, 321 // starters 322 {"a", 1, ""}, 323 {"abc", 3, ""}, 324 {"\u043Eb", 3, ""}, 325 // incomplete last rune. 326 {"\xCC", 1, ""}, 327 {"a\xCC", 2, ""}, 328 // incorrectly ordered combining characters 329 {"\u0300\u0316", 0, ""}, 330 {"\u0300\u0316cd", 0, ""}, 331 // have a maximum number of combining characters. 332 {rep(0x035D, 30) + "\u035B", 0, ""}, 333 {"a" + rep(0x035D, 30) + "\u035B", 0, ""}, 334 {"Ɵ" + rep(0x035D, 30) + "\u035B", 0, ""}, 335 {"aa" + rep(0x035D, 30) + "\u035B", 1, ""}, 336 {rep(0x035D, 30) + cgj + "\u035B", 64, ""}, 337 {"a" + rep(0x035D, 30) + cgj + "\u035B", 65, ""}, 338 {"Ɵ" + rep(0x035D, 30) + cgj + "\u035B", 66, ""}, 339 {"aa" + rep(0x035D, 30) + cgj + "\u035B", 66, ""}, 340 } 341 342 var quickSpanNFDTests = []PositionTest{ 343 // needs decomposing 344 {"\u00C0", 0, ""}, 345 {"abc\u00C0", 3, ""}, 346 // correctly ordered combining characters 347 {"\u0300", 2, ""}, 348 {"ab\u0300", 4, ""}, 349 {"ab\u0300cd", 6, ""}, 350 {"\u0300cd", 4, ""}, 351 {"\u0316\u0300", 4, ""}, 352 {"ab\u0316\u0300", 6, ""}, 353 {"ab\u0316\u0300cd", 8, ""}, 354 {"ab\u0316\u0300\u00C0", 6, ""}, 355 {"\u0316\u0300cd", 6, ""}, 356 {"\u043E\u0308b", 5, ""}, 357 // incorrectly ordered combining characters 358 {"ab\u0300\u0316", 1, ""}, // TODO: we could skip 'b' as well. 359 {"ab\u0300\u0316cd", 1, ""}, 360 // Hangul 361 {"같은", 0, ""}, 362 } 363 364 var quickSpanNFCTests = []PositionTest{ 365 // okay composed 366 {"\u00C0", 2, ""}, 367 {"abc\u00C0", 5, ""}, 368 // correctly ordered combining characters 369 {"ab\u0300", 1, ""}, 370 {"ab\u0300cd", 1, ""}, 371 {"ab\u0316\u0300", 1, ""}, 372 {"ab\u0316\u0300cd", 1, ""}, 373 {"\u00C0\u035D", 4, ""}, 374 // we do not special case leading combining characters 375 {"\u0300cd", 0, ""}, 376 {"\u0300", 0, ""}, 377 {"\u0316\u0300", 0, ""}, 378 {"\u0316\u0300cd", 0, ""}, 379 // incorrectly ordered combining characters 380 {"ab\u0300\u0316", 1, ""}, 381 {"ab\u0300\u0316cd", 1, ""}, 382 // Hangul 383 {"같은", 6, ""}, 384 // We return the start of the violating segment in case of overflow. 385 {grave(30) + "\uff9e", 0, ""}, 386 {grave(30), 0, ""}, 387 } 388 389 func doQuickSpan(rb *reorderBuffer, s string) (int, []byte) { 390 return rb.f.form.QuickSpan([]byte(s)), nil 391 } 392 393 func doQuickSpanString(rb *reorderBuffer, s string) (int, []byte) { 394 return rb.f.form.QuickSpanString(s), nil 395 } 396 397 func TestQuickSpan(t *testing.T) { 398 runPosTests(t, "TestQuickSpanNFD1", NFD, doQuickSpan, quickSpanTests) 399 runPosTests(t, "TestQuickSpanNFD2", NFD, doQuickSpan, quickSpanNFDTests) 400 runPosTests(t, "TestQuickSpanNFC1", NFC, doQuickSpan, quickSpanTests) 401 runPosTests(t, "TestQuickSpanNFC2", NFC, doQuickSpan, quickSpanNFCTests) 402 403 runPosTests(t, "TestQuickSpanStringNFD1", NFD, doQuickSpanString, quickSpanTests) 404 runPosTests(t, "TestQuickSpanStringNFD2", NFD, doQuickSpanString, quickSpanNFDTests) 405 runPosTests(t, "TestQuickSpanStringNFC1", NFC, doQuickSpanString, quickSpanTests) 406 runPosTests(t, "TestQuickSpanStringNFC2", NFC, doQuickSpanString, quickSpanNFCTests) 407 } 408 409 var isNormalTests = []PositionTest{ 410 {"", 1, ""}, 411 // illegal runes 412 {"\xff", 1, ""}, 413 // starters 414 {"a", 1, ""}, 415 {"abc", 1, ""}, 416 {"\u043Eb", 1, ""}, 417 // incorrectly ordered combining characters 418 {"\u0300\u0316", 0, ""}, 419 {"ab\u0300\u0316", 0, ""}, 420 {"ab\u0300\u0316cd", 0, ""}, 421 {"\u0300\u0316cd", 0, ""}, 422 } 423 var isNormalNFDTests = []PositionTest{ 424 // needs decomposing 425 {"\u00C0", 0, ""}, 426 {"abc\u00C0", 0, ""}, 427 // correctly ordered combining characters 428 {"\u0300", 1, ""}, 429 {"ab\u0300", 1, ""}, 430 {"ab\u0300cd", 1, ""}, 431 {"\u0300cd", 1, ""}, 432 {"\u0316\u0300", 1, ""}, 433 {"ab\u0316\u0300", 1, ""}, 434 {"ab\u0316\u0300cd", 1, ""}, 435 {"\u0316\u0300cd", 1, ""}, 436 {"\u043E\u0308b", 1, ""}, 437 // Hangul 438 {"같은", 0, ""}, 439 } 440 var isNormalNFCTests = []PositionTest{ 441 // okay composed 442 {"\u00C0", 1, ""}, 443 {"abc\u00C0", 1, ""}, 444 // need reordering 445 {"a\u0300", 0, ""}, 446 {"a\u0300cd", 0, ""}, 447 {"a\u0316\u0300", 0, ""}, 448 {"a\u0316\u0300cd", 0, ""}, 449 // correctly ordered combining characters 450 {"ab\u0300", 1, ""}, 451 {"ab\u0300cd", 1, ""}, 452 {"ab\u0316\u0300", 1, ""}, 453 {"ab\u0316\u0300cd", 1, ""}, 454 {"\u00C0\u035D", 1, ""}, 455 {"\u0300", 1, ""}, 456 {"\u0316\u0300cd", 1, ""}, 457 // Hangul 458 {"같은", 1, ""}, 459 } 460 461 var isNormalNFKXTests = []PositionTest{ 462 // Special case. 463 {"\u00BC", 0, ""}, 464 } 465 466 func isNormalF(rb *reorderBuffer, s string) (int, []byte) { 467 if rb.f.form.IsNormal([]byte(s)) { 468 return 1, nil 469 } 470 return 0, nil 471 } 472 473 func isNormalStringF(rb *reorderBuffer, s string) (int, []byte) { 474 if rb.f.form.IsNormalString(s) { 475 return 1, nil 476 } 477 return 0, nil 478 } 479 480 func TestIsNormal(t *testing.T) { 481 runPosTests(t, "TestIsNormalNFD1", NFD, isNormalF, isNormalTests) 482 runPosTests(t, "TestIsNormalNFD2", NFD, isNormalF, isNormalNFDTests) 483 runPosTests(t, "TestIsNormalNFC1", NFC, isNormalF, isNormalTests) 484 runPosTests(t, "TestIsNormalNFC2", NFC, isNormalF, isNormalNFCTests) 485 runPosTests(t, "TestIsNormalNFKD1", NFKD, isNormalF, isNormalTests) 486 runPosTests(t, "TestIsNormalNFKD2", NFKD, isNormalF, isNormalNFDTests) 487 runPosTests(t, "TestIsNormalNFKD3", NFKD, isNormalF, isNormalNFKXTests) 488 runPosTests(t, "TestIsNormalNFKC1", NFKC, isNormalF, isNormalTests) 489 runPosTests(t, "TestIsNormalNFKC2", NFKC, isNormalF, isNormalNFCTests) 490 runPosTests(t, "TestIsNormalNFKC3", NFKC, isNormalF, isNormalNFKXTests) 491 } 492 493 func TestIsNormalString(t *testing.T) { 494 runPosTests(t, "TestIsNormalNFD1", NFD, isNormalStringF, isNormalTests) 495 runPosTests(t, "TestIsNormalNFD2", NFD, isNormalStringF, isNormalNFDTests) 496 runPosTests(t, "TestIsNormalNFC1", NFC, isNormalStringF, isNormalTests) 497 runPosTests(t, "TestIsNormalNFC2", NFC, isNormalStringF, isNormalNFCTests) 498 } 499 500 type AppendTest struct { 501 left string 502 right string 503 out string 504 } 505 506 type appendFunc func(f Form, out []byte, s string) []byte 507 508 var fstr = []string{"NFC", "NFD", "NFKC", "NFKD"} 509 510 func runNormTests(t *testing.T, name string, fn appendFunc) { 511 for f := NFC; f <= NFKD; f++ { 512 runAppendTests(t, name, f, fn, normTests[f]) 513 } 514 } 515 516 func runAppendTests(t *testing.T, name string, f Form, fn appendFunc, tests []AppendTest) { 517 for i, test := range tests { 518 if *testn >= 0 && i != *testn { 519 continue 520 } 521 out := []byte(test.left) 522 have := string(fn(f, out, test.right)) 523 if len(have) != len(test.out) { 524 t.Errorf("%s.%s:%d: length is %d; want %d (%+q vs %+q)", fstr[f], name, i, len(have), len(test.out), pc(have), pc(test.out)) 525 } 526 if have != test.out { 527 k, pf := pidx(have, test.out) 528 t.Errorf("%s.%s:%d: \nwas %s%+q; \nwant %s%+q", fstr[f], name, i, pf, pc(have[k:]), pf, pc(test.out[k:])) 529 } 530 531 // Bootstrap by normalizing input. Ensures that the various variants 532 // behave the same. 533 for g := NFC; g <= NFKD; g++ { 534 if f == g { 535 continue 536 } 537 want := g.String(test.left + test.right) 538 have := string(fn(g, g.AppendString(nil, test.left), test.right)) 539 if len(have) != len(want) { 540 t.Errorf("%s(%s.%s):%d: length is %d; want %d (%+q vs %+q)", fstr[g], fstr[f], name, i, len(have), len(want), pc(have), pc(want)) 541 } 542 if have != want { 543 k, pf := pidx(have, want) 544 t.Errorf("%s(%s.%s):%d: \nwas %s%+q; \nwant %s%+q", fstr[g], fstr[f], name, i, pf, pc(have[k:]), pf, pc(want[k:])) 545 } 546 } 547 } 548 } 549 550 var normTests = [][]AppendTest{ 551 appendTestsNFC, 552 appendTestsNFD, 553 appendTestsNFKC, 554 appendTestsNFKD, 555 } 556 557 var appendTestsNFC = []AppendTest{ 558 {"", ascii, ascii}, 559 {"", txt_all, txt_all}, 560 {"\uff9e", grave(30), "\uff9e" + grave(29) + cgj + grave(1)}, 561 {grave(30), "\uff9e", grave(30) + cgj + "\uff9e"}, 562 563 // Tests designed for Iter. 564 { // ordering of non-composing combining characters 565 "", 566 "\u0305\u0316", 567 "\u0316\u0305", 568 }, 569 { // segment overflow 570 "", 571 "a" + rep(0x0305, maxNonStarters+4) + "\u0316", 572 "a" + rep(0x0305, maxNonStarters) + cgj + "\u0316" + rep(0x305, 4), 573 }, 574 575 { // Combine across non-blocking non-starters. 576 // U+0327 COMBINING CEDILLA;Mn;202;NSM;;;;;N;NON-SPACING CEDILLA;;;; 577 // U+0325 COMBINING RING BELOW;Mn;220;NSM;;;;;N;NON-SPACING RING BELOW;;;; 578 "", "a\u0327\u0325", "\u1e01\u0327", 579 }, 580 581 { // Jamo V+T does not combine. 582 "", 583 "\u1161\u11a8", 584 "\u1161\u11a8", 585 }, 586 587 // Stability tests: see http://www.unicode.org/review/pr-29.html. 588 {"", "\u0b47\u0300\u0b3e", "\u0b47\u0300\u0b3e"}, 589 {"", "\u1100\u0300\u1161", "\u1100\u0300\u1161"}, 590 {"", "\u0b47\u0b3e", "\u0b4b"}, 591 {"", "\u1100\u1161", "\uac00"}, 592 593 // U+04DA MALAYALAM VOWEL SIGN O;Mc;0;L;0D46 0D3E;;;;N;;;;; 594 { // 0d4a starts a new segment. 595 "", 596 "\u0d4a" + strings.Repeat("\u0d3e", 15) + "\u0d4a" + strings.Repeat("\u0d3e", 15), 597 "\u0d4a" + strings.Repeat("\u0d3e", 15) + "\u0d4a" + strings.Repeat("\u0d3e", 15), 598 }, 599 600 { // Split combining characters. 601 // TODO: don't insert CGJ before starters. 602 "", 603 "\u0d46" + strings.Repeat("\u0d3e", 31), 604 "\u0d4a" + strings.Repeat("\u0d3e", 29) + cgj + "\u0d3e", 605 }, 606 607 { // Split combining characters. 608 "", 609 "\u0d4a" + strings.Repeat("\u0d3e", 30), 610 "\u0d4a" + strings.Repeat("\u0d3e", 29) + cgj + "\u0d3e", 611 }, 612 } 613 614 var appendTestsNFD = []AppendTest{ 615 // TODO: Move some of the tests here. 616 } 617 618 var appendTestsNFKC = []AppendTest{ 619 // empty buffers 620 {"", "", ""}, 621 {"a", "", "a"}, 622 {"", "a", "a"}, 623 {"", "\u0041\u0307\u0304", "\u01E0"}, 624 // segment split across buffers 625 {"", "a\u0300b", "\u00E0b"}, 626 {"a", "\u0300b", "\u00E0b"}, 627 {"a", "\u0300\u0316", "\u00E0\u0316"}, 628 {"a", "\u0316\u0300", "\u00E0\u0316"}, 629 {"a", "\u0300a\u0300", "\u00E0\u00E0"}, 630 {"a", "\u0300a\u0300a\u0300", "\u00E0\u00E0\u00E0"}, 631 {"a", "\u0300aaa\u0300aaa\u0300", "\u00E0aa\u00E0aa\u00E0"}, 632 {"a\u0300", "\u0327", "\u00E0\u0327"}, 633 {"a\u0327", "\u0300", "\u00E0\u0327"}, 634 {"a\u0316", "\u0300", "\u00E0\u0316"}, 635 {"\u0041\u0307", "\u0304", "\u01E0"}, 636 // Hangul 637 {"", "\u110B\u1173", "\uC73C"}, 638 {"", "\u1103\u1161", "\uB2E4"}, 639 {"", "\u110B\u1173\u11B7", "\uC74C"}, 640 {"", "\u320E", "\x28\uAC00\x29"}, 641 {"", "\x28\u1100\u1161\x29", "\x28\uAC00\x29"}, 642 {"\u1103", "\u1161", "\uB2E4"}, 643 {"\u110B", "\u1173\u11B7", "\uC74C"}, 644 {"\u110B\u1173", "\u11B7", "\uC74C"}, 645 {"\uC73C", "\u11B7", "\uC74C"}, 646 // UTF-8 encoding split across buffers 647 {"a\xCC", "\x80", "\u00E0"}, 648 {"a\xCC", "\x80b", "\u00E0b"}, 649 {"a\xCC", "\x80a\u0300", "\u00E0\u00E0"}, 650 {"a\xCC", "\x80\x80", "\u00E0\x80"}, 651 {"a\xCC", "\x80\xCC", "\u00E0\xCC"}, 652 {"a\u0316\xCC", "\x80a\u0316\u0300", "\u00E0\u0316\u00E0\u0316"}, 653 // ending in incomplete UTF-8 encoding 654 {"", "\xCC", "\xCC"}, 655 {"a", "\xCC", "a\xCC"}, 656 {"a", "b\xCC", "ab\xCC"}, 657 {"\u0226", "\xCC", "\u0226\xCC"}, 658 // illegal runes 659 {"", "\x80", "\x80"}, 660 {"", "\x80\x80\x80", "\x80\x80\x80"}, 661 {"", "\xCC\x80\x80\x80", "\xCC\x80\x80\x80"}, 662 {"", "a\x80", "a\x80"}, 663 {"", "a\x80\x80\x80", "a\x80\x80\x80"}, 664 {"", "a\x80\x80\x80\x80\x80\x80", "a\x80\x80\x80\x80\x80\x80"}, 665 {"a", "\x80\x80\x80", "a\x80\x80\x80"}, 666 // overflow 667 {"", strings.Repeat("\x80", 33), strings.Repeat("\x80", 33)}, 668 {strings.Repeat("\x80", 33), "", strings.Repeat("\x80", 33)}, 669 {strings.Repeat("\x80", 33), strings.Repeat("\x80", 33), strings.Repeat("\x80", 66)}, 670 // overflow of combining characters 671 {"", grave(34), grave(30) + cgj + grave(4)}, 672 {"", grave(36), grave(30) + cgj + grave(6)}, 673 {grave(29), grave(5), grave(30) + cgj + grave(4)}, 674 {grave(30), grave(4), grave(30) + cgj + grave(4)}, 675 {grave(30), grave(3), grave(30) + cgj + grave(3)}, 676 {grave(30) + "\xCC", "\x80", grave(30) + cgj + grave(1)}, 677 {"", "\uFDFA" + grave(14), "\u0635\u0644\u0649 \u0627\u0644\u0644\u0647 \u0639\u0644\u064a\u0647 \u0648\u0633\u0644\u0645" + grave(14)}, 678 {"", "\uFDFA" + grave(28) + "\u0316", "\u0635\u0644\u0649 \u0627\u0644\u0644\u0647 \u0639\u0644\u064a\u0647 \u0648\u0633\u0644\u0645\u0316" + grave(28)}, 679 // - First rune has a trailing non-starter. 680 {"\u00d5", grave(30), "\u00d5" + grave(29) + cgj + grave(1)}, 681 // - U+FF9E decomposes into a non-starter in compatibility mode. A CGJ must be 682 // inserted even when FF9E starts a new segment. 683 {"\uff9e", grave(30), "\u3099" + grave(29) + cgj + grave(1)}, 684 {grave(30), "\uff9e", grave(30) + cgj + "\u3099"}, 685 // - Many non-starter decompositions in a row causing overflow. 686 {"", rep(0x340, 31), rep(0x300, 30) + cgj + "\u0300"}, 687 {"", rep(0xFF9E, 31), rep(0x3099, 30) + cgj + "\u3099"}, 688 // weird UTF-8 689 {"\u00E0\xE1", "\x86", "\u00E0\xE1\x86"}, 690 {"a\u0300\u11B7", "\u0300", "\u00E0\u11B7\u0300"}, 691 {"a\u0300\u11B7\u0300", "\u0300", "\u00E0\u11B7\u0300\u0300"}, 692 {"\u0300", "\xF8\x80\x80\x80\x80\u0300", "\u0300\xF8\x80\x80\x80\x80\u0300"}, 693 {"\u0300", "\xFC\x80\x80\x80\x80\x80\u0300", "\u0300\xFC\x80\x80\x80\x80\x80\u0300"}, 694 {"\xF8\x80\x80\x80\x80\u0300", "\u0300", "\xF8\x80\x80\x80\x80\u0300\u0300"}, 695 {"\xFC\x80\x80\x80\x80\x80\u0300", "\u0300", "\xFC\x80\x80\x80\x80\x80\u0300\u0300"}, 696 {"\xF8\x80\x80\x80", "\x80\u0300\u0300", "\xF8\x80\x80\x80\x80\u0300\u0300"}, 697 698 {"", strings.Repeat("a\u0316\u0300", 6), strings.Repeat("\u00E0\u0316", 6)}, 699 // large input. 700 {"", strings.Repeat("a\u0300\u0316", 4000), strings.Repeat("\u00E0\u0316", 4000)}, 701 {"", strings.Repeat("\x80\x80", 4000), strings.Repeat("\x80\x80", 4000)}, 702 {"", "\u0041\u0307\u0304", "\u01E0"}, 703 } 704 705 var appendTestsNFKD = []AppendTest{ 706 {"", "a" + grave(64), "a" + grave(30) + cgj + grave(30) + cgj + grave(4)}, 707 708 { // segment overflow on unchanged character 709 "", 710 "a" + grave(64) + "\u0316", 711 "a" + grave(30) + cgj + grave(30) + cgj + "\u0316" + grave(4), 712 }, 713 { // segment overflow on unchanged character + start value 714 "", 715 "a" + grave(98) + "\u0316", 716 "a" + grave(30) + cgj + grave(30) + cgj + grave(30) + cgj + "\u0316" + grave(8), 717 }, 718 { // segment overflow on decomposition. (U+0340 decomposes to U+0300.) 719 "", 720 "a" + grave(59) + "\u0340", 721 "a" + grave(30) + cgj + grave(30), 722 }, 723 { // segment overflow on non-starter decomposition 724 "", 725 "a" + grave(33) + "\u0340" + grave(30) + "\u0320", 726 "a" + grave(30) + cgj + grave(30) + cgj + "\u0320" + grave(4), 727 }, 728 { // start value after ASCII overflow 729 "", 730 rep('a', segSize) + grave(32) + "\u0320", 731 rep('a', segSize) + grave(30) + cgj + "\u0320" + grave(2), 732 }, 733 { // Jamo overflow 734 "", 735 "\u1100\u1161" + grave(30) + "\u0320" + grave(2), 736 "\u1100\u1161" + grave(29) + cgj + "\u0320" + grave(3), 737 }, 738 { // Hangul 739 "", 740 "\uac00", 741 "\u1100\u1161", 742 }, 743 { // Hangul overflow 744 "", 745 "\uac00" + grave(32) + "\u0320", 746 "\u1100\u1161" + grave(29) + cgj + "\u0320" + grave(3), 747 }, 748 { // Hangul overflow in Hangul mode. 749 "", 750 "\uac00\uac00" + grave(32) + "\u0320", 751 "\u1100\u1161\u1100\u1161" + grave(29) + cgj + "\u0320" + grave(3), 752 }, 753 { // Hangul overflow in Hangul mode. 754 "", 755 strings.Repeat("\uac00", 3) + grave(32) + "\u0320", 756 strings.Repeat("\u1100\u1161", 3) + grave(29) + cgj + "\u0320" + grave(3), 757 }, 758 { // start value after cc=0 759 "", 760 "您您" + grave(34) + "\u0320", 761 "您您" + grave(30) + cgj + "\u0320" + grave(4), 762 }, 763 { // start value after normalization 764 "", 765 "\u0300\u0320a" + grave(34) + "\u0320", 766 "\u0320\u0300a" + grave(30) + cgj + "\u0320" + grave(4), 767 }, 768 } 769 770 func TestAppend(t *testing.T) { 771 runNormTests(t, "Append", func(f Form, out []byte, s string) []byte { 772 return f.Append(out, []byte(s)...) 773 }) 774 } 775 776 func TestAppendString(t *testing.T) { 777 runNormTests(t, "AppendString", func(f Form, out []byte, s string) []byte { 778 return f.AppendString(out, s) 779 }) 780 } 781 782 func TestBytes(t *testing.T) { 783 runNormTests(t, "Bytes", func(f Form, out []byte, s string) []byte { 784 buf := []byte{} 785 buf = append(buf, out...) 786 buf = append(buf, s...) 787 return f.Bytes(buf) 788 }) 789 } 790 791 func TestString(t *testing.T) { 792 runNormTests(t, "String", func(f Form, out []byte, s string) []byte { 793 outs := string(out) + s 794 return []byte(f.String(outs)) 795 }) 796 } 797 798 func appendBench(f Form, in []byte) func() { 799 buf := make([]byte, 0, 4*len(in)) 800 return func() { 801 f.Append(buf, in...) 802 } 803 } 804 805 func bytesBench(f Form, in []byte) func() { 806 return func() { 807 f.Bytes(in) 808 } 809 } 810 811 func iterBench(f Form, in []byte) func() { 812 iter := Iter{} 813 return func() { 814 iter.Init(f, in) 815 for !iter.Done() { 816 iter.Next() 817 } 818 } 819 } 820 821 func transformBench(f Form, in []byte) func() { 822 buf := make([]byte, 4*len(in)) 823 return func() { 824 if _, n, err := f.Transform(buf, in, true); err != nil || len(in) != n { 825 log.Panic(n, len(in), err) 826 } 827 } 828 } 829 830 func readerBench(f Form, in []byte) func() { 831 buf := make([]byte, 4*len(in)) 832 return func() { 833 r := f.Reader(bytes.NewReader(in)) 834 var err error 835 for err == nil { 836 _, err = r.Read(buf) 837 } 838 if err != io.EOF { 839 panic("") 840 } 841 } 842 } 843 844 func writerBench(f Form, in []byte) func() { 845 buf := make([]byte, 0, 4*len(in)) 846 return func() { 847 r := f.Writer(bytes.NewBuffer(buf)) 848 if _, err := r.Write(in); err != nil { 849 panic("") 850 } 851 } 852 } 853 854 func appendBenchmarks(bm []func(), f Form, in []byte) []func() { 855 bm = append(bm, appendBench(f, in)) 856 bm = append(bm, iterBench(f, in)) 857 bm = append(bm, transformBench(f, in)) 858 bm = append(bm, readerBench(f, in)) 859 bm = append(bm, writerBench(f, in)) 860 return bm 861 } 862 863 func doFormBenchmark(b *testing.B, inf, f Form, s string) { 864 b.StopTimer() 865 in := inf.Bytes([]byte(s)) 866 bm := appendBenchmarks(nil, f, in) 867 b.SetBytes(int64(len(in) * len(bm))) 868 b.StartTimer() 869 for i := 0; i < b.N; i++ { 870 for _, fn := range bm { 871 fn() 872 } 873 } 874 } 875 876 func doSingle(b *testing.B, f func(Form, []byte) func(), s []byte) { 877 b.StopTimer() 878 fn := f(NFC, s) 879 b.SetBytes(int64(len(s))) 880 b.StartTimer() 881 for i := 0; i < b.N; i++ { 882 fn() 883 } 884 } 885 886 var ( 887 smallNoChange = []byte("nörmalization") 888 smallChange = []byte("No\u0308rmalization") 889 ascii = strings.Repeat("There is nothing to change here! ", 500) 890 ) 891 892 func lowerBench(f Form, in []byte) func() { 893 // Use package strings instead of bytes as it doesn't allocate memory 894 // if there aren't any changes. 895 s := string(in) 896 return func() { 897 strings.ToLower(s) 898 } 899 } 900 901 func BenchmarkLowerCaseNoChange(b *testing.B) { 902 doSingle(b, lowerBench, smallNoChange) 903 } 904 func BenchmarkLowerCaseChange(b *testing.B) { 905 doSingle(b, lowerBench, smallChange) 906 } 907 908 func quickSpanBench(f Form, in []byte) func() { 909 return func() { 910 f.QuickSpan(in) 911 } 912 } 913 914 func BenchmarkQuickSpanChangeNFC(b *testing.B) { 915 doSingle(b, quickSpanBench, smallNoChange) 916 } 917 918 func BenchmarkBytesNoChangeNFC(b *testing.B) { 919 doSingle(b, bytesBench, smallNoChange) 920 } 921 func BenchmarkBytesChangeNFC(b *testing.B) { 922 doSingle(b, bytesBench, smallChange) 923 } 924 925 func BenchmarkAppendNoChangeNFC(b *testing.B) { 926 doSingle(b, appendBench, smallNoChange) 927 } 928 func BenchmarkAppendChangeNFC(b *testing.B) { 929 doSingle(b, appendBench, smallChange) 930 } 931 func BenchmarkAppendLargeNFC(b *testing.B) { 932 doSingle(b, appendBench, txt_all_bytes) 933 } 934 935 func BenchmarkIterNoChangeNFC(b *testing.B) { 936 doSingle(b, iterBench, smallNoChange) 937 } 938 func BenchmarkIterChangeNFC(b *testing.B) { 939 doSingle(b, iterBench, smallChange) 940 } 941 func BenchmarkIterLargeNFC(b *testing.B) { 942 doSingle(b, iterBench, txt_all_bytes) 943 } 944 945 func BenchmarkTransformNoChangeNFC(b *testing.B) { 946 doSingle(b, transformBench, smallNoChange) 947 } 948 func BenchmarkTransformChangeNFC(b *testing.B) { 949 doSingle(b, transformBench, smallChange) 950 } 951 func BenchmarkTransformLargeNFC(b *testing.B) { 952 doSingle(b, transformBench, txt_all_bytes) 953 } 954 955 func BenchmarkNormalizeAsciiNFC(b *testing.B) { 956 doFormBenchmark(b, NFC, NFC, ascii) 957 } 958 func BenchmarkNormalizeAsciiNFD(b *testing.B) { 959 doFormBenchmark(b, NFC, NFD, ascii) 960 } 961 func BenchmarkNormalizeAsciiNFKC(b *testing.B) { 962 doFormBenchmark(b, NFC, NFKC, ascii) 963 } 964 func BenchmarkNormalizeAsciiNFKD(b *testing.B) { 965 doFormBenchmark(b, NFC, NFKD, ascii) 966 } 967 968 func BenchmarkNormalizeNFC2NFC(b *testing.B) { 969 doFormBenchmark(b, NFC, NFC, txt_all) 970 } 971 func BenchmarkNormalizeNFC2NFD(b *testing.B) { 972 doFormBenchmark(b, NFC, NFD, txt_all) 973 } 974 func BenchmarkNormalizeNFD2NFC(b *testing.B) { 975 doFormBenchmark(b, NFD, NFC, txt_all) 976 } 977 func BenchmarkNormalizeNFD2NFD(b *testing.B) { 978 doFormBenchmark(b, NFD, NFD, txt_all) 979 } 980 981 // Hangul is often special-cased, so we test it separately. 982 func BenchmarkNormalizeHangulNFC2NFC(b *testing.B) { 983 doFormBenchmark(b, NFC, NFC, txt_kr) 984 } 985 func BenchmarkNormalizeHangulNFC2NFD(b *testing.B) { 986 doFormBenchmark(b, NFC, NFD, txt_kr) 987 } 988 func BenchmarkNormalizeHangulNFD2NFC(b *testing.B) { 989 doFormBenchmark(b, NFD, NFC, txt_kr) 990 } 991 func BenchmarkNormalizeHangulNFD2NFD(b *testing.B) { 992 doFormBenchmark(b, NFD, NFD, txt_kr) 993 } 994 995 var forms = []Form{NFC, NFD, NFKC, NFKD} 996 997 func doTextBenchmark(b *testing.B, s string) { 998 b.StopTimer() 999 in := []byte(s) 1000 bm := []func(){} 1001 for _, f := range forms { 1002 bm = appendBenchmarks(bm, f, in) 1003 } 1004 b.SetBytes(int64(len(s) * len(bm))) 1005 b.StartTimer() 1006 for i := 0; i < b.N; i++ { 1007 for _, f := range bm { 1008 f() 1009 } 1010 } 1011 } 1012 1013 func BenchmarkCanonicalOrdering(b *testing.B) { 1014 doTextBenchmark(b, txt_canon) 1015 } 1016 func BenchmarkExtendedLatin(b *testing.B) { 1017 doTextBenchmark(b, txt_vn) 1018 } 1019 func BenchmarkMiscTwoByteUtf8(b *testing.B) { 1020 doTextBenchmark(b, twoByteUtf8) 1021 } 1022 func BenchmarkMiscThreeByteUtf8(b *testing.B) { 1023 doTextBenchmark(b, threeByteUtf8) 1024 } 1025 func BenchmarkHangul(b *testing.B) { 1026 doTextBenchmark(b, txt_kr) 1027 } 1028 func BenchmarkJapanese(b *testing.B) { 1029 doTextBenchmark(b, txt_jp) 1030 } 1031 func BenchmarkChinese(b *testing.B) { 1032 doTextBenchmark(b, txt_cn) 1033 } 1034 func BenchmarkOverflow(b *testing.B) { 1035 doTextBenchmark(b, overflow) 1036 } 1037 1038 var overflow = string(bytes.Repeat([]byte("\u035D"), 4096)) + "\u035B" 1039 1040 // Tests sampled from the Canonical ordering tests (Part 2) of 1041 // http://unicode.org/Public/UNIDATA/NormalizationTest.txt 1042 const txt_canon = `\u0061\u0315\u0300\u05AE\u0300\u0062 \u0061\u0300\u0315\u0300\u05AE\u0062 1043 \u0061\u0302\u0315\u0300\u05AE\u0062 \u0061\u0307\u0315\u0300\u05AE\u0062 1044 \u0061\u0315\u0300\u05AE\u030A\u0062 \u0061\u059A\u0316\u302A\u031C\u0062 1045 \u0061\u032E\u059A\u0316\u302A\u0062 \u0061\u0338\u093C\u0334\u0062 1046 \u0061\u059A\u0316\u302A\u0339 \u0061\u0341\u0315\u0300\u05AE\u0062 1047 \u0061\u0348\u059A\u0316\u302A\u0062 \u0061\u0361\u0345\u035D\u035C\u0062 1048 \u0061\u0366\u0315\u0300\u05AE\u0062 \u0061\u0315\u0300\u05AE\u0486\u0062 1049 \u0061\u05A4\u059A\u0316\u302A\u0062 \u0061\u0315\u0300\u05AE\u0613\u0062 1050 \u0061\u0315\u0300\u05AE\u0615\u0062 \u0061\u0617\u0315\u0300\u05AE\u0062 1051 \u0061\u0619\u0618\u064D\u064E\u0062 \u0061\u0315\u0300\u05AE\u0654\u0062 1052 \u0061\u0315\u0300\u05AE\u06DC\u0062 \u0061\u0733\u0315\u0300\u05AE\u0062 1053 \u0061\u0744\u059A\u0316\u302A\u0062 \u0061\u0315\u0300\u05AE\u0745\u0062 1054 \u0061\u09CD\u05B0\u094D\u3099\u0062 \u0061\u0E38\u0E48\u0E38\u0C56\u0062 1055 \u0061\u0EB8\u0E48\u0E38\u0E49\u0062 \u0061\u0F72\u0F71\u0EC8\u0F71\u0062 1056 \u0061\u1039\u05B0\u094D\u3099\u0062 \u0061\u05B0\u094D\u3099\u1A60\u0062 1057 \u0061\u3099\u093C\u0334\u1BE6\u0062 \u0061\u3099\u093C\u0334\u1C37\u0062 1058 \u0061\u1CD9\u059A\u0316\u302A\u0062 \u0061\u2DED\u0315\u0300\u05AE\u0062 1059 \u0061\u2DEF\u0315\u0300\u05AE\u0062 \u0061\u302D\u302E\u059A\u0316\u0062` 1060 1061 // Taken from http://creativecommons.org/licenses/by-sa/3.0/vn/ 1062 const txt_vn = `Với các điều kiện sau: Ghi nhận công của tác giả. 1063 Nếu bạn sử dụng, chuyển đổi, hoặc xây dựng dự án từ 1064 nội dung được chia sẻ này, bạn phải áp dụng giấy phép này hoặc 1065 một giấy phép khác có các điều khoản tương tự như giấy phép này 1066 cho dự án của bạn. Hiểu rằng: Miễn — Bất kỳ các điều kiện nào 1067 trên đây cũng có thể được miễn bỏ nếu bạn được sự cho phép của 1068 người sở hữu bản quyền. Phạm vi công chúng — Khi tác phẩm hoặc 1069 bất kỳ chương nào của tác phẩm đã trong vùng dành cho công 1070 chúng theo quy định của pháp luật thì tình trạng của nó không 1071 bị ảnh hưởng bởi giấy phép trong bất kỳ trường hợp nào.` 1072 1073 // Taken from http://creativecommons.org/licenses/by-sa/1.0/deed.ru 1074 const txt_ru = `При обязательном соблюдении следующих условий: 1075 Attribution — Вы должны атрибутировать произведение (указывать 1076 автора и источник) в порядке, предусмотренном автором или 1077 лицензиаром (но только так, чтобы никоим образом не подразумевалось, 1078 что они поддерживают вас или использование вами данного произведения). 1079 Υπό τις ακόλουθες προϋποθέσεις:` 1080 1081 // Taken from http://creativecommons.org/licenses/by-sa/3.0/gr/ 1082 const txt_gr = `Αναφορά Δημιουργού — Θα πρέπει να κάνετε την αναφορά στο έργο με τον 1083 τρόπο που έχει οριστεί από το δημιουργό ή το χορηγούντο την άδεια 1084 (χωρίς όμως να εννοείται με οποιονδήποτε τρόπο ότι εγκρίνουν εσάς ή 1085 τη χρήση του έργου από εσάς). Παρόμοια Διανομή — Εάν αλλοιώσετε, 1086 τροποποιήσετε ή δημιουργήσετε περαιτέρω βασισμένοι στο έργο θα 1087 μπορείτε να διανέμετε το έργο που θα προκύψει μόνο με την ίδια ή 1088 παρόμοια άδεια.` 1089 1090 // Taken from http://creativecommons.org/licenses/by-sa/3.0/deed.ar 1091 const txt_ar = `بموجب الشروط التالية نسب المصنف — يجب عليك أن 1092 تنسب العمل بالطريقة التي تحددها المؤلف أو المرخص (ولكن ليس بأي حال من 1093 الأحوال أن توحي وتقترح بتحول أو استخدامك للعمل). 1094 المشاركة على قدم المساواة — إذا كنت يعدل ، والتغيير ، أو الاستفادة 1095 من هذا العمل ، قد ينتج عن توزيع العمل إلا في ظل تشابه او تطابق فى واحد 1096 لهذا الترخيص.` 1097 1098 // Taken from http://creativecommons.org/licenses/by-sa/1.0/il/ 1099 const txt_il = `בכפוף לתנאים הבאים: ייחוס — עליך לייחס את היצירה (לתת קרדיט) באופן 1100 המצויין על-ידי היוצר או מעניק הרישיון (אך לא בשום אופן המרמז על כך 1101 שהם תומכים בך או בשימוש שלך ביצירה). שיתוף זהה — אם תחליט/י לשנות, 1102 לעבד או ליצור יצירה נגזרת בהסתמך על יצירה זו, תוכל/י להפיץ את יצירתך 1103 החדשה רק תחת אותו הרישיון או רישיון דומה לרישיון זה.` 1104 1105 const twoByteUtf8 = txt_ru + txt_gr + txt_ar + txt_il 1106 1107 // Taken from http://creativecommons.org/licenses/by-sa/2.0/kr/ 1108 const txt_kr = `다음과 같은 조건을 따라야 합니다: 저작자표시 1109 (Attribution) — 저작자나 이용허락자가 정한 방법으로 저작물의 1110 원저작자를 표시하여야 합니다(그러나 원저작자가 이용자나 이용자의 1111 이용을 보증하거나 추천한다는 의미로 표시해서는 안됩니다). 1112 동일조건변경허락 — 이 저작물을 이용하여 만든 이차적 저작물에는 본 1113 라이선스와 동일한 라이선스를 적용해야 합니다.` 1114 1115 // Taken from http://creativecommons.org/licenses/by-sa/3.0/th/ 1116 const txt_th = `ภายใต้เงื่อนไข ดังต่อไปนี้ : แสดงที่มา — คุณต้องแสดงที่ 1117 มาของงานดังกล่าว ตามรูปแบบที่ผู้สร้างสรรค์หรือผู้อนุญาตกำหนด (แต่ 1118 ไม่ใช่ในลักษณะที่ว่า พวกเขาสนับสนุนคุณหรือสนับสนุนการที่ 1119 คุณนำงานไปใช้) อนุญาตแบบเดียวกัน — หากคุณดัดแปลง เปลี่ยนรูป หรื 1120 อต่อเติมงานนี้ คุณต้องใช้สัญญาอนุญาตแบบเดียวกันหรือแบบที่เหมื 1121 อนกับสัญญาอนุญาตที่ใช้กับงานนี้เท่านั้น` 1122 1123 const threeByteUtf8 = txt_th 1124 1125 // Taken from http://creativecommons.org/licenses/by-sa/2.0/jp/ 1126 const txt_jp = `あなたの従うべき条件は以下の通りです。 1127 表示 — あなたは原著作者のクレジットを表示しなければなりません。 1128 継承 — もしあなたがこの作品を改変、変形または加工した場合、 1129 あなたはその結果生じた作品をこの作品と同一の許諾条件の下でのみ 1130 頒布することができます。` 1131 1132 // http://creativecommons.org/licenses/by-sa/2.5/cn/ 1133 const txt_cn = `您可以自由: 复制、发行、展览、表演、放映、 1134 广播或通过信息网络传播本作品 创作演绎作品 1135 对本作品进行商业性使用 惟须遵守下列条件: 1136 署名 — 您必须按照作者或者许可人指定的方式对作品进行署名。 1137 相同方式共享 — 如果您改变、转换本作品或者以本作品为基础进行创作, 1138 您只能采用与本协议相同的许可协议发布基于本作品的演绎作品。` 1139 1140 const txt_cjk = txt_cn + txt_jp + txt_kr 1141 const txt_all = txt_vn + twoByteUtf8 + threeByteUtf8 + txt_cjk 1142 1143 var txt_all_bytes = []byte(txt_all)