github.com/go-xe2/third@v1.0.3/golang.org/x/text/unicode/bidi/core.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package bidi 6 7 import "log" 8 9 // This implementation is a port based on the reference implementation found at: 10 // http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/ 11 // 12 // described in Unicode Bidirectional Algorithm (UAX #9). 13 // 14 // Input: 15 // There are two levels of input to the algorithm, since clients may prefer to 16 // supply some information from out-of-band sources rather than relying on the 17 // default behavior. 18 // 19 // - Bidi class array 20 // - Bidi class array, with externally supplied base line direction 21 // 22 // Output: 23 // Output is separated into several stages: 24 // 25 // - levels array over entire paragraph 26 // - reordering array over entire paragraph 27 // - levels array over line 28 // - reordering array over line 29 // 30 // Note that for conformance to the Unicode Bidirectional Algorithm, 31 // implementations are only required to generate correct reordering and 32 // character directionality (odd or even levels) over a line. Generating 33 // identical level arrays over a line is not required. Bidi explicit format 34 // codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and 35 // positions as long as the rest of the input is properly reordered. 36 // 37 // As the algorithm is defined to operate on a single paragraph at a time, this 38 // implementation is written to handle single paragraphs. Thus rule P1 is 39 // presumed by this implementation-- the data provided to the implementation is 40 // assumed to be a single paragraph, and either contains no 'B' codes, or a 41 // single 'B' code at the end of the input. 'B' is allowed as input to 42 // illustrate how the algorithm assigns it a level. 43 // 44 // Also note that rules L3 and L4 depend on the rendering engine that uses the 45 // result of the bidi algorithm. This implementation assumes that the rendering 46 // engine expects combining marks in visual order (e.g. to the left of their 47 // base character in RTL runs) and that it adjusts the glyphs used to render 48 // mirrored characters that are in RTL runs so that they render appropriately. 49 50 // level is the embedding level of a character. Even embedding levels indicate 51 // left-to-right order and odd levels indicate right-to-left order. The special 52 // level of -1 is reserved for undefined order. 53 type level int8 54 55 const implicitLevel level = -1 56 57 // in returns if x is equal to any of the values in set. 58 func (c Class) in(set ...Class) bool { 59 for _, s := range set { 60 if c == s { 61 return true 62 } 63 } 64 return false 65 } 66 67 // A paragraph contains the state of a paragraph. 68 type paragraph struct { 69 initialTypes []Class 70 71 // Arrays of properties needed for paired bracket evaluation in N0 72 pairTypes []bracketType // paired Bracket types for paragraph 73 pairValues []rune // rune for opening bracket or pbOpen and pbClose; 0 for pbNone 74 75 embeddingLevel level // default: = implicitLevel; 76 77 // at the paragraph levels 78 resultTypes []Class 79 resultLevels []level 80 81 // Index of matching PDI for isolate initiator characters. For other 82 // characters, the value of matchingPDI will be set to -1. For isolate 83 // initiators with no matching PDI, matchingPDI will be set to the length of 84 // the input string. 85 matchingPDI []int 86 87 // Index of matching isolate initiator for PDI characters. For other 88 // characters, and for PDIs with no matching isolate initiator, the value of 89 // matchingIsolateInitiator will be set to -1. 90 matchingIsolateInitiator []int 91 } 92 93 // newParagraph initializes a paragraph. The user needs to supply a few arrays 94 // corresponding to the preprocessed text input. The types correspond to the 95 // Unicode BiDi classes for each rune. pairTypes indicates the bracket type for 96 // each rune. pairValues provides a unique bracket class identifier for each 97 // rune (suggested is the rune of the open bracket for opening and matching 98 // close brackets, after normalization). The embedding levels are optional, but 99 // may be supplied to encode embedding levels of styled text. 100 // 101 // TODO: return an error. 102 func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph { 103 validateTypes(types) 104 validatePbTypes(pairTypes) 105 validatePbValues(pairValues, pairTypes) 106 validateParagraphEmbeddingLevel(levels) 107 108 p := ¶graph{ 109 initialTypes: append([]Class(nil), types...), 110 embeddingLevel: levels, 111 112 pairTypes: pairTypes, 113 pairValues: pairValues, 114 115 resultTypes: append([]Class(nil), types...), 116 } 117 p.run() 118 return p 119 } 120 121 func (p *paragraph) Len() int { return len(p.initialTypes) } 122 123 // The algorithm. Does not include line-based processing (Rules L1, L2). 124 // These are applied later in the line-based phase of the algorithm. 125 func (p *paragraph) run() { 126 p.determineMatchingIsolates() 127 128 // 1) determining the paragraph level 129 // Rule P1 is the requirement for entering this algorithm. 130 // Rules P2, P3. 131 // If no externally supplied paragraph embedding level, use default. 132 if p.embeddingLevel == implicitLevel { 133 p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len()) 134 } 135 136 // Initialize result levels to paragraph embedding level. 137 p.resultLevels = make([]level, p.Len()) 138 setLevels(p.resultLevels, p.embeddingLevel) 139 140 // 2) Explicit levels and directions 141 // Rules X1-X8. 142 p.determineExplicitEmbeddingLevels() 143 144 // Rule X9. 145 // We do not remove the embeddings, the overrides, the PDFs, and the BNs 146 // from the string explicitly. But they are not copied into isolating run 147 // sequences when they are created, so they are removed for all 148 // practical purposes. 149 150 // Rule X10. 151 // Run remainder of algorithm one isolating run sequence at a time 152 for _, seq := range p.determineIsolatingRunSequences() { 153 // 3) resolving weak types 154 // Rules W1-W7. 155 seq.resolveWeakTypes() 156 157 // 4a) resolving paired brackets 158 // Rule N0 159 resolvePairedBrackets(seq) 160 161 // 4b) resolving neutral types 162 // Rules N1-N3. 163 seq.resolveNeutralTypes() 164 165 // 5) resolving implicit embedding levels 166 // Rules I1, I2. 167 seq.resolveImplicitLevels() 168 169 // Apply the computed levels and types 170 seq.applyLevelsAndTypes() 171 } 172 173 // Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and 174 // BNs. This is for convenience, so the resulting level array will have 175 // a value for every character. 176 p.assignLevelsToCharactersRemovedByX9() 177 } 178 179 // determineMatchingIsolates determines the matching PDI for each isolate 180 // initiator and vice versa. 181 // 182 // Definition BD9. 183 // 184 // At the end of this function: 185 // 186 // - The member variable matchingPDI is set to point to the index of the 187 // matching PDI character for each isolate initiator character. If there is 188 // no matching PDI, it is set to the length of the input text. For other 189 // characters, it is set to -1. 190 // - The member variable matchingIsolateInitiator is set to point to the 191 // index of the matching isolate initiator character for each PDI character. 192 // If there is no matching isolate initiator, or the character is not a PDI, 193 // it is set to -1. 194 func (p *paragraph) determineMatchingIsolates() { 195 p.matchingPDI = make([]int, p.Len()) 196 p.matchingIsolateInitiator = make([]int, p.Len()) 197 198 for i := range p.matchingIsolateInitiator { 199 p.matchingIsolateInitiator[i] = -1 200 } 201 202 for i := range p.matchingPDI { 203 p.matchingPDI[i] = -1 204 205 if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) { 206 depthCounter := 1 207 for j := i + 1; j < p.Len(); j++ { 208 if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) { 209 depthCounter++ 210 } else if u == PDI { 211 if depthCounter--; depthCounter == 0 { 212 p.matchingPDI[i] = j 213 p.matchingIsolateInitiator[j] = i 214 break 215 } 216 } 217 } 218 if p.matchingPDI[i] == -1 { 219 p.matchingPDI[i] = p.Len() 220 } 221 } 222 } 223 } 224 225 // determineParagraphEmbeddingLevel reports the resolved paragraph direction of 226 // the substring limited by the given range [start, end). 227 // 228 // Determines the paragraph level based on rules P2, P3. This is also used 229 // in rule X5c to find if an FSI should resolve to LRI or RLI. 230 func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level { 231 var strongType Class = unknownClass 232 233 // Rule P2. 234 for i := start; i < end; i++ { 235 if t := p.resultTypes[i]; t.in(L, AL, R) { 236 strongType = t 237 break 238 } else if t.in(FSI, LRI, RLI) { 239 i = p.matchingPDI[i] // skip over to the matching PDI 240 if i > end { 241 log.Panic("assert (i <= end)") 242 } 243 } 244 } 245 // Rule P3. 246 switch strongType { 247 case unknownClass: // none found 248 // default embedding level when no strong types found is 0. 249 return 0 250 case L: 251 return 0 252 default: // AL, R 253 return 1 254 } 255 } 256 257 const maxDepth = 125 258 259 // This stack will store the embedding levels and override and isolated 260 // statuses 261 type directionalStatusStack struct { 262 stackCounter int 263 embeddingLevelStack [maxDepth + 1]level 264 overrideStatusStack [maxDepth + 1]Class 265 isolateStatusStack [maxDepth + 1]bool 266 } 267 268 func (s *directionalStatusStack) empty() { s.stackCounter = 0 } 269 func (s *directionalStatusStack) pop() { s.stackCounter-- } 270 func (s *directionalStatusStack) depth() int { return s.stackCounter } 271 272 func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) { 273 s.embeddingLevelStack[s.stackCounter] = level 274 s.overrideStatusStack[s.stackCounter] = overrideStatus 275 s.isolateStatusStack[s.stackCounter] = isolateStatus 276 s.stackCounter++ 277 } 278 279 func (s *directionalStatusStack) lastEmbeddingLevel() level { 280 return s.embeddingLevelStack[s.stackCounter-1] 281 } 282 283 func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class { 284 return s.overrideStatusStack[s.stackCounter-1] 285 } 286 287 func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool { 288 return s.isolateStatusStack[s.stackCounter-1] 289 } 290 291 // Determine explicit levels using rules X1 - X8 292 func (p *paragraph) determineExplicitEmbeddingLevels() { 293 var stack directionalStatusStack 294 var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int 295 296 // Rule X1. 297 stack.push(p.embeddingLevel, ON, false) 298 299 for i, t := range p.resultTypes { 300 // Rules X2, X3, X4, X5, X5a, X5b, X5c 301 switch t { 302 case RLE, LRE, RLO, LRO, RLI, LRI, FSI: 303 isIsolate := t.in(RLI, LRI, FSI) 304 isRTL := t.in(RLE, RLO, RLI) 305 306 // override if this is an FSI that resolves to RLI 307 if t == FSI { 308 isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1) 309 } 310 if isIsolate { 311 p.resultLevels[i] = stack.lastEmbeddingLevel() 312 if stack.lastDirectionalOverrideStatus() != ON { 313 p.resultTypes[i] = stack.lastDirectionalOverrideStatus() 314 } 315 } 316 317 var newLevel level 318 if isRTL { 319 // least greater odd 320 newLevel = (stack.lastEmbeddingLevel() + 1) | 1 321 } else { 322 // least greater even 323 newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1 324 } 325 326 if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 { 327 if isIsolate { 328 validIsolateCount++ 329 } 330 // Push new embedding level, override status, and isolated 331 // status. 332 // No check for valid stack counter, since the level check 333 // suffices. 334 switch t { 335 case LRO: 336 stack.push(newLevel, L, isIsolate) 337 case RLO: 338 stack.push(newLevel, R, isIsolate) 339 default: 340 stack.push(newLevel, ON, isIsolate) 341 } 342 // Not really part of the spec 343 if !isIsolate { 344 p.resultLevels[i] = newLevel 345 } 346 } else { 347 // This is an invalid explicit formatting character, 348 // so apply the "Otherwise" part of rules X2-X5b. 349 if isIsolate { 350 overflowIsolateCount++ 351 } else { // !isIsolate 352 if overflowIsolateCount == 0 { 353 overflowEmbeddingCount++ 354 } 355 } 356 } 357 358 // Rule X6a 359 case PDI: 360 if overflowIsolateCount > 0 { 361 overflowIsolateCount-- 362 } else if validIsolateCount == 0 { 363 // do nothing 364 } else { 365 overflowEmbeddingCount = 0 366 for !stack.lastDirectionalIsolateStatus() { 367 stack.pop() 368 } 369 stack.pop() 370 validIsolateCount-- 371 } 372 p.resultLevels[i] = stack.lastEmbeddingLevel() 373 374 // Rule X7 375 case PDF: 376 // Not really part of the spec 377 p.resultLevels[i] = stack.lastEmbeddingLevel() 378 379 if overflowIsolateCount > 0 { 380 // do nothing 381 } else if overflowEmbeddingCount > 0 { 382 overflowEmbeddingCount-- 383 } else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 { 384 stack.pop() 385 } 386 387 case B: // paragraph separator. 388 // Rule X8. 389 390 // These values are reset for clarity, in this implementation B 391 // can only occur as the last code in the array. 392 stack.empty() 393 overflowIsolateCount = 0 394 overflowEmbeddingCount = 0 395 validIsolateCount = 0 396 p.resultLevels[i] = p.embeddingLevel 397 398 default: 399 p.resultLevels[i] = stack.lastEmbeddingLevel() 400 if stack.lastDirectionalOverrideStatus() != ON { 401 p.resultTypes[i] = stack.lastDirectionalOverrideStatus() 402 } 403 } 404 } 405 } 406 407 type isolatingRunSequence struct { 408 p *paragraph 409 410 indexes []int // indexes to the original string 411 412 types []Class // type of each character using the index 413 resolvedLevels []level // resolved levels after application of rules 414 level level 415 sos, eos Class 416 } 417 418 func (i *isolatingRunSequence) Len() int { return len(i.indexes) } 419 420 func maxLevel(a, b level) level { 421 if a > b { 422 return a 423 } 424 return b 425 } 426 427 // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types, 428 // either L or R, for each isolating run sequence. 429 func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence { 430 length := len(indexes) 431 types := make([]Class, length) 432 for i, x := range indexes { 433 types[i] = p.resultTypes[x] 434 } 435 436 // assign level, sos and eos 437 prevChar := indexes[0] - 1 438 for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) { 439 prevChar-- 440 } 441 prevLevel := p.embeddingLevel 442 if prevChar >= 0 { 443 prevLevel = p.resultLevels[prevChar] 444 } 445 446 var succLevel level 447 lastType := types[length-1] 448 if lastType.in(LRI, RLI, FSI) { 449 succLevel = p.embeddingLevel 450 } else { 451 // the first character after the end of run sequence 452 limit := indexes[length-1] + 1 453 for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ { 454 455 } 456 succLevel = p.embeddingLevel 457 if limit < p.Len() { 458 succLevel = p.resultLevels[limit] 459 } 460 } 461 level := p.resultLevels[indexes[0]] 462 return &isolatingRunSequence{ 463 p: p, 464 indexes: indexes, 465 types: types, 466 level: level, 467 sos: typeForLevel(maxLevel(prevLevel, level)), 468 eos: typeForLevel(maxLevel(succLevel, level)), 469 } 470 } 471 472 // Resolving weak types Rules W1-W7. 473 // 474 // Note that some weak types (EN, AN) remain after this processing is 475 // complete. 476 func (s *isolatingRunSequence) resolveWeakTypes() { 477 478 // on entry, only these types remain 479 s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI) 480 481 // Rule W1. 482 // Changes all NSMs. 483 preceedingCharacterType := s.sos 484 for i, t := range s.types { 485 if t == NSM { 486 s.types[i] = preceedingCharacterType 487 } else { 488 if t.in(LRI, RLI, FSI, PDI) { 489 preceedingCharacterType = ON 490 } 491 preceedingCharacterType = t 492 } 493 } 494 495 // Rule W2. 496 // EN does not change at the start of the run, because sos != AL. 497 for i, t := range s.types { 498 if t == EN { 499 for j := i - 1; j >= 0; j-- { 500 if t := s.types[j]; t.in(L, R, AL) { 501 if t == AL { 502 s.types[i] = AN 503 } 504 break 505 } 506 } 507 } 508 } 509 510 // Rule W3. 511 for i, t := range s.types { 512 if t == AL { 513 s.types[i] = R 514 } 515 } 516 517 // Rule W4. 518 // Since there must be values on both sides for this rule to have an 519 // effect, the scan skips the first and last value. 520 // 521 // Although the scan proceeds left to right, and changes the type 522 // values in a way that would appear to affect the computations 523 // later in the scan, there is actually no problem. A change in the 524 // current value can only affect the value to its immediate right, 525 // and only affect it if it is ES or CS. But the current value can 526 // only change if the value to its right is not ES or CS. Thus 527 // either the current value will not change, or its change will have 528 // no effect on the remainder of the analysis. 529 530 for i := 1; i < s.Len()-1; i++ { 531 t := s.types[i] 532 if t == ES || t == CS { 533 prevSepType := s.types[i-1] 534 succSepType := s.types[i+1] 535 if prevSepType == EN && succSepType == EN { 536 s.types[i] = EN 537 } else if s.types[i] == CS && prevSepType == AN && succSepType == AN { 538 s.types[i] = AN 539 } 540 } 541 } 542 543 // Rule W5. 544 for i, t := range s.types { 545 if t == ET { 546 // locate end of sequence 547 runStart := i 548 runEnd := s.findRunLimit(runStart, ET) 549 550 // check values at ends of sequence 551 t := s.sos 552 if runStart > 0 { 553 t = s.types[runStart-1] 554 } 555 if t != EN { 556 t = s.eos 557 if runEnd < len(s.types) { 558 t = s.types[runEnd] 559 } 560 } 561 if t == EN { 562 setTypes(s.types[runStart:runEnd], EN) 563 } 564 // continue at end of sequence 565 i = runEnd 566 } 567 } 568 569 // Rule W6. 570 for i, t := range s.types { 571 if t.in(ES, ET, CS) { 572 s.types[i] = ON 573 } 574 } 575 576 // Rule W7. 577 for i, t := range s.types { 578 if t == EN { 579 // set default if we reach start of run 580 prevStrongType := s.sos 581 for j := i - 1; j >= 0; j-- { 582 t = s.types[j] 583 if t == L || t == R { // AL's have been changed to R 584 prevStrongType = t 585 break 586 } 587 } 588 if prevStrongType == L { 589 s.types[i] = L 590 } 591 } 592 } 593 } 594 595 // 6) resolving neutral types Rules N1-N2. 596 func (s *isolatingRunSequence) resolveNeutralTypes() { 597 598 // on entry, only these types can be in resultTypes 599 s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI) 600 601 for i, t := range s.types { 602 switch t { 603 case WS, ON, B, S, RLI, LRI, FSI, PDI: 604 // find bounds of run of neutrals 605 runStart := i 606 runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI) 607 608 // determine effective types at ends of run 609 var leadType, trailType Class 610 611 // Note that the character found can only be L, R, AN, or 612 // EN. 613 if runStart == 0 { 614 leadType = s.sos 615 } else { 616 leadType = s.types[runStart-1] 617 if leadType.in(AN, EN) { 618 leadType = R 619 } 620 } 621 if runEnd == len(s.types) { 622 trailType = s.eos 623 } else { 624 trailType = s.types[runEnd] 625 if trailType.in(AN, EN) { 626 trailType = R 627 } 628 } 629 630 var resolvedType Class 631 if leadType == trailType { 632 // Rule N1. 633 resolvedType = leadType 634 } else { 635 // Rule N2. 636 // Notice the embedding level of the run is used, not 637 // the paragraph embedding level. 638 resolvedType = typeForLevel(s.level) 639 } 640 641 setTypes(s.types[runStart:runEnd], resolvedType) 642 643 // skip over run of (former) neutrals 644 i = runEnd 645 } 646 } 647 } 648 649 func setLevels(levels []level, newLevel level) { 650 for i := range levels { 651 levels[i] = newLevel 652 } 653 } 654 655 func setTypes(types []Class, newType Class) { 656 for i := range types { 657 types[i] = newType 658 } 659 } 660 661 // 7) resolving implicit embedding levels Rules I1, I2. 662 func (s *isolatingRunSequence) resolveImplicitLevels() { 663 664 // on entry, only these types can be in resultTypes 665 s.assertOnly(L, R, EN, AN) 666 667 s.resolvedLevels = make([]level, len(s.types)) 668 setLevels(s.resolvedLevels, s.level) 669 670 if (s.level & 1) == 0 { // even level 671 for i, t := range s.types { 672 // Rule I1. 673 if t == L { 674 // no change 675 } else if t == R { 676 s.resolvedLevels[i] += 1 677 } else { // t == AN || t == EN 678 s.resolvedLevels[i] += 2 679 } 680 } 681 } else { // odd level 682 for i, t := range s.types { 683 // Rule I2. 684 if t == R { 685 // no change 686 } else { // t == L || t == AN || t == EN 687 s.resolvedLevels[i] += 1 688 } 689 } 690 } 691 } 692 693 // Applies the levels and types resolved in rules W1-I2 to the 694 // resultLevels array. 695 func (s *isolatingRunSequence) applyLevelsAndTypes() { 696 for i, x := range s.indexes { 697 s.p.resultTypes[x] = s.types[i] 698 s.p.resultLevels[x] = s.resolvedLevels[i] 699 } 700 } 701 702 // Return the limit of the run consisting only of the types in validSet 703 // starting at index. This checks the value at index, and will return 704 // index if that value is not in validSet. 705 func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int { 706 loop: 707 for ; index < len(s.types); index++ { 708 t := s.types[index] 709 for _, valid := range validSet { 710 if t == valid { 711 continue loop 712 } 713 } 714 return index // didn't find a match in validSet 715 } 716 return len(s.types) 717 } 718 719 // Algorithm validation. Assert that all values in types are in the 720 // provided set. 721 func (s *isolatingRunSequence) assertOnly(codes ...Class) { 722 loop: 723 for i, t := range s.types { 724 for _, c := range codes { 725 if t == c { 726 continue loop 727 } 728 } 729 log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i]) 730 } 731 } 732 733 // determineLevelRuns returns an array of level runs. Each level run is 734 // described as an array of indexes into the input string. 735 // 736 // Determines the level runs. Rule X9 will be applied in determining the 737 // runs, in the way that makes sure the characters that are supposed to be 738 // removed are not included in the runs. 739 func (p *paragraph) determineLevelRuns() [][]int { 740 run := []int{} 741 allRuns := [][]int{} 742 currentLevel := implicitLevel 743 744 for i := range p.initialTypes { 745 if !isRemovedByX9(p.initialTypes[i]) { 746 if p.resultLevels[i] != currentLevel { 747 // we just encountered a new run; wrap up last run 748 if currentLevel >= 0 { // only wrap it up if there was a run 749 allRuns = append(allRuns, run) 750 run = nil 751 } 752 // Start new run 753 currentLevel = p.resultLevels[i] 754 } 755 run = append(run, i) 756 } 757 } 758 // Wrap up the final run, if any 759 if len(run) > 0 { 760 allRuns = append(allRuns, run) 761 } 762 return allRuns 763 } 764 765 // Definition BD13. Determine isolating run sequences. 766 func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence { 767 levelRuns := p.determineLevelRuns() 768 769 // Compute the run that each character belongs to 770 runForCharacter := make([]int, p.Len()) 771 for i, run := range levelRuns { 772 for _, index := range run { 773 runForCharacter[index] = i 774 } 775 } 776 777 sequences := []*isolatingRunSequence{} 778 779 var currentRunSequence []int 780 781 for _, run := range levelRuns { 782 first := run[0] 783 if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 { 784 currentRunSequence = nil 785 // int run = i; 786 for { 787 // Copy this level run into currentRunSequence 788 currentRunSequence = append(currentRunSequence, run...) 789 790 last := currentRunSequence[len(currentRunSequence)-1] 791 lastT := p.initialTypes[last] 792 if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() { 793 run = levelRuns[runForCharacter[p.matchingPDI[last]]] 794 } else { 795 break 796 } 797 } 798 sequences = append(sequences, p.isolatingRunSequence(currentRunSequence)) 799 } 800 } 801 return sequences 802 } 803 804 // Assign level information to characters removed by rule X9. This is for 805 // ease of relating the level information to the original input data. Note 806 // that the levels assigned to these codes are arbitrary, they're chosen so 807 // as to avoid breaking level runs. 808 func (p *paragraph) assignLevelsToCharactersRemovedByX9() { 809 for i, t := range p.initialTypes { 810 if t.in(LRE, RLE, LRO, RLO, PDF, BN) { 811 p.resultTypes[i] = t 812 p.resultLevels[i] = -1 813 } 814 } 815 // now propagate forward the levels information (could have 816 // propagated backward, the main thing is not to introduce a level 817 // break where one doesn't already exist). 818 819 if p.resultLevels[0] == -1 { 820 p.resultLevels[0] = p.embeddingLevel 821 } 822 for i := 1; i < len(p.initialTypes); i++ { 823 if p.resultLevels[i] == -1 { 824 p.resultLevels[i] = p.resultLevels[i-1] 825 } 826 } 827 // Embedding information is for informational purposes only so need not be 828 // adjusted. 829 } 830 831 // 832 // Output 833 // 834 835 // getLevels computes levels array breaking lines at offsets in linebreaks. 836 // Rule L1. 837 // 838 // The linebreaks array must include at least one value. The values must be 839 // in strictly increasing order (no duplicates) between 1 and the length of 840 // the text, inclusive. The last value must be the length of the text. 841 func (p *paragraph) getLevels(linebreaks []int) []level { 842 // Note that since the previous processing has removed all 843 // P, S, and WS values from resultTypes, the values referred to 844 // in these rules are the initial types, before any processing 845 // has been applied (including processing of overrides). 846 // 847 // This example implementation has reinserted explicit format codes 848 // and BN, in order that the levels array correspond to the 849 // initial text. Their final placement is not normative. 850 // These codes are treated like WS in this implementation, 851 // so they don't interrupt sequences of WS. 852 853 validateLineBreaks(linebreaks, p.Len()) 854 855 result := append([]level(nil), p.resultLevels...) 856 857 // don't worry about linebreaks since if there is a break within 858 // a series of WS values preceding S, the linebreak itself 859 // causes the reset. 860 for i, t := range p.initialTypes { 861 if t.in(B, S) { 862 // Rule L1, clauses one and two. 863 result[i] = p.embeddingLevel 864 865 // Rule L1, clause three. 866 for j := i - 1; j >= 0; j-- { 867 if isWhitespace(p.initialTypes[j]) { // including format codes 868 result[j] = p.embeddingLevel 869 } else { 870 break 871 } 872 } 873 } 874 } 875 876 // Rule L1, clause four. 877 start := 0 878 for _, limit := range linebreaks { 879 for j := limit - 1; j >= start; j-- { 880 if isWhitespace(p.initialTypes[j]) { // including format codes 881 result[j] = p.embeddingLevel 882 } else { 883 break 884 } 885 } 886 start = limit 887 } 888 889 return result 890 } 891 892 // getReordering returns the reordering of lines from a visual index to a 893 // logical index for line breaks at the given offsets. 894 // 895 // Lines are concatenated from left to right. So for example, the fifth 896 // character from the left on the third line is 897 // 898 // getReordering(linebreaks)[linebreaks[1] + 4] 899 // 900 // (linebreaks[1] is the position after the last character of the second 901 // line, which is also the index of the first character on the third line, 902 // and adding four gets the fifth character from the left). 903 // 904 // The linebreaks array must include at least one value. The values must be 905 // in strictly increasing order (no duplicates) between 1 and the length of 906 // the text, inclusive. The last value must be the length of the text. 907 func (p *paragraph) getReordering(linebreaks []int) []int { 908 validateLineBreaks(linebreaks, p.Len()) 909 910 return computeMultilineReordering(p.getLevels(linebreaks), linebreaks) 911 } 912 913 // Return multiline reordering array for a given level array. Reordering 914 // does not occur across a line break. 915 func computeMultilineReordering(levels []level, linebreaks []int) []int { 916 result := make([]int, len(levels)) 917 918 start := 0 919 for _, limit := range linebreaks { 920 tempLevels := make([]level, limit-start) 921 copy(tempLevels, levels[start:]) 922 923 for j, order := range computeReordering(tempLevels) { 924 result[start+j] = order + start 925 } 926 start = limit 927 } 928 return result 929 } 930 931 // Return reordering array for a given level array. This reorders a single 932 // line. The reordering is a visual to logical map. For example, the 933 // leftmost char is string.charAt(order[0]). Rule L2. 934 func computeReordering(levels []level) []int { 935 result := make([]int, len(levels)) 936 // initialize order 937 for i := range result { 938 result[i] = i 939 } 940 941 // locate highest level found on line. 942 // Note the rules say text, but no reordering across line bounds is 943 // performed, so this is sufficient. 944 highestLevel := level(0) 945 lowestOddLevel := level(maxDepth + 2) 946 for _, level := range levels { 947 if level > highestLevel { 948 highestLevel = level 949 } 950 if level&1 != 0 && level < lowestOddLevel { 951 lowestOddLevel = level 952 } 953 } 954 955 for level := highestLevel; level >= lowestOddLevel; level-- { 956 for i := 0; i < len(levels); i++ { 957 if levels[i] >= level { 958 // find range of text at or above this level 959 start := i 960 limit := i + 1 961 for limit < len(levels) && levels[limit] >= level { 962 limit++ 963 } 964 965 for j, k := start, limit-1; j < k; j, k = j+1, k-1 { 966 result[j], result[k] = result[k], result[j] 967 } 968 // skip to end of level run 969 i = limit 970 } 971 } 972 } 973 974 return result 975 } 976 977 // isWhitespace reports whether the type is considered a whitespace type for the 978 // line break rules. 979 func isWhitespace(c Class) bool { 980 switch c { 981 case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS: 982 return true 983 } 984 return false 985 } 986 987 // isRemovedByX9 reports whether the type is one of the types removed in X9. 988 func isRemovedByX9(c Class) bool { 989 switch c { 990 case LRE, RLE, LRO, RLO, PDF, BN: 991 return true 992 } 993 return false 994 } 995 996 // typeForLevel reports the strong type (L or R) corresponding to the level. 997 func typeForLevel(level level) Class { 998 if (level & 0x1) == 0 { 999 return L 1000 } 1001 return R 1002 } 1003 1004 // TODO: change validation to not panic 1005 1006 func validateTypes(types []Class) { 1007 if len(types) == 0 { 1008 log.Panic("types is null") 1009 } 1010 for i, t := range types[:len(types)-1] { 1011 if t == B { 1012 log.Panicf("B type before end of paragraph at index: %d", i) 1013 } 1014 } 1015 } 1016 1017 func validateParagraphEmbeddingLevel(embeddingLevel level) { 1018 if embeddingLevel != implicitLevel && 1019 embeddingLevel != 0 && 1020 embeddingLevel != 1 { 1021 log.Panicf("illegal paragraph embedding level: %d", embeddingLevel) 1022 } 1023 } 1024 1025 func validateLineBreaks(linebreaks []int, textLength int) { 1026 prev := 0 1027 for i, next := range linebreaks { 1028 if next <= prev { 1029 log.Panicf("bad linebreak: %d at index: %d", next, i) 1030 } 1031 prev = next 1032 } 1033 if prev != textLength { 1034 log.Panicf("last linebreak was %d, want %d", prev, textLength) 1035 } 1036 } 1037 1038 func validatePbTypes(pairTypes []bracketType) { 1039 if len(pairTypes) == 0 { 1040 log.Panic("pairTypes is null") 1041 } 1042 for i, pt := range pairTypes { 1043 switch pt { 1044 case bpNone, bpOpen, bpClose: 1045 default: 1046 log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i]) 1047 } 1048 } 1049 } 1050 1051 func validatePbValues(pairValues []rune, pairTypes []bracketType) { 1052 if pairValues == nil { 1053 log.Panic("pairValues is null") 1054 } 1055 if len(pairTypes) != len(pairValues) { 1056 log.Panic("pairTypes is different length from pairValues") 1057 } 1058 }