github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/x/text/unicode/bidi/core.go (about) 1 // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. 2 3 // Copyright 2015 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package bidi 8 9 import "log" 10 11 // This implementation is a port based on the reference implementation found at: 12 // http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/ 13 // 14 // described in Unicode Bidirectional Algorithm (UAX #9). 15 // 16 // Input: 17 // There are two levels of input to the algorithm, since clients may prefer to 18 // supply some information from out-of-band sources rather than relying on the 19 // default behavior. 20 // 21 // - Bidi class array 22 // - Bidi class array, with externally supplied base line direction 23 // 24 // Output: 25 // Output is separated into several stages: 26 // 27 // - levels array over entire paragraph 28 // - reordering array over entire paragraph 29 // - levels array over line 30 // - reordering array over line 31 // 32 // Note that for conformance to the Unicode Bidirectional Algorithm, 33 // implementations are only required to generate correct reordering and 34 // character directionality (odd or even levels) over a line. Generating 35 // identical level arrays over a line is not required. Bidi explicit format 36 // codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and 37 // positions as long as the rest of the input is properly reordered. 38 // 39 // As the algorithm is defined to operate on a single paragraph at a time, this 40 // implementation is written to handle single paragraphs. Thus rule P1 is 41 // presumed by this implementation-- the data provided to the implementation is 42 // assumed to be a single paragraph, and either contains no 'B' codes, or a 43 // single 'B' code at the end of the input. 'B' is allowed as input to 44 // illustrate how the algorithm assigns it a level. 45 // 46 // Also note that rules L3 and L4 depend on the rendering engine that uses the 47 // result of the bidi algorithm. This implementation assumes that the rendering 48 // engine expects combining marks in visual order (e.g. to the left of their 49 // base character in RTL runs) and that it adjusts the glyphs used to render 50 // mirrored characters that are in RTL runs so that they render appropriately. 51 52 // level is the embedding level of a character. Even embedding levels indicate 53 // left-to-right order and odd levels indicate right-to-left order. The special 54 // level of -1 is reserved for undefined order. 55 type level int8 56 57 const implicitLevel level = -1 58 59 // in returns if x is equal to any of the values in set. 60 func (c Class) in(set ...Class) bool { 61 for _, s := range set { 62 if c == s { 63 return true 64 } 65 } 66 return false 67 } 68 69 // A paragraph contains the state of a paragraph. 70 type paragraph struct { 71 initialTypes []Class 72 73 // Arrays of properties needed for paired bracket evaluation in N0 74 pairTypes []bracketType // paired Bracket types for paragraph 75 pairValues []rune // rune for opening bracket or pbOpen and pbClose; 0 for pbNone 76 77 embeddingLevel level // default: = implicitLevel; 78 79 // at the paragraph levels 80 resultTypes []Class 81 resultLevels []level 82 83 // Index of matching PDI for isolate initiator characters. For other 84 // characters, the value of matchingPDI will be set to -1. For isolate 85 // initiators with no matching PDI, matchingPDI will be set to the length of 86 // the input string. 87 matchingPDI []int 88 89 // Index of matching isolate initiator for PDI characters. For other 90 // characters, and for PDIs with no matching isolate initiator, the value of 91 // matchingIsolateInitiator will be set to -1. 92 matchingIsolateInitiator []int 93 } 94 95 // newParagraph initializes a paragraph. The user needs to supply a few arrays 96 // corresponding to the preprocessed text input. The types correspond to the 97 // Unicode BiDi classes for each rune. pairTypes indicates the bracket type for 98 // each rune. pairValues provides a unique bracket class identifier for each 99 // rune (suggested is the rune of the open bracket for opening and matching 100 // close brackets, after normalization). The embedding levels are optional, but 101 // may be supplied to encode embedding levels of styled text. 102 // 103 // TODO: return an error. 104 func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph { 105 validateTypes(types) 106 validatePbTypes(pairTypes) 107 validatePbValues(pairValues, pairTypes) 108 validateParagraphEmbeddingLevel(levels) 109 110 p := ¶graph{ 111 initialTypes: append([]Class(nil), types...), 112 embeddingLevel: levels, 113 114 pairTypes: pairTypes, 115 pairValues: pairValues, 116 117 resultTypes: append([]Class(nil), types...), 118 } 119 p.run() 120 return p 121 } 122 123 func (p *paragraph) Len() int { return len(p.initialTypes) } 124 125 // The algorithm. Does not include line-based processing (Rules L1, L2). 126 // These are applied later in the line-based phase of the algorithm. 127 func (p *paragraph) run() { 128 p.determineMatchingIsolates() 129 130 // 1) determining the paragraph level 131 // Rule P1 is the requirement for entering this algorithm. 132 // Rules P2, P3. 133 // If no externally supplied paragraph embedding level, use default. 134 if p.embeddingLevel == implicitLevel { 135 p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len()) 136 } 137 138 // Initialize result levels to paragraph embedding level. 139 p.resultLevels = make([]level, p.Len()) 140 setLevels(p.resultLevels, p.embeddingLevel) 141 142 // 2) Explicit levels and directions 143 // Rules X1-X8. 144 p.determineExplicitEmbeddingLevels() 145 146 // Rule X9. 147 // We do not remove the embeddings, the overrides, the PDFs, and the BNs 148 // from the string explicitly. But they are not copied into isolating run 149 // sequences when they are created, so they are removed for all 150 // practical purposes. 151 152 // Rule X10. 153 // Run remainder of algorithm one isolating run sequence at a time 154 for _, seq := range p.determineIsolatingRunSequences() { 155 // 3) resolving weak types 156 // Rules W1-W7. 157 seq.resolveWeakTypes() 158 159 // 4a) resolving paired brackets 160 // Rule N0 161 resolvePairedBrackets(seq) 162 163 // 4b) resolving neutral types 164 // Rules N1-N3. 165 seq.resolveNeutralTypes() 166 167 // 5) resolving implicit embedding levels 168 // Rules I1, I2. 169 seq.resolveImplicitLevels() 170 171 // Apply the computed levels and types 172 seq.applyLevelsAndTypes() 173 } 174 175 // Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and 176 // BNs. This is for convenience, so the resulting level array will have 177 // a value for every character. 178 p.assignLevelsToCharactersRemovedByX9() 179 } 180 181 // determineMatchingIsolates determines the matching PDI for each isolate 182 // initiator and vice versa. 183 // 184 // Definition BD9. 185 // 186 // At the end of this function: 187 // 188 // - The member variable matchingPDI is set to point to the index of the 189 // matching PDI character for each isolate initiator character. If there is 190 // no matching PDI, it is set to the length of the input text. For other 191 // characters, it is set to -1. 192 // - The member variable matchingIsolateInitiator is set to point to the 193 // index of the matching isolate initiator character for each PDI character. 194 // If there is no matching isolate initiator, or the character is not a PDI, 195 // it is set to -1. 196 func (p *paragraph) determineMatchingIsolates() { 197 p.matchingPDI = make([]int, p.Len()) 198 p.matchingIsolateInitiator = make([]int, p.Len()) 199 200 for i := range p.matchingIsolateInitiator { 201 p.matchingIsolateInitiator[i] = -1 202 } 203 204 for i := range p.matchingPDI { 205 p.matchingPDI[i] = -1 206 207 if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) { 208 depthCounter := 1 209 for j := i + 1; j < p.Len(); j++ { 210 if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) { 211 depthCounter++ 212 } else if u == PDI { 213 if depthCounter--; depthCounter == 0 { 214 p.matchingPDI[i] = j 215 p.matchingIsolateInitiator[j] = i 216 break 217 } 218 } 219 } 220 if p.matchingPDI[i] == -1 { 221 p.matchingPDI[i] = p.Len() 222 } 223 } 224 } 225 } 226 227 // determineParagraphEmbeddingLevel reports the resolved paragraph direction of 228 // the substring limited by the given range [start, end). 229 // 230 // Determines the paragraph level based on rules P2, P3. This is also used 231 // in rule X5c to find if an FSI should resolve to LRI or RLI. 232 func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level { 233 var strongType Class = unknownClass 234 235 // Rule P2. 236 for i := start; i < end; i++ { 237 if t := p.resultTypes[i]; t.in(L, AL, R) { 238 strongType = t 239 break 240 } else if t.in(FSI, LRI, RLI) { 241 i = p.matchingPDI[i] // skip over to the matching PDI 242 if i > end { 243 log.Panic("assert (i <= end)") 244 } 245 } 246 } 247 // Rule P3. 248 switch strongType { 249 case unknownClass: // none found 250 // default embedding level when no strong types found is 0. 251 return 0 252 case L: 253 return 0 254 default: // AL, R 255 return 1 256 } 257 } 258 259 const maxDepth = 125 260 261 // This stack will store the embedding levels and override and isolated 262 // statuses 263 type directionalStatusStack struct { 264 stackCounter int 265 embeddingLevelStack [maxDepth + 1]level 266 overrideStatusStack [maxDepth + 1]Class 267 isolateStatusStack [maxDepth + 1]bool 268 } 269 270 func (s *directionalStatusStack) empty() { s.stackCounter = 0 } 271 func (s *directionalStatusStack) pop() { s.stackCounter-- } 272 func (s *directionalStatusStack) depth() int { return s.stackCounter } 273 274 func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) { 275 s.embeddingLevelStack[s.stackCounter] = level 276 s.overrideStatusStack[s.stackCounter] = overrideStatus 277 s.isolateStatusStack[s.stackCounter] = isolateStatus 278 s.stackCounter++ 279 } 280 281 func (s *directionalStatusStack) lastEmbeddingLevel() level { 282 return s.embeddingLevelStack[s.stackCounter-1] 283 } 284 285 func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class { 286 return s.overrideStatusStack[s.stackCounter-1] 287 } 288 289 func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool { 290 return s.isolateStatusStack[s.stackCounter-1] 291 } 292 293 // Determine explicit levels using rules X1 - X8 294 func (p *paragraph) determineExplicitEmbeddingLevels() { 295 var stack directionalStatusStack 296 var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int 297 298 // Rule X1. 299 stack.push(p.embeddingLevel, ON, false) 300 301 for i, t := range p.resultTypes { 302 // Rules X2, X3, X4, X5, X5a, X5b, X5c 303 switch t { 304 case RLE, LRE, RLO, LRO, RLI, LRI, FSI: 305 isIsolate := t.in(RLI, LRI, FSI) 306 isRTL := t.in(RLE, RLO, RLI) 307 308 // override if this is an FSI that resolves to RLI 309 if t == FSI { 310 isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1) 311 } 312 if isIsolate { 313 p.resultLevels[i] = stack.lastEmbeddingLevel() 314 if stack.lastDirectionalOverrideStatus() != ON { 315 p.resultTypes[i] = stack.lastDirectionalOverrideStatus() 316 } 317 } 318 319 var newLevel level 320 if isRTL { 321 // least greater odd 322 newLevel = (stack.lastEmbeddingLevel() + 1) | 1 323 } else { 324 // least greater even 325 newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1 326 } 327 328 if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 { 329 if isIsolate { 330 validIsolateCount++ 331 } 332 // Push new embedding level, override status, and isolated 333 // status. 334 // No check for valid stack counter, since the level check 335 // suffices. 336 switch t { 337 case LRO: 338 stack.push(newLevel, L, isIsolate) 339 case RLO: 340 stack.push(newLevel, R, isIsolate) 341 default: 342 stack.push(newLevel, ON, isIsolate) 343 } 344 // Not really part of the spec 345 if !isIsolate { 346 p.resultLevels[i] = newLevel 347 } 348 } else { 349 // This is an invalid explicit formatting character, 350 // so apply the "Otherwise" part of rules X2-X5b. 351 if isIsolate { 352 overflowIsolateCount++ 353 } else { // !isIsolate 354 if overflowIsolateCount == 0 { 355 overflowEmbeddingCount++ 356 } 357 } 358 } 359 360 // Rule X6a 361 case PDI: 362 if overflowIsolateCount > 0 { 363 overflowIsolateCount-- 364 } else if validIsolateCount == 0 { 365 // do nothing 366 } else { 367 overflowEmbeddingCount = 0 368 for !stack.lastDirectionalIsolateStatus() { 369 stack.pop() 370 } 371 stack.pop() 372 validIsolateCount-- 373 } 374 p.resultLevels[i] = stack.lastEmbeddingLevel() 375 376 // Rule X7 377 case PDF: 378 // Not really part of the spec 379 p.resultLevels[i] = stack.lastEmbeddingLevel() 380 381 if overflowIsolateCount > 0 { 382 // do nothing 383 } else if overflowEmbeddingCount > 0 { 384 overflowEmbeddingCount-- 385 } else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 { 386 stack.pop() 387 } 388 389 case B: // paragraph separator. 390 // Rule X8. 391 392 // These values are reset for clarity, in this implementation B 393 // can only occur as the last code in the array. 394 stack.empty() 395 overflowIsolateCount = 0 396 overflowEmbeddingCount = 0 397 validIsolateCount = 0 398 p.resultLevels[i] = p.embeddingLevel 399 400 default: 401 p.resultLevels[i] = stack.lastEmbeddingLevel() 402 if stack.lastDirectionalOverrideStatus() != ON { 403 p.resultTypes[i] = stack.lastDirectionalOverrideStatus() 404 } 405 } 406 } 407 } 408 409 type isolatingRunSequence struct { 410 p *paragraph 411 412 indexes []int // indexes to the original string 413 414 types []Class // type of each character using the index 415 resolvedLevels []level // resolved levels after application of rules 416 level level 417 sos, eos Class 418 } 419 420 func (i *isolatingRunSequence) Len() int { return len(i.indexes) } 421 422 func maxLevel(a, b level) level { 423 if a > b { 424 return a 425 } 426 return b 427 } 428 429 // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types, 430 // either L or R, for each isolating run sequence. 431 func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence { 432 length := len(indexes) 433 types := make([]Class, length) 434 for i, x := range indexes { 435 types[i] = p.resultTypes[x] 436 } 437 438 // assign level, sos and eos 439 prevChar := indexes[0] - 1 440 for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) { 441 prevChar-- 442 } 443 prevLevel := p.embeddingLevel 444 if prevChar >= 0 { 445 prevLevel = p.resultLevels[prevChar] 446 } 447 448 var succLevel level 449 lastType := types[length-1] 450 if lastType.in(LRI, RLI, FSI) { 451 succLevel = p.embeddingLevel 452 } else { 453 // the first character after the end of run sequence 454 limit := indexes[length-1] + 1 455 for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ { 456 457 } 458 succLevel = p.embeddingLevel 459 if limit < p.Len() { 460 succLevel = p.resultLevels[limit] 461 } 462 } 463 level := p.resultLevels[indexes[0]] 464 return &isolatingRunSequence{ 465 p: p, 466 indexes: indexes, 467 types: types, 468 level: level, 469 sos: typeForLevel(maxLevel(prevLevel, level)), 470 eos: typeForLevel(maxLevel(succLevel, level)), 471 } 472 } 473 474 // Resolving weak types Rules W1-W7. 475 // 476 // Note that some weak types (EN, AN) remain after this processing is 477 // complete. 478 func (s *isolatingRunSequence) resolveWeakTypes() { 479 480 // on entry, only these types remain 481 s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI) 482 483 // Rule W1. 484 // Changes all NSMs. 485 preceedingCharacterType := s.sos 486 for i, t := range s.types { 487 if t == NSM { 488 s.types[i] = preceedingCharacterType 489 } else { 490 if t.in(LRI, RLI, FSI, PDI) { 491 preceedingCharacterType = ON 492 } 493 preceedingCharacterType = t 494 } 495 } 496 497 // Rule W2. 498 // EN does not change at the start of the run, because sos != AL. 499 for i, t := range s.types { 500 if t == EN { 501 for j := i - 1; j >= 0; j-- { 502 if t := s.types[j]; t.in(L, R, AL) { 503 if t == AL { 504 s.types[i] = AN 505 } 506 break 507 } 508 } 509 } 510 } 511 512 // Rule W3. 513 for i, t := range s.types { 514 if t == AL { 515 s.types[i] = R 516 } 517 } 518 519 // Rule W4. 520 // Since there must be values on both sides for this rule to have an 521 // effect, the scan skips the first and last value. 522 // 523 // Although the scan proceeds left to right, and changes the type 524 // values in a way that would appear to affect the computations 525 // later in the scan, there is actually no problem. A change in the 526 // current value can only affect the value to its immediate right, 527 // and only affect it if it is ES or CS. But the current value can 528 // only change if the value to its right is not ES or CS. Thus 529 // either the current value will not change, or its change will have 530 // no effect on the remainder of the analysis. 531 532 for i := 1; i < s.Len()-1; i++ { 533 t := s.types[i] 534 if t == ES || t == CS { 535 prevSepType := s.types[i-1] 536 succSepType := s.types[i+1] 537 if prevSepType == EN && succSepType == EN { 538 s.types[i] = EN 539 } else if s.types[i] == CS && prevSepType == AN && succSepType == AN { 540 s.types[i] = AN 541 } 542 } 543 } 544 545 // Rule W5. 546 for i, t := range s.types { 547 if t == ET { 548 // locate end of sequence 549 runStart := i 550 runEnd := s.findRunLimit(runStart, ET) 551 552 // check values at ends of sequence 553 t := s.sos 554 if runStart > 0 { 555 t = s.types[runStart-1] 556 } 557 if t != EN { 558 t = s.eos 559 if runEnd < len(s.types) { 560 t = s.types[runEnd] 561 } 562 } 563 if t == EN { 564 setTypes(s.types[runStart:runEnd], EN) 565 } 566 // continue at end of sequence 567 i = runEnd 568 } 569 } 570 571 // Rule W6. 572 for i, t := range s.types { 573 if t.in(ES, ET, CS) { 574 s.types[i] = ON 575 } 576 } 577 578 // Rule W7. 579 for i, t := range s.types { 580 if t == EN { 581 // set default if we reach start of run 582 prevStrongType := s.sos 583 for j := i - 1; j >= 0; j-- { 584 t = s.types[j] 585 if t == L || t == R { // AL's have been changed to R 586 prevStrongType = t 587 break 588 } 589 } 590 if prevStrongType == L { 591 s.types[i] = L 592 } 593 } 594 } 595 } 596 597 // 6) resolving neutral types Rules N1-N2. 598 func (s *isolatingRunSequence) resolveNeutralTypes() { 599 600 // on entry, only these types can be in resultTypes 601 s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI) 602 603 for i, t := range s.types { 604 switch t { 605 case WS, ON, B, S, RLI, LRI, FSI, PDI: 606 // find bounds of run of neutrals 607 runStart := i 608 runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI) 609 610 // determine effective types at ends of run 611 var leadType, trailType Class 612 613 // Note that the character found can only be L, R, AN, or 614 // EN. 615 if runStart == 0 { 616 leadType = s.sos 617 } else { 618 leadType = s.types[runStart-1] 619 if leadType.in(AN, EN) { 620 leadType = R 621 } 622 } 623 if runEnd == len(s.types) { 624 trailType = s.eos 625 } else { 626 trailType = s.types[runEnd] 627 if trailType.in(AN, EN) { 628 trailType = R 629 } 630 } 631 632 var resolvedType Class 633 if leadType == trailType { 634 // Rule N1. 635 resolvedType = leadType 636 } else { 637 // Rule N2. 638 // Notice the embedding level of the run is used, not 639 // the paragraph embedding level. 640 resolvedType = typeForLevel(s.level) 641 } 642 643 setTypes(s.types[runStart:runEnd], resolvedType) 644 645 // skip over run of (former) neutrals 646 i = runEnd 647 } 648 } 649 } 650 651 func setLevels(levels []level, newLevel level) { 652 for i := range levels { 653 levels[i] = newLevel 654 } 655 } 656 657 func setTypes(types []Class, newType Class) { 658 for i := range types { 659 types[i] = newType 660 } 661 } 662 663 // 7) resolving implicit embedding levels Rules I1, I2. 664 func (s *isolatingRunSequence) resolveImplicitLevels() { 665 666 // on entry, only these types can be in resultTypes 667 s.assertOnly(L, R, EN, AN) 668 669 s.resolvedLevels = make([]level, len(s.types)) 670 setLevels(s.resolvedLevels, s.level) 671 672 if (s.level & 1) == 0 { // even level 673 for i, t := range s.types { 674 // Rule I1. 675 if t == L { 676 // no change 677 } else if t == R { 678 s.resolvedLevels[i] += 1 679 } else { // t == AN || t == EN 680 s.resolvedLevels[i] += 2 681 } 682 } 683 } else { // odd level 684 for i, t := range s.types { 685 // Rule I2. 686 if t == R { 687 // no change 688 } else { // t == L || t == AN || t == EN 689 s.resolvedLevels[i] += 1 690 } 691 } 692 } 693 } 694 695 // Applies the levels and types resolved in rules W1-I2 to the 696 // resultLevels array. 697 func (s *isolatingRunSequence) applyLevelsAndTypes() { 698 for i, x := range s.indexes { 699 s.p.resultTypes[x] = s.types[i] 700 s.p.resultLevels[x] = s.resolvedLevels[i] 701 } 702 } 703 704 // Return the limit of the run consisting only of the types in validSet 705 // starting at index. This checks the value at index, and will return 706 // index if that value is not in validSet. 707 func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int { 708 loop: 709 for ; index < len(s.types); index++ { 710 t := s.types[index] 711 for _, valid := range validSet { 712 if t == valid { 713 continue loop 714 } 715 } 716 return index // didn't find a match in validSet 717 } 718 return len(s.types) 719 } 720 721 // Algorithm validation. Assert that all values in types are in the 722 // provided set. 723 func (s *isolatingRunSequence) assertOnly(codes ...Class) { 724 loop: 725 for i, t := range s.types { 726 for _, c := range codes { 727 if t == c { 728 continue loop 729 } 730 } 731 log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i]) 732 } 733 } 734 735 // determineLevelRuns returns an array of level runs. Each level run is 736 // described as an array of indexes into the input string. 737 // 738 // Determines the level runs. Rule X9 will be applied in determining the 739 // runs, in the way that makes sure the characters that are supposed to be 740 // removed are not included in the runs. 741 func (p *paragraph) determineLevelRuns() [][]int { 742 run := []int{} 743 allRuns := [][]int{} 744 currentLevel := implicitLevel 745 746 for i := range p.initialTypes { 747 if !isRemovedByX9(p.initialTypes[i]) { 748 if p.resultLevels[i] != currentLevel { 749 // we just encountered a new run; wrap up last run 750 if currentLevel >= 0 { // only wrap it up if there was a run 751 allRuns = append(allRuns, run) 752 run = nil 753 } 754 // Start new run 755 currentLevel = p.resultLevels[i] 756 } 757 run = append(run, i) 758 } 759 } 760 // Wrap up the final run, if any 761 if len(run) > 0 { 762 allRuns = append(allRuns, run) 763 } 764 return allRuns 765 } 766 767 // Definition BD13. Determine isolating run sequences. 768 func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence { 769 levelRuns := p.determineLevelRuns() 770 771 // Compute the run that each character belongs to 772 runForCharacter := make([]int, p.Len()) 773 for i, run := range levelRuns { 774 for _, index := range run { 775 runForCharacter[index] = i 776 } 777 } 778 779 sequences := []*isolatingRunSequence{} 780 781 var currentRunSequence []int 782 783 for _, run := range levelRuns { 784 first := run[0] 785 if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 { 786 currentRunSequence = nil 787 // int run = i; 788 for { 789 // Copy this level run into currentRunSequence 790 currentRunSequence = append(currentRunSequence, run...) 791 792 last := currentRunSequence[len(currentRunSequence)-1] 793 lastT := p.initialTypes[last] 794 if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() { 795 run = levelRuns[runForCharacter[p.matchingPDI[last]]] 796 } else { 797 break 798 } 799 } 800 sequences = append(sequences, p.isolatingRunSequence(currentRunSequence)) 801 } 802 } 803 return sequences 804 } 805 806 // Assign level information to characters removed by rule X9. This is for 807 // ease of relating the level information to the original input data. Note 808 // that the levels assigned to these codes are arbitrary, they're chosen so 809 // as to avoid breaking level runs. 810 func (p *paragraph) assignLevelsToCharactersRemovedByX9() { 811 for i, t := range p.initialTypes { 812 if t.in(LRE, RLE, LRO, RLO, PDF, BN) { 813 p.resultTypes[i] = t 814 p.resultLevels[i] = -1 815 } 816 } 817 // now propagate forward the levels information (could have 818 // propagated backward, the main thing is not to introduce a level 819 // break where one doesn't already exist). 820 821 if p.resultLevels[0] == -1 { 822 p.resultLevels[0] = p.embeddingLevel 823 } 824 for i := 1; i < len(p.initialTypes); i++ { 825 if p.resultLevels[i] == -1 { 826 p.resultLevels[i] = p.resultLevels[i-1] 827 } 828 } 829 // Embedding information is for informational purposes only so need not be 830 // adjusted. 831 } 832 833 // 834 // Output 835 // 836 837 // getLevels computes levels array breaking lines at offsets in linebreaks. 838 // Rule L1. 839 // 840 // The linebreaks array must include at least one value. The values must be 841 // in strictly increasing order (no duplicates) between 1 and the length of 842 // the text, inclusive. The last value must be the length of the text. 843 func (p *paragraph) getLevels(linebreaks []int) []level { 844 // Note that since the previous processing has removed all 845 // P, S, and WS values from resultTypes, the values referred to 846 // in these rules are the initial types, before any processing 847 // has been applied (including processing of overrides). 848 // 849 // This example implementation has reinserted explicit format codes 850 // and BN, in order that the levels array correspond to the 851 // initial text. Their final placement is not normative. 852 // These codes are treated like WS in this implementation, 853 // so they don't interrupt sequences of WS. 854 855 validateLineBreaks(linebreaks, p.Len()) 856 857 result := append([]level(nil), p.resultLevels...) 858 859 // don't worry about linebreaks since if there is a break within 860 // a series of WS values preceding S, the linebreak itself 861 // causes the reset. 862 for i, t := range p.initialTypes { 863 if t.in(B, S) { 864 // Rule L1, clauses one and two. 865 result[i] = p.embeddingLevel 866 867 // Rule L1, clause three. 868 for j := i - 1; j >= 0; j-- { 869 if isWhitespace(p.initialTypes[j]) { // including format codes 870 result[j] = p.embeddingLevel 871 } else { 872 break 873 } 874 } 875 } 876 } 877 878 // Rule L1, clause four. 879 start := 0 880 for _, limit := range linebreaks { 881 for j := limit - 1; j >= start; j-- { 882 if isWhitespace(p.initialTypes[j]) { // including format codes 883 result[j] = p.embeddingLevel 884 } else { 885 break 886 } 887 } 888 start = limit 889 } 890 891 return result 892 } 893 894 // getReordering returns the reordering of lines from a visual index to a 895 // logical index for line breaks at the given offsets. 896 // 897 // Lines are concatenated from left to right. So for example, the fifth 898 // character from the left on the third line is 899 // 900 // getReordering(linebreaks)[linebreaks[1] + 4] 901 // 902 // (linebreaks[1] is the position after the last character of the second 903 // line, which is also the index of the first character on the third line, 904 // and adding four gets the fifth character from the left). 905 // 906 // The linebreaks array must include at least one value. The values must be 907 // in strictly increasing order (no duplicates) between 1 and the length of 908 // the text, inclusive. The last value must be the length of the text. 909 func (p *paragraph) getReordering(linebreaks []int) []int { 910 validateLineBreaks(linebreaks, p.Len()) 911 912 return computeMultilineReordering(p.getLevels(linebreaks), linebreaks) 913 } 914 915 // Return multiline reordering array for a given level array. Reordering 916 // does not occur across a line break. 917 func computeMultilineReordering(levels []level, linebreaks []int) []int { 918 result := make([]int, len(levels)) 919 920 start := 0 921 for _, limit := range linebreaks { 922 tempLevels := make([]level, limit-start) 923 copy(tempLevels, levels[start:]) 924 925 for j, order := range computeReordering(tempLevels) { 926 result[start+j] = order + start 927 } 928 start = limit 929 } 930 return result 931 } 932 933 // Return reordering array for a given level array. This reorders a single 934 // line. The reordering is a visual to logical map. For example, the 935 // leftmost char is string.charAt(order[0]). Rule L2. 936 func computeReordering(levels []level) []int { 937 result := make([]int, len(levels)) 938 // initialize order 939 for i := range result { 940 result[i] = i 941 } 942 943 // locate highest level found on line. 944 // Note the rules say text, but no reordering across line bounds is 945 // performed, so this is sufficient. 946 highestLevel := level(0) 947 lowestOddLevel := level(maxDepth + 2) 948 for _, level := range levels { 949 if level > highestLevel { 950 highestLevel = level 951 } 952 if level&1 != 0 && level < lowestOddLevel { 953 lowestOddLevel = level 954 } 955 } 956 957 for level := highestLevel; level >= lowestOddLevel; level-- { 958 for i := 0; i < len(levels); i++ { 959 if levels[i] >= level { 960 // find range of text at or above this level 961 start := i 962 limit := i + 1 963 for limit < len(levels) && levels[limit] >= level { 964 limit++ 965 } 966 967 for j, k := start, limit-1; j < k; j, k = j+1, k-1 { 968 result[j], result[k] = result[k], result[j] 969 } 970 // skip to end of level run 971 i = limit 972 } 973 } 974 } 975 976 return result 977 } 978 979 // isWhitespace reports whether the type is considered a whitespace type for the 980 // line break rules. 981 func isWhitespace(c Class) bool { 982 switch c { 983 case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS: 984 return true 985 } 986 return false 987 } 988 989 // isRemovedByX9 reports whether the type is one of the types removed in X9. 990 func isRemovedByX9(c Class) bool { 991 switch c { 992 case LRE, RLE, LRO, RLO, PDF, BN: 993 return true 994 } 995 return false 996 } 997 998 // typeForLevel reports the strong type (L or R) corresponding to the level. 999 func typeForLevel(level level) Class { 1000 if (level & 0x1) == 0 { 1001 return L 1002 } 1003 return R 1004 } 1005 1006 // TODO: change validation to not panic 1007 1008 func validateTypes(types []Class) { 1009 if len(types) == 0 { 1010 log.Panic("types is null") 1011 } 1012 for i, t := range types[:len(types)-1] { 1013 if t == B { 1014 log.Panicf("B type before end of paragraph at index: %d", i) 1015 } 1016 } 1017 } 1018 1019 func validateParagraphEmbeddingLevel(embeddingLevel level) { 1020 if embeddingLevel != implicitLevel && 1021 embeddingLevel != 0 && 1022 embeddingLevel != 1 { 1023 log.Panicf("illegal paragraph embedding level: %d", embeddingLevel) 1024 } 1025 } 1026 1027 func validateLineBreaks(linebreaks []int, textLength int) { 1028 prev := 0 1029 for i, next := range linebreaks { 1030 if next <= prev { 1031 log.Panicf("bad linebreak: %d at index: %d", next, i) 1032 } 1033 prev = next 1034 } 1035 if prev != textLength { 1036 log.Panicf("last linebreak was %d, want %d", prev, textLength) 1037 } 1038 } 1039 1040 func validatePbTypes(pairTypes []bracketType) { 1041 if len(pairTypes) == 0 { 1042 log.Panic("pairTypes is null") 1043 } 1044 for i, pt := range pairTypes { 1045 switch pt { 1046 case bpNone, bpOpen, bpClose: 1047 default: 1048 log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i]) 1049 } 1050 } 1051 } 1052 1053 func validatePbValues(pairValues []rune, pairTypes []bracketType) { 1054 if pairValues == nil { 1055 log.Panic("pairValues is null") 1056 } 1057 if len(pairTypes) != len(pairValues) { 1058 log.Panic("pairTypes is different length from pairValues") 1059 } 1060 }