github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/unicode/bidi/core.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package bidi 6 7 import "log" 8 9 // This implementation is a port based on the reference implementation found at: 10 // http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/ 11 // 12 // described in Unicode Bidirectional Algorithm (UAX #9). 13 // 14 // Input: 15 // There are two levels of input to the algorithm, since clients may prefer to 16 // supply some information from out-of-band sources rather than relying on the 17 // default behavior. 18 // 19 // - Bidi class array 20 // - Bidi class array, with externally supplied base line direction 21 // 22 // Output: 23 // Output is separated into several stages: 24 // 25 // - levels array over entire paragraph 26 // - reordering array over entire paragraph 27 // - levels array over line 28 // - reordering array over line 29 // 30 // Note that for conformance to the Unicode Bidirectional Algorithm, 31 // implementations are only required to generate correct reordering and 32 // character directionality (odd or even levels) over a line. Generating 33 // identical level arrays over a line is not required. Bidi explicit format 34 // codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and 35 // positions as long as the rest of the input is properly reordered. 36 // 37 // As the algorithm is defined to operate on a single paragraph at a time, this 38 // implementation is written to handle single paragraphs. Thus rule P1 is 39 // presumed by this implementation-- the data provided to the implementation is 40 // assumed to be a single paragraph, and either contains no 'B' codes, or a 41 // single 'B' code at the end of the input. 'B' is allowed as input to 42 // illustrate how the algorithm assigns it a level. 43 // 44 // Also note that rules L3 and L4 depend on the rendering engine that uses the 45 // result of the bidi algorithm. This implementation assumes that the rendering 46 // engine expects combining marks in visual order (e.g. to the left of their 47 // base character in RTL runs) and that it adjusts the glyphs used to render 48 // mirrored characters that are in RTL runs so that they render appropriately. 49 50 // level is the embedding level of a character. Even embedding levels indicate 51 // left-to-right order and odd levels indicate right-to-left order. The special 52 // level of -1 is reserved for undefined order. 53 type level int8 54 55 const implicitLevel level = -1 56 57 // in returns if x is equal to any of the values in set. 58 func (c class) in(set ...class) bool { 59 for _, s := range set { 60 if c == s { 61 return true 62 } 63 } 64 return false 65 } 66 67 // A paragraph contains the state of a paragraph. 68 type paragraph struct { 69 initialTypes []class 70 71 // Arrays of properties needed for paired bracket evaluation in N0 72 pairTypes []bracketType // paired Bracket types for paragraph 73 pairValues []rune // rune for opening bracket or pbOpen and pbClose; 0 for pbNone 74 75 embeddingLevel level // default: = implicitLevel; 76 77 // at the paragraph levels 78 resultTypes []class 79 resultLevels []level 80 81 // Index of matching PDI for isolate initiator characters. For other 82 // characters, the value of matchingPDI will be set to -1. For isolate 83 // initiators with no matching PDI, matchingPDI will be set to the length of 84 // the input string. 85 matchingPDI []int 86 87 // Index of matching isolate initiator for PDI characters. For other 88 // characters, and for PDIs with no matching isolate initiator, the value of 89 // matchingIsolateInitiator will be set to -1. 90 matchingIsolateInitiator []int 91 } 92 93 // newParagraph initializes a paragraph. The user needs to supply a few arrays 94 // corresponding to the preprocessed text input. The types correspond to the 95 // Unicode BiDi classes for each rune. pairTypes indicates the bracket type for 96 // each rune. pairValues provides a unique bracket class identifier for each 97 // rune (suggested is the rune of the open bracket for opening and matching 98 // close brackets, after normalization). The embedding levels are optional, but 99 // may be supplied to encode embedding levels of styled text. 100 // 101 // TODO: return an error. 102 func newParagraph(types []class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph { 103 validateTypes(types) 104 validatePbTypes(pairTypes) 105 validatePbValues(pairValues, pairTypes) 106 validateParagraphEmbeddingLevel(levels) 107 108 p := ¶graph{ 109 initialTypes: append([]class(nil), types...), 110 embeddingLevel: levels, 111 112 pairTypes: pairTypes, 113 pairValues: pairValues, 114 115 resultTypes: append([]class(nil), types...), 116 } 117 p.run() 118 return p 119 } 120 121 func (p *paragraph) Len() int { return len(p.initialTypes) } 122 123 // The algorithm. Does not include line-based processing (Rules L1, L2). 124 // These are applied later in the line-based phase of the algorithm. 125 func (p *paragraph) run() { 126 p.determineMatchingIsolates() 127 128 // 1) determining the paragraph level 129 // Rule P1 is the requirement for entering this algorithm. 130 // Rules P2, P3. 131 // If no externally supplied paragraph embedding level, use default. 132 if p.embeddingLevel == implicitLevel { 133 p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len()) 134 } 135 136 // Initialize result levels to paragraph embedding level. 137 p.resultLevels = make([]level, p.Len()) 138 setLevels(p.resultLevels, p.embeddingLevel) 139 140 // 2) Explicit levels and directions 141 // Rules X1-X8. 142 p.determineExplicitEmbeddingLevels() 143 144 // Rule X9. 145 // We do not remove the embeddings, the overrides, the PDFs, and the BNs 146 // from the string explicitly. But they are not copied into isolating run 147 // sequences when they are created, so they are removed for all 148 // practical purposes. 149 150 // Rule X10. 151 // Run remainder of algorithm one isolating run sequence at a time 152 for _, seq := range p.determineIsolatingRunSequences() { 153 // 3) resolving weak types 154 // Rules W1-W7. 155 seq.resolveWeakTypes() 156 157 // 4a) resolving paired brackets 158 // Rule N0 159 resolvePairedBrackets(seq) 160 161 // 4b) resolving neutral types 162 // Rules N1-N3. 163 seq.resolveNeutralTypes() 164 165 // 5) resolving implicit embedding levels 166 // Rules I1, I2. 167 seq.resolveImplicitLevels() 168 169 // Apply the computed levels and types 170 seq.applyLevelsAndTypes() 171 } 172 173 // Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and 174 // BNs. This is for convenience, so the resulting level array will have 175 // a value for every character. 176 p.assignLevelsToCharactersRemovedByX9() 177 } 178 179 // determineMatchingIsolates determines the matching PDI for each isolate 180 // initiator and vice versa. 181 // 182 // Definition BD9. 183 // 184 // At the end of this function: 185 // 186 // - The member variable matchingPDI is set to point to the index of the 187 // matching PDI character for each isolate initiator character. If there is 188 // no matching PDI, it is set to the length of the input text. For other 189 // characters, it is set to -1. 190 // - The member variable matchingIsolateInitiator is set to point to the 191 // index of the matching isolate initiator character for each PDI character. 192 // If there is no matching isolate initiator, or the character is not a PDI, 193 // it is set to -1. 194 func (p *paragraph) determineMatchingIsolates() { 195 p.matchingPDI = make([]int, p.Len()) 196 p.matchingIsolateInitiator = make([]int, p.Len()) 197 198 for i := range p.matchingIsolateInitiator { 199 p.matchingIsolateInitiator[i] = -1 200 } 201 202 for i := range p.matchingPDI { 203 p.matchingPDI[i] = -1 204 205 if t := p.resultTypes[i]; t.in(_LRI, _RLI, _FSI) { 206 depthCounter := 1 207 for j := i + 1; j < p.Len(); j++ { 208 if u := p.resultTypes[j]; u.in(_LRI, _RLI, _FSI) { 209 depthCounter++ 210 } else if u == _PDI { 211 if depthCounter--; depthCounter == 0 { 212 p.matchingPDI[i] = j 213 p.matchingIsolateInitiator[j] = i 214 break 215 } 216 } 217 } 218 if p.matchingPDI[i] == -1 { 219 p.matchingPDI[i] = p.Len() 220 } 221 } 222 } 223 } 224 225 // determineParagraphEmbeddingLevel reports the resolved paragraph direction of 226 // the substring limited by the given range [start, end). 227 // 228 // Determines the paragraph level based on rules P2, P3. This is also used 229 // in rule X5c to find if an FSI should resolve to LRI or RLI. 230 func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level { 231 var strongType class = -1 // unknown 232 233 // Rule P2. 234 for i := start; i < end; i++ { 235 if t := p.resultTypes[i]; t.in(_L, _AL, _R) { 236 strongType = t 237 break 238 } else if t.in(_FSI, _LRI, _RLI) { 239 i = p.matchingPDI[i] // skip over to the matching PDI 240 if i > end { 241 log.Panic("assert (i <= end)") 242 } 243 } 244 } 245 // Rule P3. 246 switch strongType { 247 case -1: // none found 248 // default embedding level when no strong types found is 0. 249 return 0 250 case _L: 251 return 0 252 default: // AL, R 253 return 1 254 } 255 } 256 257 const maxDepth = 125 258 259 // This stack will store the embedding levels and override and isolated 260 // statuses 261 type directionalStatusStack struct { 262 stackCounter int 263 embeddingLevelStack [maxDepth + 1]level 264 overrideStatusStack [maxDepth + 1]class 265 isolateStatusStack [maxDepth + 1]bool 266 } 267 268 func (s *directionalStatusStack) empty() { s.stackCounter = 0 } 269 func (s *directionalStatusStack) pop() { s.stackCounter-- } 270 func (s *directionalStatusStack) depth() int { return s.stackCounter } 271 272 func (s *directionalStatusStack) push(level level, overrideStatus class, isolateStatus bool) { 273 s.embeddingLevelStack[s.stackCounter] = level 274 s.overrideStatusStack[s.stackCounter] = overrideStatus 275 s.isolateStatusStack[s.stackCounter] = isolateStatus 276 s.stackCounter++ 277 } 278 279 func (s *directionalStatusStack) lastEmbeddingLevel() level { 280 return s.embeddingLevelStack[s.stackCounter-1] 281 } 282 283 func (s *directionalStatusStack) lastDirectionalOverrideStatus() class { 284 return s.overrideStatusStack[s.stackCounter-1] 285 } 286 287 func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool { 288 return s.isolateStatusStack[s.stackCounter-1] 289 } 290 291 // Determine explicit levels using rules X1 - X8 292 func (p *paragraph) determineExplicitEmbeddingLevels() { 293 var stack directionalStatusStack 294 var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int 295 296 // Rule X1. 297 stack.push(p.embeddingLevel, _ON, false) 298 299 for i, t := range p.resultTypes { 300 // Rules X2, X3, X4, X5, X5a, X5b, X5c 301 switch t { 302 case _RLE, _LRE, _RLO, _LRO, _RLI, _LRI, _FSI: 303 isIsolate := t.in(_RLI, _LRI, _FSI) 304 isRTL := t.in(_RLE, _RLO, _RLI) 305 306 // override if this is an FSI that resolves to RLI 307 if t == _FSI { 308 isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1) 309 } 310 if isIsolate { 311 p.resultLevels[i] = stack.lastEmbeddingLevel() 312 } 313 314 var newLevel level 315 if isRTL { 316 // least greater odd 317 newLevel = (stack.lastEmbeddingLevel() + 1) | 1 318 } else { 319 // least greater even 320 newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1 321 } 322 323 if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 { 324 if isIsolate { 325 validIsolateCount++ 326 } 327 // Push new embedding level, override status, and isolated 328 // status. 329 // No check for valid stack counter, since the level check 330 // suffices. 331 switch t { 332 case _LRO: 333 stack.push(newLevel, _L, isIsolate) 334 case _RLO: 335 stack.push(newLevel, _R, isIsolate) 336 default: 337 stack.push(newLevel, _ON, isIsolate) 338 } 339 // Not really part of the spec 340 if !isIsolate { 341 p.resultLevels[i] = newLevel 342 } 343 } else { 344 // This is an invalid explicit formatting character, 345 // so apply the "Otherwise" part of rules X2-X5b. 346 if isIsolate { 347 overflowIsolateCount++ 348 } else { // !isIsolate 349 if overflowIsolateCount == 0 { 350 overflowEmbeddingCount++ 351 } 352 } 353 } 354 355 // Rule X6a 356 case _PDI: 357 if overflowIsolateCount > 0 { 358 overflowIsolateCount-- 359 } else if validIsolateCount == 0 { 360 // do nothing 361 } else { 362 overflowEmbeddingCount = 0 363 for !stack.lastDirectionalIsolateStatus() { 364 stack.pop() 365 } 366 stack.pop() 367 validIsolateCount-- 368 } 369 p.resultLevels[i] = stack.lastEmbeddingLevel() 370 371 // Rule X7 372 case _PDF: 373 // Not really part of the spec 374 p.resultLevels[i] = stack.lastEmbeddingLevel() 375 376 if overflowIsolateCount > 0 { 377 // do nothing 378 } else if overflowEmbeddingCount > 0 { 379 overflowEmbeddingCount-- 380 } else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 { 381 stack.pop() 382 } 383 384 case _B: // paragraph separator. 385 // Rule X8. 386 387 // These values are reset for clarity, in this implementation B 388 // can only occur as the last code in the array. 389 stack.empty() 390 overflowIsolateCount = 0 391 overflowEmbeddingCount = 0 392 validIsolateCount = 0 393 p.resultLevels[i] = p.embeddingLevel 394 395 default: 396 p.resultLevels[i] = stack.lastEmbeddingLevel() 397 if stack.lastDirectionalOverrideStatus() != _ON { 398 p.resultTypes[i] = stack.lastDirectionalOverrideStatus() 399 } 400 } 401 } 402 } 403 404 type isolatingRunSequence struct { 405 p *paragraph 406 407 indexes []int // indexes to the original string 408 409 types []class // type of each character using the index 410 resolvedLevels []level // resolved levels after application of rules 411 level level 412 sos, eos class 413 } 414 415 func (i *isolatingRunSequence) Len() int { return len(i.indexes) } 416 417 func maxLevel(a, b level) level { 418 if a > b { 419 return a 420 } 421 return b 422 } 423 424 // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types, 425 // either L or R, for each isolating run sequence. 426 func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence { 427 length := len(indexes) 428 types := make([]class, length) 429 for i, x := range indexes { 430 types[i] = p.resultTypes[x] 431 } 432 433 // assign level, sos and eos 434 prevChar := indexes[0] - 1 435 for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) { 436 prevChar-- 437 } 438 prevLevel := p.embeddingLevel 439 if prevChar >= 0 { 440 prevLevel = p.resultLevels[prevChar] 441 } 442 443 var succLevel level 444 lastType := types[length-1] 445 if lastType.in(_LRI, _RLI, _FSI) { 446 succLevel = p.embeddingLevel 447 } else { 448 // the first character after the end of run sequence 449 limit := indexes[length-1] + 1 450 for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ { 451 452 } 453 succLevel = p.embeddingLevel 454 if limit < p.Len() { 455 succLevel = p.resultLevels[limit] 456 } 457 } 458 level := p.resultLevels[indexes[0]] 459 return &isolatingRunSequence{ 460 p: p, 461 indexes: indexes, 462 types: types, 463 level: level, 464 sos: typeForLevel(maxLevel(prevLevel, level)), 465 eos: typeForLevel(maxLevel(succLevel, level)), 466 } 467 } 468 469 // Resolving weak types Rules W1-W7. 470 // 471 // Note that some weak types (EN, AN) remain after this processing is 472 // complete. 473 func (s *isolatingRunSequence) resolveWeakTypes() { 474 475 // on entry, only these types remain 476 s.assertOnly(_L, _R, _AL, _EN, _ES, _ET, _AN, _CS, _B, _S, _WS, _ON, _NSM, _LRI, _RLI, _FSI, _PDI) 477 478 // Rule W1. 479 // Changes all NSMs. 480 preceedingCharacterType := s.sos 481 for i, t := range s.types { 482 if t == _NSM { 483 s.types[i] = preceedingCharacterType 484 } else { 485 if t.in(_LRI, _RLI, _FSI, _PDI) { 486 preceedingCharacterType = _ON 487 } 488 preceedingCharacterType = t 489 } 490 } 491 492 // Rule W2. 493 // EN does not change at the start of the run, because sos != AL. 494 for i, t := range s.types { 495 if t == _EN { 496 for j := i - 1; j >= 0; j-- { 497 if t := s.types[j]; t.in(_L, _R, _AL) { 498 if t == _AL { 499 s.types[i] = _AN 500 } 501 break 502 } 503 } 504 } 505 } 506 507 // Rule W3. 508 for i, t := range s.types { 509 if t == _AL { 510 s.types[i] = _R 511 } 512 } 513 514 // Rule W4. 515 // Since there must be values on both sides for this rule to have an 516 // effect, the scan skips the first and last value. 517 // 518 // Although the scan proceeds left to right, and changes the type 519 // values in a way that would appear to affect the computations 520 // later in the scan, there is actually no problem. A change in the 521 // current value can only affect the value to its immediate right, 522 // and only affect it if it is ES or CS. But the current value can 523 // only change if the value to its right is not ES or CS. Thus 524 // either the current value will not change, or its change will have 525 // no effect on the remainder of the analysis. 526 527 for i := 1; i < s.Len()-1; i++ { 528 t := s.types[i] 529 if t == _ES || t == _CS { 530 prevSepType := s.types[i-1] 531 succSepType := s.types[i+1] 532 if prevSepType == _EN && succSepType == _EN { 533 s.types[i] = _EN 534 } else if s.types[i] == _CS && prevSepType == _AN && succSepType == _AN { 535 s.types[i] = _AN 536 } 537 } 538 } 539 540 // Rule W5. 541 for i, t := range s.types { 542 if t == _ET { 543 // locate end of sequence 544 runStart := i 545 runEnd := s.findRunLimit(runStart, _ET) 546 547 // check values at ends of sequence 548 t := s.sos 549 if runStart > 0 { 550 t = s.types[runStart-1] 551 } 552 if t != _EN { 553 t = s.eos 554 if runEnd < len(s.types) { 555 t = s.types[runEnd] 556 } 557 } 558 if t == _EN { 559 setTypes(s.types[runStart:runEnd], _EN) 560 } 561 // continue at end of sequence 562 i = runEnd 563 } 564 } 565 566 // Rule W6. 567 for i, t := range s.types { 568 if t.in(_ES, _ET, _CS) { 569 s.types[i] = _ON 570 } 571 } 572 573 // Rule W7. 574 for i, t := range s.types { 575 if t == _EN { 576 // set default if we reach start of run 577 prevStrongType := s.sos 578 for j := i - 1; j >= 0; j-- { 579 t = s.types[j] 580 if t == _L || t == _R { // AL's have been changed to R 581 prevStrongType = t 582 break 583 } 584 } 585 if prevStrongType == _L { 586 s.types[i] = _L 587 } 588 } 589 } 590 } 591 592 // 6) resolving neutral types Rules N1-N2. 593 func (s *isolatingRunSequence) resolveNeutralTypes() { 594 595 // on entry, only these types can be in resultTypes 596 s.assertOnly(_L, _R, _EN, _AN, _B, _S, _WS, _ON, _RLI, _LRI, _FSI, _PDI) 597 598 for i, t := range s.types { 599 switch t { 600 case _WS, _ON, _B, _S, _RLI, _LRI, _FSI, _PDI: 601 // find bounds of run of neutrals 602 runStart := i 603 runEnd := s.findRunLimit(runStart, _B, _S, _WS, _ON, _RLI, _LRI, _FSI, _PDI) 604 605 // determine effective types at ends of run 606 var leadType, trailType class 607 608 // Note that the character found can only be L, R, AN, or 609 // EN. 610 if runStart == 0 { 611 leadType = s.sos 612 } else { 613 leadType = s.types[runStart-1] 614 if leadType.in(_AN, _EN) { 615 leadType = _R 616 } 617 } 618 if runEnd == len(s.types) { 619 trailType = s.eos 620 } else { 621 trailType = s.types[runEnd] 622 if trailType.in(_AN, _EN) { 623 trailType = _R 624 } 625 } 626 627 var resolvedType class 628 if leadType == trailType { 629 // Rule N1. 630 resolvedType = leadType 631 } else { 632 // Rule N2. 633 // Notice the embedding level of the run is used, not 634 // the paragraph embedding level. 635 resolvedType = typeForLevel(s.level) 636 } 637 638 setTypes(s.types[runStart:runEnd], resolvedType) 639 640 // skip over run of (former) neutrals 641 i = runEnd 642 } 643 } 644 } 645 646 func setLevels(levels []level, newLevel level) { 647 for i := range levels { 648 levels[i] = newLevel 649 } 650 } 651 652 func setTypes(types []class, newType class) { 653 for i := range types { 654 types[i] = newType 655 } 656 } 657 658 // 7) resolving implicit embedding levels Rules I1, I2. 659 func (s *isolatingRunSequence) resolveImplicitLevels() { 660 661 // on entry, only these types can be in resultTypes 662 s.assertOnly(_L, _R, _EN, _AN) 663 664 s.resolvedLevels = make([]level, len(s.types)) 665 setLevels(s.resolvedLevels, s.level) 666 667 if (s.level & 1) == 0 { // even level 668 for i, t := range s.types { 669 // Rule I1. 670 if t == _L { 671 // no change 672 } else if t == _R { 673 s.resolvedLevels[i] += 1 674 } else { // t == _AN || t == _EN 675 s.resolvedLevels[i] += 2 676 } 677 } 678 } else { // odd level 679 for i, t := range s.types { 680 // Rule I2. 681 if t == _R { 682 // no change 683 } else { // t == _L || t == _AN || t == _EN 684 s.resolvedLevels[i] += 1 685 } 686 } 687 } 688 } 689 690 // Applies the levels and types resolved in rules W1-I2 to the 691 // resultLevels array. 692 func (s *isolatingRunSequence) applyLevelsAndTypes() { 693 for i, x := range s.indexes { 694 s.p.resultTypes[x] = s.types[i] 695 s.p.resultLevels[x] = s.resolvedLevels[i] 696 } 697 } 698 699 // Return the limit of the run consisting only of the types in validSet 700 // starting at index. This checks the value at index, and will return 701 // index if that value is not in validSet. 702 func (s *isolatingRunSequence) findRunLimit(index int, validSet ...class) int { 703 loop: 704 for ; index < len(s.types); index++ { 705 t := s.types[index] 706 for _, valid := range validSet { 707 if t == valid { 708 continue loop 709 } 710 } 711 return index // didn't find a match in validSet 712 } 713 return len(s.types) 714 } 715 716 // Algorithm validation. Assert that all values in types are in the 717 // provided set. 718 func (s *isolatingRunSequence) assertOnly(codes ...class) { 719 loop: 720 for i, t := range s.types { 721 for _, c := range codes { 722 if t == c { 723 continue loop 724 } 725 } 726 log.Panicf("invalid bidi code %s present in assertOnly at position %d", t, s.indexes[i]) 727 } 728 } 729 730 // determineLevelRuns returns an array of level runs. Each level run is 731 // described as an array of indexes into the input string. 732 // 733 // Determines the level runs. Rule X9 will be applied in determining the 734 // runs, in the way that makes sure the characters that are supposed to be 735 // removed are not included in the runs. 736 func (p *paragraph) determineLevelRuns() [][]int { 737 run := []int{} 738 allRuns := [][]int{} 739 currentLevel := implicitLevel 740 741 for i := range p.initialTypes { 742 if !isRemovedByX9(p.initialTypes[i]) { 743 if p.resultLevels[i] != currentLevel { 744 // we just encountered a new run; wrap up last run 745 if currentLevel >= 0 { // only wrap it up if there was a run 746 allRuns = append(allRuns, run) 747 run = nil 748 } 749 // Start new run 750 currentLevel = p.resultLevels[i] 751 } 752 run = append(run, i) 753 } 754 } 755 // Wrap up the final run, if any 756 if len(run) > 0 { 757 allRuns = append(allRuns, run) 758 } 759 return allRuns 760 } 761 762 // Definition BD13. Determine isolating run sequences. 763 func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence { 764 levelRuns := p.determineLevelRuns() 765 766 // Compute the run that each character belongs to 767 runForCharacter := make([]int, p.Len()) 768 for i, run := range levelRuns { 769 for _, index := range run { 770 runForCharacter[index] = i 771 } 772 } 773 774 sequences := []*isolatingRunSequence{} 775 776 var currentRunSequence []int 777 778 for _, run := range levelRuns { 779 first := run[0] 780 if p.initialTypes[first] != _PDI || p.matchingIsolateInitiator[first] == -1 { 781 currentRunSequence = nil 782 // int run = i; 783 for { 784 // Copy this level run into currentRunSequence 785 currentRunSequence = append(currentRunSequence, run...) 786 787 last := currentRunSequence[len(currentRunSequence)-1] 788 lastT := p.initialTypes[last] 789 if lastT.in(_LRI, _RLI, _FSI) && p.matchingPDI[last] != p.Len() { 790 run = levelRuns[runForCharacter[p.matchingPDI[last]]] 791 } else { 792 break 793 } 794 } 795 sequences = append(sequences, p.isolatingRunSequence(currentRunSequence)) 796 } 797 } 798 return sequences 799 } 800 801 // Assign level information to characters removed by rule X9. This is for 802 // ease of relating the level information to the original input data. Note 803 // that the levels assigned to these codes are arbitrary, they're chosen so 804 // as to avoid breaking level runs. 805 func (p *paragraph) assignLevelsToCharactersRemovedByX9() { 806 for i, t := range p.initialTypes { 807 if t.in(_LRE, _RLE, _LRO, _RLO, _PDF, _BN) { 808 p.resultTypes[i] = t 809 p.resultLevels[i] = -1 810 } 811 } 812 // now propagate forward the levels information (could have 813 // propagated backward, the main thing is not to introduce a level 814 // break where one doesn't already exist). 815 816 if p.resultLevels[0] == -1 { 817 p.resultLevels[0] = p.embeddingLevel 818 } 819 for i := 1; i < len(p.initialTypes); i++ { 820 if p.resultLevels[i] == -1 { 821 p.resultLevels[i] = p.resultLevels[i-1] 822 } 823 } 824 // Embedding information is for informational purposes only so need not be 825 // adjusted. 826 } 827 828 // 829 // Output 830 // 831 832 // getLevels computes levels array breaking lines at offsets in linebreaks. 833 // Rule L1. 834 // 835 // The linebreaks array must include at least one value. The values must be 836 // in strictly increasing order (no duplicates) between 1 and the length of 837 // the text, inclusive. The last value must be the length of the text. 838 func (p *paragraph) getLevels(linebreaks []int) []level { 839 // Note that since the previous processing has removed all 840 // P, S, and WS values from resultTypes, the values referred to 841 // in these rules are the initial types, before any processing 842 // has been applied (including processing of overrides). 843 // 844 // This example implementation has reinserted explicit format codes 845 // and BN, in order that the levels array correspond to the 846 // initial text. Their final placement is not normative. 847 // These codes are treated like WS in this implementation, 848 // so they don't interrupt sequences of WS. 849 850 validateLineBreaks(linebreaks, p.Len()) 851 852 result := append([]level(nil), p.resultLevels...) 853 854 // don't worry about linebreaks since if there is a break within 855 // a series of WS values preceding S, the linebreak itself 856 // causes the reset. 857 for i, t := range p.initialTypes { 858 if t.in(_B, _S) { 859 // Rule L1, clauses one and two. 860 result[i] = p.embeddingLevel 861 862 // Rule L1, clause three. 863 for j := i - 1; j >= 0; j-- { 864 if isWhitespace(p.initialTypes[j]) { // including format codes 865 result[j] = p.embeddingLevel 866 } else { 867 break 868 } 869 } 870 } 871 } 872 873 // Rule L1, clause four. 874 start := 0 875 for _, limit := range linebreaks { 876 for j := limit - 1; j >= start; j-- { 877 if isWhitespace(p.initialTypes[j]) { // including format codes 878 result[j] = p.embeddingLevel 879 } else { 880 break 881 } 882 } 883 start = limit 884 } 885 886 return result 887 } 888 889 // getReordering returns the reordering of lines from a visual index to a 890 // logical index for line breaks at the given offsets. 891 // 892 // Lines are concatenated from left to right. So for example, the fifth 893 // character from the left on the third line is 894 // 895 // getReordering(linebreaks)[linebreaks[1] + 4] 896 // 897 // (linebreaks[1] is the position after the last character of the second 898 // line, which is also the index of the first character on the third line, 899 // and adding four gets the fifth character from the left). 900 // 901 // The linebreaks array must include at least one value. The values must be 902 // in strictly increasing order (no duplicates) between 1 and the length of 903 // the text, inclusive. The last value must be the length of the text. 904 func (p *paragraph) getReordering(linebreaks []int) []int { 905 validateLineBreaks(linebreaks, p.Len()) 906 907 return computeMultilineReordering(p.getLevels(linebreaks), linebreaks) 908 } 909 910 // Return multiline reordering array for a given level array. Reordering 911 // does not occur across a line break. 912 func computeMultilineReordering(levels []level, linebreaks []int) []int { 913 result := make([]int, len(levels)) 914 915 start := 0 916 for _, limit := range linebreaks { 917 tempLevels := make([]level, limit-start) 918 copy(tempLevels, levels[start:]) 919 920 for j, order := range computeReordering(tempLevels) { 921 result[start+j] = order + start 922 } 923 start = limit 924 } 925 return result 926 } 927 928 // Return reordering array for a given level array. This reorders a single 929 // line. The reordering is a visual to logical map. For example, the 930 // leftmost char is string.charAt(order[0]). Rule L2. 931 func computeReordering(levels []level) []int { 932 result := make([]int, len(levels)) 933 // initialize order 934 for i := range result { 935 result[i] = i 936 } 937 938 // locate highest level found on line. 939 // Note the rules say text, but no reordering across line bounds is 940 // performed, so this is sufficient. 941 highestLevel := level(0) 942 lowestOddLevel := level(maxDepth + 2) 943 for _, level := range levels { 944 if level > highestLevel { 945 highestLevel = level 946 } 947 if level&1 != 0 && level < lowestOddLevel { 948 lowestOddLevel = level 949 } 950 } 951 952 for level := highestLevel; level >= lowestOddLevel; level-- { 953 for i := 0; i < len(levels); i++ { 954 if levels[i] >= level { 955 // find range of text at or above this level 956 start := i 957 limit := i + 1 958 for limit < len(levels) && levels[limit] >= level { 959 limit++ 960 } 961 962 for j, k := start, limit-1; j < k; j, k = j+1, k-1 { 963 result[j], result[k] = result[k], result[j] 964 } 965 // skip to end of level run 966 i = limit 967 } 968 } 969 } 970 971 return result 972 } 973 974 // isWhitespace reports whether the type is considered a whitespace type for the 975 // line break rules. 976 func isWhitespace(c class) bool { 977 switch c { 978 case _LRE, _RLE, _LRO, _RLO, _PDF, _LRI, _RLI, _FSI, _PDI, _BN, _WS: 979 return true 980 } 981 return false 982 } 983 984 // isRemovedByX9 reports whether the type is one of the types removed in X9. 985 func isRemovedByX9(c class) bool { 986 switch c { 987 case _LRE, _RLE, _LRO, _RLO, _PDF, _BN: 988 return true 989 } 990 return false 991 } 992 993 // typeForLevel reports the strong type (L or R) corresponding to the level. 994 func typeForLevel(level level) class { 995 if (level & 0x1) == 0 { 996 return _L 997 } 998 return _R 999 } 1000 1001 // TODO: change validation to not panic 1002 1003 func validateTypes(types []class) { 1004 if len(types) == 0 { 1005 log.Panic("types is null") 1006 } 1007 for i, t := range types[:len(types)-1] { 1008 if t == _B { 1009 log.Panicf("B type before end of paragraph at index: %d", i) 1010 } 1011 } 1012 } 1013 1014 func validateParagraphEmbeddingLevel(embeddingLevel level) { 1015 if embeddingLevel != implicitLevel && 1016 embeddingLevel != 0 && 1017 embeddingLevel != 1 { 1018 log.Panicf("illegal paragraph embedding level: %d", embeddingLevel) 1019 } 1020 } 1021 1022 func validateLineBreaks(linebreaks []int, textLength int) { 1023 prev := 0 1024 for i, next := range linebreaks { 1025 if next <= prev { 1026 log.Panicf("bad linebreak: %d at index: %d", next, i) 1027 } 1028 prev = next 1029 } 1030 if prev != textLength { 1031 log.Panicf("last linebreak was %d, want %d", prev, textLength) 1032 } 1033 } 1034 1035 func validatePbTypes(pairTypes []bracketType) { 1036 if len(pairTypes) == 0 { 1037 log.Panic("pairTypes is null") 1038 } 1039 for i, pt := range pairTypes { 1040 switch pt { 1041 case bpNone, bpOpen, bpClose: 1042 default: 1043 log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i]) 1044 } 1045 } 1046 } 1047 1048 func validatePbValues(pairValues []rune, pairTypes []bracketType) { 1049 if pairValues == nil { 1050 log.Panic("pairValues is null") 1051 } 1052 if len(pairTypes) != len(pairValues) { 1053 log.Panic("pairTypes is different length from pairValues") 1054 } 1055 }