github.com/go-xe2/third@v1.0.3/golang.org/x/text/unicode/bidi/core.go

github.com/go-xe2/third@v1.0.3/golang.org/x/text/unicode/bidi/core.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bidi
     6  
     7  import "log"
     8  
     9  // This implementation is a port based on the reference implementation found at:
    10  // http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
    11  //
    12  // described in Unicode Bidirectional Algorithm (UAX #9).
    13  //
    14  // Input:
    15  // There are two levels of input to the algorithm, since clients may prefer to
    16  // supply some information from out-of-band sources rather than relying on the
    17  // default behavior.
    18  //
    19  // - Bidi class array
    20  // - Bidi class array, with externally supplied base line direction
    21  //
    22  // Output:
    23  // Output is separated into several stages:
    24  //
    25  //  - levels array over entire paragraph
    26  //  - reordering array over entire paragraph
    27  //  - levels array over line
    28  //  - reordering array over line
    29  //
    30  // Note that for conformance to the Unicode Bidirectional Algorithm,
    31  // implementations are only required to generate correct reordering and
    32  // character directionality (odd or even levels) over a line. Generating
    33  // identical level arrays over a line is not required. Bidi explicit format
    34  // codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
    35  // positions as long as the rest of the input is properly reordered.
    36  //
    37  // As the algorithm is defined to operate on a single paragraph at a time, this
    38  // implementation is written to handle single paragraphs. Thus rule P1 is
    39  // presumed by this implementation-- the data provided to the implementation is
    40  // assumed to be a single paragraph, and either contains no 'B' codes, or a
    41  // single 'B' code at the end of the input. 'B' is allowed as input to
    42  // illustrate how the algorithm assigns it a level.
    43  //
    44  // Also note that rules L3 and L4 depend on the rendering engine that uses the
    45  // result of the bidi algorithm. This implementation assumes that the rendering
    46  // engine expects combining marks in visual order (e.g. to the left of their
    47  // base character in RTL runs) and that it adjusts the glyphs used to render
    48  // mirrored characters that are in RTL runs so that they render appropriately.
    49  
    50  // level is the embedding level of a character. Even embedding levels indicate
    51  // left-to-right order and odd levels indicate right-to-left order. The special
    52  // level of -1 is reserved for undefined order.
    53  type level int8
    54  
    55  const implicitLevel level = -1
    56  
    57  // in returns if x is equal to any of the values in set.
    58  func (c Class) in(set ...Class) bool {
    59  	for _, s := range set {
    60  		if c == s {
    61  			return true
    62  		}
    63  	}
    64  	return false
    65  }
    66  
    67  // A paragraph contains the state of a paragraph.
    68  type paragraph struct {
    69  	initialTypes []Class
    70  
    71  	// Arrays of properties needed for paired bracket evaluation in N0
    72  	pairTypes  []bracketType // paired Bracket types for paragraph
    73  	pairValues []rune        // rune for opening bracket or pbOpen and pbClose; 0 for pbNone
    74  
    75  	embeddingLevel level // default: = implicitLevel;
    76  
    77  	// at the paragraph levels
    78  	resultTypes  []Class
    79  	resultLevels []level
    80  
    81  	// Index of matching PDI for isolate initiator characters. For other
    82  	// characters, the value of matchingPDI will be set to -1. For isolate
    83  	// initiators with no matching PDI, matchingPDI will be set to the length of
    84  	// the input string.
    85  	matchingPDI []int
    86  
    87  	// Index of matching isolate initiator for PDI characters. For other
    88  	// characters, and for PDIs with no matching isolate initiator, the value of
    89  	// matchingIsolateInitiator will be set to -1.
    90  	matchingIsolateInitiator []int
    91  }
    92  
    93  // newParagraph initializes a paragraph. The user needs to supply a few arrays
    94  // corresponding to the preprocessed text input. The types correspond to the
    95  // Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
    96  // each rune. pairValues provides a unique bracket class identifier for each
    97  // rune (suggested is the rune of the open bracket for opening and matching
    98  // close brackets, after normalization). The embedding levels are optional, but
    99  // may be supplied to encode embedding levels of styled text.
   100  //
   101  // TODO: return an error.
   102  func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph {
   103  	validateTypes(types)
   104  	validatePbTypes(pairTypes)
   105  	validatePbValues(pairValues, pairTypes)
   106  	validateParagraphEmbeddingLevel(levels)
   107  
   108  	p := &paragraph{
   109  		initialTypes:   append([]Class(nil), types...),
   110  		embeddingLevel: levels,
   111  
   112  		pairTypes:  pairTypes,
   113  		pairValues: pairValues,
   114  
   115  		resultTypes: append([]Class(nil), types...),
   116  	}
   117  	p.run()
   118  	return p
   119  }
   120  
   121  func (p *paragraph) Len() int { return len(p.initialTypes) }
   122  
   123  // The algorithm. Does not include line-based processing (Rules L1, L2).
   124  // These are applied later in the line-based phase of the algorithm.
   125  func (p *paragraph) run() {
   126  	p.determineMatchingIsolates()
   127  
   128  	// 1) determining the paragraph level
   129  	// Rule P1 is the requirement for entering this algorithm.
   130  	// Rules P2, P3.
   131  	// If no externally supplied paragraph embedding level, use default.
   132  	if p.embeddingLevel == implicitLevel {
   133  		p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
   134  	}
   135  
   136  	// Initialize result levels to paragraph embedding level.
   137  	p.resultLevels = make([]level, p.Len())
   138  	setLevels(p.resultLevels, p.embeddingLevel)
   139  
   140  	// 2) Explicit levels and directions
   141  	// Rules X1-X8.
   142  	p.determineExplicitEmbeddingLevels()
   143  
   144  	// Rule X9.
   145  	// We do not remove the embeddings, the overrides, the PDFs, and the BNs
   146  	// from the string explicitly. But they are not copied into isolating run
   147  	// sequences when they are created, so they are removed for all
   148  	// practical purposes.
   149  
   150  	// Rule X10.
   151  	// Run remainder of algorithm one isolating run sequence at a time
   152  	for _, seq := range p.determineIsolatingRunSequences() {
   153  		// 3) resolving weak types
   154  		// Rules W1-W7.
   155  		seq.resolveWeakTypes()
   156  
   157  		// 4a) resolving paired brackets
   158  		// Rule N0
   159  		resolvePairedBrackets(seq)
   160  
   161  		// 4b) resolving neutral types
   162  		// Rules N1-N3.
   163  		seq.resolveNeutralTypes()
   164  
   165  		// 5) resolving implicit embedding levels
   166  		// Rules I1, I2.
   167  		seq.resolveImplicitLevels()
   168  
   169  		// Apply the computed levels and types
   170  		seq.applyLevelsAndTypes()
   171  	}
   172  
   173  	// Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
   174  	// BNs. This is for convenience, so the resulting level array will have
   175  	// a value for every character.
   176  	p.assignLevelsToCharactersRemovedByX9()
   177  }
   178  
   179  // determineMatchingIsolates determines the matching PDI for each isolate
   180  // initiator and vice versa.
   181  //
   182  // Definition BD9.
   183  //
   184  // At the end of this function:
   185  //
   186  //  - The member variable matchingPDI is set to point to the index of the
   187  //    matching PDI character for each isolate initiator character. If there is
   188  //    no matching PDI, it is set to the length of the input text. For other
   189  //    characters, it is set to -1.
   190  //  - The member variable matchingIsolateInitiator is set to point to the
   191  //    index of the matching isolate initiator character for each PDI character.
   192  //    If there is no matching isolate initiator, or the character is not a PDI,
   193  //    it is set to -1.
   194  func (p *paragraph) determineMatchingIsolates() {
   195  	p.matchingPDI = make([]int, p.Len())
   196  	p.matchingIsolateInitiator = make([]int, p.Len())
   197  
   198  	for i := range p.matchingIsolateInitiator {
   199  		p.matchingIsolateInitiator[i] = -1
   200  	}
   201  
   202  	for i := range p.matchingPDI {
   203  		p.matchingPDI[i] = -1
   204  
   205  		if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
   206  			depthCounter := 1
   207  			for j := i + 1; j < p.Len(); j++ {
   208  				if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
   209  					depthCounter++
   210  				} else if u == PDI {
   211  					if depthCounter--; depthCounter == 0 {
   212  						p.matchingPDI[i] = j
   213  						p.matchingIsolateInitiator[j] = i
   214  						break
   215  					}
   216  				}
   217  			}
   218  			if p.matchingPDI[i] == -1 {
   219  				p.matchingPDI[i] = p.Len()
   220  			}
   221  		}
   222  	}
   223  }
   224  
   225  // determineParagraphEmbeddingLevel reports the resolved paragraph direction of
   226  // the substring limited by the given range [start, end).
   227  //
   228  // Determines the paragraph level based on rules P2, P3. This is also used
   229  // in rule X5c to find if an FSI should resolve to LRI or RLI.
   230  func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
   231  	var strongType Class = unknownClass
   232  
   233  	// Rule P2.
   234  	for i := start; i < end; i++ {
   235  		if t := p.resultTypes[i]; t.in(L, AL, R) {
   236  			strongType = t
   237  			break
   238  		} else if t.in(FSI, LRI, RLI) {
   239  			i = p.matchingPDI[i] // skip over to the matching PDI
   240  			if i > end {
   241  				log.Panic("assert (i <= end)")
   242  			}
   243  		}
   244  	}
   245  	// Rule P3.
   246  	switch strongType {
   247  	case unknownClass: // none found
   248  		// default embedding level when no strong types found is 0.
   249  		return 0
   250  	case L:
   251  		return 0
   252  	default: // AL, R
   253  		return 1
   254  	}
   255  }
   256  
   257  const maxDepth = 125
   258  
   259  // This stack will store the embedding levels and override and isolated
   260  // statuses
   261  type directionalStatusStack struct {
   262  	stackCounter        int
   263  	embeddingLevelStack [maxDepth + 1]level
   264  	overrideStatusStack [maxDepth + 1]Class
   265  	isolateStatusStack  [maxDepth + 1]bool
   266  }
   267  
   268  func (s *directionalStatusStack) empty()     { s.stackCounter = 0 }
   269  func (s *directionalStatusStack) pop()       { s.stackCounter-- }
   270  func (s *directionalStatusStack) depth() int { return s.stackCounter }
   271  
   272  func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
   273  	s.embeddingLevelStack[s.stackCounter] = level
   274  	s.overrideStatusStack[s.stackCounter] = overrideStatus
   275  	s.isolateStatusStack[s.stackCounter] = isolateStatus
   276  	s.stackCounter++
   277  }
   278  
   279  func (s *directionalStatusStack) lastEmbeddingLevel() level {
   280  	return s.embeddingLevelStack[s.stackCounter-1]
   281  }
   282  
   283  func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
   284  	return s.overrideStatusStack[s.stackCounter-1]
   285  }
   286  
   287  func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
   288  	return s.isolateStatusStack[s.stackCounter-1]
   289  }
   290  
   291  // Determine explicit levels using rules X1 - X8
   292  func (p *paragraph) determineExplicitEmbeddingLevels() {
   293  	var stack directionalStatusStack
   294  	var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
   295  
   296  	// Rule X1.
   297  	stack.push(p.embeddingLevel, ON, false)
   298  
   299  	for i, t := range p.resultTypes {
   300  		// Rules X2, X3, X4, X5, X5a, X5b, X5c
   301  		switch t {
   302  		case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
   303  			isIsolate := t.in(RLI, LRI, FSI)
   304  			isRTL := t.in(RLE, RLO, RLI)
   305  
   306  			// override if this is an FSI that resolves to RLI
   307  			if t == FSI {
   308  				isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
   309  			}
   310  			if isIsolate {
   311  				p.resultLevels[i] = stack.lastEmbeddingLevel()
   312  				if stack.lastDirectionalOverrideStatus() != ON {
   313  					p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
   314  				}
   315  			}
   316  
   317  			var newLevel level
   318  			if isRTL {
   319  				// least greater odd
   320  				newLevel = (stack.lastEmbeddingLevel() + 1) | 1
   321  			} else {
   322  				// least greater even
   323  				newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
   324  			}
   325  
   326  			if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
   327  				if isIsolate {
   328  					validIsolateCount++
   329  				}
   330  				// Push new embedding level, override status, and isolated
   331  				// status.
   332  				// No check for valid stack counter, since the level check
   333  				// suffices.
   334  				switch t {
   335  				case LRO:
   336  					stack.push(newLevel, L, isIsolate)
   337  				case RLO:
   338  					stack.push(newLevel, R, isIsolate)
   339  				default:
   340  					stack.push(newLevel, ON, isIsolate)
   341  				}
   342  				// Not really part of the spec
   343  				if !isIsolate {
   344  					p.resultLevels[i] = newLevel
   345  				}
   346  			} else {
   347  				// This is an invalid explicit formatting character,
   348  				// so apply the "Otherwise" part of rules X2-X5b.
   349  				if isIsolate {
   350  					overflowIsolateCount++
   351  				} else { // !isIsolate
   352  					if overflowIsolateCount == 0 {
   353  						overflowEmbeddingCount++
   354  					}
   355  				}
   356  			}
   357  
   358  		// Rule X6a
   359  		case PDI:
   360  			if overflowIsolateCount > 0 {
   361  				overflowIsolateCount--
   362  			} else if validIsolateCount == 0 {
   363  				// do nothing
   364  			} else {
   365  				overflowEmbeddingCount = 0
   366  				for !stack.lastDirectionalIsolateStatus() {
   367  					stack.pop()
   368  				}
   369  				stack.pop()
   370  				validIsolateCount--
   371  			}
   372  			p.resultLevels[i] = stack.lastEmbeddingLevel()
   373  
   374  		// Rule X7
   375  		case PDF:
   376  			// Not really part of the spec
   377  			p.resultLevels[i] = stack.lastEmbeddingLevel()
   378  
   379  			if overflowIsolateCount > 0 {
   380  				// do nothing
   381  			} else if overflowEmbeddingCount > 0 {
   382  				overflowEmbeddingCount--
   383  			} else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
   384  				stack.pop()
   385  			}
   386  
   387  		case B: // paragraph separator.
   388  			// Rule X8.
   389  
   390  			// These values are reset for clarity, in this implementation B
   391  			// can only occur as the last code in the array.
   392  			stack.empty()
   393  			overflowIsolateCount = 0
   394  			overflowEmbeddingCount = 0
   395  			validIsolateCount = 0
   396  			p.resultLevels[i] = p.embeddingLevel
   397  
   398  		default:
   399  			p.resultLevels[i] = stack.lastEmbeddingLevel()
   400  			if stack.lastDirectionalOverrideStatus() != ON {
   401  				p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
   402  			}
   403  		}
   404  	}
   405  }
   406  
   407  type isolatingRunSequence struct {
   408  	p *paragraph
   409  
   410  	indexes []int // indexes to the original string
   411  
   412  	types          []Class // type of each character using the index
   413  	resolvedLevels []level // resolved levels after application of rules
   414  	level          level
   415  	sos, eos       Class
   416  }
   417  
   418  func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
   419  
   420  func maxLevel(a, b level) level {
   421  	if a > b {
   422  		return a
   423  	}
   424  	return b
   425  }
   426  
   427  // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
   428  // 			 either L or R, for each isolating run sequence.
   429  func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
   430  	length := len(indexes)
   431  	types := make([]Class, length)
   432  	for i, x := range indexes {
   433  		types[i] = p.resultTypes[x]
   434  	}
   435  
   436  	// assign level, sos and eos
   437  	prevChar := indexes[0] - 1
   438  	for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
   439  		prevChar--
   440  	}
   441  	prevLevel := p.embeddingLevel
   442  	if prevChar >= 0 {
   443  		prevLevel = p.resultLevels[prevChar]
   444  	}
   445  
   446  	var succLevel level
   447  	lastType := types[length-1]
   448  	if lastType.in(LRI, RLI, FSI) {
   449  		succLevel = p.embeddingLevel
   450  	} else {
   451  		// the first character after the end of run sequence
   452  		limit := indexes[length-1] + 1
   453  		for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
   454  
   455  		}
   456  		succLevel = p.embeddingLevel
   457  		if limit < p.Len() {
   458  			succLevel = p.resultLevels[limit]
   459  		}
   460  	}
   461  	level := p.resultLevels[indexes[0]]
   462  	return &isolatingRunSequence{
   463  		p:       p,
   464  		indexes: indexes,
   465  		types:   types,
   466  		level:   level,
   467  		sos:     typeForLevel(maxLevel(prevLevel, level)),
   468  		eos:     typeForLevel(maxLevel(succLevel, level)),
   469  	}
   470  }
   471  
   472  // Resolving weak types Rules W1-W7.
   473  //
   474  // Note that some weak types (EN, AN) remain after this processing is
   475  // complete.
   476  func (s *isolatingRunSequence) resolveWeakTypes() {
   477  
   478  	// on entry, only these types remain
   479  	s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
   480  
   481  	// Rule W1.
   482  	// Changes all NSMs.
   483  	preceedingCharacterType := s.sos
   484  	for i, t := range s.types {
   485  		if t == NSM {
   486  			s.types[i] = preceedingCharacterType
   487  		} else {
   488  			if t.in(LRI, RLI, FSI, PDI) {
   489  				preceedingCharacterType = ON
   490  			}
   491  			preceedingCharacterType = t
   492  		}
   493  	}
   494  
   495  	// Rule W2.
   496  	// EN does not change at the start of the run, because sos != AL.
   497  	for i, t := range s.types {
   498  		if t == EN {
   499  			for j := i - 1; j >= 0; j-- {
   500  				if t := s.types[j]; t.in(L, R, AL) {
   501  					if t == AL {
   502  						s.types[i] = AN
   503  					}
   504  					break
   505  				}
   506  			}
   507  		}
   508  	}
   509  
   510  	// Rule W3.
   511  	for i, t := range s.types {
   512  		if t == AL {
   513  			s.types[i] = R
   514  		}
   515  	}
   516  
   517  	// Rule W4.
   518  	// Since there must be values on both sides for this rule to have an
   519  	// effect, the scan skips the first and last value.
   520  	//
   521  	// Although the scan proceeds left to right, and changes the type
   522  	// values in a way that would appear to affect the computations
   523  	// later in the scan, there is actually no problem. A change in the
   524  	// current value can only affect the value to its immediate right,
   525  	// and only affect it if it is ES or CS. But the current value can
   526  	// only change if the value to its right is not ES or CS. Thus
   527  	// either the current value will not change, or its change will have
   528  	// no effect on the remainder of the analysis.
   529  
   530  	for i := 1; i < s.Len()-1; i++ {
   531  		t := s.types[i]
   532  		if t == ES || t == CS {
   533  			prevSepType := s.types[i-1]
   534  			succSepType := s.types[i+1]
   535  			if prevSepType == EN && succSepType == EN {
   536  				s.types[i] = EN
   537  			} else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
   538  				s.types[i] = AN
   539  			}
   540  		}
   541  	}
   542  
   543  	// Rule W5.
   544  	for i, t := range s.types {
   545  		if t == ET {
   546  			// locate end of sequence
   547  			runStart := i
   548  			runEnd := s.findRunLimit(runStart, ET)
   549  
   550  			// check values at ends of sequence
   551  			t := s.sos
   552  			if runStart > 0 {
   553  				t = s.types[runStart-1]
   554  			}
   555  			if t != EN {
   556  				t = s.eos
   557  				if runEnd < len(s.types) {
   558  					t = s.types[runEnd]
   559  				}
   560  			}
   561  			if t == EN {
   562  				setTypes(s.types[runStart:runEnd], EN)
   563  			}
   564  			// continue at end of sequence
   565  			i = runEnd
   566  		}
   567  	}
   568  
   569  	// Rule W6.
   570  	for i, t := range s.types {
   571  		if t.in(ES, ET, CS) {
   572  			s.types[i] = ON
   573  		}
   574  	}
   575  
   576  	// Rule W7.
   577  	for i, t := range s.types {
   578  		if t == EN {
   579  			// set default if we reach start of run
   580  			prevStrongType := s.sos
   581  			for j := i - 1; j >= 0; j-- {
   582  				t = s.types[j]
   583  				if t == L || t == R { // AL's have been changed to R
   584  					prevStrongType = t
   585  					break
   586  				}
   587  			}
   588  			if prevStrongType == L {
   589  				s.types[i] = L
   590  			}
   591  		}
   592  	}
   593  }
   594  
   595  // 6) resolving neutral types Rules N1-N2.
   596  func (s *isolatingRunSequence) resolveNeutralTypes() {
   597  
   598  	// on entry, only these types can be in resultTypes
   599  	s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
   600  
   601  	for i, t := range s.types {
   602  		switch t {
   603  		case WS, ON, B, S, RLI, LRI, FSI, PDI:
   604  			// find bounds of run of neutrals
   605  			runStart := i
   606  			runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
   607  
   608  			// determine effective types at ends of run
   609  			var leadType, trailType Class
   610  
   611  			// Note that the character found can only be L, R, AN, or
   612  			// EN.
   613  			if runStart == 0 {
   614  				leadType = s.sos
   615  			} else {
   616  				leadType = s.types[runStart-1]
   617  				if leadType.in(AN, EN) {
   618  					leadType = R
   619  				}
   620  			}
   621  			if runEnd == len(s.types) {
   622  				trailType = s.eos
   623  			} else {
   624  				trailType = s.types[runEnd]
   625  				if trailType.in(AN, EN) {
   626  					trailType = R
   627  				}
   628  			}
   629  
   630  			var resolvedType Class
   631  			if leadType == trailType {
   632  				// Rule N1.
   633  				resolvedType = leadType
   634  			} else {
   635  				// Rule N2.
   636  				// Notice the embedding level of the run is used, not
   637  				// the paragraph embedding level.
   638  				resolvedType = typeForLevel(s.level)
   639  			}
   640  
   641  			setTypes(s.types[runStart:runEnd], resolvedType)
   642  
   643  			// skip over run of (former) neutrals
   644  			i = runEnd
   645  		}
   646  	}
   647  }
   648  
   649  func setLevels(levels []level, newLevel level) {
   650  	for i := range levels {
   651  		levels[i] = newLevel
   652  	}
   653  }
   654  
   655  func setTypes(types []Class, newType Class) {
   656  	for i := range types {
   657  		types[i] = newType
   658  	}
   659  }
   660  
   661  // 7) resolving implicit embedding levels Rules I1, I2.
   662  func (s *isolatingRunSequence) resolveImplicitLevels() {
   663  
   664  	// on entry, only these types can be in resultTypes
   665  	s.assertOnly(L, R, EN, AN)
   666  
   667  	s.resolvedLevels = make([]level, len(s.types))
   668  	setLevels(s.resolvedLevels, s.level)
   669  
   670  	if (s.level & 1) == 0 { // even level
   671  		for i, t := range s.types {
   672  			// Rule I1.
   673  			if t == L {
   674  				// no change
   675  			} else if t == R {
   676  				s.resolvedLevels[i] += 1
   677  			} else { // t == AN || t == EN
   678  				s.resolvedLevels[i] += 2
   679  			}
   680  		}
   681  	} else { // odd level
   682  		for i, t := range s.types {
   683  			// Rule I2.
   684  			if t == R {
   685  				// no change
   686  			} else { // t == L || t == AN || t == EN
   687  				s.resolvedLevels[i] += 1
   688  			}
   689  		}
   690  	}
   691  }
   692  
   693  // Applies the levels and types resolved in rules W1-I2 to the
   694  // resultLevels array.
   695  func (s *isolatingRunSequence) applyLevelsAndTypes() {
   696  	for i, x := range s.indexes {
   697  		s.p.resultTypes[x] = s.types[i]
   698  		s.p.resultLevels[x] = s.resolvedLevels[i]
   699  	}
   700  }
   701  
   702  // Return the limit of the run consisting only of the types in validSet
   703  // starting at index. This checks the value at index, and will return
   704  // index if that value is not in validSet.
   705  func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
   706  loop:
   707  	for ; index < len(s.types); index++ {
   708  		t := s.types[index]
   709  		for _, valid := range validSet {
   710  			if t == valid {
   711  				continue loop
   712  			}
   713  		}
   714  		return index // didn't find a match in validSet
   715  	}
   716  	return len(s.types)
   717  }
   718  
   719  // Algorithm validation. Assert that all values in types are in the
   720  // provided set.
   721  func (s *isolatingRunSequence) assertOnly(codes ...Class) {
   722  loop:
   723  	for i, t := range s.types {
   724  		for _, c := range codes {
   725  			if t == c {
   726  				continue loop
   727  			}
   728  		}
   729  		log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
   730  	}
   731  }
   732  
   733  // determineLevelRuns returns an array of level runs. Each level run is
   734  // described as an array of indexes into the input string.
   735  //
   736  // Determines the level runs. Rule X9 will be applied in determining the
   737  // runs, in the way that makes sure the characters that are supposed to be
   738  // removed are not included in the runs.
   739  func (p *paragraph) determineLevelRuns() [][]int {
   740  	run := []int{}
   741  	allRuns := [][]int{}
   742  	currentLevel := implicitLevel
   743  
   744  	for i := range p.initialTypes {
   745  		if !isRemovedByX9(p.initialTypes[i]) {
   746  			if p.resultLevels[i] != currentLevel {
   747  				// we just encountered a new run; wrap up last run
   748  				if currentLevel >= 0 { // only wrap it up if there was a run
   749  					allRuns = append(allRuns, run)
   750  					run = nil
   751  				}
   752  				// Start new run
   753  				currentLevel = p.resultLevels[i]
   754  			}
   755  			run = append(run, i)
   756  		}
   757  	}
   758  	// Wrap up the final run, if any
   759  	if len(run) > 0 {
   760  		allRuns = append(allRuns, run)
   761  	}
   762  	return allRuns
   763  }
   764  
   765  // Definition BD13. Determine isolating run sequences.
   766  func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
   767  	levelRuns := p.determineLevelRuns()
   768  
   769  	// Compute the run that each character belongs to
   770  	runForCharacter := make([]int, p.Len())
   771  	for i, run := range levelRuns {
   772  		for _, index := range run {
   773  			runForCharacter[index] = i
   774  		}
   775  	}
   776  
   777  	sequences := []*isolatingRunSequence{}
   778  
   779  	var currentRunSequence []int
   780  
   781  	for _, run := range levelRuns {
   782  		first := run[0]
   783  		if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 {
   784  			currentRunSequence = nil
   785  			// int run = i;
   786  			for {
   787  				// Copy this level run into currentRunSequence
   788  				currentRunSequence = append(currentRunSequence, run...)
   789  
   790  				last := currentRunSequence[len(currentRunSequence)-1]
   791  				lastT := p.initialTypes[last]
   792  				if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
   793  					run = levelRuns[runForCharacter[p.matchingPDI[last]]]
   794  				} else {
   795  					break
   796  				}
   797  			}
   798  			sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
   799  		}
   800  	}
   801  	return sequences
   802  }
   803  
   804  // Assign level information to characters removed by rule X9. This is for
   805  // ease of relating the level information to the original input data. Note
   806  // that the levels assigned to these codes are arbitrary, they're chosen so
   807  // as to avoid breaking level runs.
   808  func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
   809  	for i, t := range p.initialTypes {
   810  		if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
   811  			p.resultTypes[i] = t
   812  			p.resultLevels[i] = -1
   813  		}
   814  	}
   815  	// now propagate forward the levels information (could have
   816  	// propagated backward, the main thing is not to introduce a level
   817  	// break where one doesn't already exist).
   818  
   819  	if p.resultLevels[0] == -1 {
   820  		p.resultLevels[0] = p.embeddingLevel
   821  	}
   822  	for i := 1; i < len(p.initialTypes); i++ {
   823  		if p.resultLevels[i] == -1 {
   824  			p.resultLevels[i] = p.resultLevels[i-1]
   825  		}
   826  	}
   827  	// Embedding information is for informational purposes only so need not be
   828  	// adjusted.
   829  }
   830  
   831  //
   832  // Output
   833  //
   834  
   835  // getLevels computes levels array breaking lines at offsets in linebreaks.
   836  // Rule L1.
   837  //
   838  // The linebreaks array must include at least one value. The values must be
   839  // in strictly increasing order (no duplicates) between 1 and the length of
   840  // the text, inclusive. The last value must be the length of the text.
   841  func (p *paragraph) getLevels(linebreaks []int) []level {
   842  	// Note that since the previous processing has removed all
   843  	// P, S, and WS values from resultTypes, the values referred to
   844  	// in these rules are the initial types, before any processing
   845  	// has been applied (including processing of overrides).
   846  	//
   847  	// This example implementation has reinserted explicit format codes
   848  	// and BN, in order that the levels array correspond to the
   849  	// initial text. Their final placement is not normative.
   850  	// These codes are treated like WS in this implementation,
   851  	// so they don't interrupt sequences of WS.
   852  
   853  	validateLineBreaks(linebreaks, p.Len())
   854  
   855  	result := append([]level(nil), p.resultLevels...)
   856  
   857  	// don't worry about linebreaks since if there is a break within
   858  	// a series of WS values preceding S, the linebreak itself
   859  	// causes the reset.
   860  	for i, t := range p.initialTypes {
   861  		if t.in(B, S) {
   862  			// Rule L1, clauses one and two.
   863  			result[i] = p.embeddingLevel
   864  
   865  			// Rule L1, clause three.
   866  			for j := i - 1; j >= 0; j-- {
   867  				if isWhitespace(p.initialTypes[j]) { // including format codes
   868  					result[j] = p.embeddingLevel
   869  				} else {
   870  					break
   871  				}
   872  			}
   873  		}
   874  	}
   875  
   876  	// Rule L1, clause four.
   877  	start := 0
   878  	for _, limit := range linebreaks {
   879  		for j := limit - 1; j >= start; j-- {
   880  			if isWhitespace(p.initialTypes[j]) { // including format codes
   881  				result[j] = p.embeddingLevel
   882  			} else {
   883  				break
   884  			}
   885  		}
   886  		start = limit
   887  	}
   888  
   889  	return result
   890  }
   891  
   892  // getReordering returns the reordering of lines from a visual index to a
   893  // logical index for line breaks at the given offsets.
   894  //
   895  // Lines are concatenated from left to right. So for example, the fifth
   896  // character from the left on the third line is
   897  //
   898  // 		getReordering(linebreaks)[linebreaks[1] + 4]
   899  //
   900  // (linebreaks[1] is the position after the last character of the second
   901  // line, which is also the index of the first character on the third line,
   902  // and adding four gets the fifth character from the left).
   903  //
   904  // The linebreaks array must include at least one value. The values must be
   905  // in strictly increasing order (no duplicates) between 1 and the length of
   906  // the text, inclusive. The last value must be the length of the text.
   907  func (p *paragraph) getReordering(linebreaks []int) []int {
   908  	validateLineBreaks(linebreaks, p.Len())
   909  
   910  	return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
   911  }
   912  
   913  // Return multiline reordering array for a given level array. Reordering
   914  // does not occur across a line break.
   915  func computeMultilineReordering(levels []level, linebreaks []int) []int {
   916  	result := make([]int, len(levels))
   917  
   918  	start := 0
   919  	for _, limit := range linebreaks {
   920  		tempLevels := make([]level, limit-start)
   921  		copy(tempLevels, levels[start:])
   922  
   923  		for j, order := range computeReordering(tempLevels) {
   924  			result[start+j] = order + start
   925  		}
   926  		start = limit
   927  	}
   928  	return result
   929  }
   930  
   931  // Return reordering array for a given level array. This reorders a single
   932  // line. The reordering is a visual to logical map. For example, the
   933  // leftmost char is string.charAt(order[0]). Rule L2.
   934  func computeReordering(levels []level) []int {
   935  	result := make([]int, len(levels))
   936  	// initialize order
   937  	for i := range result {
   938  		result[i] = i
   939  	}
   940  
   941  	// locate highest level found on line.
   942  	// Note the rules say text, but no reordering across line bounds is
   943  	// performed, so this is sufficient.
   944  	highestLevel := level(0)
   945  	lowestOddLevel := level(maxDepth + 2)
   946  	for _, level := range levels {
   947  		if level > highestLevel {
   948  			highestLevel = level
   949  		}
   950  		if level&1 != 0 && level < lowestOddLevel {
   951  			lowestOddLevel = level
   952  		}
   953  	}
   954  
   955  	for level := highestLevel; level >= lowestOddLevel; level-- {
   956  		for i := 0; i < len(levels); i++ {
   957  			if levels[i] >= level {
   958  				// find range of text at or above this level
   959  				start := i
   960  				limit := i + 1
   961  				for limit < len(levels) && levels[limit] >= level {
   962  					limit++
   963  				}
   964  
   965  				for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
   966  					result[j], result[k] = result[k], result[j]
   967  				}
   968  				// skip to end of level run
   969  				i = limit
   970  			}
   971  		}
   972  	}
   973  
   974  	return result
   975  }
   976  
   977  // isWhitespace reports whether the type is considered a whitespace type for the
   978  // line break rules.
   979  func isWhitespace(c Class) bool {
   980  	switch c {
   981  	case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
   982  		return true
   983  	}
   984  	return false
   985  }
   986  
   987  // isRemovedByX9 reports whether the type is one of the types removed in X9.
   988  func isRemovedByX9(c Class) bool {
   989  	switch c {
   990  	case LRE, RLE, LRO, RLO, PDF, BN:
   991  		return true
   992  	}
   993  	return false
   994  }
   995  
   996  // typeForLevel reports the strong type (L or R) corresponding to the level.
   997  func typeForLevel(level level) Class {
   998  	if (level & 0x1) == 0 {
   999  		return L
  1000  	}
  1001  	return R
  1002  }
  1003  
  1004  // TODO: change validation to not panic
  1005  
  1006  func validateTypes(types []Class) {
  1007  	if len(types) == 0 {
  1008  		log.Panic("types is null")
  1009  	}
  1010  	for i, t := range types[:len(types)-1] {
  1011  		if t == B {
  1012  			log.Panicf("B type before end of paragraph at index: %d", i)
  1013  		}
  1014  	}
  1015  }
  1016  
  1017  func validateParagraphEmbeddingLevel(embeddingLevel level) {
  1018  	if embeddingLevel != implicitLevel &&
  1019  		embeddingLevel != 0 &&
  1020  		embeddingLevel != 1 {
  1021  		log.Panicf("illegal paragraph embedding level: %d", embeddingLevel)
  1022  	}
  1023  }
  1024  
  1025  func validateLineBreaks(linebreaks []int, textLength int) {
  1026  	prev := 0
  1027  	for i, next := range linebreaks {
  1028  		if next <= prev {
  1029  			log.Panicf("bad linebreak: %d at index: %d", next, i)
  1030  		}
  1031  		prev = next
  1032  	}
  1033  	if prev != textLength {
  1034  		log.Panicf("last linebreak was %d, want %d", prev, textLength)
  1035  	}
  1036  }
  1037  
  1038  func validatePbTypes(pairTypes []bracketType) {
  1039  	if len(pairTypes) == 0 {
  1040  		log.Panic("pairTypes is null")
  1041  	}
  1042  	for i, pt := range pairTypes {
  1043  		switch pt {
  1044  		case bpNone, bpOpen, bpClose:
  1045  		default:
  1046  			log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i])
  1047  		}
  1048  	}
  1049  }
  1050  
  1051  func validatePbValues(pairValues []rune, pairTypes []bracketType) {
  1052  	if pairValues == nil {
  1053  		log.Panic("pairValues is null")
  1054  	}
  1055  	if len(pairTypes) != len(pairValues) {
  1056  		log.Panic("pairTypes is different length from pairValues")
  1057  	}
  1058  }