github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/unicode/bidi/core.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bidi
     6  
     7  import "log"
     8  
     9  // This implementation is a port based on the reference implementation found at:
    10  // http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
    11  //
    12  // described in Unicode Bidirectional Algorithm (UAX #9).
    13  //
    14  // Input:
    15  // There are two levels of input to the algorithm, since clients may prefer to
    16  // supply some information from out-of-band sources rather than relying on the
    17  // default behavior.
    18  //
    19  // - Bidi class array
    20  // - Bidi class array, with externally supplied base line direction
    21  //
    22  // Output:
    23  // Output is separated into several stages:
    24  //
    25  //  - levels array over entire paragraph
    26  //  - reordering array over entire paragraph
    27  //  - levels array over line
    28  //  - reordering array over line
    29  //
    30  // Note that for conformance to the Unicode Bidirectional Algorithm,
    31  // implementations are only required to generate correct reordering and
    32  // character directionality (odd or even levels) over a line. Generating
    33  // identical level arrays over a line is not required. Bidi explicit format
    34  // codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
    35  // positions as long as the rest of the input is properly reordered.
    36  //
    37  // As the algorithm is defined to operate on a single paragraph at a time, this
    38  // implementation is written to handle single paragraphs. Thus rule P1 is
    39  // presumed by this implementation-- the data provided to the implementation is
    40  // assumed to be a single paragraph, and either contains no 'B' codes, or a
    41  // single 'B' code at the end of the input. 'B' is allowed as input to
    42  // illustrate how the algorithm assigns it a level.
    43  //
    44  // Also note that rules L3 and L4 depend on the rendering engine that uses the
    45  // result of the bidi algorithm. This implementation assumes that the rendering
    46  // engine expects combining marks in visual order (e.g. to the left of their
    47  // base character in RTL runs) and that it adjusts the glyphs used to render
    48  // mirrored characters that are in RTL runs so that they render appropriately.
    49  
    50  // level is the embedding level of a character. Even embedding levels indicate
    51  // left-to-right order and odd levels indicate right-to-left order. The special
    52  // level of -1 is reserved for undefined order.
    53  type level int8
    54  
    55  const implicitLevel level = -1
    56  
    57  // in returns if x is equal to any of the values in set.
    58  func (c class) in(set ...class) bool {
    59  	for _, s := range set {
    60  		if c == s {
    61  			return true
    62  		}
    63  	}
    64  	return false
    65  }
    66  
    67  // A paragraph contains the state of a paragraph.
    68  type paragraph struct {
    69  	initialTypes []class
    70  
    71  	// Arrays of properties needed for paired bracket evaluation in N0
    72  	pairTypes  []bracketType // paired Bracket types for paragraph
    73  	pairValues []rune        // rune for opening bracket or pbOpen and pbClose; 0 for pbNone
    74  
    75  	embeddingLevel level // default: = implicitLevel;
    76  
    77  	// at the paragraph levels
    78  	resultTypes  []class
    79  	resultLevels []level
    80  
    81  	// Index of matching PDI for isolate initiator characters. For other
    82  	// characters, the value of matchingPDI will be set to -1. For isolate
    83  	// initiators with no matching PDI, matchingPDI will be set to the length of
    84  	// the input string.
    85  	matchingPDI []int
    86  
    87  	// Index of matching isolate initiator for PDI characters. For other
    88  	// characters, and for PDIs with no matching isolate initiator, the value of
    89  	// matchingIsolateInitiator will be set to -1.
    90  	matchingIsolateInitiator []int
    91  }
    92  
    93  // newParagraph initializes a paragraph. The user needs to supply a few arrays
    94  // corresponding to the preprocessed text input. The types correspond to the
    95  // Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
    96  // each rune. pairValues provides a unique bracket class identifier for each
    97  // rune (suggested is the rune of the open bracket for opening and matching
    98  // close brackets, after normalization). The embedding levels are optional, but
    99  // may be supplied to encode embedding levels of styled text.
   100  //
   101  // TODO: return an error.
   102  func newParagraph(types []class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph {
   103  	validateTypes(types)
   104  	validatePbTypes(pairTypes)
   105  	validatePbValues(pairValues, pairTypes)
   106  	validateParagraphEmbeddingLevel(levels)
   107  
   108  	p := &paragraph{
   109  		initialTypes:   append([]class(nil), types...),
   110  		embeddingLevel: levels,
   111  
   112  		pairTypes:  pairTypes,
   113  		pairValues: pairValues,
   114  
   115  		resultTypes: append([]class(nil), types...),
   116  	}
   117  	p.run()
   118  	return p
   119  }
   120  
   121  func (p *paragraph) Len() int { return len(p.initialTypes) }
   122  
   123  // The algorithm. Does not include line-based processing (Rules L1, L2).
   124  // These are applied later in the line-based phase of the algorithm.
   125  func (p *paragraph) run() {
   126  	p.determineMatchingIsolates()
   127  
   128  	// 1) determining the paragraph level
   129  	// Rule P1 is the requirement for entering this algorithm.
   130  	// Rules P2, P3.
   131  	// If no externally supplied paragraph embedding level, use default.
   132  	if p.embeddingLevel == implicitLevel {
   133  		p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
   134  	}
   135  
   136  	// Initialize result levels to paragraph embedding level.
   137  	p.resultLevels = make([]level, p.Len())
   138  	setLevels(p.resultLevels, p.embeddingLevel)
   139  
   140  	// 2) Explicit levels and directions
   141  	// Rules X1-X8.
   142  	p.determineExplicitEmbeddingLevels()
   143  
   144  	// Rule X9.
   145  	// We do not remove the embeddings, the overrides, the PDFs, and the BNs
   146  	// from the string explicitly. But they are not copied into isolating run
   147  	// sequences when they are created, so they are removed for all
   148  	// practical purposes.
   149  
   150  	// Rule X10.
   151  	// Run remainder of algorithm one isolating run sequence at a time
   152  	for _, seq := range p.determineIsolatingRunSequences() {
   153  		// 3) resolving weak types
   154  		// Rules W1-W7.
   155  		seq.resolveWeakTypes()
   156  
   157  		// 4a) resolving paired brackets
   158  		// Rule N0
   159  		resolvePairedBrackets(seq)
   160  
   161  		// 4b) resolving neutral types
   162  		// Rules N1-N3.
   163  		seq.resolveNeutralTypes()
   164  
   165  		// 5) resolving implicit embedding levels
   166  		// Rules I1, I2.
   167  		seq.resolveImplicitLevels()
   168  
   169  		// Apply the computed levels and types
   170  		seq.applyLevelsAndTypes()
   171  	}
   172  
   173  	// Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
   174  	// BNs. This is for convenience, so the resulting level array will have
   175  	// a value for every character.
   176  	p.assignLevelsToCharactersRemovedByX9()
   177  }
   178  
   179  // determineMatchingIsolates determines the matching PDI for each isolate
   180  // initiator and vice versa.
   181  //
   182  // Definition BD9.
   183  //
   184  // At the end of this function:
   185  //
   186  //  - The member variable matchingPDI is set to point to the index of the
   187  //    matching PDI character for each isolate initiator character. If there is
   188  //    no matching PDI, it is set to the length of the input text. For other
   189  //    characters, it is set to -1.
   190  //  - The member variable matchingIsolateInitiator is set to point to the
   191  //    index of the matching isolate initiator character for each PDI character.
   192  //    If there is no matching isolate initiator, or the character is not a PDI,
   193  //    it is set to -1.
   194  func (p *paragraph) determineMatchingIsolates() {
   195  	p.matchingPDI = make([]int, p.Len())
   196  	p.matchingIsolateInitiator = make([]int, p.Len())
   197  
   198  	for i := range p.matchingIsolateInitiator {
   199  		p.matchingIsolateInitiator[i] = -1
   200  	}
   201  
   202  	for i := range p.matchingPDI {
   203  		p.matchingPDI[i] = -1
   204  
   205  		if t := p.resultTypes[i]; t.in(_LRI, _RLI, _FSI) {
   206  			depthCounter := 1
   207  			for j := i + 1; j < p.Len(); j++ {
   208  				if u := p.resultTypes[j]; u.in(_LRI, _RLI, _FSI) {
   209  					depthCounter++
   210  				} else if u == _PDI {
   211  					if depthCounter--; depthCounter == 0 {
   212  						p.matchingPDI[i] = j
   213  						p.matchingIsolateInitiator[j] = i
   214  						break
   215  					}
   216  				}
   217  			}
   218  			if p.matchingPDI[i] == -1 {
   219  				p.matchingPDI[i] = p.Len()
   220  			}
   221  		}
   222  	}
   223  }
   224  
   225  // determineParagraphEmbeddingLevel reports the resolved paragraph direction of
   226  // the substring limited by the given range [start, end).
   227  //
   228  // Determines the paragraph level based on rules P2, P3. This is also used
   229  // in rule X5c to find if an FSI should resolve to LRI or RLI.
   230  func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
   231  	var strongType class = -1 // unknown
   232  
   233  	// Rule P2.
   234  	for i := start; i < end; i++ {
   235  		if t := p.resultTypes[i]; t.in(_L, _AL, _R) {
   236  			strongType = t
   237  			break
   238  		} else if t.in(_FSI, _LRI, _RLI) {
   239  			i = p.matchingPDI[i] // skip over to the matching PDI
   240  			if i > end {
   241  				log.Panic("assert (i <= end)")
   242  			}
   243  		}
   244  	}
   245  	// Rule P3.
   246  	switch strongType {
   247  	case -1: // none found
   248  		// default embedding level when no strong types found is 0.
   249  		return 0
   250  	case _L:
   251  		return 0
   252  	default: // AL, R
   253  		return 1
   254  	}
   255  }
   256  
   257  const maxDepth = 125
   258  
   259  // This stack will store the embedding levels and override and isolated
   260  // statuses
   261  type directionalStatusStack struct {
   262  	stackCounter        int
   263  	embeddingLevelStack [maxDepth + 1]level
   264  	overrideStatusStack [maxDepth + 1]class
   265  	isolateStatusStack  [maxDepth + 1]bool
   266  }
   267  
   268  func (s *directionalStatusStack) empty()     { s.stackCounter = 0 }
   269  func (s *directionalStatusStack) pop()       { s.stackCounter-- }
   270  func (s *directionalStatusStack) depth() int { return s.stackCounter }
   271  
   272  func (s *directionalStatusStack) push(level level, overrideStatus class, isolateStatus bool) {
   273  	s.embeddingLevelStack[s.stackCounter] = level
   274  	s.overrideStatusStack[s.stackCounter] = overrideStatus
   275  	s.isolateStatusStack[s.stackCounter] = isolateStatus
   276  	s.stackCounter++
   277  }
   278  
   279  func (s *directionalStatusStack) lastEmbeddingLevel() level {
   280  	return s.embeddingLevelStack[s.stackCounter-1]
   281  }
   282  
   283  func (s *directionalStatusStack) lastDirectionalOverrideStatus() class {
   284  	return s.overrideStatusStack[s.stackCounter-1]
   285  }
   286  
   287  func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
   288  	return s.isolateStatusStack[s.stackCounter-1]
   289  }
   290  
   291  // Determine explicit levels using rules X1 - X8
   292  func (p *paragraph) determineExplicitEmbeddingLevels() {
   293  	var stack directionalStatusStack
   294  	var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
   295  
   296  	// Rule X1.
   297  	stack.push(p.embeddingLevel, _ON, false)
   298  
   299  	for i, t := range p.resultTypes {
   300  		// Rules X2, X3, X4, X5, X5a, X5b, X5c
   301  		switch t {
   302  		case _RLE, _LRE, _RLO, _LRO, _RLI, _LRI, _FSI:
   303  			isIsolate := t.in(_RLI, _LRI, _FSI)
   304  			isRTL := t.in(_RLE, _RLO, _RLI)
   305  
   306  			// override if this is an FSI that resolves to RLI
   307  			if t == _FSI {
   308  				isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
   309  			}
   310  			if isIsolate {
   311  				p.resultLevels[i] = stack.lastEmbeddingLevel()
   312  			}
   313  
   314  			var newLevel level
   315  			if isRTL {
   316  				// least greater odd
   317  				newLevel = (stack.lastEmbeddingLevel() + 1) | 1
   318  			} else {
   319  				// least greater even
   320  				newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
   321  			}
   322  
   323  			if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
   324  				if isIsolate {
   325  					validIsolateCount++
   326  				}
   327  				// Push new embedding level, override status, and isolated
   328  				// status.
   329  				// No check for valid stack counter, since the level check
   330  				// suffices.
   331  				switch t {
   332  				case _LRO:
   333  					stack.push(newLevel, _L, isIsolate)
   334  				case _RLO:
   335  					stack.push(newLevel, _R, isIsolate)
   336  				default:
   337  					stack.push(newLevel, _ON, isIsolate)
   338  				}
   339  				// Not really part of the spec
   340  				if !isIsolate {
   341  					p.resultLevels[i] = newLevel
   342  				}
   343  			} else {
   344  				// This is an invalid explicit formatting character,
   345  				// so apply the "Otherwise" part of rules X2-X5b.
   346  				if isIsolate {
   347  					overflowIsolateCount++
   348  				} else { // !isIsolate
   349  					if overflowIsolateCount == 0 {
   350  						overflowEmbeddingCount++
   351  					}
   352  				}
   353  			}
   354  
   355  		// Rule X6a
   356  		case _PDI:
   357  			if overflowIsolateCount > 0 {
   358  				overflowIsolateCount--
   359  			} else if validIsolateCount == 0 {
   360  				// do nothing
   361  			} else {
   362  				overflowEmbeddingCount = 0
   363  				for !stack.lastDirectionalIsolateStatus() {
   364  					stack.pop()
   365  				}
   366  				stack.pop()
   367  				validIsolateCount--
   368  			}
   369  			p.resultLevels[i] = stack.lastEmbeddingLevel()
   370  
   371  		// Rule X7
   372  		case _PDF:
   373  			// Not really part of the spec
   374  			p.resultLevels[i] = stack.lastEmbeddingLevel()
   375  
   376  			if overflowIsolateCount > 0 {
   377  				// do nothing
   378  			} else if overflowEmbeddingCount > 0 {
   379  				overflowEmbeddingCount--
   380  			} else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
   381  				stack.pop()
   382  			}
   383  
   384  		case _B: // paragraph separator.
   385  			// Rule X8.
   386  
   387  			// These values are reset for clarity, in this implementation B
   388  			// can only occur as the last code in the array.
   389  			stack.empty()
   390  			overflowIsolateCount = 0
   391  			overflowEmbeddingCount = 0
   392  			validIsolateCount = 0
   393  			p.resultLevels[i] = p.embeddingLevel
   394  
   395  		default:
   396  			p.resultLevels[i] = stack.lastEmbeddingLevel()
   397  			if stack.lastDirectionalOverrideStatus() != _ON {
   398  				p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
   399  			}
   400  		}
   401  	}
   402  }
   403  
   404  type isolatingRunSequence struct {
   405  	p *paragraph
   406  
   407  	indexes []int // indexes to the original string
   408  
   409  	types          []class // type of each character using the index
   410  	resolvedLevels []level // resolved levels after application of rules
   411  	level          level
   412  	sos, eos       class
   413  }
   414  
   415  func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
   416  
   417  func maxLevel(a, b level) level {
   418  	if a > b {
   419  		return a
   420  	}
   421  	return b
   422  }
   423  
   424  // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
   425  // 			 either L or R, for each isolating run sequence.
   426  func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
   427  	length := len(indexes)
   428  	types := make([]class, length)
   429  	for i, x := range indexes {
   430  		types[i] = p.resultTypes[x]
   431  	}
   432  
   433  	// assign level, sos and eos
   434  	prevChar := indexes[0] - 1
   435  	for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
   436  		prevChar--
   437  	}
   438  	prevLevel := p.embeddingLevel
   439  	if prevChar >= 0 {
   440  		prevLevel = p.resultLevels[prevChar]
   441  	}
   442  
   443  	var succLevel level
   444  	lastType := types[length-1]
   445  	if lastType.in(_LRI, _RLI, _FSI) {
   446  		succLevel = p.embeddingLevel
   447  	} else {
   448  		// the first character after the end of run sequence
   449  		limit := indexes[length-1] + 1
   450  		for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
   451  
   452  		}
   453  		succLevel = p.embeddingLevel
   454  		if limit < p.Len() {
   455  			succLevel = p.resultLevels[limit]
   456  		}
   457  	}
   458  	level := p.resultLevels[indexes[0]]
   459  	return &isolatingRunSequence{
   460  		p:       p,
   461  		indexes: indexes,
   462  		types:   types,
   463  		level:   level,
   464  		sos:     typeForLevel(maxLevel(prevLevel, level)),
   465  		eos:     typeForLevel(maxLevel(succLevel, level)),
   466  	}
   467  }
   468  
   469  // Resolving weak types Rules W1-W7.
   470  //
   471  // Note that some weak types (EN, AN) remain after this processing is
   472  // complete.
   473  func (s *isolatingRunSequence) resolveWeakTypes() {
   474  
   475  	// on entry, only these types remain
   476  	s.assertOnly(_L, _R, _AL, _EN, _ES, _ET, _AN, _CS, _B, _S, _WS, _ON, _NSM, _LRI, _RLI, _FSI, _PDI)
   477  
   478  	// Rule W1.
   479  	// Changes all NSMs.
   480  	preceedingCharacterType := s.sos
   481  	for i, t := range s.types {
   482  		if t == _NSM {
   483  			s.types[i] = preceedingCharacterType
   484  		} else {
   485  			if t.in(_LRI, _RLI, _FSI, _PDI) {
   486  				preceedingCharacterType = _ON
   487  			}
   488  			preceedingCharacterType = t
   489  		}
   490  	}
   491  
   492  	// Rule W2.
   493  	// EN does not change at the start of the run, because sos != AL.
   494  	for i, t := range s.types {
   495  		if t == _EN {
   496  			for j := i - 1; j >= 0; j-- {
   497  				if t := s.types[j]; t.in(_L, _R, _AL) {
   498  					if t == _AL {
   499  						s.types[i] = _AN
   500  					}
   501  					break
   502  				}
   503  			}
   504  		}
   505  	}
   506  
   507  	// Rule W3.
   508  	for i, t := range s.types {
   509  		if t == _AL {
   510  			s.types[i] = _R
   511  		}
   512  	}
   513  
   514  	// Rule W4.
   515  	// Since there must be values on both sides for this rule to have an
   516  	// effect, the scan skips the first and last value.
   517  	//
   518  	// Although the scan proceeds left to right, and changes the type
   519  	// values in a way that would appear to affect the computations
   520  	// later in the scan, there is actually no problem. A change in the
   521  	// current value can only affect the value to its immediate right,
   522  	// and only affect it if it is ES or CS. But the current value can
   523  	// only change if the value to its right is not ES or CS. Thus
   524  	// either the current value will not change, or its change will have
   525  	// no effect on the remainder of the analysis.
   526  
   527  	for i := 1; i < s.Len()-1; i++ {
   528  		t := s.types[i]
   529  		if t == _ES || t == _CS {
   530  			prevSepType := s.types[i-1]
   531  			succSepType := s.types[i+1]
   532  			if prevSepType == _EN && succSepType == _EN {
   533  				s.types[i] = _EN
   534  			} else if s.types[i] == _CS && prevSepType == _AN && succSepType == _AN {
   535  				s.types[i] = _AN
   536  			}
   537  		}
   538  	}
   539  
   540  	// Rule W5.
   541  	for i, t := range s.types {
   542  		if t == _ET {
   543  			// locate end of sequence
   544  			runStart := i
   545  			runEnd := s.findRunLimit(runStart, _ET)
   546  
   547  			// check values at ends of sequence
   548  			t := s.sos
   549  			if runStart > 0 {
   550  				t = s.types[runStart-1]
   551  			}
   552  			if t != _EN {
   553  				t = s.eos
   554  				if runEnd < len(s.types) {
   555  					t = s.types[runEnd]
   556  				}
   557  			}
   558  			if t == _EN {
   559  				setTypes(s.types[runStart:runEnd], _EN)
   560  			}
   561  			// continue at end of sequence
   562  			i = runEnd
   563  		}
   564  	}
   565  
   566  	// Rule W6.
   567  	for i, t := range s.types {
   568  		if t.in(_ES, _ET, _CS) {
   569  			s.types[i] = _ON
   570  		}
   571  	}
   572  
   573  	// Rule W7.
   574  	for i, t := range s.types {
   575  		if t == _EN {
   576  			// set default if we reach start of run
   577  			prevStrongType := s.sos
   578  			for j := i - 1; j >= 0; j-- {
   579  				t = s.types[j]
   580  				if t == _L || t == _R { // AL's have been changed to R
   581  					prevStrongType = t
   582  					break
   583  				}
   584  			}
   585  			if prevStrongType == _L {
   586  				s.types[i] = _L
   587  			}
   588  		}
   589  	}
   590  }
   591  
   592  // 6) resolving neutral types Rules N1-N2.
   593  func (s *isolatingRunSequence) resolveNeutralTypes() {
   594  
   595  	// on entry, only these types can be in resultTypes
   596  	s.assertOnly(_L, _R, _EN, _AN, _B, _S, _WS, _ON, _RLI, _LRI, _FSI, _PDI)
   597  
   598  	for i, t := range s.types {
   599  		switch t {
   600  		case _WS, _ON, _B, _S, _RLI, _LRI, _FSI, _PDI:
   601  			// find bounds of run of neutrals
   602  			runStart := i
   603  			runEnd := s.findRunLimit(runStart, _B, _S, _WS, _ON, _RLI, _LRI, _FSI, _PDI)
   604  
   605  			// determine effective types at ends of run
   606  			var leadType, trailType class
   607  
   608  			// Note that the character found can only be L, R, AN, or
   609  			// EN.
   610  			if runStart == 0 {
   611  				leadType = s.sos
   612  			} else {
   613  				leadType = s.types[runStart-1]
   614  				if leadType.in(_AN, _EN) {
   615  					leadType = _R
   616  				}
   617  			}
   618  			if runEnd == len(s.types) {
   619  				trailType = s.eos
   620  			} else {
   621  				trailType = s.types[runEnd]
   622  				if trailType.in(_AN, _EN) {
   623  					trailType = _R
   624  				}
   625  			}
   626  
   627  			var resolvedType class
   628  			if leadType == trailType {
   629  				// Rule N1.
   630  				resolvedType = leadType
   631  			} else {
   632  				// Rule N2.
   633  				// Notice the embedding level of the run is used, not
   634  				// the paragraph embedding level.
   635  				resolvedType = typeForLevel(s.level)
   636  			}
   637  
   638  			setTypes(s.types[runStart:runEnd], resolvedType)
   639  
   640  			// skip over run of (former) neutrals
   641  			i = runEnd
   642  		}
   643  	}
   644  }
   645  
   646  func setLevels(levels []level, newLevel level) {
   647  	for i := range levels {
   648  		levels[i] = newLevel
   649  	}
   650  }
   651  
   652  func setTypes(types []class, newType class) {
   653  	for i := range types {
   654  		types[i] = newType
   655  	}
   656  }
   657  
   658  // 7) resolving implicit embedding levels Rules I1, I2.
   659  func (s *isolatingRunSequence) resolveImplicitLevels() {
   660  
   661  	// on entry, only these types can be in resultTypes
   662  	s.assertOnly(_L, _R, _EN, _AN)
   663  
   664  	s.resolvedLevels = make([]level, len(s.types))
   665  	setLevels(s.resolvedLevels, s.level)
   666  
   667  	if (s.level & 1) == 0 { // even level
   668  		for i, t := range s.types {
   669  			// Rule I1.
   670  			if t == _L {
   671  				// no change
   672  			} else if t == _R {
   673  				s.resolvedLevels[i] += 1
   674  			} else { // t == _AN || t == _EN
   675  				s.resolvedLevels[i] += 2
   676  			}
   677  		}
   678  	} else { // odd level
   679  		for i, t := range s.types {
   680  			// Rule I2.
   681  			if t == _R {
   682  				// no change
   683  			} else { // t == _L || t == _AN || t == _EN
   684  				s.resolvedLevels[i] += 1
   685  			}
   686  		}
   687  	}
   688  }
   689  
   690  // Applies the levels and types resolved in rules W1-I2 to the
   691  // resultLevels array.
   692  func (s *isolatingRunSequence) applyLevelsAndTypes() {
   693  	for i, x := range s.indexes {
   694  		s.p.resultTypes[x] = s.types[i]
   695  		s.p.resultLevels[x] = s.resolvedLevels[i]
   696  	}
   697  }
   698  
   699  // Return the limit of the run consisting only of the types in validSet
   700  // starting at index. This checks the value at index, and will return
   701  // index if that value is not in validSet.
   702  func (s *isolatingRunSequence) findRunLimit(index int, validSet ...class) int {
   703  loop:
   704  	for ; index < len(s.types); index++ {
   705  		t := s.types[index]
   706  		for _, valid := range validSet {
   707  			if t == valid {
   708  				continue loop
   709  			}
   710  		}
   711  		return index // didn't find a match in validSet
   712  	}
   713  	return len(s.types)
   714  }
   715  
   716  // Algorithm validation. Assert that all values in types are in the
   717  // provided set.
   718  func (s *isolatingRunSequence) assertOnly(codes ...class) {
   719  loop:
   720  	for i, t := range s.types {
   721  		for _, c := range codes {
   722  			if t == c {
   723  				continue loop
   724  			}
   725  		}
   726  		log.Panicf("invalid bidi code %s present in assertOnly at position %d", t, s.indexes[i])
   727  	}
   728  }
   729  
   730  // determineLevelRuns returns an array of level runs. Each level run is
   731  // described as an array of indexes into the input string.
   732  //
   733  // Determines the level runs. Rule X9 will be applied in determining the
   734  // runs, in the way that makes sure the characters that are supposed to be
   735  // removed are not included in the runs.
   736  func (p *paragraph) determineLevelRuns() [][]int {
   737  	run := []int{}
   738  	allRuns := [][]int{}
   739  	currentLevel := implicitLevel
   740  
   741  	for i := range p.initialTypes {
   742  		if !isRemovedByX9(p.initialTypes[i]) {
   743  			if p.resultLevels[i] != currentLevel {
   744  				// we just encountered a new run; wrap up last run
   745  				if currentLevel >= 0 { // only wrap it up if there was a run
   746  					allRuns = append(allRuns, run)
   747  					run = nil
   748  				}
   749  				// Start new run
   750  				currentLevel = p.resultLevels[i]
   751  			}
   752  			run = append(run, i)
   753  		}
   754  	}
   755  	// Wrap up the final run, if any
   756  	if len(run) > 0 {
   757  		allRuns = append(allRuns, run)
   758  	}
   759  	return allRuns
   760  }
   761  
   762  // Definition BD13. Determine isolating run sequences.
   763  func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
   764  	levelRuns := p.determineLevelRuns()
   765  
   766  	// Compute the run that each character belongs to
   767  	runForCharacter := make([]int, p.Len())
   768  	for i, run := range levelRuns {
   769  		for _, index := range run {
   770  			runForCharacter[index] = i
   771  		}
   772  	}
   773  
   774  	sequences := []*isolatingRunSequence{}
   775  
   776  	var currentRunSequence []int
   777  
   778  	for _, run := range levelRuns {
   779  		first := run[0]
   780  		if p.initialTypes[first] != _PDI || p.matchingIsolateInitiator[first] == -1 {
   781  			currentRunSequence = nil
   782  			// int run = i;
   783  			for {
   784  				// Copy this level run into currentRunSequence
   785  				currentRunSequence = append(currentRunSequence, run...)
   786  
   787  				last := currentRunSequence[len(currentRunSequence)-1]
   788  				lastT := p.initialTypes[last]
   789  				if lastT.in(_LRI, _RLI, _FSI) && p.matchingPDI[last] != p.Len() {
   790  					run = levelRuns[runForCharacter[p.matchingPDI[last]]]
   791  				} else {
   792  					break
   793  				}
   794  			}
   795  			sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
   796  		}
   797  	}
   798  	return sequences
   799  }
   800  
   801  // Assign level information to characters removed by rule X9. This is for
   802  // ease of relating the level information to the original input data. Note
   803  // that the levels assigned to these codes are arbitrary, they're chosen so
   804  // as to avoid breaking level runs.
   805  func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
   806  	for i, t := range p.initialTypes {
   807  		if t.in(_LRE, _RLE, _LRO, _RLO, _PDF, _BN) {
   808  			p.resultTypes[i] = t
   809  			p.resultLevels[i] = -1
   810  		}
   811  	}
   812  	// now propagate forward the levels information (could have
   813  	// propagated backward, the main thing is not to introduce a level
   814  	// break where one doesn't already exist).
   815  
   816  	if p.resultLevels[0] == -1 {
   817  		p.resultLevels[0] = p.embeddingLevel
   818  	}
   819  	for i := 1; i < len(p.initialTypes); i++ {
   820  		if p.resultLevels[i] == -1 {
   821  			p.resultLevels[i] = p.resultLevels[i-1]
   822  		}
   823  	}
   824  	// Embedding information is for informational purposes only so need not be
   825  	// adjusted.
   826  }
   827  
   828  //
   829  // Output
   830  //
   831  
   832  // getLevels computes levels array breaking lines at offsets in linebreaks.
   833  // Rule L1.
   834  //
   835  // The linebreaks array must include at least one value. The values must be
   836  // in strictly increasing order (no duplicates) between 1 and the length of
   837  // the text, inclusive. The last value must be the length of the text.
   838  func (p *paragraph) getLevels(linebreaks []int) []level {
   839  	// Note that since the previous processing has removed all
   840  	// P, S, and WS values from resultTypes, the values referred to
   841  	// in these rules are the initial types, before any processing
   842  	// has been applied (including processing of overrides).
   843  	//
   844  	// This example implementation has reinserted explicit format codes
   845  	// and BN, in order that the levels array correspond to the
   846  	// initial text. Their final placement is not normative.
   847  	// These codes are treated like WS in this implementation,
   848  	// so they don't interrupt sequences of WS.
   849  
   850  	validateLineBreaks(linebreaks, p.Len())
   851  
   852  	result := append([]level(nil), p.resultLevels...)
   853  
   854  	// don't worry about linebreaks since if there is a break within
   855  	// a series of WS values preceding S, the linebreak itself
   856  	// causes the reset.
   857  	for i, t := range p.initialTypes {
   858  		if t.in(_B, _S) {
   859  			// Rule L1, clauses one and two.
   860  			result[i] = p.embeddingLevel
   861  
   862  			// Rule L1, clause three.
   863  			for j := i - 1; j >= 0; j-- {
   864  				if isWhitespace(p.initialTypes[j]) { // including format codes
   865  					result[j] = p.embeddingLevel
   866  				} else {
   867  					break
   868  				}
   869  			}
   870  		}
   871  	}
   872  
   873  	// Rule L1, clause four.
   874  	start := 0
   875  	for _, limit := range linebreaks {
   876  		for j := limit - 1; j >= start; j-- {
   877  			if isWhitespace(p.initialTypes[j]) { // including format codes
   878  				result[j] = p.embeddingLevel
   879  			} else {
   880  				break
   881  			}
   882  		}
   883  		start = limit
   884  	}
   885  
   886  	return result
   887  }
   888  
   889  // getReordering returns the reordering of lines from a visual index to a
   890  // logical index for line breaks at the given offsets.
   891  //
   892  // Lines are concatenated from left to right. So for example, the fifth
   893  // character from the left on the third line is
   894  //
   895  // 		getReordering(linebreaks)[linebreaks[1] + 4]
   896  //
   897  // (linebreaks[1] is the position after the last character of the second
   898  // line, which is also the index of the first character on the third line,
   899  // and adding four gets the fifth character from the left).
   900  //
   901  // The linebreaks array must include at least one value. The values must be
   902  // in strictly increasing order (no duplicates) between 1 and the length of
   903  // the text, inclusive. The last value must be the length of the text.
   904  func (p *paragraph) getReordering(linebreaks []int) []int {
   905  	validateLineBreaks(linebreaks, p.Len())
   906  
   907  	return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
   908  }
   909  
   910  // Return multiline reordering array for a given level array. Reordering
   911  // does not occur across a line break.
   912  func computeMultilineReordering(levels []level, linebreaks []int) []int {
   913  	result := make([]int, len(levels))
   914  
   915  	start := 0
   916  	for _, limit := range linebreaks {
   917  		tempLevels := make([]level, limit-start)
   918  		copy(tempLevels, levels[start:])
   919  
   920  		for j, order := range computeReordering(tempLevels) {
   921  			result[start+j] = order + start
   922  		}
   923  		start = limit
   924  	}
   925  	return result
   926  }
   927  
   928  // Return reordering array for a given level array. This reorders a single
   929  // line. The reordering is a visual to logical map. For example, the
   930  // leftmost char is string.charAt(order[0]). Rule L2.
   931  func computeReordering(levels []level) []int {
   932  	result := make([]int, len(levels))
   933  	// initialize order
   934  	for i := range result {
   935  		result[i] = i
   936  	}
   937  
   938  	// locate highest level found on line.
   939  	// Note the rules say text, but no reordering across line bounds is
   940  	// performed, so this is sufficient.
   941  	highestLevel := level(0)
   942  	lowestOddLevel := level(maxDepth + 2)
   943  	for _, level := range levels {
   944  		if level > highestLevel {
   945  			highestLevel = level
   946  		}
   947  		if level&1 != 0 && level < lowestOddLevel {
   948  			lowestOddLevel = level
   949  		}
   950  	}
   951  
   952  	for level := highestLevel; level >= lowestOddLevel; level-- {
   953  		for i := 0; i < len(levels); i++ {
   954  			if levels[i] >= level {
   955  				// find range of text at or above this level
   956  				start := i
   957  				limit := i + 1
   958  				for limit < len(levels) && levels[limit] >= level {
   959  					limit++
   960  				}
   961  
   962  				for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
   963  					result[j], result[k] = result[k], result[j]
   964  				}
   965  				// skip to end of level run
   966  				i = limit
   967  			}
   968  		}
   969  	}
   970  
   971  	return result
   972  }
   973  
   974  // isWhitespace reports whether the type is considered a whitespace type for the
   975  // line break rules.
   976  func isWhitespace(c class) bool {
   977  	switch c {
   978  	case _LRE, _RLE, _LRO, _RLO, _PDF, _LRI, _RLI, _FSI, _PDI, _BN, _WS:
   979  		return true
   980  	}
   981  	return false
   982  }
   983  
   984  // isRemovedByX9 reports whether the type is one of the types removed in X9.
   985  func isRemovedByX9(c class) bool {
   986  	switch c {
   987  	case _LRE, _RLE, _LRO, _RLO, _PDF, _BN:
   988  		return true
   989  	}
   990  	return false
   991  }
   992  
   993  // typeForLevel reports the strong type (L or R) corresponding to the level.
   994  func typeForLevel(level level) class {
   995  	if (level & 0x1) == 0 {
   996  		return _L
   997  	}
   998  	return _R
   999  }
  1000  
  1001  // TODO: change validation to not panic
  1002  
  1003  func validateTypes(types []class) {
  1004  	if len(types) == 0 {
  1005  		log.Panic("types is null")
  1006  	}
  1007  	for i, t := range types[:len(types)-1] {
  1008  		if t == _B {
  1009  			log.Panicf("B type before end of paragraph at index: %d", i)
  1010  		}
  1011  	}
  1012  }
  1013  
  1014  func validateParagraphEmbeddingLevel(embeddingLevel level) {
  1015  	if embeddingLevel != implicitLevel &&
  1016  		embeddingLevel != 0 &&
  1017  		embeddingLevel != 1 {
  1018  		log.Panicf("illegal paragraph embedding level: %d", embeddingLevel)
  1019  	}
  1020  }
  1021  
  1022  func validateLineBreaks(linebreaks []int, textLength int) {
  1023  	prev := 0
  1024  	for i, next := range linebreaks {
  1025  		if next <= prev {
  1026  			log.Panicf("bad linebreak: %d at index: %d", next, i)
  1027  		}
  1028  		prev = next
  1029  	}
  1030  	if prev != textLength {
  1031  		log.Panicf("last linebreak was %d, want %d", prev, textLength)
  1032  	}
  1033  }
  1034  
  1035  func validatePbTypes(pairTypes []bracketType) {
  1036  	if len(pairTypes) == 0 {
  1037  		log.Panic("pairTypes is null")
  1038  	}
  1039  	for i, pt := range pairTypes {
  1040  		switch pt {
  1041  		case bpNone, bpOpen, bpClose:
  1042  		default:
  1043  			log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i])
  1044  		}
  1045  	}
  1046  }
  1047  
  1048  func validatePbValues(pairValues []rune, pairTypes []bracketType) {
  1049  	if pairValues == nil {
  1050  		log.Panic("pairValues is null")
  1051  	}
  1052  	if len(pairTypes) != len(pairValues) {
  1053  		log.Panic("pairTypes is different length from pairValues")
  1054  	}
  1055  }