github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/x/text/unicode/bidi/core.go (about)

     1  // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
     2  
     3  // Copyright 2015 The Go Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  package bidi
     8  
     9  import "log"
    10  
    11  // This implementation is a port based on the reference implementation found at:
    12  // http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
    13  //
    14  // described in Unicode Bidirectional Algorithm (UAX #9).
    15  //
    16  // Input:
    17  // There are two levels of input to the algorithm, since clients may prefer to
    18  // supply some information from out-of-band sources rather than relying on the
    19  // default behavior.
    20  //
    21  // - Bidi class array
    22  // - Bidi class array, with externally supplied base line direction
    23  //
    24  // Output:
    25  // Output is separated into several stages:
    26  //
    27  //  - levels array over entire paragraph
    28  //  - reordering array over entire paragraph
    29  //  - levels array over line
    30  //  - reordering array over line
    31  //
    32  // Note that for conformance to the Unicode Bidirectional Algorithm,
    33  // implementations are only required to generate correct reordering and
    34  // character directionality (odd or even levels) over a line. Generating
    35  // identical level arrays over a line is not required. Bidi explicit format
    36  // codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
    37  // positions as long as the rest of the input is properly reordered.
    38  //
    39  // As the algorithm is defined to operate on a single paragraph at a time, this
    40  // implementation is written to handle single paragraphs. Thus rule P1 is
    41  // presumed by this implementation-- the data provided to the implementation is
    42  // assumed to be a single paragraph, and either contains no 'B' codes, or a
    43  // single 'B' code at the end of the input. 'B' is allowed as input to
    44  // illustrate how the algorithm assigns it a level.
    45  //
    46  // Also note that rules L3 and L4 depend on the rendering engine that uses the
    47  // result of the bidi algorithm. This implementation assumes that the rendering
    48  // engine expects combining marks in visual order (e.g. to the left of their
    49  // base character in RTL runs) and that it adjusts the glyphs used to render
    50  // mirrored characters that are in RTL runs so that they render appropriately.
    51  
    52  // level is the embedding level of a character. Even embedding levels indicate
    53  // left-to-right order and odd levels indicate right-to-left order. The special
    54  // level of -1 is reserved for undefined order.
    55  type level int8
    56  
    57  const implicitLevel level = -1
    58  
    59  // in returns if x is equal to any of the values in set.
    60  func (c Class) in(set ...Class) bool {
    61  	for _, s := range set {
    62  		if c == s {
    63  			return true
    64  		}
    65  	}
    66  	return false
    67  }
    68  
    69  // A paragraph contains the state of a paragraph.
    70  type paragraph struct {
    71  	initialTypes []Class
    72  
    73  	// Arrays of properties needed for paired bracket evaluation in N0
    74  	pairTypes  []bracketType // paired Bracket types for paragraph
    75  	pairValues []rune        // rune for opening bracket or pbOpen and pbClose; 0 for pbNone
    76  
    77  	embeddingLevel level // default: = implicitLevel;
    78  
    79  	// at the paragraph levels
    80  	resultTypes  []Class
    81  	resultLevels []level
    82  
    83  	// Index of matching PDI for isolate initiator characters. For other
    84  	// characters, the value of matchingPDI will be set to -1. For isolate
    85  	// initiators with no matching PDI, matchingPDI will be set to the length of
    86  	// the input string.
    87  	matchingPDI []int
    88  
    89  	// Index of matching isolate initiator for PDI characters. For other
    90  	// characters, and for PDIs with no matching isolate initiator, the value of
    91  	// matchingIsolateInitiator will be set to -1.
    92  	matchingIsolateInitiator []int
    93  }
    94  
    95  // newParagraph initializes a paragraph. The user needs to supply a few arrays
    96  // corresponding to the preprocessed text input. The types correspond to the
    97  // Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
    98  // each rune. pairValues provides a unique bracket class identifier for each
    99  // rune (suggested is the rune of the open bracket for opening and matching
   100  // close brackets, after normalization). The embedding levels are optional, but
   101  // may be supplied to encode embedding levels of styled text.
   102  //
   103  // TODO: return an error.
   104  func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph {
   105  	validateTypes(types)
   106  	validatePbTypes(pairTypes)
   107  	validatePbValues(pairValues, pairTypes)
   108  	validateParagraphEmbeddingLevel(levels)
   109  
   110  	p := &paragraph{
   111  		initialTypes:   append([]Class(nil), types...),
   112  		embeddingLevel: levels,
   113  
   114  		pairTypes:  pairTypes,
   115  		pairValues: pairValues,
   116  
   117  		resultTypes: append([]Class(nil), types...),
   118  	}
   119  	p.run()
   120  	return p
   121  }
   122  
   123  func (p *paragraph) Len() int { return len(p.initialTypes) }
   124  
   125  // The algorithm. Does not include line-based processing (Rules L1, L2).
   126  // These are applied later in the line-based phase of the algorithm.
   127  func (p *paragraph) run() {
   128  	p.determineMatchingIsolates()
   129  
   130  	// 1) determining the paragraph level
   131  	// Rule P1 is the requirement for entering this algorithm.
   132  	// Rules P2, P3.
   133  	// If no externally supplied paragraph embedding level, use default.
   134  	if p.embeddingLevel == implicitLevel {
   135  		p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
   136  	}
   137  
   138  	// Initialize result levels to paragraph embedding level.
   139  	p.resultLevels = make([]level, p.Len())
   140  	setLevels(p.resultLevels, p.embeddingLevel)
   141  
   142  	// 2) Explicit levels and directions
   143  	// Rules X1-X8.
   144  	p.determineExplicitEmbeddingLevels()
   145  
   146  	// Rule X9.
   147  	// We do not remove the embeddings, the overrides, the PDFs, and the BNs
   148  	// from the string explicitly. But they are not copied into isolating run
   149  	// sequences when they are created, so they are removed for all
   150  	// practical purposes.
   151  
   152  	// Rule X10.
   153  	// Run remainder of algorithm one isolating run sequence at a time
   154  	for _, seq := range p.determineIsolatingRunSequences() {
   155  		// 3) resolving weak types
   156  		// Rules W1-W7.
   157  		seq.resolveWeakTypes()
   158  
   159  		// 4a) resolving paired brackets
   160  		// Rule N0
   161  		resolvePairedBrackets(seq)
   162  
   163  		// 4b) resolving neutral types
   164  		// Rules N1-N3.
   165  		seq.resolveNeutralTypes()
   166  
   167  		// 5) resolving implicit embedding levels
   168  		// Rules I1, I2.
   169  		seq.resolveImplicitLevels()
   170  
   171  		// Apply the computed levels and types
   172  		seq.applyLevelsAndTypes()
   173  	}
   174  
   175  	// Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
   176  	// BNs. This is for convenience, so the resulting level array will have
   177  	// a value for every character.
   178  	p.assignLevelsToCharactersRemovedByX9()
   179  }
   180  
   181  // determineMatchingIsolates determines the matching PDI for each isolate
   182  // initiator and vice versa.
   183  //
   184  // Definition BD9.
   185  //
   186  // At the end of this function:
   187  //
   188  //  - The member variable matchingPDI is set to point to the index of the
   189  //    matching PDI character for each isolate initiator character. If there is
   190  //    no matching PDI, it is set to the length of the input text. For other
   191  //    characters, it is set to -1.
   192  //  - The member variable matchingIsolateInitiator is set to point to the
   193  //    index of the matching isolate initiator character for each PDI character.
   194  //    If there is no matching isolate initiator, or the character is not a PDI,
   195  //    it is set to -1.
   196  func (p *paragraph) determineMatchingIsolates() {
   197  	p.matchingPDI = make([]int, p.Len())
   198  	p.matchingIsolateInitiator = make([]int, p.Len())
   199  
   200  	for i := range p.matchingIsolateInitiator {
   201  		p.matchingIsolateInitiator[i] = -1
   202  	}
   203  
   204  	for i := range p.matchingPDI {
   205  		p.matchingPDI[i] = -1
   206  
   207  		if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
   208  			depthCounter := 1
   209  			for j := i + 1; j < p.Len(); j++ {
   210  				if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
   211  					depthCounter++
   212  				} else if u == PDI {
   213  					if depthCounter--; depthCounter == 0 {
   214  						p.matchingPDI[i] = j
   215  						p.matchingIsolateInitiator[j] = i
   216  						break
   217  					}
   218  				}
   219  			}
   220  			if p.matchingPDI[i] == -1 {
   221  				p.matchingPDI[i] = p.Len()
   222  			}
   223  		}
   224  	}
   225  }
   226  
   227  // determineParagraphEmbeddingLevel reports the resolved paragraph direction of
   228  // the substring limited by the given range [start, end).
   229  //
   230  // Determines the paragraph level based on rules P2, P3. This is also used
   231  // in rule X5c to find if an FSI should resolve to LRI or RLI.
   232  func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
   233  	var strongType Class = unknownClass
   234  
   235  	// Rule P2.
   236  	for i := start; i < end; i++ {
   237  		if t := p.resultTypes[i]; t.in(L, AL, R) {
   238  			strongType = t
   239  			break
   240  		} else if t.in(FSI, LRI, RLI) {
   241  			i = p.matchingPDI[i] // skip over to the matching PDI
   242  			if i > end {
   243  				log.Panic("assert (i <= end)")
   244  			}
   245  		}
   246  	}
   247  	// Rule P3.
   248  	switch strongType {
   249  	case unknownClass: // none found
   250  		// default embedding level when no strong types found is 0.
   251  		return 0
   252  	case L:
   253  		return 0
   254  	default: // AL, R
   255  		return 1
   256  	}
   257  }
   258  
   259  const maxDepth = 125
   260  
   261  // This stack will store the embedding levels and override and isolated
   262  // statuses
   263  type directionalStatusStack struct {
   264  	stackCounter        int
   265  	embeddingLevelStack [maxDepth + 1]level
   266  	overrideStatusStack [maxDepth + 1]Class
   267  	isolateStatusStack  [maxDepth + 1]bool
   268  }
   269  
   270  func (s *directionalStatusStack) empty()     { s.stackCounter = 0 }
   271  func (s *directionalStatusStack) pop()       { s.stackCounter-- }
   272  func (s *directionalStatusStack) depth() int { return s.stackCounter }
   273  
   274  func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
   275  	s.embeddingLevelStack[s.stackCounter] = level
   276  	s.overrideStatusStack[s.stackCounter] = overrideStatus
   277  	s.isolateStatusStack[s.stackCounter] = isolateStatus
   278  	s.stackCounter++
   279  }
   280  
   281  func (s *directionalStatusStack) lastEmbeddingLevel() level {
   282  	return s.embeddingLevelStack[s.stackCounter-1]
   283  }
   284  
   285  func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
   286  	return s.overrideStatusStack[s.stackCounter-1]
   287  }
   288  
   289  func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
   290  	return s.isolateStatusStack[s.stackCounter-1]
   291  }
   292  
   293  // Determine explicit levels using rules X1 - X8
   294  func (p *paragraph) determineExplicitEmbeddingLevels() {
   295  	var stack directionalStatusStack
   296  	var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
   297  
   298  	// Rule X1.
   299  	stack.push(p.embeddingLevel, ON, false)
   300  
   301  	for i, t := range p.resultTypes {
   302  		// Rules X2, X3, X4, X5, X5a, X5b, X5c
   303  		switch t {
   304  		case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
   305  			isIsolate := t.in(RLI, LRI, FSI)
   306  			isRTL := t.in(RLE, RLO, RLI)
   307  
   308  			// override if this is an FSI that resolves to RLI
   309  			if t == FSI {
   310  				isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
   311  			}
   312  			if isIsolate {
   313  				p.resultLevels[i] = stack.lastEmbeddingLevel()
   314  				if stack.lastDirectionalOverrideStatus() != ON {
   315  					p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
   316  				}
   317  			}
   318  
   319  			var newLevel level
   320  			if isRTL {
   321  				// least greater odd
   322  				newLevel = (stack.lastEmbeddingLevel() + 1) | 1
   323  			} else {
   324  				// least greater even
   325  				newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
   326  			}
   327  
   328  			if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
   329  				if isIsolate {
   330  					validIsolateCount++
   331  				}
   332  				// Push new embedding level, override status, and isolated
   333  				// status.
   334  				// No check for valid stack counter, since the level check
   335  				// suffices.
   336  				switch t {
   337  				case LRO:
   338  					stack.push(newLevel, L, isIsolate)
   339  				case RLO:
   340  					stack.push(newLevel, R, isIsolate)
   341  				default:
   342  					stack.push(newLevel, ON, isIsolate)
   343  				}
   344  				// Not really part of the spec
   345  				if !isIsolate {
   346  					p.resultLevels[i] = newLevel
   347  				}
   348  			} else {
   349  				// This is an invalid explicit formatting character,
   350  				// so apply the "Otherwise" part of rules X2-X5b.
   351  				if isIsolate {
   352  					overflowIsolateCount++
   353  				} else { // !isIsolate
   354  					if overflowIsolateCount == 0 {
   355  						overflowEmbeddingCount++
   356  					}
   357  				}
   358  			}
   359  
   360  		// Rule X6a
   361  		case PDI:
   362  			if overflowIsolateCount > 0 {
   363  				overflowIsolateCount--
   364  			} else if validIsolateCount == 0 {
   365  				// do nothing
   366  			} else {
   367  				overflowEmbeddingCount = 0
   368  				for !stack.lastDirectionalIsolateStatus() {
   369  					stack.pop()
   370  				}
   371  				stack.pop()
   372  				validIsolateCount--
   373  			}
   374  			p.resultLevels[i] = stack.lastEmbeddingLevel()
   375  
   376  		// Rule X7
   377  		case PDF:
   378  			// Not really part of the spec
   379  			p.resultLevels[i] = stack.lastEmbeddingLevel()
   380  
   381  			if overflowIsolateCount > 0 {
   382  				// do nothing
   383  			} else if overflowEmbeddingCount > 0 {
   384  				overflowEmbeddingCount--
   385  			} else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
   386  				stack.pop()
   387  			}
   388  
   389  		case B: // paragraph separator.
   390  			// Rule X8.
   391  
   392  			// These values are reset for clarity, in this implementation B
   393  			// can only occur as the last code in the array.
   394  			stack.empty()
   395  			overflowIsolateCount = 0
   396  			overflowEmbeddingCount = 0
   397  			validIsolateCount = 0
   398  			p.resultLevels[i] = p.embeddingLevel
   399  
   400  		default:
   401  			p.resultLevels[i] = stack.lastEmbeddingLevel()
   402  			if stack.lastDirectionalOverrideStatus() != ON {
   403  				p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
   404  			}
   405  		}
   406  	}
   407  }
   408  
   409  type isolatingRunSequence struct {
   410  	p *paragraph
   411  
   412  	indexes []int // indexes to the original string
   413  
   414  	types          []Class // type of each character using the index
   415  	resolvedLevels []level // resolved levels after application of rules
   416  	level          level
   417  	sos, eos       Class
   418  }
   419  
   420  func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
   421  
   422  func maxLevel(a, b level) level {
   423  	if a > b {
   424  		return a
   425  	}
   426  	return b
   427  }
   428  
   429  // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
   430  // 			 either L or R, for each isolating run sequence.
   431  func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
   432  	length := len(indexes)
   433  	types := make([]Class, length)
   434  	for i, x := range indexes {
   435  		types[i] = p.resultTypes[x]
   436  	}
   437  
   438  	// assign level, sos and eos
   439  	prevChar := indexes[0] - 1
   440  	for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
   441  		prevChar--
   442  	}
   443  	prevLevel := p.embeddingLevel
   444  	if prevChar >= 0 {
   445  		prevLevel = p.resultLevels[prevChar]
   446  	}
   447  
   448  	var succLevel level
   449  	lastType := types[length-1]
   450  	if lastType.in(LRI, RLI, FSI) {
   451  		succLevel = p.embeddingLevel
   452  	} else {
   453  		// the first character after the end of run sequence
   454  		limit := indexes[length-1] + 1
   455  		for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
   456  
   457  		}
   458  		succLevel = p.embeddingLevel
   459  		if limit < p.Len() {
   460  			succLevel = p.resultLevels[limit]
   461  		}
   462  	}
   463  	level := p.resultLevels[indexes[0]]
   464  	return &isolatingRunSequence{
   465  		p:       p,
   466  		indexes: indexes,
   467  		types:   types,
   468  		level:   level,
   469  		sos:     typeForLevel(maxLevel(prevLevel, level)),
   470  		eos:     typeForLevel(maxLevel(succLevel, level)),
   471  	}
   472  }
   473  
   474  // Resolving weak types Rules W1-W7.
   475  //
   476  // Note that some weak types (EN, AN) remain after this processing is
   477  // complete.
   478  func (s *isolatingRunSequence) resolveWeakTypes() {
   479  
   480  	// on entry, only these types remain
   481  	s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
   482  
   483  	// Rule W1.
   484  	// Changes all NSMs.
   485  	preceedingCharacterType := s.sos
   486  	for i, t := range s.types {
   487  		if t == NSM {
   488  			s.types[i] = preceedingCharacterType
   489  		} else {
   490  			if t.in(LRI, RLI, FSI, PDI) {
   491  				preceedingCharacterType = ON
   492  			}
   493  			preceedingCharacterType = t
   494  		}
   495  	}
   496  
   497  	// Rule W2.
   498  	// EN does not change at the start of the run, because sos != AL.
   499  	for i, t := range s.types {
   500  		if t == EN {
   501  			for j := i - 1; j >= 0; j-- {
   502  				if t := s.types[j]; t.in(L, R, AL) {
   503  					if t == AL {
   504  						s.types[i] = AN
   505  					}
   506  					break
   507  				}
   508  			}
   509  		}
   510  	}
   511  
   512  	// Rule W3.
   513  	for i, t := range s.types {
   514  		if t == AL {
   515  			s.types[i] = R
   516  		}
   517  	}
   518  
   519  	// Rule W4.
   520  	// Since there must be values on both sides for this rule to have an
   521  	// effect, the scan skips the first and last value.
   522  	//
   523  	// Although the scan proceeds left to right, and changes the type
   524  	// values in a way that would appear to affect the computations
   525  	// later in the scan, there is actually no problem. A change in the
   526  	// current value can only affect the value to its immediate right,
   527  	// and only affect it if it is ES or CS. But the current value can
   528  	// only change if the value to its right is not ES or CS. Thus
   529  	// either the current value will not change, or its change will have
   530  	// no effect on the remainder of the analysis.
   531  
   532  	for i := 1; i < s.Len()-1; i++ {
   533  		t := s.types[i]
   534  		if t == ES || t == CS {
   535  			prevSepType := s.types[i-1]
   536  			succSepType := s.types[i+1]
   537  			if prevSepType == EN && succSepType == EN {
   538  				s.types[i] = EN
   539  			} else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
   540  				s.types[i] = AN
   541  			}
   542  		}
   543  	}
   544  
   545  	// Rule W5.
   546  	for i, t := range s.types {
   547  		if t == ET {
   548  			// locate end of sequence
   549  			runStart := i
   550  			runEnd := s.findRunLimit(runStart, ET)
   551  
   552  			// check values at ends of sequence
   553  			t := s.sos
   554  			if runStart > 0 {
   555  				t = s.types[runStart-1]
   556  			}
   557  			if t != EN {
   558  				t = s.eos
   559  				if runEnd < len(s.types) {
   560  					t = s.types[runEnd]
   561  				}
   562  			}
   563  			if t == EN {
   564  				setTypes(s.types[runStart:runEnd], EN)
   565  			}
   566  			// continue at end of sequence
   567  			i = runEnd
   568  		}
   569  	}
   570  
   571  	// Rule W6.
   572  	for i, t := range s.types {
   573  		if t.in(ES, ET, CS) {
   574  			s.types[i] = ON
   575  		}
   576  	}
   577  
   578  	// Rule W7.
   579  	for i, t := range s.types {
   580  		if t == EN {
   581  			// set default if we reach start of run
   582  			prevStrongType := s.sos
   583  			for j := i - 1; j >= 0; j-- {
   584  				t = s.types[j]
   585  				if t == L || t == R { // AL's have been changed to R
   586  					prevStrongType = t
   587  					break
   588  				}
   589  			}
   590  			if prevStrongType == L {
   591  				s.types[i] = L
   592  			}
   593  		}
   594  	}
   595  }
   596  
   597  // 6) resolving neutral types Rules N1-N2.
   598  func (s *isolatingRunSequence) resolveNeutralTypes() {
   599  
   600  	// on entry, only these types can be in resultTypes
   601  	s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
   602  
   603  	for i, t := range s.types {
   604  		switch t {
   605  		case WS, ON, B, S, RLI, LRI, FSI, PDI:
   606  			// find bounds of run of neutrals
   607  			runStart := i
   608  			runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
   609  
   610  			// determine effective types at ends of run
   611  			var leadType, trailType Class
   612  
   613  			// Note that the character found can only be L, R, AN, or
   614  			// EN.
   615  			if runStart == 0 {
   616  				leadType = s.sos
   617  			} else {
   618  				leadType = s.types[runStart-1]
   619  				if leadType.in(AN, EN) {
   620  					leadType = R
   621  				}
   622  			}
   623  			if runEnd == len(s.types) {
   624  				trailType = s.eos
   625  			} else {
   626  				trailType = s.types[runEnd]
   627  				if trailType.in(AN, EN) {
   628  					trailType = R
   629  				}
   630  			}
   631  
   632  			var resolvedType Class
   633  			if leadType == trailType {
   634  				// Rule N1.
   635  				resolvedType = leadType
   636  			} else {
   637  				// Rule N2.
   638  				// Notice the embedding level of the run is used, not
   639  				// the paragraph embedding level.
   640  				resolvedType = typeForLevel(s.level)
   641  			}
   642  
   643  			setTypes(s.types[runStart:runEnd], resolvedType)
   644  
   645  			// skip over run of (former) neutrals
   646  			i = runEnd
   647  		}
   648  	}
   649  }
   650  
   651  func setLevels(levels []level, newLevel level) {
   652  	for i := range levels {
   653  		levels[i] = newLevel
   654  	}
   655  }
   656  
   657  func setTypes(types []Class, newType Class) {
   658  	for i := range types {
   659  		types[i] = newType
   660  	}
   661  }
   662  
   663  // 7) resolving implicit embedding levels Rules I1, I2.
   664  func (s *isolatingRunSequence) resolveImplicitLevels() {
   665  
   666  	// on entry, only these types can be in resultTypes
   667  	s.assertOnly(L, R, EN, AN)
   668  
   669  	s.resolvedLevels = make([]level, len(s.types))
   670  	setLevels(s.resolvedLevels, s.level)
   671  
   672  	if (s.level & 1) == 0 { // even level
   673  		for i, t := range s.types {
   674  			// Rule I1.
   675  			if t == L {
   676  				// no change
   677  			} else if t == R {
   678  				s.resolvedLevels[i] += 1
   679  			} else { // t == AN || t == EN
   680  				s.resolvedLevels[i] += 2
   681  			}
   682  		}
   683  	} else { // odd level
   684  		for i, t := range s.types {
   685  			// Rule I2.
   686  			if t == R {
   687  				// no change
   688  			} else { // t == L || t == AN || t == EN
   689  				s.resolvedLevels[i] += 1
   690  			}
   691  		}
   692  	}
   693  }
   694  
   695  // Applies the levels and types resolved in rules W1-I2 to the
   696  // resultLevels array.
   697  func (s *isolatingRunSequence) applyLevelsAndTypes() {
   698  	for i, x := range s.indexes {
   699  		s.p.resultTypes[x] = s.types[i]
   700  		s.p.resultLevels[x] = s.resolvedLevels[i]
   701  	}
   702  }
   703  
   704  // Return the limit of the run consisting only of the types in validSet
   705  // starting at index. This checks the value at index, and will return
   706  // index if that value is not in validSet.
   707  func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
   708  loop:
   709  	for ; index < len(s.types); index++ {
   710  		t := s.types[index]
   711  		for _, valid := range validSet {
   712  			if t == valid {
   713  				continue loop
   714  			}
   715  		}
   716  		return index // didn't find a match in validSet
   717  	}
   718  	return len(s.types)
   719  }
   720  
   721  // Algorithm validation. Assert that all values in types are in the
   722  // provided set.
   723  func (s *isolatingRunSequence) assertOnly(codes ...Class) {
   724  loop:
   725  	for i, t := range s.types {
   726  		for _, c := range codes {
   727  			if t == c {
   728  				continue loop
   729  			}
   730  		}
   731  		log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
   732  	}
   733  }
   734  
   735  // determineLevelRuns returns an array of level runs. Each level run is
   736  // described as an array of indexes into the input string.
   737  //
   738  // Determines the level runs. Rule X9 will be applied in determining the
   739  // runs, in the way that makes sure the characters that are supposed to be
   740  // removed are not included in the runs.
   741  func (p *paragraph) determineLevelRuns() [][]int {
   742  	run := []int{}
   743  	allRuns := [][]int{}
   744  	currentLevel := implicitLevel
   745  
   746  	for i := range p.initialTypes {
   747  		if !isRemovedByX9(p.initialTypes[i]) {
   748  			if p.resultLevels[i] != currentLevel {
   749  				// we just encountered a new run; wrap up last run
   750  				if currentLevel >= 0 { // only wrap it up if there was a run
   751  					allRuns = append(allRuns, run)
   752  					run = nil
   753  				}
   754  				// Start new run
   755  				currentLevel = p.resultLevels[i]
   756  			}
   757  			run = append(run, i)
   758  		}
   759  	}
   760  	// Wrap up the final run, if any
   761  	if len(run) > 0 {
   762  		allRuns = append(allRuns, run)
   763  	}
   764  	return allRuns
   765  }
   766  
   767  // Definition BD13. Determine isolating run sequences.
   768  func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
   769  	levelRuns := p.determineLevelRuns()
   770  
   771  	// Compute the run that each character belongs to
   772  	runForCharacter := make([]int, p.Len())
   773  	for i, run := range levelRuns {
   774  		for _, index := range run {
   775  			runForCharacter[index] = i
   776  		}
   777  	}
   778  
   779  	sequences := []*isolatingRunSequence{}
   780  
   781  	var currentRunSequence []int
   782  
   783  	for _, run := range levelRuns {
   784  		first := run[0]
   785  		if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 {
   786  			currentRunSequence = nil
   787  			// int run = i;
   788  			for {
   789  				// Copy this level run into currentRunSequence
   790  				currentRunSequence = append(currentRunSequence, run...)
   791  
   792  				last := currentRunSequence[len(currentRunSequence)-1]
   793  				lastT := p.initialTypes[last]
   794  				if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
   795  					run = levelRuns[runForCharacter[p.matchingPDI[last]]]
   796  				} else {
   797  					break
   798  				}
   799  			}
   800  			sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
   801  		}
   802  	}
   803  	return sequences
   804  }
   805  
   806  // Assign level information to characters removed by rule X9. This is for
   807  // ease of relating the level information to the original input data. Note
   808  // that the levels assigned to these codes are arbitrary, they're chosen so
   809  // as to avoid breaking level runs.
   810  func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
   811  	for i, t := range p.initialTypes {
   812  		if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
   813  			p.resultTypes[i] = t
   814  			p.resultLevels[i] = -1
   815  		}
   816  	}
   817  	// now propagate forward the levels information (could have
   818  	// propagated backward, the main thing is not to introduce a level
   819  	// break where one doesn't already exist).
   820  
   821  	if p.resultLevels[0] == -1 {
   822  		p.resultLevels[0] = p.embeddingLevel
   823  	}
   824  	for i := 1; i < len(p.initialTypes); i++ {
   825  		if p.resultLevels[i] == -1 {
   826  			p.resultLevels[i] = p.resultLevels[i-1]
   827  		}
   828  	}
   829  	// Embedding information is for informational purposes only so need not be
   830  	// adjusted.
   831  }
   832  
   833  //
   834  // Output
   835  //
   836  
   837  // getLevels computes levels array breaking lines at offsets in linebreaks.
   838  // Rule L1.
   839  //
   840  // The linebreaks array must include at least one value. The values must be
   841  // in strictly increasing order (no duplicates) between 1 and the length of
   842  // the text, inclusive. The last value must be the length of the text.
   843  func (p *paragraph) getLevels(linebreaks []int) []level {
   844  	// Note that since the previous processing has removed all
   845  	// P, S, and WS values from resultTypes, the values referred to
   846  	// in these rules are the initial types, before any processing
   847  	// has been applied (including processing of overrides).
   848  	//
   849  	// This example implementation has reinserted explicit format codes
   850  	// and BN, in order that the levels array correspond to the
   851  	// initial text. Their final placement is not normative.
   852  	// These codes are treated like WS in this implementation,
   853  	// so they don't interrupt sequences of WS.
   854  
   855  	validateLineBreaks(linebreaks, p.Len())
   856  
   857  	result := append([]level(nil), p.resultLevels...)
   858  
   859  	// don't worry about linebreaks since if there is a break within
   860  	// a series of WS values preceding S, the linebreak itself
   861  	// causes the reset.
   862  	for i, t := range p.initialTypes {
   863  		if t.in(B, S) {
   864  			// Rule L1, clauses one and two.
   865  			result[i] = p.embeddingLevel
   866  
   867  			// Rule L1, clause three.
   868  			for j := i - 1; j >= 0; j-- {
   869  				if isWhitespace(p.initialTypes[j]) { // including format codes
   870  					result[j] = p.embeddingLevel
   871  				} else {
   872  					break
   873  				}
   874  			}
   875  		}
   876  	}
   877  
   878  	// Rule L1, clause four.
   879  	start := 0
   880  	for _, limit := range linebreaks {
   881  		for j := limit - 1; j >= start; j-- {
   882  			if isWhitespace(p.initialTypes[j]) { // including format codes
   883  				result[j] = p.embeddingLevel
   884  			} else {
   885  				break
   886  			}
   887  		}
   888  		start = limit
   889  	}
   890  
   891  	return result
   892  }
   893  
   894  // getReordering returns the reordering of lines from a visual index to a
   895  // logical index for line breaks at the given offsets.
   896  //
   897  // Lines are concatenated from left to right. So for example, the fifth
   898  // character from the left on the third line is
   899  //
   900  // 		getReordering(linebreaks)[linebreaks[1] + 4]
   901  //
   902  // (linebreaks[1] is the position after the last character of the second
   903  // line, which is also the index of the first character on the third line,
   904  // and adding four gets the fifth character from the left).
   905  //
   906  // The linebreaks array must include at least one value. The values must be
   907  // in strictly increasing order (no duplicates) between 1 and the length of
   908  // the text, inclusive. The last value must be the length of the text.
   909  func (p *paragraph) getReordering(linebreaks []int) []int {
   910  	validateLineBreaks(linebreaks, p.Len())
   911  
   912  	return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
   913  }
   914  
   915  // Return multiline reordering array for a given level array. Reordering
   916  // does not occur across a line break.
   917  func computeMultilineReordering(levels []level, linebreaks []int) []int {
   918  	result := make([]int, len(levels))
   919  
   920  	start := 0
   921  	for _, limit := range linebreaks {
   922  		tempLevels := make([]level, limit-start)
   923  		copy(tempLevels, levels[start:])
   924  
   925  		for j, order := range computeReordering(tempLevels) {
   926  			result[start+j] = order + start
   927  		}
   928  		start = limit
   929  	}
   930  	return result
   931  }
   932  
   933  // Return reordering array for a given level array. This reorders a single
   934  // line. The reordering is a visual to logical map. For example, the
   935  // leftmost char is string.charAt(order[0]). Rule L2.
   936  func computeReordering(levels []level) []int {
   937  	result := make([]int, len(levels))
   938  	// initialize order
   939  	for i := range result {
   940  		result[i] = i
   941  	}
   942  
   943  	// locate highest level found on line.
   944  	// Note the rules say text, but no reordering across line bounds is
   945  	// performed, so this is sufficient.
   946  	highestLevel := level(0)
   947  	lowestOddLevel := level(maxDepth + 2)
   948  	for _, level := range levels {
   949  		if level > highestLevel {
   950  			highestLevel = level
   951  		}
   952  		if level&1 != 0 && level < lowestOddLevel {
   953  			lowestOddLevel = level
   954  		}
   955  	}
   956  
   957  	for level := highestLevel; level >= lowestOddLevel; level-- {
   958  		for i := 0; i < len(levels); i++ {
   959  			if levels[i] >= level {
   960  				// find range of text at or above this level
   961  				start := i
   962  				limit := i + 1
   963  				for limit < len(levels) && levels[limit] >= level {
   964  					limit++
   965  				}
   966  
   967  				for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
   968  					result[j], result[k] = result[k], result[j]
   969  				}
   970  				// skip to end of level run
   971  				i = limit
   972  			}
   973  		}
   974  	}
   975  
   976  	return result
   977  }
   978  
   979  // isWhitespace reports whether the type is considered a whitespace type for the
   980  // line break rules.
   981  func isWhitespace(c Class) bool {
   982  	switch c {
   983  	case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
   984  		return true
   985  	}
   986  	return false
   987  }
   988  
   989  // isRemovedByX9 reports whether the type is one of the types removed in X9.
   990  func isRemovedByX9(c Class) bool {
   991  	switch c {
   992  	case LRE, RLE, LRO, RLO, PDF, BN:
   993  		return true
   994  	}
   995  	return false
   996  }
   997  
   998  // typeForLevel reports the strong type (L or R) corresponding to the level.
   999  func typeForLevel(level level) Class {
  1000  	if (level & 0x1) == 0 {
  1001  		return L
  1002  	}
  1003  	return R
  1004  }
  1005  
  1006  // TODO: change validation to not panic
  1007  
  1008  func validateTypes(types []Class) {
  1009  	if len(types) == 0 {
  1010  		log.Panic("types is null")
  1011  	}
  1012  	for i, t := range types[:len(types)-1] {
  1013  		if t == B {
  1014  			log.Panicf("B type before end of paragraph at index: %d", i)
  1015  		}
  1016  	}
  1017  }
  1018  
  1019  func validateParagraphEmbeddingLevel(embeddingLevel level) {
  1020  	if embeddingLevel != implicitLevel &&
  1021  		embeddingLevel != 0 &&
  1022  		embeddingLevel != 1 {
  1023  		log.Panicf("illegal paragraph embedding level: %d", embeddingLevel)
  1024  	}
  1025  }
  1026  
  1027  func validateLineBreaks(linebreaks []int, textLength int) {
  1028  	prev := 0
  1029  	for i, next := range linebreaks {
  1030  		if next <= prev {
  1031  			log.Panicf("bad linebreak: %d at index: %d", next, i)
  1032  		}
  1033  		prev = next
  1034  	}
  1035  	if prev != textLength {
  1036  		log.Panicf("last linebreak was %d, want %d", prev, textLength)
  1037  	}
  1038  }
  1039  
  1040  func validatePbTypes(pairTypes []bracketType) {
  1041  	if len(pairTypes) == 0 {
  1042  		log.Panic("pairTypes is null")
  1043  	}
  1044  	for i, pt := range pairTypes {
  1045  		switch pt {
  1046  		case bpNone, bpOpen, bpClose:
  1047  		default:
  1048  			log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i])
  1049  		}
  1050  	}
  1051  }
  1052  
  1053  func validatePbValues(pairValues []rune, pairTypes []bracketType) {
  1054  	if pairValues == nil {
  1055  		log.Panic("pairValues is null")
  1056  	}
  1057  	if len(pairTypes) != len(pairValues) {
  1058  		log.Panic("pairTypes is different length from pairValues")
  1059  	}
  1060  }