github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/x/text/unicode/bidi/bracket.go (about) 1 // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. 2 3 // Copyright 2015 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package bidi 8 9 import ( 10 "container/list" 11 "fmt" 12 "sort" 13 ) 14 15 // This file contains a port of the reference implementation of the 16 // Bidi Parentheses Algorithm: 17 // http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java 18 // 19 // The implementation in this file covers definitions BD14-BD16 and rule N0 20 // of UAX#9. 21 // 22 // Some preprocessing is done for each rune before data is passed to this 23 // algorithm: 24 // - opening and closing brackets are identified 25 // - a bracket pair type, like '(' and ')' is assigned a unique identifier that 26 // is identical for the opening and closing bracket. It is left to do these 27 // mappings. 28 // - The BPA algorithm requires that bracket characters that are canonical 29 // equivalents of each other be able to be substituted for each other. 30 // It is the responsibility of the caller to do this canonicalization. 31 // 32 // In implementing BD16, this implementation departs slightly from the "logical" 33 // algorithm defined in UAX#9. In particular, the stack referenced there 34 // supports operations that go beyond a "basic" stack. An equivalent 35 // implementation based on a linked list is used here. 36 37 // Bidi_Paired_Bracket_Type 38 // BD14. An opening paired bracket is a character whose 39 // Bidi_Paired_Bracket_Type property value is Open. 40 // 41 // BD15. A closing paired bracket is a character whose 42 // Bidi_Paired_Bracket_Type property value is Close. 43 type bracketType byte 44 45 const ( 46 bpNone bracketType = iota 47 bpOpen 48 bpClose 49 ) 50 51 // bracketPair holds a pair of index values for opening and closing bracket 52 // location of a bracket pair. 53 type bracketPair struct { 54 opener int 55 closer int 56 } 57 58 func (b *bracketPair) String() string { 59 return fmt.Sprintf("(%v, %v)", b.opener, b.closer) 60 } 61 62 // bracketPairs is a slice of bracketPairs with a sort.Interface implementation. 63 type bracketPairs []bracketPair 64 65 func (b bracketPairs) Len() int { return len(b) } 66 func (b bracketPairs) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 67 func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener } 68 69 // resolvePairedBrackets runs the paired bracket part of the UBA algorithm. 70 // 71 // For each rune, it takes the indexes into the original string, the class the 72 // bracket type (in pairTypes) and the bracket identifier (pairValues). It also 73 // takes the direction type for the start-of-sentence and the embedding level. 74 // 75 // The identifiers for bracket types are the rune of the canonicalized opening 76 // bracket for brackets (open or close) or 0 for runes that are not brackets. 77 func resolvePairedBrackets(s *isolatingRunSequence) { 78 p := bracketPairer{ 79 sos: s.sos, 80 openers: list.New(), 81 codesIsolatedRun: s.types, 82 indexes: s.indexes, 83 } 84 dirEmbed := L 85 if s.level&1 != 0 { 86 dirEmbed = R 87 } 88 p.locateBrackets(s.p.pairTypes, s.p.pairValues) 89 p.resolveBrackets(dirEmbed, s.p.initialTypes) 90 } 91 92 type bracketPairer struct { 93 sos Class // direction corresponding to start of sequence 94 95 // The following is a restatement of BD 16 using non-algorithmic language. 96 // 97 // A bracket pair is a pair of characters consisting of an opening 98 // paired bracket and a closing paired bracket such that the 99 // Bidi_Paired_Bracket property value of the former equals the latter, 100 // subject to the following constraints. 101 // - both characters of a pair occur in the same isolating run sequence 102 // - the closing character of a pair follows the opening character 103 // - any bracket character can belong at most to one pair, the earliest possible one 104 // - any bracket character not part of a pair is treated like an ordinary character 105 // - pairs may nest properly, but their spans may not overlap otherwise 106 107 // Bracket characters with canonical decompositions are supposed to be 108 // treated as if they had been normalized, to allow normalized and non- 109 // normalized text to give the same result. In this implementation that step 110 // is pushed out to the caller. The caller has to ensure that the pairValue 111 // slices contain the rune of the opening bracket after normalization for 112 // any opening or closing bracket. 113 114 openers *list.List // list of positions for opening brackets 115 116 // bracket pair positions sorted by location of opening bracket 117 pairPositions bracketPairs 118 119 codesIsolatedRun []Class // directional bidi codes for an isolated run 120 indexes []int // array of index values into the original string 121 122 } 123 124 // matchOpener reports whether characters at given positions form a matching 125 // bracket pair. 126 func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool { 127 return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]] 128 } 129 130 const maxPairingDepth = 63 131 132 // locateBrackets locates matching bracket pairs according to BD16. 133 // 134 // This implementation uses a linked list instead of a stack, because, while 135 // elements are added at the front (like a push) they are not generally removed 136 // in atomic 'pop' operations, reducing the benefit of the stack archetype. 137 func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) { 138 // traverse the run 139 // do that explicitly (not in a for-each) so we can record position 140 for i, index := range p.indexes { 141 142 // look at the bracket type for each character 143 if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON { 144 // continue scanning 145 continue 146 } 147 switch pairTypes[index] { 148 case bpOpen: 149 // check if maximum pairing depth reached 150 if p.openers.Len() == maxPairingDepth { 151 p.openers.Init() 152 return 153 } 154 // remember opener location, most recent first 155 p.openers.PushFront(i) 156 157 case bpClose: 158 // see if there is a match 159 count := 0 160 for elem := p.openers.Front(); elem != nil; elem = elem.Next() { 161 count++ 162 opener := elem.Value.(int) 163 if p.matchOpener(pairValues, opener, i) { 164 // if the opener matches, add nested pair to the ordered list 165 p.pairPositions = append(p.pairPositions, bracketPair{opener, i}) 166 // remove up to and including matched opener 167 for ; count > 0; count-- { 168 p.openers.Remove(p.openers.Front()) 169 } 170 break 171 } 172 } 173 sort.Sort(p.pairPositions) 174 // if we get here, the closing bracket matched no openers 175 // and gets ignored 176 } 177 } 178 } 179 180 // Bracket pairs within an isolating run sequence are processed as units so 181 // that both the opening and the closing paired bracket in a pair resolve to 182 // the same direction. 183 // 184 // N0. Process bracket pairs in an isolating run sequence sequentially in 185 // the logical order of the text positions of the opening paired brackets 186 // using the logic given below. Within this scope, bidirectional types EN 187 // and AN are treated as R. 188 // 189 // Identify the bracket pairs in the current isolating run sequence 190 // according to BD16. For each bracket-pair element in the list of pairs of 191 // text positions: 192 // 193 // a Inspect the bidirectional types of the characters enclosed within the 194 // bracket pair. 195 // 196 // b If any strong type (either L or R) matching the embedding direction is 197 // found, set the type for both brackets in the pair to match the embedding 198 // direction. 199 // 200 // o [ e ] o -> o e e e o 201 // 202 // o [ o e ] -> o e o e e 203 // 204 // o [ NI e ] -> o e NI e e 205 // 206 // c Otherwise, if a strong type (opposite the embedding direction) is 207 // found, test for adjacent strong types as follows: 1 First, check 208 // backwards before the opening paired bracket until the first strong type 209 // (L, R, or sos) is found. If that first preceding strong type is opposite 210 // the embedding direction, then set the type for both brackets in the pair 211 // to that type. 2 Otherwise, set the type for both brackets in the pair to 212 // the embedding direction. 213 // 214 // o [ o ] e -> o o o o e 215 // 216 // o [ o NI ] o -> o o o NI o o 217 // 218 // e [ o ] o -> e e o e o 219 // 220 // e [ o ] e -> e e o e e 221 // 222 // e ( o [ o ] NI ) e -> e e o o o o NI e e 223 // 224 // d Otherwise, do not set the type for the current bracket pair. Note that 225 // if the enclosed text contains no strong types the paired brackets will 226 // both resolve to the same level when resolved individually using rules N1 227 // and N2. 228 // 229 // e ( NI ) o -> e ( NI ) o 230 231 // getStrongTypeN0 maps character's directional code to strong type as required 232 // by rule N0. 233 // 234 // TODO: have separate type for "strong" directionality. 235 func (p *bracketPairer) getStrongTypeN0(index int) Class { 236 switch p.codesIsolatedRun[index] { 237 // in the scope of N0, number types are treated as R 238 case EN, AN, AL, R: 239 return R 240 case L: 241 return L 242 default: 243 return ON 244 } 245 } 246 247 // classifyPairContent reports the strong types contained inside a Bracket Pair, 248 // assuming the given embedding direction. 249 // 250 // It returns ON if no strong type is found. If a single strong type is found, 251 // it returns this this type. Otherwise it returns the embedding direction. 252 // 253 // TODO: use separate type for "strong" directionality. 254 func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class { 255 dirOpposite := ON 256 for i := loc.opener + 1; i < loc.closer; i++ { 257 dir := p.getStrongTypeN0(i) 258 if dir == ON { 259 continue 260 } 261 if dir == dirEmbed { 262 return dir // type matching embedding direction found 263 } 264 dirOpposite = dir 265 } 266 // return ON if no strong type found, or class opposite to dirEmbed 267 return dirOpposite 268 } 269 270 // classBeforePair determines which strong types are present before a Bracket 271 // Pair. Return R or L if strong type found, otherwise ON. 272 func (p *bracketPairer) classBeforePair(loc bracketPair) Class { 273 for i := loc.opener - 1; i >= 0; i-- { 274 if dir := p.getStrongTypeN0(i); dir != ON { 275 return dir 276 } 277 } 278 // no strong types found, return sos 279 return p.sos 280 } 281 282 // assignBracketType implements rule N0 for a single bracket pair. 283 func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) { 284 // rule "N0, a", inspect contents of pair 285 dirPair := p.classifyPairContent(loc, dirEmbed) 286 287 // dirPair is now L, R, or N (no strong type found) 288 289 // the following logical tests are performed out of order compared to 290 // the statement of the rules but yield the same results 291 if dirPair == ON { 292 return // case "d" - nothing to do 293 } 294 295 if dirPair != dirEmbed { 296 // case "c": strong type found, opposite - check before (c.1) 297 dirPair = p.classBeforePair(loc) 298 if dirPair == dirEmbed || dirPair == ON { 299 // no strong opposite type found before - use embedding (c.2) 300 dirPair = dirEmbed 301 } 302 } 303 // else: case "b", strong type found matching embedding, 304 // no explicit action needed, as dirPair is already set to embedding 305 // direction 306 307 // set the bracket types to the type found 308 p.setBracketsToType(loc, dirPair, initialTypes) 309 } 310 311 func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) { 312 p.codesIsolatedRun[loc.opener] = dirPair 313 p.codesIsolatedRun[loc.closer] = dirPair 314 315 for i := loc.opener + 1; i < loc.closer; i++ { 316 index := p.indexes[i] 317 if initialTypes[index] != NSM { 318 break 319 } 320 p.codesIsolatedRun[i] = dirPair 321 } 322 323 for i := loc.closer + 1; i < len(p.indexes); i++ { 324 index := p.indexes[i] 325 if initialTypes[index] != NSM { 326 break 327 } 328 p.codesIsolatedRun[i] = dirPair 329 } 330 } 331 332 // resolveBrackets implements rule N0 for a list of pairs. 333 func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) { 334 for _, loc := range p.pairPositions { 335 p.assignBracketType(loc, dirEmbed, initialTypes) 336 } 337 }