github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/unicode/bidi/bracket.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package bidi 6 7 import ( 8 "container/list" 9 "fmt" 10 "sort" 11 ) 12 13 // This file contains a port of the reference implementation of the 14 // Bidi Parentheses Algorithm: 15 // http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java 16 // 17 // The implementation in this file covers definitions BD14-BD16 and rule N0 18 // of UAX#9. 19 // 20 // Some preprocessing is done for each rune before data is passed to this 21 // algorithm: 22 // - opening and closing brackets are identified 23 // - a bracket pair type, like '(' and ')' is assigned a unique identifier that 24 // is identical for the opening and closing bracket. It is left to do these 25 // mappings. 26 // - The BPA algorithm requires that bracket characters that are canonical 27 // equivalents of each other be able to be substituted for each other. 28 // It is the responsibility of the caller to do this canonicalization. 29 // 30 // In implementing BD16, this implementation departs slightly from the "logical" 31 // algorithm defined in UAX#9. In particular, the stack referenced there 32 // supports operations that go beyond a "basic" stack. An equivalent 33 // implementation based on a linked list is used here. 34 35 // Bidi_Paired_Bracket_Type 36 // BD14. An opening paired bracket is a character whose 37 // Bidi_Paired_Bracket_Type property value is Open. 38 // 39 // BD15. A closing paired bracket is a character whose 40 // Bidi_Paired_Bracket_Type property value is Close. 41 type bracketType byte 42 43 const ( 44 bpNone bracketType = iota 45 bpOpen 46 bpClose 47 ) 48 49 // bracketPair holds a pair of index values for opening and closing bracket 50 // location of a bracket pair. 51 type bracketPair struct { 52 opener int 53 closer int 54 } 55 56 func (b *bracketPair) String() string { 57 return fmt.Sprintf("(%v, %v)", b.opener, b.closer) 58 } 59 60 // bracketPairs is a slice of bracketPairs with a sort.Interface implementation. 61 type bracketPairs []bracketPair 62 63 func (b bracketPairs) Len() int { return len(b) } 64 func (b bracketPairs) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 65 func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener } 66 67 // resolvePairedBrackets runs the paired bracket part of the UBA algorithm. 68 // 69 // For each rune, it takes the indexes into the original string, the class the 70 // bracket type (in pairTypes) and the bracket identifier (pairValues). It also 71 // takes the direction type for the start-of-sentence and the embedding level. 72 // 73 // The identifiers for bracket types are the rune of the canonicalized opening 74 // bracket for brackets (open or close) or 0 for runes that are not brackets. 75 func resolvePairedBrackets(s *isolatingRunSequence) { 76 p := bracketPairer{ 77 sos: s.sos, 78 openers: list.New(), 79 codesIsolatedRun: s.types, 80 indexes: s.indexes, 81 } 82 dirEmbed := _L 83 if s.level&1 != 0 { 84 dirEmbed = _R 85 } 86 p.locateBrackets(s.p.pairTypes, s.p.pairValues) 87 p.resolveBrackets(dirEmbed) 88 } 89 90 type bracketPairer struct { 91 sos class // direction corresponding to start of sequence 92 93 // The following is a restatement of BD 16 using non-algorithmic language. 94 // 95 // A bracket pair is a pair of characters consisting of an opening 96 // paired bracket and a closing paired bracket such that the 97 // Bidi_Paired_Bracket property value of the former equals the latter, 98 // subject to the following constraints. 99 // - both characters of a pair occur in the same isolating run sequence 100 // - the closing character of a pair follows the opening character 101 // - any bracket character can belong at most to one pair, the earliest possible one 102 // - any bracket character not part of a pair is treated like an ordinary character 103 // - pairs may nest properly, but their spans may not overlap otherwise 104 105 // Bracket characters with canonical decompositions are supposed to be 106 // treated as if they had been normalized, to allow normalized and non- 107 // normalized text to give the same result. In this implementation that step 108 // is pushed out to the caller. The caller has to ensure that the pairValue 109 // slices contain the rune of the opening bracket after normalization for 110 // any opening or closing bracket. 111 112 openers *list.List // list of positions for opening brackets 113 114 // bracket pair positions sorted by location of opening bracket 115 pairPositions bracketPairs 116 117 codesIsolatedRun []class // directional bidi codes for an isolated run 118 indexes []int // array of index values into the original string 119 120 } 121 122 // matchOpener reports whether characters at given positions form a matching 123 // bracket pair. 124 func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool { 125 return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]] 126 } 127 128 // locateBrackets locates matching bracket pairs according to BD16. 129 // 130 // This implementation uses a linked list instead of a stack, because, while 131 // elements are added at the front (like a push) they are not generally removed 132 // in atomic 'pop' operations, reducing the benefit of the stack archetype. 133 func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) { 134 // traverse the run 135 // do that explicitly (not in a for-each) so we can record position 136 for i, index := range p.indexes { 137 138 // look at the bracket type for each character 139 switch pairTypes[index] { 140 case bpNone: 141 // continue scanning 142 143 case bpOpen: 144 // remember opener location, most recent first 145 p.openers.PushFront(i) 146 147 case bpClose: 148 // see if there is a match 149 count := 0 150 for elem := p.openers.Front(); elem != nil; elem = elem.Next() { 151 count++ 152 opener := elem.Value.(int) 153 if p.matchOpener(pairValues, opener, i) { 154 // if the opener matches, add nested pair to the ordered list 155 p.pairPositions = append(p.pairPositions, bracketPair{opener, i}) 156 // remove up to and including matched opener 157 for ; count > 0; count-- { 158 p.openers.Remove(p.openers.Front()) 159 } 160 break 161 } 162 } 163 sort.Sort(p.pairPositions) 164 // if we get here, the closing bracket matched no openers 165 // and gets ignored 166 } 167 } 168 } 169 170 // Bracket pairs within an isolating run sequence are processed as units so 171 // that both the opening and the closing paired bracket in a pair resolve to 172 // the same direction. 173 // 174 // N0. Process bracket pairs in an isolating run sequence sequentially in 175 // the logical order of the text positions of the opening paired brackets 176 // using the logic given below. Within this scope, bidirectional types EN 177 // and AN are treated as R. 178 // 179 // Identify the bracket pairs in the current isolating run sequence 180 // according to BD16. For each bracket-pair element in the list of pairs of 181 // text positions: 182 // 183 // a Inspect the bidirectional types of the characters enclosed within the 184 // bracket pair. 185 // 186 // b If any strong type (either L or R) matching the embedding direction is 187 // found, set the type for both brackets in the pair to match the embedding 188 // direction. 189 // 190 // o [ e ] o -> o e e e o 191 // 192 // o [ o e ] -> o e o e e 193 // 194 // o [ NI e ] -> o e NI e e 195 // 196 // c Otherwise, if a strong type (opposite the embedding direction) is 197 // found, test for adjacent strong types as follows: 1 First, check 198 // backwards before the opening paired bracket until the first strong type 199 // (L, R, or sos) is found. If that first preceding strong type is opposite 200 // the embedding direction, then set the type for both brackets in the pair 201 // to that type. 2 Otherwise, set the type for both brackets in the pair to 202 // the embedding direction. 203 // 204 // o [ o ] e -> o o o o e 205 // 206 // o [ o NI ] o -> o o o NI o o 207 // 208 // e [ o ] o -> e e o e o 209 // 210 // e [ o ] e -> e e o e e 211 // 212 // e ( o [ o ] NI ) e -> e e o o o o NI e e 213 // 214 // d Otherwise, do not set the type for the current bracket pair. Note that 215 // if the enclosed text contains no strong types the paired brackets will 216 // both resolve to the same level when resolved individually using rules N1 217 // and N2. 218 // 219 // e ( NI ) o -> e ( NI ) o 220 221 // getStrongTypeN0 maps character's directional code to strong type as required 222 // by rule N0. 223 // 224 // TODO: have separate type for "strong" directionality. 225 func (p *bracketPairer) getStrongTypeN0(index int) class { 226 switch p.codesIsolatedRun[index] { 227 // in the scope of N0, number types are treated as R 228 case _EN, _AN, _AL, _R: 229 return _R 230 case _L: 231 return _L 232 default: 233 return _ON 234 } 235 } 236 237 // classifyPairContent reports the strong types contained inside a Bracket Pair, 238 // assuming the given embedding direction. 239 // 240 // It returns _ON if no strong type is found. If a single strong type is found, 241 // it returns this this type. Otherwise it returns the embedding direction. 242 // 243 // TODO: use separate type for "strong" directionality. 244 func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed class) class { 245 dirOpposite := _ON 246 for i := loc.opener + 1; i < loc.closer; i++ { 247 dir := p.getStrongTypeN0(i) 248 if dir == _ON { 249 continue 250 } 251 if dir == dirEmbed { 252 return dir // type matching embedding direction found 253 } 254 dirOpposite = dir 255 } 256 // return ON if no strong type found, or class opposite to dirEmbed 257 return dirOpposite 258 } 259 260 // classBeforePair determines which strong types are present before a Bracket 261 // Pair. Return R or L if strong type found, otherwise ON. 262 func (p *bracketPairer) classBeforePair(loc bracketPair) class { 263 for i := loc.opener - 1; i >= 0; i-- { 264 if dir := p.getStrongTypeN0(i); dir != _ON { 265 return dir 266 } 267 } 268 // no strong types found, return sos 269 return p.sos 270 } 271 272 // assignBracketType implements rule N0 for a single bracket pair. 273 func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed class) { 274 // rule "N0, a", inspect contents of pair 275 dirPair := p.classifyPairContent(loc, dirEmbed) 276 277 // dirPair is now L, R, or N (no strong type found) 278 279 // the following logical tests are performed out of order compared to 280 // the statement of the rules but yield the same results 281 if dirPair == _ON { 282 return // case "d" - nothing to do 283 } 284 285 if dirPair != dirEmbed { 286 // case "c": strong type found, opposite - check before (c.1) 287 dirPair = p.classBeforePair(loc) 288 if dirPair == dirEmbed || dirPair == _ON { 289 // no strong opposite type found before - use embedding (c.2) 290 dirPair = dirEmbed 291 } 292 } 293 // else: case "b", strong type found matching embedding, 294 // no explicit action needed, as dirPair is already set to embedding 295 // direction 296 297 // set the bracket types to the type found 298 p.codesIsolatedRun[loc.opener] = dirPair 299 p.codesIsolatedRun[loc.closer] = dirPair 300 } 301 302 // resolveBrackets implements rule N0 for a list of pairs. 303 func (p *bracketPairer) resolveBrackets(dirEmbed class) { 304 for _, loc := range p.pairPositions { 305 p.assignBracketType(loc, dirEmbed) 306 } 307 }