golang.org/toolchain@v0.0.1-go1.9rc2.windows-amd64/src/vendor/golang_org/x/text/secure/bidirule/bidirule.go (about) 1 // Code generated by running "go run gen.go -core" in golang.org/x/text. DO NOT EDIT. 2 3 // Copyright 2016 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 // Package bidirule implements the Bidi Rule defined by RFC 5893. 8 // 9 // This package is under development. The API may change without notice and 10 // without preserving backward compatibility. 11 package bidirule 12 13 import ( 14 "errors" 15 "unicode/utf8" 16 17 "golang_org/x/text/transform" 18 "golang_org/x/text/unicode/bidi" 19 ) 20 21 // This file contains an implementation of RFC 5893: Right-to-Left Scripts for 22 // Internationalized Domain Names for Applications (IDNA) 23 // 24 // A label is an individual component of a domain name. Labels are usually 25 // shown separated by dots; for example, the domain name "www.example.com" is 26 // composed of three labels: "www", "example", and "com". 27 // 28 // An RTL label is a label that contains at least one character of class R, AL, 29 // or AN. An LTR label is any label that is not an RTL label. 30 // 31 // A "Bidi domain name" is a domain name that contains at least one RTL label. 32 // 33 // The following guarantees can be made based on the above: 34 // 35 // o In a domain name consisting of only labels that satisfy the rule, 36 // the requirements of Section 3 are satisfied. Note that even LTR 37 // labels and pure ASCII labels have to be tested. 38 // 39 // o In a domain name consisting of only LDH labels (as defined in the 40 // Definitions document [RFC5890]) and labels that satisfy the rule, 41 // the requirements of Section 3 are satisfied as long as a label 42 // that starts with an ASCII digit does not come after a 43 // right-to-left label. 44 // 45 // No guarantee is given for other combinations. 46 47 // ErrInvalid indicates a label is invalid according to the Bidi Rule. 48 var ErrInvalid = errors.New("bidirule: failed Bidi Rule") 49 50 type ruleState uint8 51 52 const ( 53 ruleInitial ruleState = iota 54 ruleLTR 55 ruleLTRFinal 56 ruleRTL 57 ruleRTLFinal 58 ruleInvalid 59 ) 60 61 type ruleTransition struct { 62 next ruleState 63 mask uint16 64 } 65 66 var transitions = [...][2]ruleTransition{ 67 // [2.1] The first character must be a character with Bidi property L, R, or 68 // AL. If it has the R or AL property, it is an RTL label; if it has the L 69 // property, it is an LTR label. 70 ruleInitial: { 71 {ruleLTRFinal, 1 << bidi.L}, 72 {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL}, 73 }, 74 ruleRTL: { 75 // [2.3] In an RTL label, the end of the label must be a character with 76 // Bidi property R, AL, EN, or AN, followed by zero or more characters 77 // with Bidi property NSM. 78 {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN}, 79 80 // [2.2] In an RTL label, only characters with the Bidi properties R, 81 // AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. 82 // We exclude the entries from [2.3] 83 {ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM}, 84 }, 85 ruleRTLFinal: { 86 // [2.3] In an RTL label, the end of the label must be a character with 87 // Bidi property R, AL, EN, or AN, followed by zero or more characters 88 // with Bidi property NSM. 89 {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN | 1<<bidi.NSM}, 90 91 // [2.2] In an RTL label, only characters with the Bidi properties R, 92 // AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. 93 // We exclude the entries from [2.3] and NSM. 94 {ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN}, 95 }, 96 ruleLTR: { 97 // [2.6] In an LTR label, the end of the label must be a character with 98 // Bidi property L or EN, followed by zero or more characters with Bidi 99 // property NSM. 100 {ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN}, 101 102 // [2.5] In an LTR label, only characters with the Bidi properties L, 103 // EN, ES, CS, ET, ON, BN, or NSM are allowed. 104 // We exclude the entries from [2.6]. 105 {ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM}, 106 }, 107 ruleLTRFinal: { 108 // [2.6] In an LTR label, the end of the label must be a character with 109 // Bidi property L or EN, followed by zero or more characters with Bidi 110 // property NSM. 111 {ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN | 1<<bidi.NSM}, 112 113 // [2.5] In an LTR label, only characters with the Bidi properties L, 114 // EN, ES, CS, ET, ON, BN, or NSM are allowed. 115 // We exclude the entries from [2.6]. 116 {ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN}, 117 }, 118 ruleInvalid: { 119 {ruleInvalid, 0}, 120 {ruleInvalid, 0}, 121 }, 122 } 123 124 // [2.4] In an RTL label, if an EN is present, no AN may be present, and 125 // vice versa. 126 const exclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN) 127 128 // From RFC 5893 129 // An RTL label is a label that contains at least one character of type 130 // R, AL, or AN. 131 // 132 // An LTR label is any label that is not an RTL label. 133 134 // Direction reports the direction of the given label as defined by RFC 5893. 135 // The Bidi Rule does not have to be applied to labels of the category 136 // LeftToRight. 137 func Direction(b []byte) bidi.Direction { 138 for i := 0; i < len(b); { 139 e, sz := bidi.Lookup(b[i:]) 140 if sz == 0 { 141 i++ 142 } 143 c := e.Class() 144 if c == bidi.R || c == bidi.AL || c == bidi.AN { 145 return bidi.RightToLeft 146 } 147 i += sz 148 } 149 return bidi.LeftToRight 150 } 151 152 // DirectionString reports the direction of the given label as defined by RFC 153 // 5893. The Bidi Rule does not have to be applied to labels of the category 154 // LeftToRight. 155 func DirectionString(s string) bidi.Direction { 156 for i := 0; i < len(s); { 157 e, sz := bidi.LookupString(s[i:]) 158 if sz == 0 { 159 i++ 160 } 161 c := e.Class() 162 if c == bidi.R || c == bidi.AL || c == bidi.AN { 163 return bidi.RightToLeft 164 } 165 i += sz 166 } 167 return bidi.LeftToRight 168 } 169 170 // Valid reports whether b conforms to the BiDi rule. 171 func Valid(b []byte) bool { 172 var t Transformer 173 if n, ok := t.advance(b); !ok || n < len(b) { 174 return false 175 } 176 return t.isFinal() 177 } 178 179 // ValidString reports whether s conforms to the BiDi rule. 180 func ValidString(s string) bool { 181 var t Transformer 182 if n, ok := t.advanceString(s); !ok || n < len(s) { 183 return false 184 } 185 return t.isFinal() 186 } 187 188 // New returns a Transformer that verifies that input adheres to the Bidi Rule. 189 func New() *Transformer { 190 return &Transformer{} 191 } 192 193 // Transformer implements transform.Transform. 194 type Transformer struct { 195 state ruleState 196 hasRTL bool 197 seen uint16 198 } 199 200 // A rule can only be violated for "Bidi Domain names", meaning if one of the 201 // following categories has been observed. 202 func (t *Transformer) isRTL() bool { 203 const isRTL = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.AN 204 return t.seen&isRTL != 0 205 } 206 207 func (t *Transformer) isFinal() bool { 208 if !t.isRTL() { 209 return true 210 } 211 return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial 212 } 213 214 // Reset implements transform.Transformer. 215 func (t *Transformer) Reset() { *t = Transformer{} } 216 217 // Transform implements transform.Transformer. This Transformer has state and 218 // needs to be reset between uses. 219 func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 220 if len(dst) < len(src) { 221 src = src[:len(dst)] 222 atEOF = false 223 err = transform.ErrShortDst 224 } 225 n, err1 := t.Span(src, atEOF) 226 copy(dst, src[:n]) 227 if err == nil || err1 != nil && err1 != transform.ErrShortSrc { 228 err = err1 229 } 230 return n, n, err 231 } 232 233 // Span returns the first n bytes of src that conform to the Bidi rule. 234 func (t *Transformer) Span(src []byte, atEOF bool) (n int, err error) { 235 if t.state == ruleInvalid && t.isRTL() { 236 return 0, ErrInvalid 237 } 238 n, ok := t.advance(src) 239 switch { 240 case !ok: 241 err = ErrInvalid 242 case n < len(src): 243 if !atEOF { 244 err = transform.ErrShortSrc 245 break 246 } 247 err = ErrInvalid 248 case !t.isFinal(): 249 err = ErrInvalid 250 } 251 return n, err 252 } 253 254 // Precomputing the ASCII values decreases running time for the ASCII fast path 255 // by about 30%. 256 var asciiTable [128]bidi.Properties 257 258 func init() { 259 for i := range asciiTable { 260 p, _ := bidi.LookupRune(rune(i)) 261 asciiTable[i] = p 262 } 263 } 264 265 func (t *Transformer) advance(s []byte) (n int, ok bool) { 266 var e bidi.Properties 267 var sz int 268 for n < len(s) { 269 if s[n] < utf8.RuneSelf { 270 e, sz = asciiTable[s[n]], 1 271 } else { 272 e, sz = bidi.Lookup(s[n:]) 273 if sz <= 1 { 274 if sz == 1 { 275 // We always consider invalid UTF-8 to be invalid, even if 276 // the string has not yet been determined to be RTL. 277 // TODO: is this correct? 278 return n, false 279 } 280 return n, true // incomplete UTF-8 encoding 281 } 282 } 283 // TODO: using CompactClass would result in noticeable speedup. 284 // See unicode/bidi/prop.go:Properties.CompactClass. 285 c := uint16(1 << e.Class()) 286 t.seen |= c 287 if t.seen&exclusiveRTL == exclusiveRTL { 288 t.state = ruleInvalid 289 return n, false 290 } 291 switch tr := transitions[t.state]; { 292 case tr[0].mask&c != 0: 293 t.state = tr[0].next 294 case tr[1].mask&c != 0: 295 t.state = tr[1].next 296 default: 297 t.state = ruleInvalid 298 if t.isRTL() { 299 return n, false 300 } 301 } 302 n += sz 303 } 304 return n, true 305 } 306 307 func (t *Transformer) advanceString(s string) (n int, ok bool) { 308 var e bidi.Properties 309 var sz int 310 for n < len(s) { 311 if s[n] < utf8.RuneSelf { 312 e, sz = asciiTable[s[n]], 1 313 } else { 314 e, sz = bidi.LookupString(s[n:]) 315 if sz <= 1 { 316 if sz == 1 { 317 return n, false // invalid UTF-8 318 } 319 return n, true // incomplete UTF-8 encoding 320 } 321 } 322 // TODO: using CompactClass results in noticeable speedup. 323 // See unicode/bidi/prop.go:Properties.CompactClass. 324 c := uint16(1 << e.Class()) 325 t.seen |= c 326 if t.seen&exclusiveRTL == exclusiveRTL { 327 t.state = ruleInvalid 328 return n, false 329 } 330 switch tr := transitions[t.state]; { 331 case tr[0].mask&c != 0: 332 t.state = tr[0].next 333 case tr[1].mask&c != 0: 334 t.state = tr[1].next 335 default: 336 t.state = ruleInvalid 337 if t.isRTL() { 338 return n, false 339 } 340 } 341 n += sz 342 } 343 return n, true 344 }