github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/x/text/secure/bidirule/bidirule.go (about) 1 // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. 2 3 // Copyright 2016 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 // Package bidirule implements the Bidi Rule defined by RFC 5893. 8 // 9 // This package is under development. The API may change without notice and 10 // without preserving backward compatibility. 11 package bidirule 12 13 import ( 14 "errors" 15 "unicode/utf8" 16 17 "github.com/icodeface/tls/internal/x/text/transform" 18 "github.com/icodeface/tls/internal/x/text/unicode/bidi" 19 ) 20 21 // This file contains an implementation of RFC 5893: Right-to-Left Scripts for 22 // Internationalized Domain Names for Applications (IDNA) 23 // 24 // A label is an individual component of a domain name. Labels are usually 25 // shown separated by dots; for example, the domain name "www.example.com" is 26 // composed of three labels: "www", "example", and "com". 27 // 28 // An RTL label is a label that contains at least one character of class R, AL, 29 // or AN. An LTR label is any label that is not an RTL label. 30 // 31 // A "Bidi domain name" is a domain name that contains at least one RTL label. 32 // 33 // The following guarantees can be made based on the above: 34 // 35 // o In a domain name consisting of only labels that satisfy the rule, 36 // the requirements of Section 3 are satisfied. Note that even LTR 37 // labels and pure ASCII labels have to be tested. 38 // 39 // o In a domain name consisting of only LDH labels (as defined in the 40 // Definitions document [RFC5890]) and labels that satisfy the rule, 41 // the requirements of Section 3 are satisfied as long as a label 42 // that starts with an ASCII digit does not come after a 43 // right-to-left label. 44 // 45 // No guarantee is given for other combinations. 46 47 // ErrInvalid indicates a label is invalid according to the Bidi Rule. 48 var ErrInvalid = errors.New("bidirule: failed Bidi Rule") 49 50 type ruleState uint8 51 52 const ( 53 ruleInitial ruleState = iota 54 ruleLTR 55 ruleLTRFinal 56 ruleRTL 57 ruleRTLFinal 58 ruleInvalid 59 ) 60 61 type ruleTransition struct { 62 next ruleState 63 mask uint16 64 } 65 66 var transitions = [...][2]ruleTransition{ 67 // [2.1] The first character must be a character with Bidi property L, R, or 68 // AL. If it has the R or AL property, it is an RTL label; if it has the L 69 // property, it is an LTR label. 70 ruleInitial: { 71 {ruleLTRFinal, 1 << bidi.L}, 72 {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL}, 73 }, 74 ruleRTL: { 75 // [2.3] In an RTL label, the end of the label must be a character with 76 // Bidi property R, AL, EN, or AN, followed by zero or more characters 77 // with Bidi property NSM. 78 {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN}, 79 80 // [2.2] In an RTL label, only characters with the Bidi properties R, 81 // AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. 82 // We exclude the entries from [2.3] 83 {ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM}, 84 }, 85 ruleRTLFinal: { 86 // [2.3] In an RTL label, the end of the label must be a character with 87 // Bidi property R, AL, EN, or AN, followed by zero or more characters 88 // with Bidi property NSM. 89 {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN | 1<<bidi.NSM}, 90 91 // [2.2] In an RTL label, only characters with the Bidi properties R, 92 // AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. 93 // We exclude the entries from [2.3] and NSM. 94 {ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN}, 95 }, 96 ruleLTR: { 97 // [2.6] In an LTR label, the end of the label must be a character with 98 // Bidi property L or EN, followed by zero or more characters with Bidi 99 // property NSM. 100 {ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN}, 101 102 // [2.5] In an LTR label, only characters with the Bidi properties L, 103 // EN, ES, CS, ET, ON, BN, or NSM are allowed. 104 // We exclude the entries from [2.6]. 105 {ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM}, 106 }, 107 ruleLTRFinal: { 108 // [2.6] In an LTR label, the end of the label must be a character with 109 // Bidi property L or EN, followed by zero or more characters with Bidi 110 // property NSM. 111 {ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN | 1<<bidi.NSM}, 112 113 // [2.5] In an LTR label, only characters with the Bidi properties L, 114 // EN, ES, CS, ET, ON, BN, or NSM are allowed. 115 // We exclude the entries from [2.6]. 116 {ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN}, 117 }, 118 ruleInvalid: { 119 {ruleInvalid, 0}, 120 {ruleInvalid, 0}, 121 }, 122 } 123 124 // [2.4] In an RTL label, if an EN is present, no AN may be present, and 125 // vice versa. 126 const exclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN) 127 128 // From RFC 5893 129 // An RTL label is a label that contains at least one character of type 130 // R, AL, or AN. 131 // 132 // An LTR label is any label that is not an RTL label. 133 134 // Direction reports the direction of the given label as defined by RFC 5893. 135 // The Bidi Rule does not have to be applied to labels of the category 136 // LeftToRight. 137 func Direction(b []byte) bidi.Direction { 138 for i := 0; i < len(b); { 139 e, sz := bidi.Lookup(b[i:]) 140 if sz == 0 { 141 i++ 142 } 143 c := e.Class() 144 if c == bidi.R || c == bidi.AL || c == bidi.AN { 145 return bidi.RightToLeft 146 } 147 i += sz 148 } 149 return bidi.LeftToRight 150 } 151 152 // DirectionString reports the direction of the given label as defined by RFC 153 // 5893. The Bidi Rule does not have to be applied to labels of the category 154 // LeftToRight. 155 func DirectionString(s string) bidi.Direction { 156 for i := 0; i < len(s); { 157 e, sz := bidi.LookupString(s[i:]) 158 if sz == 0 { 159 i++ 160 continue 161 } 162 c := e.Class() 163 if c == bidi.R || c == bidi.AL || c == bidi.AN { 164 return bidi.RightToLeft 165 } 166 i += sz 167 } 168 return bidi.LeftToRight 169 } 170 171 // Valid reports whether b conforms to the BiDi rule. 172 func Valid(b []byte) bool { 173 var t Transformer 174 if n, ok := t.advance(b); !ok || n < len(b) { 175 return false 176 } 177 return t.isFinal() 178 } 179 180 // ValidString reports whether s conforms to the BiDi rule. 181 func ValidString(s string) bool { 182 var t Transformer 183 if n, ok := t.advanceString(s); !ok || n < len(s) { 184 return false 185 } 186 return t.isFinal() 187 } 188 189 // New returns a Transformer that verifies that input adheres to the Bidi Rule. 190 func New() *Transformer { 191 return &Transformer{} 192 } 193 194 // Transformer implements transform.Transform. 195 type Transformer struct { 196 state ruleState 197 hasRTL bool 198 seen uint16 199 } 200 201 // A rule can only be violated for "Bidi Domain names", meaning if one of the 202 // following categories has been observed. 203 func (t *Transformer) isRTL() bool { 204 const isRTL = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.AN 205 return t.seen&isRTL != 0 206 } 207 208 func (t *Transformer) isFinal() bool { 209 return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial 210 } 211 212 // Reset implements transform.Transformer. 213 func (t *Transformer) Reset() { *t = Transformer{} } 214 215 // Transform implements transform.Transformer. This Transformer has state and 216 // needs to be reset between uses. 217 func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 218 if len(dst) < len(src) { 219 src = src[:len(dst)] 220 atEOF = false 221 err = transform.ErrShortDst 222 } 223 n, err1 := t.Span(src, atEOF) 224 copy(dst, src[:n]) 225 if err == nil || err1 != nil && err1 != transform.ErrShortSrc { 226 err = err1 227 } 228 return n, n, err 229 } 230 231 // Span returns the first n bytes of src that conform to the Bidi rule. 232 func (t *Transformer) Span(src []byte, atEOF bool) (n int, err error) { 233 if t.state == ruleInvalid && t.isRTL() { 234 return 0, ErrInvalid 235 } 236 n, ok := t.advance(src) 237 switch { 238 case !ok: 239 err = ErrInvalid 240 case n < len(src): 241 if !atEOF { 242 err = transform.ErrShortSrc 243 break 244 } 245 err = ErrInvalid 246 case !t.isFinal(): 247 err = ErrInvalid 248 } 249 return n, err 250 } 251 252 // Precomputing the ASCII values decreases running time for the ASCII fast path 253 // by about 30%. 254 var asciiTable [128]bidi.Properties 255 256 func init() { 257 for i := range asciiTable { 258 p, _ := bidi.LookupRune(rune(i)) 259 asciiTable[i] = p 260 } 261 } 262 263 func (t *Transformer) advance(s []byte) (n int, ok bool) { 264 var e bidi.Properties 265 var sz int 266 for n < len(s) { 267 if s[n] < utf8.RuneSelf { 268 e, sz = asciiTable[s[n]], 1 269 } else { 270 e, sz = bidi.Lookup(s[n:]) 271 if sz <= 1 { 272 if sz == 1 { 273 // We always consider invalid UTF-8 to be invalid, even if 274 // the string has not yet been determined to be RTL. 275 // TODO: is this correct? 276 return n, false 277 } 278 return n, true // incomplete UTF-8 encoding 279 } 280 } 281 // TODO: using CompactClass would result in noticeable speedup. 282 // See unicode/bidi/prop.go:Properties.CompactClass. 283 c := uint16(1 << e.Class()) 284 t.seen |= c 285 if t.seen&exclusiveRTL == exclusiveRTL { 286 t.state = ruleInvalid 287 return n, false 288 } 289 switch tr := transitions[t.state]; { 290 case tr[0].mask&c != 0: 291 t.state = tr[0].next 292 case tr[1].mask&c != 0: 293 t.state = tr[1].next 294 default: 295 t.state = ruleInvalid 296 if t.isRTL() { 297 return n, false 298 } 299 } 300 n += sz 301 } 302 return n, true 303 } 304 305 func (t *Transformer) advanceString(s string) (n int, ok bool) { 306 var e bidi.Properties 307 var sz int 308 for n < len(s) { 309 if s[n] < utf8.RuneSelf { 310 e, sz = asciiTable[s[n]], 1 311 } else { 312 e, sz = bidi.LookupString(s[n:]) 313 if sz <= 1 { 314 if sz == 1 { 315 return n, false // invalid UTF-8 316 } 317 return n, true // incomplete UTF-8 encoding 318 } 319 } 320 // TODO: using CompactClass results in noticeable speedup. 321 // See unicode/bidi/prop.go:Properties.CompactClass. 322 c := uint16(1 << e.Class()) 323 t.seen |= c 324 if t.seen&exclusiveRTL == exclusiveRTL { 325 t.state = ruleInvalid 326 return n, false 327 } 328 switch tr := transitions[t.state]; { 329 case tr[0].mask&c != 0: 330 t.state = tr[0].next 331 case tr[1].mask&c != 0: 332 t.state = tr[1].next 333 default: 334 t.state = ruleInvalid 335 if t.isRTL() { 336 return n, false 337 } 338 } 339 n += sz 340 } 341 return n, true 342 }