github.com/thiagoyeds/go-cloud@v0.26.0/internal/escape/escape.go (about) 1 // Copyright 2019 The Go Cloud Development Kit Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package escape includes helpers for escaping and unescaping strings. 16 package escape 17 18 import ( 19 "fmt" 20 "net/url" 21 "strconv" 22 "strings" 23 ) 24 25 // NonUTF8String is a string for which utf8.ValidString returns false. 26 const NonUTF8String = "\xbd\xb2" 27 28 // IsASCIIAlphanumeric returns true iff r is alphanumeric: a-z, A-Z, 0-9. 29 func IsASCIIAlphanumeric(r rune) bool { 30 switch { 31 case 'A' <= r && r <= 'Z': 32 return true 33 case 'a' <= r && r <= 'z': 34 return true 35 case '0' <= r && r <= '9': 36 return true 37 } 38 return false 39 } 40 41 // HexEscape returns s, with all runes for which shouldEscape returns true 42 // escaped to "__0xXXX__", where XXX is the hex representation of the rune 43 // value. For example, " " would escape to "__0x20__". 44 // 45 // Non-UTF-8 strings will have their non-UTF-8 characters escaped to 46 // unicode.ReplacementChar; the original value is lost. Please file an 47 // issue if you need non-UTF8 support. 48 // 49 // Note: shouldEscape takes the whole string as a slice of runes and an 50 // index. Passing it a single byte or a single rune doesn't provide 51 // enough context for some escape decisions; for example, the caller might 52 // want to escape the second "/" in "//" but not the first one. 53 // We pass a slice of runes instead of the string or a slice of bytes 54 // because some decisions will be made on a rune basis (e.g., encode 55 // all non-ASCII runes). 56 func HexEscape(s string, shouldEscape func(s []rune, i int) bool) string { 57 // Do a first pass to see which runes (if any) need escaping. 58 runes := []rune(s) 59 var toEscape []int 60 for i := range runes { 61 if shouldEscape(runes, i) { 62 toEscape = append(toEscape, i) 63 } 64 } 65 if len(toEscape) == 0 { 66 return s 67 } 68 // Each escaped rune turns into at most 14 runes ("__0x7fffffff__"), 69 // so allocate an extra 13 for each. We'll reslice at the end 70 // if we didn't end up using them. 71 escaped := make([]rune, len(runes)+13*len(toEscape)) 72 n := 0 // current index into toEscape 73 j := 0 // current index into escaped 74 for i, r := range runes { 75 if n < len(toEscape) && i == toEscape[n] { 76 // We were asked to escape this rune. 77 for _, x := range fmt.Sprintf("__%#x__", r) { 78 escaped[j] = x 79 j++ 80 } 81 n++ 82 } else { 83 escaped[j] = r 84 j++ 85 } 86 } 87 return string(escaped[0:j]) 88 } 89 90 // unescape tries to unescape starting at r[i]. 91 // It returns a boolean indicating whether the unescaping was successful, 92 // and (if true) the unescaped rune and the last index of r that was used 93 // during unescaping. 94 func unescape(r []rune, i int) (bool, rune, int) { 95 // Look for "__0x". 96 if r[i] != '_' { 97 return false, 0, 0 98 } 99 i++ 100 if i >= len(r) || r[i] != '_' { 101 return false, 0, 0 102 } 103 i++ 104 if i >= len(r) || r[i] != '0' { 105 return false, 0, 0 106 } 107 i++ 108 if i >= len(r) || r[i] != 'x' { 109 return false, 0, 0 110 } 111 i++ 112 // Capture the digits until the next "_" (if any). 113 var hexdigits []rune 114 for ; i < len(r) && r[i] != '_'; i++ { 115 hexdigits = append(hexdigits, r[i]) 116 } 117 // Look for the trailing "__". 118 if i >= len(r) || r[i] != '_' { 119 return false, 0, 0 120 } 121 i++ 122 if i >= len(r) || r[i] != '_' { 123 return false, 0, 0 124 } 125 // Parse the hex digits into an int32. 126 retval, err := strconv.ParseInt(string(hexdigits), 16, 32) 127 if err != nil { 128 return false, 0, 0 129 } 130 return true, rune(retval), i 131 } 132 133 // HexUnescape reverses HexEscape. 134 func HexUnescape(s string) string { 135 var unescaped []rune 136 runes := []rune(s) 137 for i := 0; i < len(runes); i++ { 138 if ok, newR, newI := unescape(runes, i); ok { 139 // We unescaped some runes starting at i, resulting in the 140 // unescaped rune newR. The last rune used was newI. 141 if unescaped == nil { 142 // This is the first rune we've encountered that 143 // needed unescaping. Allocate a buffer and copy any 144 // previous runes. 145 unescaped = make([]rune, i) 146 copy(unescaped, runes) 147 } 148 unescaped = append(unescaped, newR) 149 i = newI 150 } else if unescaped != nil { 151 unescaped = append(unescaped, runes[i]) 152 } 153 } 154 if unescaped == nil { 155 return s 156 } 157 return string(unescaped) 158 } 159 160 // URLEscape uses url.PathEscape to escape s. 161 func URLEscape(s string) string { 162 return url.PathEscape(s) 163 } 164 165 // URLUnescape reverses URLEscape using url.PathUnescape. If the unescape 166 // returns an error, it returns s. 167 func URLUnescape(s string) string { 168 if u, err := url.PathUnescape(s); err == nil { 169 return u 170 } 171 return s 172 } 173 174 func makeASCIIString(start, end int) string { 175 var s []byte 176 for i := start; i < end; i++ { 177 if i >= 'a' && i <= 'z' { 178 continue 179 } 180 if i >= 'A' && i <= 'Z' { 181 continue 182 } 183 if i >= '0' && i <= '9' { 184 continue 185 } 186 s = append(s, byte(i)) 187 } 188 return string(s) 189 } 190 191 // WeirdStrings are unusual/weird strings for use in testing escaping. 192 // The keys are descriptive strings, the values are the weird strings. 193 var WeirdStrings = map[string]string{ 194 "fwdslashes": "foo/bar/baz", 195 "repeatedfwdslashes": "foo//bar///baz", 196 "dotdotslash": "../foo/../bar/../../baz../", 197 "backslashes": "foo\\bar\\baz", 198 "repeatedbackslashes": "..\\foo\\\\bar\\\\\\baz", 199 "dotdotbackslash": "..\\foo\\..\\bar\\..\\..\\baz..\\", 200 "quote": "foo\"bar\"baz", 201 "spaces": "foo bar baz", 202 "startwithdigit": "12345", 203 "unicode": strings.Repeat("☺", 3), 204 // The ASCII characters 0-128, split up to avoid the possibly-escaped 205 // versions from getting too long. 206 "ascii-1": makeASCIIString(0, 16), 207 "ascii-2": makeASCIIString(16, 32), 208 "ascii-3": makeASCIIString(32, 48), 209 "ascii-4": makeASCIIString(48, 64), 210 "ascii-5": makeASCIIString(64, 80), 211 "ascii-6": makeASCIIString(80, 96), 212 "ascii-7": makeASCIIString(96, 112), 213 "ascii-8": makeASCIIString(112, 128), 214 }