github.com/m3db/m3@v1.5.0/src/query/models/strconv/quote.go (about) 1 // Copyright (c) 2019 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package strconv 22 23 import ( 24 "strconv" 25 "unicode/utf8" 26 ) 27 28 // NB: predefined strconv constants 29 const ( 30 tx = 0x80 // 1000 0000 31 t2 = 0xC0 // 1100 0000 32 t3 = 0xE0 // 1110 0000 33 t4 = 0xF0 // 1111 0000 34 35 maskx = 0x3F // 0011 1111 36 37 rune1Max = 1<<7 - 1 38 rune2Max = 1<<11 - 1 39 rune3Max = 1<<16 - 1 40 41 runeError = '\uFFFD' // the "error" Rune or "Unicode replacement character" 42 maxRune = '\U0010FFFF' // Maximum valid Unicode code point. 43 44 // NB: Code points in the surrogate range are not valid for UTF-8. 45 surrogateMin = 0xD800 46 surrogateMax = 0xDFFF 47 48 lowerhex = "0123456789abcdef" 49 50 quote = byte('"') 51 ) 52 53 // EncodeRune writes into src (which must be large enough) the UTF-8 encoding 54 // of the rune at the given index. It returns the number of bytes written. 55 // 56 // NB: based on utf8.encodeRune method, but instead uses indexed insertion 57 // into a predefined buffer. 58 func encodeRune(dst []byte, r rune, idx int) int { 59 // Negative values are erroneous. Making it unsigned addresses the problem. 60 switch i := uint32(r); { 61 case i <= rune1Max: 62 dst[idx] = byte(r) 63 return idx + 1 64 case i <= rune2Max: 65 dst[idx] = t2 | byte(r>>6) 66 dst[idx+1] = tx | byte(r)&maskx 67 return idx + 2 68 case i > maxRune, surrogateMin <= i && i <= surrogateMax: 69 r = runeError 70 fallthrough 71 case i <= rune3Max: 72 dst[idx] = t3 | byte(r>>12) 73 dst[idx+2] = tx | byte(r)&maskx 74 dst[idx+1] = tx | byte(r>>6)&maskx 75 return idx + 3 76 default: 77 dst[idx] = t4 | byte(r>>18) 78 dst[idx+1] = tx | byte(r>>12)&maskx 79 dst[idx+2] = tx | byte(r>>6)&maskx 80 dst[idx+3] = tx | byte(r)&maskx 81 return idx + 4 82 } 83 } 84 85 // It returns the number of bytes written. 86 func insertEscapedRune(dst []byte, r rune, idx int) int { 87 if r == rune(quote) || r == '\\' { // always backslashed 88 dst[idx] = '\\' 89 dst[idx+1] = byte(r) 90 return idx + 2 91 } 92 93 if strconv.IsPrint(r) { 94 return encodeRune(dst, r, idx) 95 } 96 97 switch r { 98 case '\a': 99 dst[idx] = '\\' 100 dst[idx+1] = 'a' 101 return idx + 2 102 case '\b': 103 dst[idx] = '\\' 104 dst[idx+1] = 'b' 105 return idx + 2 106 case '\f': 107 dst[idx] = '\\' 108 dst[idx+1] = 'f' 109 return idx + 2 110 case '\n': 111 dst[idx] = '\\' 112 dst[idx+1] = 'n' 113 return idx + 2 114 case '\r': 115 dst[idx] = '\\' 116 dst[idx+1] = 'r' 117 return idx + 2 118 case '\t': 119 dst[idx] = '\\' 120 dst[idx+1] = 't' 121 return idx + 2 122 case '\v': 123 dst[idx] = '\\' 124 dst[idx+1] = 'v' 125 return idx + 2 126 default: 127 switch { 128 case r < ' ': 129 dst[idx] = '\\' 130 dst[idx+1] = 'x' 131 dst[idx+2] = lowerhex[byte(r)>>4] 132 dst[idx+3] = lowerhex[byte(r)&0xF] 133 return idx + 4 134 case r > utf8.MaxRune: 135 r = 0xFFFD 136 fallthrough 137 case r < 0x10000: 138 dst[idx] = '\\' 139 dst[idx+1] = 'u' 140 dst[idx+2] = lowerhex[r>>uint(12)&0xF] 141 dst[idx+3] = lowerhex[r>>uint(8)&0xF] 142 dst[idx+4] = lowerhex[r>>uint(4)&0xF] 143 dst[idx+5] = lowerhex[r>>uint(0)&0xF] 144 return idx + 6 145 default: 146 dst[idx] = '\\' 147 dst[idx+1] = 'U' 148 dst[idx+2] = lowerhex[r>>uint(28)&0xF] 149 dst[idx+3] = lowerhex[r>>uint(24)&0xF] 150 dst[idx+4] = lowerhex[r>>uint(20)&0xF] 151 dst[idx+5] = lowerhex[r>>uint(16)&0xF] 152 dst[idx+6] = lowerhex[r>>uint(12)&0xF] 153 dst[idx+7] = lowerhex[r>>uint(8)&0xF] 154 dst[idx+8] = lowerhex[r>>uint(4)&0xF] 155 dst[idx+9] = lowerhex[r>>uint(0)&0xF] 156 return idx + 10 157 } 158 } 159 } 160 161 // Escape copies byte slice src to dst at a given index, adding escaping any 162 // quote or control characters. It returns the index at which the copy finished. 163 // 164 // NB: ensure that dst is large enough to store src, additional 165 // quotation runes, and any additional escape characters. 166 // as generated by Quote, to dst and returns the extended buffer. 167 func Escape(dst, src []byte, idx int) int { 168 // nolint 169 for width := 0; len(src) > 0; src = src[width:] { 170 r := rune(src[0]) 171 width = 1 172 if r >= utf8.RuneSelf { 173 r, width = utf8.DecodeRune(src) 174 } 175 176 if width == 1 && r == utf8.RuneError { 177 dst[idx] = '\\' 178 dst[idx+1] = 'x' 179 dst[idx+2] = lowerhex[src[0]>>4] 180 dst[idx+3] = lowerhex[src[0]&0xF] 181 idx += 4 182 continue 183 } 184 185 idx = insertEscapedRune(dst, r, idx) 186 } 187 188 return idx 189 } 190 191 // Quote copies byte slice src to dst at a given index, adding 192 // quotation runes around the src slice and escaping any quote or control 193 // characters. It returns the index at which the copy finished. 194 // 195 // NB: ensure that dst is large enough to store src, additional 196 // quotation runes, and any additional escape characters. 197 // as generated by Quote, to dst and returns the extended buffer. 198 // 199 // NB: based on stconv.Quote method, but instead uses indexed insertion 200 // into a predefined buffer. 201 func Quote(dst, src []byte, idx int) int { 202 dst[idx] = quote 203 idx++ 204 idx = Escape(dst, src, idx) 205 dst[idx] = quote 206 return idx + 1 207 } 208 209 // QuoteSimple copies byte slice src to dst at a given index, adding 210 // quotation runes around the src slice, but does not escape any 211 // characters. It returns the index at which the copy finished. 212 // 213 // NB: ensure that dst is large enough to store src and two other characters. 214 func QuoteSimple(dst, src []byte, idx int) int { 215 dst[idx] = quote 216 idx++ 217 idx += copy(dst[idx:], src) 218 dst[idx] = quote 219 return idx + 1 220 } 221 222 // EscapedLength computes the length required for a byte slice to hold 223 // a quoted byte slice. 224 // 225 // NB: essentially a dry-run of `Escape` that does not write characters, but 226 // instead counts total character counts for the destination byte slice. 227 func EscapedLength(src []byte) int { 228 length := 0 229 // nolint 230 for width := 0; len(src) > 0; src = src[width:] { 231 r := rune(src[0]) 232 width = 1 233 if r >= utf8.RuneSelf { 234 r, width = utf8.DecodeRune(src) 235 } 236 237 if width == 1 && r == utf8.RuneError { 238 length += 4 239 continue 240 } 241 242 length += escapedRuneLength(r) 243 } 244 245 return length 246 } 247 248 // QuotedLength computes the length required for a byte slice to hold 249 // a quoted byte slice. 250 // 251 // NB: essentially a dry-run of `Quote` that does not write characters, but 252 // instead counts total character counts for the destination byte slice. 253 func QuotedLength(src []byte) int { 254 return 2 + EscapedLength(src) // account for opening and closing quotes 255 } 256 257 func escapedRuneLength(r rune) int { 258 if r == rune(quote) || r == '\\' { // always backslashed 259 return 2 260 } 261 262 if strconv.IsPrint(r) { 263 switch i := uint32(r); { 264 case i <= rune1Max: 265 return 1 266 case i <= rune2Max: 267 return 2 268 case i > maxRune, surrogateMin <= i && i <= surrogateMax: 269 fallthrough 270 case i <= rune3Max: 271 return 3 272 default: 273 return 4 274 } 275 } 276 277 switch r { 278 case '\a': 279 return 2 280 case '\b': 281 return 2 282 case '\f': 283 return 2 284 case '\n': 285 return 2 286 case '\r': 287 return 2 288 case '\t': 289 return 2 290 case '\v': 291 return 2 292 default: 293 switch { 294 case r < ' ': 295 return 4 296 case r > utf8.MaxRune: 297 fallthrough 298 case r < 0x10000: 299 return 6 300 default: 301 return 10 302 } 303 } 304 }