github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/soliton/stringutil/string_util.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package stringutil 15 16 import ( 17 "bytes" 18 "fmt" 19 "sort" 20 "strings" 21 "unicode/utf8" 22 23 "github.com/whtcorpsinc/errors" 24 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 25 "github.com/whtcorpsinc/milevadb/soliton/replog" 26 ) 27 28 // ErrSyntax indicates that a value does not have the right syntax for the target type. 29 var ErrSyntax = errors.New("invalid syntax") 30 31 // UnquoteChar decodes the first character or byte in the escaped string 32 // or character literal represented by the string s. 33 // It returns four values: 34 // 35 //1) value, the decoded Unicode code point or byte value; 36 //2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; 37 //3) tail, the remainder of the string after the character; and 38 //4) an error that will be nil if the character is syntactically valid. 39 // 40 // The second argument, quote, specifies the type of literal being parsed 41 // and therefore which escaped quote character is permitted. 42 // If set to a single quote, it permits the sequence \' and disallows unescaped '. 43 // If set to a double quote, it permits \" and disallows unescaped ". 44 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. 45 // Different with strconv.UnquoteChar, it permits unnecessary backslash. 46 func UnquoteChar(s string, quote byte) (value []byte, tail string, err error) { 47 // easy cases 48 switch c := s[0]; { 49 case c == quote: 50 err = errors.Trace(ErrSyntax) 51 return 52 case c >= utf8.RuneSelf: 53 r, size := utf8.DecodeRuneInString(s) 54 if r == utf8.RuneError { 55 value = append(value, c) 56 return value, s[1:], nil 57 } 58 value = append(value, string(r)...) 59 return value, s[size:], nil 60 case c != '\\': 61 value = append(value, c) 62 return value, s[1:], nil 63 } 64 // hard case: c is backslash 65 if len(s) <= 1 { 66 err = errors.Trace(ErrSyntax) 67 return 68 } 69 c := s[1] 70 s = s[2:] 71 switch c { 72 case 'b': 73 value = append(value, '\b') 74 case 'n': 75 value = append(value, '\n') 76 case 'r': 77 value = append(value, '\r') 78 case 't': 79 value = append(value, '\t') 80 case 'Z': 81 value = append(value, '\032') 82 case '0': 83 value = append(value, '\000') 84 case '_', '%': 85 value = append(value, '\\') 86 value = append(value, c) 87 case '\\': 88 value = append(value, '\\') 89 case '\'', '"': 90 value = append(value, c) 91 default: 92 value = append(value, c) 93 } 94 tail = s 95 return 96 } 97 98 // Unquote interprets s as a single-quoted, double-quoted, 99 // or backquoted Go string literal, returning the string value 100 // that s quotes. For example: test=`"\"\n"` (hex: 22 5c 22 5c 6e 22) 101 // should be converted to `"\n` (hex: 22 0a). 102 func Unquote(s string) (t string, err error) { 103 n := len(s) 104 if n < 2 { 105 return "", errors.Trace(ErrSyntax) 106 } 107 quote := s[0] 108 if quote != s[n-1] { 109 return "", errors.Trace(ErrSyntax) 110 } 111 s = s[1 : n-1] 112 if quote != '"' && quote != '\'' { 113 return "", errors.Trace(ErrSyntax) 114 } 115 // Avoid allocation. No need to convert if there is no '\' 116 if strings.IndexByte(s, '\\') == -1 && strings.IndexByte(s, quote) == -1 { 117 return s, nil 118 } 119 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 120 for len(s) > 0 { 121 mb, ss, err := UnquoteChar(s, quote) 122 if err != nil { 123 return "", errors.Trace(err) 124 } 125 s = ss 126 buf = append(buf, mb...) 127 } 128 return string(buf), nil 129 } 130 131 const ( 132 // PatMatch is the enumeration value for per-character match. 133 PatMatch = iota + 1 134 // PatOne is the enumeration value for '_' match. 135 PatOne 136 // PatAny is the enumeration value for '%' match. 137 PatAny 138 ) 139 140 // CompilePattern handles escapes and wild cards convert pattern characters and 141 // pattern types. 142 func CompilePattern(pattern string, escape byte) (patChars, patTypes []byte) { 143 patChars = make([]byte, len(pattern)) 144 patTypes = make([]byte, len(pattern)) 145 patLen := 0 146 for i := 0; i < len(pattern); i++ { 147 var tp byte 148 var c = pattern[i] 149 switch c { 150 case escape: 151 tp = PatMatch 152 if i < len(pattern)-1 { 153 i++ 154 c = pattern[i] 155 if c == escape || c == '_' || c == '%' { 156 // Valid escape. 157 } else { 158 // Invalid escape, fall back to escape byte. 159 // allegrosql will treat escape character as the origin value even 160 // the escape sequence is invalid in Go or C. 161 // e.g., \m is invalid in Go, but in MyALLEGROSQL we will get "m" for select '\m'. 162 // Following case is correct just for escape \, not for others like +. 163 // TODO: Add more checks for other escapes. 164 i-- 165 c = escape 166 } 167 } 168 case '_': 169 // %_ => _% 170 if patLen > 0 && patTypes[patLen-1] == PatAny { 171 tp = PatAny 172 c = '%' 173 patChars[patLen-1], patTypes[patLen-1] = '_', PatOne 174 } else { 175 tp = PatOne 176 } 177 case '%': 178 // %% => % 179 if patLen > 0 && patTypes[patLen-1] == PatAny { 180 continue 181 } 182 tp = PatAny 183 default: 184 tp = PatMatch 185 } 186 patChars[patLen] = c 187 patTypes[patLen] = tp 188 patLen++ 189 } 190 patChars = patChars[:patLen] 191 patTypes = patTypes[:patLen] 192 return 193 } 194 195 func matchByte(a, b byte) bool { 196 return a == b 197 // We may reuse below code causet when like function go back to case insensitive. 198 /* 199 if a == b { 200 return true 201 } 202 if a >= 'a' && a <= 'z' && a-caseDiff == b { 203 return true 204 } 205 return a >= 'A' && a <= 'Z' && a+caseDiff == b 206 */ 207 } 208 209 // CompileLike2Regexp convert a like `lhs` to a regular memex 210 func CompileLike2Regexp(str string) string { 211 patChars, patTypes := CompilePattern(str, '\\') 212 var result []byte 213 for i := 0; i < len(patChars); i++ { 214 switch patTypes[i] { 215 case PatMatch: 216 result = append(result, patChars[i]) 217 case PatOne: 218 result = append(result, '.') 219 case PatAny: 220 result = append(result, '.', '*') 221 } 222 } 223 return string(result) 224 } 225 226 // DoMatch matches the string with patChars and patTypes. 227 // The algorithm has linear time complexity. 228 // https://research.swtch.com/glob 229 func DoMatch(str string, patChars, patTypes []byte) bool { 230 var sIdx, pIdx, nextSIdx, nextPIdx int 231 for pIdx < len(patChars) || sIdx < len(str) { 232 if pIdx < len(patChars) { 233 switch patTypes[pIdx] { 234 case PatMatch: 235 if sIdx < len(str) && matchByte(str[sIdx], patChars[pIdx]) { 236 pIdx++ 237 sIdx++ 238 continue 239 } 240 case PatOne: 241 if sIdx < len(str) { 242 pIdx++ 243 sIdx++ 244 continue 245 } 246 case PatAny: 247 // Try to match at sIdx. 248 // If that doesn't work out, 249 // restart at sIdx+1 next. 250 nextPIdx = pIdx 251 nextSIdx = sIdx + 1 252 pIdx++ 253 continue 254 } 255 } 256 // Mismatch. Maybe restart. 257 if 0 < nextSIdx && nextSIdx <= len(str) { 258 pIdx = nextPIdx 259 sIdx = nextSIdx 260 continue 261 } 262 return false 263 } 264 // Matched all of pattern to all of name. Success. 265 return true 266 } 267 268 // IsExactMatch return true if no wildcard character 269 func IsExactMatch(patTypes []byte) bool { 270 for _, pt := range patTypes { 271 if pt != PatMatch { 272 return false 273 } 274 } 275 return true 276 } 277 278 // Copy deep copies a string. 279 func Copy(src string) string { 280 return string(replog.Slice(src)) 281 } 282 283 // StringerFunc defines string func implement fmt.Stringer. 284 type StringerFunc func() string 285 286 // String implements fmt.Stringer 287 func (l StringerFunc) String() string { 288 return l() 289 } 290 291 // MemoizeStr returns memoized version of stringFunc. 292 func MemoizeStr(l func() string) fmt.Stringer { 293 return StringerFunc(func() string { 294 return l() 295 }) 296 } 297 298 // StringerStr defines a alias to normal string. 299 // implement fmt.Stringer 300 type StringerStr string 301 302 // String implements fmt.Stringer 303 func (i StringerStr) String() string { 304 return string(i) 305 } 306 307 // Escape the identifier for pretty-printing. 308 // For instance, the identifier "foo `bar`" will become "`foo ``bar```". 309 // The sqlMode controls whether to escape with backquotes (`) or double quotes 310 // (`"`) depending on whether allegrosql.ModeANSIQuotes is enabled. 311 func Escape(str string, sqlMode allegrosql.ALLEGROSQLMode) string { 312 var quote string 313 if sqlMode&allegrosql.ModeANSIQuotes != 0 { 314 quote = `"` 315 } else { 316 quote = "`" 317 } 318 return quote + strings.Replace(str, quote, quote+quote, -1) + quote 319 } 320 321 // BuildStringFromLabels construct config labels into string by following format: 322 // "keyA=valueA,keyB=valueB" 323 func BuildStringFromLabels(labels map[string]string) string { 324 if len(labels) < 1 { 325 return "" 326 } 327 s := make([]string, 0, len(labels)) 328 for k := range labels { 329 s = append(s, k) 330 } 331 sort.Strings(s) 332 r := new(bytes.Buffer) 333 // visit labels by sorted key in order to make sure that result should be consistency 334 for _, key := range s { 335 r.WriteString(fmt.Sprintf("%s=%s,", key, labels[key])) 336 } 337 returned := r.String() 338 return returned[:len(returned)-1] 339 }