github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/soliton/stringutil/string_util.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package stringutil
    15  
    16  import (
    17  	"bytes"
    18  	"fmt"
    19  	"sort"
    20  	"strings"
    21  	"unicode/utf8"
    22  
    23  	"github.com/whtcorpsinc/errors"
    24  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    25  	"github.com/whtcorpsinc/milevadb/soliton/replog"
    26  )
    27  
    28  // ErrSyntax indicates that a value does not have the right syntax for the target type.
    29  var ErrSyntax = errors.New("invalid syntax")
    30  
    31  // UnquoteChar decodes the first character or byte in the escaped string
    32  // or character literal represented by the string s.
    33  // It returns four values:
    34  //
    35  //1) value, the decoded Unicode code point or byte value;
    36  //2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
    37  //3) tail, the remainder of the string after the character; and
    38  //4) an error that will be nil if the character is syntactically valid.
    39  //
    40  // The second argument, quote, specifies the type of literal being parsed
    41  // and therefore which escaped quote character is permitted.
    42  // If set to a single quote, it permits the sequence \' and disallows unescaped '.
    43  // If set to a double quote, it permits \" and disallows unescaped ".
    44  // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
    45  // Different with strconv.UnquoteChar, it permits unnecessary backslash.
    46  func UnquoteChar(s string, quote byte) (value []byte, tail string, err error) {
    47  	// easy cases
    48  	switch c := s[0]; {
    49  	case c == quote:
    50  		err = errors.Trace(ErrSyntax)
    51  		return
    52  	case c >= utf8.RuneSelf:
    53  		r, size := utf8.DecodeRuneInString(s)
    54  		if r == utf8.RuneError {
    55  			value = append(value, c)
    56  			return value, s[1:], nil
    57  		}
    58  		value = append(value, string(r)...)
    59  		return value, s[size:], nil
    60  	case c != '\\':
    61  		value = append(value, c)
    62  		return value, s[1:], nil
    63  	}
    64  	// hard case: c is backslash
    65  	if len(s) <= 1 {
    66  		err = errors.Trace(ErrSyntax)
    67  		return
    68  	}
    69  	c := s[1]
    70  	s = s[2:]
    71  	switch c {
    72  	case 'b':
    73  		value = append(value, '\b')
    74  	case 'n':
    75  		value = append(value, '\n')
    76  	case 'r':
    77  		value = append(value, '\r')
    78  	case 't':
    79  		value = append(value, '\t')
    80  	case 'Z':
    81  		value = append(value, '\032')
    82  	case '0':
    83  		value = append(value, '\000')
    84  	case '_', '%':
    85  		value = append(value, '\\')
    86  		value = append(value, c)
    87  	case '\\':
    88  		value = append(value, '\\')
    89  	case '\'', '"':
    90  		value = append(value, c)
    91  	default:
    92  		value = append(value, c)
    93  	}
    94  	tail = s
    95  	return
    96  }
    97  
    98  // Unquote interprets s as a single-quoted, double-quoted,
    99  // or backquoted Go string literal, returning the string value
   100  // that s quotes. For example: test=`"\"\n"` (hex: 22 5c 22 5c 6e 22)
   101  // should be converted to `"\n` (hex: 22 0a).
   102  func Unquote(s string) (t string, err error) {
   103  	n := len(s)
   104  	if n < 2 {
   105  		return "", errors.Trace(ErrSyntax)
   106  	}
   107  	quote := s[0]
   108  	if quote != s[n-1] {
   109  		return "", errors.Trace(ErrSyntax)
   110  	}
   111  	s = s[1 : n-1]
   112  	if quote != '"' && quote != '\'' {
   113  		return "", errors.Trace(ErrSyntax)
   114  	}
   115  	// Avoid allocation. No need to convert if there is no '\'
   116  	if strings.IndexByte(s, '\\') == -1 && strings.IndexByte(s, quote) == -1 {
   117  		return s, nil
   118  	}
   119  	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
   120  	for len(s) > 0 {
   121  		mb, ss, err := UnquoteChar(s, quote)
   122  		if err != nil {
   123  			return "", errors.Trace(err)
   124  		}
   125  		s = ss
   126  		buf = append(buf, mb...)
   127  	}
   128  	return string(buf), nil
   129  }
   130  
   131  const (
   132  	// PatMatch is the enumeration value for per-character match.
   133  	PatMatch = iota + 1
   134  	// PatOne is the enumeration value for '_' match.
   135  	PatOne
   136  	// PatAny is the enumeration value for '%' match.
   137  	PatAny
   138  )
   139  
   140  // CompilePattern handles escapes and wild cards convert pattern characters and
   141  // pattern types.
   142  func CompilePattern(pattern string, escape byte) (patChars, patTypes []byte) {
   143  	patChars = make([]byte, len(pattern))
   144  	patTypes = make([]byte, len(pattern))
   145  	patLen := 0
   146  	for i := 0; i < len(pattern); i++ {
   147  		var tp byte
   148  		var c = pattern[i]
   149  		switch c {
   150  		case escape:
   151  			tp = PatMatch
   152  			if i < len(pattern)-1 {
   153  				i++
   154  				c = pattern[i]
   155  				if c == escape || c == '_' || c == '%' {
   156  					// Valid escape.
   157  				} else {
   158  					// Invalid escape, fall back to escape byte.
   159  					// allegrosql will treat escape character as the origin value even
   160  					// the escape sequence is invalid in Go or C.
   161  					// e.g., \m is invalid in Go, but in MyALLEGROSQL we will get "m" for select '\m'.
   162  					// Following case is correct just for escape \, not for others like +.
   163  					// TODO: Add more checks for other escapes.
   164  					i--
   165  					c = escape
   166  				}
   167  			}
   168  		case '_':
   169  			// %_ => _%
   170  			if patLen > 0 && patTypes[patLen-1] == PatAny {
   171  				tp = PatAny
   172  				c = '%'
   173  				patChars[patLen-1], patTypes[patLen-1] = '_', PatOne
   174  			} else {
   175  				tp = PatOne
   176  			}
   177  		case '%':
   178  			// %% => %
   179  			if patLen > 0 && patTypes[patLen-1] == PatAny {
   180  				continue
   181  			}
   182  			tp = PatAny
   183  		default:
   184  			tp = PatMatch
   185  		}
   186  		patChars[patLen] = c
   187  		patTypes[patLen] = tp
   188  		patLen++
   189  	}
   190  	patChars = patChars[:patLen]
   191  	patTypes = patTypes[:patLen]
   192  	return
   193  }
   194  
   195  func matchByte(a, b byte) bool {
   196  	return a == b
   197  	// We may reuse below code causet when like function go back to case insensitive.
   198  	/*
   199  		if a == b {
   200  			return true
   201  		}
   202  		if a >= 'a' && a <= 'z' && a-caseDiff == b {
   203  			return true
   204  		}
   205  		return a >= 'A' && a <= 'Z' && a+caseDiff == b
   206  	*/
   207  }
   208  
   209  // CompileLike2Regexp convert a like `lhs` to a regular memex
   210  func CompileLike2Regexp(str string) string {
   211  	patChars, patTypes := CompilePattern(str, '\\')
   212  	var result []byte
   213  	for i := 0; i < len(patChars); i++ {
   214  		switch patTypes[i] {
   215  		case PatMatch:
   216  			result = append(result, patChars[i])
   217  		case PatOne:
   218  			result = append(result, '.')
   219  		case PatAny:
   220  			result = append(result, '.', '*')
   221  		}
   222  	}
   223  	return string(result)
   224  }
   225  
   226  // DoMatch matches the string with patChars and patTypes.
   227  // The algorithm has linear time complexity.
   228  // https://research.swtch.com/glob
   229  func DoMatch(str string, patChars, patTypes []byte) bool {
   230  	var sIdx, pIdx, nextSIdx, nextPIdx int
   231  	for pIdx < len(patChars) || sIdx < len(str) {
   232  		if pIdx < len(patChars) {
   233  			switch patTypes[pIdx] {
   234  			case PatMatch:
   235  				if sIdx < len(str) && matchByte(str[sIdx], patChars[pIdx]) {
   236  					pIdx++
   237  					sIdx++
   238  					continue
   239  				}
   240  			case PatOne:
   241  				if sIdx < len(str) {
   242  					pIdx++
   243  					sIdx++
   244  					continue
   245  				}
   246  			case PatAny:
   247  				// Try to match at sIdx.
   248  				// If that doesn't work out,
   249  				// restart at sIdx+1 next.
   250  				nextPIdx = pIdx
   251  				nextSIdx = sIdx + 1
   252  				pIdx++
   253  				continue
   254  			}
   255  		}
   256  		// Mismatch. Maybe restart.
   257  		if 0 < nextSIdx && nextSIdx <= len(str) {
   258  			pIdx = nextPIdx
   259  			sIdx = nextSIdx
   260  			continue
   261  		}
   262  		return false
   263  	}
   264  	// Matched all of pattern to all of name. Success.
   265  	return true
   266  }
   267  
   268  // IsExactMatch return true if no wildcard character
   269  func IsExactMatch(patTypes []byte) bool {
   270  	for _, pt := range patTypes {
   271  		if pt != PatMatch {
   272  			return false
   273  		}
   274  	}
   275  	return true
   276  }
   277  
   278  // Copy deep copies a string.
   279  func Copy(src string) string {
   280  	return string(replog.Slice(src))
   281  }
   282  
   283  // StringerFunc defines string func implement fmt.Stringer.
   284  type StringerFunc func() string
   285  
   286  // String implements fmt.Stringer
   287  func (l StringerFunc) String() string {
   288  	return l()
   289  }
   290  
   291  // MemoizeStr returns memoized version of stringFunc.
   292  func MemoizeStr(l func() string) fmt.Stringer {
   293  	return StringerFunc(func() string {
   294  		return l()
   295  	})
   296  }
   297  
   298  // StringerStr defines a alias to normal string.
   299  // implement fmt.Stringer
   300  type StringerStr string
   301  
   302  // String implements fmt.Stringer
   303  func (i StringerStr) String() string {
   304  	return string(i)
   305  }
   306  
   307  // Escape the identifier for pretty-printing.
   308  // For instance, the identifier "foo `bar`" will become "`foo ``bar```".
   309  // The sqlMode controls whether to escape with backquotes (`) or double quotes
   310  // (`"`) depending on whether allegrosql.ModeANSIQuotes is enabled.
   311  func Escape(str string, sqlMode allegrosql.ALLEGROSQLMode) string {
   312  	var quote string
   313  	if sqlMode&allegrosql.ModeANSIQuotes != 0 {
   314  		quote = `"`
   315  	} else {
   316  		quote = "`"
   317  	}
   318  	return quote + strings.Replace(str, quote, quote+quote, -1) + quote
   319  }
   320  
   321  // BuildStringFromLabels construct config labels into string by following format:
   322  // "keyA=valueA,keyB=valueB"
   323  func BuildStringFromLabels(labels map[string]string) string {
   324  	if len(labels) < 1 {
   325  		return ""
   326  	}
   327  	s := make([]string, 0, len(labels))
   328  	for k := range labels {
   329  		s = append(s, k)
   330  	}
   331  	sort.Strings(s)
   332  	r := new(bytes.Buffer)
   333  	// visit labels by sorted key in order to make sure that result should be consistency
   334  	for _, key := range s {
   335  		r.WriteString(fmt.Sprintf("%s=%s,", key, labels[key]))
   336  	}
   337  	returned := r.String()
   338  	return returned[:len(returned)-1]
   339  }