github.com/matrixorigin/matrixone@v0.7.0/pkg/vectorize/like/like.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package like 16 17 import ( 18 "bytes" 19 "fmt" 20 "regexp" 21 "unicode/utf8" 22 "unsafe" 23 24 "github.com/matrixorigin/matrixone/pkg/common/moerr" 25 "github.com/matrixorigin/matrixone/pkg/container/nulls" 26 ) 27 28 const ( 29 DEFAULT_ESCAPE_CHAR = '\\' 30 ) 31 32 // <source column> like 'rule' 33 // XXX: rs here is the selection list. 34 func BtSliceAndConst(xs []string, expr []byte, rs []bool) ([]bool, error) { 35 return BtSliceNullAndConst(xs, expr, nil, rs) 36 } 37 38 func isNotNull(n *nulls.Nulls, i uint64) bool { 39 if n == nil { 40 return true 41 } 42 return !n.Contains(i) 43 } 44 45 func removeEscapeChar(src []byte, escapeChar byte) []byte { 46 var target []byte 47 max := len(src) 48 for i := 0; i < max; i++ { 49 if src[i] == escapeChar && i+1 < max { 50 i = i + 1 51 } 52 target = append(target, src[i]) 53 } 54 return target 55 } 56 57 func BtSliceNullAndConst(xs []string, expr []byte, ns *nulls.Nulls, rs []bool) ([]bool, error) { 58 // Opt Rule #1: if expr is empty string, only empty string like empty string. 59 n := uint32(len(expr)) 60 if n == 0 { 61 for i, s := range xs { 62 rs[i] = isNotNull(ns, uint64(i)) && len(s) == 0 63 } 64 return rs, nil 65 } 66 67 // Opt Rule #2: anything matches % 68 if n == 1 && expr[0] == '%' { 69 for i := range xs { 70 rs[i] = isNotNull(ns, uint64(i)) 71 } 72 return rs, nil 73 } 74 75 // Opt Rule #3: single char matches _. 76 // XXX in UTF8 world, should we do single RUNE matches _? 77 if n == 1 && expr[0] == '_' { 78 for i, s := range xs { 79 rs[i] = isNotNull(ns, uint64(i)) && len(s) == 1 80 } 81 return rs, nil 82 } 83 84 // Opt Rule #3.1: single char, no wild card, so it is a simple compare eq. 85 if n == 1 && expr[0] != '_' && expr[0] != '%' { 86 for i, s := range xs { 87 rs[i] = isNotNull(ns, uint64(i)) && len(s) == 1 && s[0] == expr[0] 88 } 89 return rs, nil 90 } 91 92 // Opt Rule #4. [_%]somethingInBetween[_%] 93 if n > 1 && !bytes.ContainsAny(expr[1:len(expr)-1], "_%") { 94 c0 := expr[0] // first character 95 c1 := expr[n-1] // last character 96 if n > 2 && expr[n-2] == DEFAULT_ESCAPE_CHAR { 97 c1 = DEFAULT_ESCAPE_CHAR 98 } 99 switch { 100 case !(c0 == '%' || c0 == '_') && !(c1 == '%' || c1 == '_'): 101 // Rule 4.1: no wild card, so it is a simple compare eq. 102 for i, s := range xs { 103 rs[i] = isNotNull(ns, uint64(i)) && uint32(len(s)) == n && bytes.Equal(expr, []byte(s)) 104 } 105 return rs, nil 106 case c0 == '_' && !(c1 == '%' || c1 == '_'): 107 // Rule 4.2: _foobarzoo, 108 for i, s := range xs { 109 rs[i] = isNotNull(ns, uint64(i)) && uint32(len(s)) == n && bytes.Equal(expr[1:], []byte(s)[1:]) 110 } 111 return rs, nil 112 case c0 == '%' && !(c1 == '%' || c1 == '_'): 113 // Rule 4.3, %foobarzoo, it turns into a suffix match. 114 suffix := removeEscapeChar(expr[1:], DEFAULT_ESCAPE_CHAR) 115 for i, s := range xs { 116 rs[i] = isNotNull(ns, uint64(i)) && bytes.HasSuffix([]byte(s), suffix) 117 } 118 return rs, nil 119 case c1 == '_' && !(c0 == '%' || c0 == '_'): 120 // Rule 4.4, foobarzoo_, it turns into eq ingoring last char. 121 prefix := removeEscapeChar(expr[:n-1], DEFAULT_ESCAPE_CHAR) 122 for i, s := range xs { 123 rs[i] = isNotNull(ns, uint64(i)) && uint32(len(s)) == n && bytes.Equal(prefix, []byte(s)[:n-1]) 124 } 125 return rs, nil 126 case c1 == '%' && !(c0 == '%' || c0 == '_'): 127 // Rule 4.5 foobarzoo%, prefix match 128 prefix := removeEscapeChar(expr[:n-1], DEFAULT_ESCAPE_CHAR) 129 for i, s := range xs { 130 rs[i] = isNotNull(ns, uint64(i)) && bytes.HasPrefix([]byte(s), prefix) 131 } 132 return rs, nil 133 case c0 == '%' && c1 == '%': 134 // Rule 4.6 %foobarzoo%, now it is contains 135 substr := removeEscapeChar(expr[1:n-1], DEFAULT_ESCAPE_CHAR) 136 for i, s := range xs { 137 rs[i] = isNotNull(ns, uint64(i)) && bytes.Contains([]byte(s), substr) 138 } 139 return rs, nil 140 case c0 == '%' && c1 == '_': 141 // Rule 4.7 %foobarzoo_, 142 suffix := removeEscapeChar(expr[1:n-1], DEFAULT_ESCAPE_CHAR) 143 for i, s := range xs { 144 bs := []byte(s) 145 rs[i] = isNotNull(ns, uint64(i)) && len(s) > 0 && bytes.HasSuffix(bs[:len(bs)-1], suffix) 146 } 147 return rs, nil 148 case c0 == '_' && c1 == '%': 149 // Rule 4.8 _foobarzoo% 150 prefix := removeEscapeChar(expr[1:n-1], DEFAULT_ESCAPE_CHAR) 151 for i, s := range xs { 152 rs[i] = isNotNull(ns, uint64(i)) && len(s) > 0 && bytes.HasPrefix([]byte(s)[1:], prefix) 153 } 154 return rs, nil 155 } 156 } 157 158 // Done opt rules, fall back to regexp 159 reg, err := regexp.Compile(convert(expr)) 160 if err != nil { 161 return nil, err 162 } 163 for i, s := range xs { 164 rs[i] = isNotNull(ns, uint64(i)) && reg.MatchString(s) 165 } 166 return rs, nil 167 } 168 169 // 'source' like 'rule' 170 func BtConstAndConst(s string, expr []byte) (bool, error) { 171 ss := []string{s} 172 rs := []bool{false} 173 rs, err := BtSliceAndConst(ss, expr, rs) 174 if err != nil { 175 return false, err 176 } 177 return rs[0], nil 178 } 179 180 // <source column> like <rule column> 181 func BtSliceAndSlice(xs []string, exprs [][]byte, rs []bool) ([]bool, error) { 182 if len(xs) != len(exprs) { 183 return nil, moerr.NewInternalErrorNoCtx("unexpected error when LIKE operator") 184 } 185 186 for i := range xs { 187 isLike, err := BtConstAndConst(xs[i], exprs[i]) 188 if err != nil { 189 return nil, err 190 } 191 rs[i] = isLike 192 } 193 return rs, nil 194 } 195 196 // 'source' like <rule column> 197 func BtConstAndSliceNull(p string, exprs [][]byte, ns *nulls.Nulls, rs []bool) ([]bool, error) { 198 for i, ex := range exprs { 199 rs[i] = false 200 if isNotNull(ns, uint64(i)) { 201 k, err := BtConstAndConst(p, ex) 202 if err != nil { 203 return nil, err 204 } 205 rs[i] = k 206 } 207 } 208 return rs, nil 209 } 210 211 // <source column may contains null> like 212 func BtSliceNullAndSliceNull(xs []string, exprs [][]byte, ns *nulls.Nulls, rs []bool) ([]bool, error) { 213 for i := range xs { 214 rs[i] = false 215 if isNotNull(ns, uint64(i)) { 216 k, err := BtConstAndConst(xs[i], exprs[i]) 217 if err != nil { 218 return nil, err 219 } 220 rs[i] = k 221 } 222 } 223 return rs, nil 224 } 225 226 func convert(expr []byte) string { 227 return fmt.Sprintf("^(?s:%s)$", replace(*(*string)(unsafe.Pointer(&expr)))) 228 } 229 230 func replace(s string) string { 231 var oldCharactor rune 232 233 r := make([]byte, len(s)*2) 234 w := 0 235 start := 0 236 for len(s) > start { 237 character, wid := utf8.DecodeRuneInString(s[start:]) 238 if oldCharactor == '\\' { 239 w += copy(r[w:], s[start:start+wid]) 240 start += wid 241 oldCharactor = 0 242 continue 243 } 244 switch character { 245 case '_': 246 w += copy(r[w:], []byte{'.'}) 247 case '%': 248 w += copy(r[w:], []byte{'.', '*'}) 249 case '(': 250 w += copy(r[w:], []byte{'\\', '('}) 251 case ')': 252 w += copy(r[w:], []byte{'\\', ')'}) 253 case '\\': 254 default: 255 w += copy(r[w:], s[start:start+wid]) 256 } 257 start += wid 258 oldCharactor = character 259 } 260 return string(r[:w]) 261 }