github.com/matrixorigin/matrixone@v1.2.0/pkg/container/vector/search.go (about) 1 // Copyright 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package vector 16 17 import ( 18 "bytes" 19 "sort" 20 21 "github.com/matrixorigin/matrixone/pkg/container/types" 22 ) 23 24 const kMinLenForSubVector = 4 25 const kMaxLenForBinarySearch = 64 26 27 func OrderedBinarySearchOffsetByValFactory[T types.OrderedT](vals []T) func(*Vector) []int32 { 28 return func(vec *Vector) []int32 { 29 var sels []int32 30 rows := MustFixedCol[T](vec) 31 subVals := vals 32 if len(vals) >= kMinLenForSubVector { 33 minVal := rows[0] 34 maxVal := rows[len(rows)-1] 35 lowerBound := sort.Search(len(vals), func(i int) bool { 36 return minVal <= vals[i] 37 }) 38 upperBound := sort.Search(len(vals), func(i int) bool { 39 return maxVal < vals[i] 40 }) 41 subVals = vals[lowerBound:upperBound] 42 } 43 44 if len(subVals) <= kMaxLenForBinarySearch { 45 offset := 0 46 for i := range subVals { 47 idx := sort.Search(len(rows), func(idx int) bool { 48 return rows[idx] >= subVals[i] 49 }) 50 if idx < len(rows) { 51 if rows[idx] == subVals[i] { 52 sels = append(sels, int32(offset+idx)) 53 } 54 offset += idx 55 rows = rows[idx:] 56 } else { 57 break 58 } 59 } 60 } else { 61 n1, n2 := len(rows), len(subVals) 62 i1, i2 := 0, 0 63 for i1 < n1 && i2 < n2 { 64 if rows[i1] == subVals[i2] { 65 sels = append(sels, int32(i1)) 66 i1++ 67 i2++ 68 } else if rows[i1] < subVals[i2] { 69 i1++ 70 } else { 71 i2++ 72 } 73 } 74 } 75 76 return sels 77 } 78 } 79 80 func VarlenBinarySearchOffsetByValFactory(vals [][]byte) func(*Vector) []int32 { 81 return func(vec *Vector) []int32 { 82 var sels []int32 83 n1 := vec.Length() 84 if n1 == 0 { 85 return sels 86 } 87 subVals := vals 88 if len(vals) >= kMinLenForSubVector { 89 lowerBound := sort.Search(len(vals), func(i int) bool { 90 return bytes.Compare(vec.GetBytesAt(0), vals[i]) <= 0 91 }) 92 upperBound := sort.Search(len(vals), func(i int) bool { 93 return bytes.Compare(vec.GetBytesAt(n1-1), vals[i]) < 0 94 }) 95 subVals = vals[lowerBound:upperBound] 96 } 97 98 if len(subVals) <= kMaxLenForBinarySearch { 99 offset := 0 100 for i := range subVals { 101 idx, found := sort.Find(n1, func(idx int) int { 102 return bytes.Compare(subVals[i], vec.GetBytesAt(offset+idx)) 103 }) 104 if idx < n1 { 105 if found { 106 sels = append(sels, int32(offset+idx)) 107 } 108 offset += idx 109 n1 -= idx 110 } else { 111 break 112 } 113 } 114 } else { 115 n2 := len(subVals) 116 i1, i2 := 0, 0 117 varlenas := MustFixedCol[types.Varlena](vec) 118 s1 := varlenas[0].GetByteSlice(vec.GetArea()) 119 for i2 < n2 { 120 ord := bytes.Compare(s1, subVals[i2]) 121 if ord == 0 { 122 sels = append(sels, int32(i1)) 123 i1++ 124 if i1 == n1 { 125 break 126 } 127 i2++ 128 s1 = varlenas[i1].GetByteSlice(vec.GetArea()) 129 } else if ord < 0 { 130 i1++ 131 if i1 == n1 { 132 break 133 } 134 s1 = varlenas[i1].GetByteSlice(vec.GetArea()) 135 } else { 136 i2++ 137 } 138 } 139 } 140 141 return sels 142 } 143 } 144 145 func FixedSizedBinarySearchOffsetByValFactory[T any](vals []T, cmp func(T, T) int) func(*Vector) []int32 { 146 return func(vec *Vector) []int32 { 147 var sels []int32 148 rows := MustFixedCol[T](vec) 149 150 subVals := vals 151 if len(vals) >= kMinLenForSubVector { 152 minVal := rows[0] 153 maxVal := rows[len(rows)-1] 154 lowerBound := sort.Search(len(vals), func(i int) bool { 155 return cmp(minVal, vals[i]) <= 0 156 }) 157 upperBound := sort.Search(len(vals), func(i int) bool { 158 return cmp(maxVal, vals[i]) < 0 159 }) 160 subVals = vals[lowerBound:upperBound] 161 } 162 163 if len(subVals) <= kMaxLenForBinarySearch { 164 offset := 0 165 for i := range subVals { 166 idx, found := sort.Find(len(rows), func(idx int) int { 167 return cmp(subVals[i], rows[i]) 168 }) 169 if idx < len(rows) { 170 if found { 171 sels = append(sels, int32(offset+idx)) 172 } 173 offset += idx 174 rows = rows[idx:] 175 } else { 176 break 177 } 178 } 179 } else { 180 n1, n2 := len(rows), len(subVals) 181 i1, i2 := 0, 0 182 for i1 < n1 && i2 < n2 { 183 ord := cmp(rows[i1], subVals[i2]) 184 if ord == 0 { 185 sels = append(sels, int32(i1)) 186 i1++ 187 i2++ 188 } else if ord < 0 { 189 i1++ 190 } else { 191 i2++ 192 } 193 } 194 } 195 196 return sels 197 } 198 } 199 200 func CollectOffsetsByPrefixEqFactory(val []byte) func(*Vector) []int32 { 201 return func(lvec *Vector) []int32 { 202 lvlen := lvec.Length() 203 if lvlen == 0 { 204 return nil 205 } 206 lcol, larea := MustVarlenaRawData(lvec) 207 start, _ := sort.Find(lvlen, func(i int) int { 208 return bytes.Compare(val, lcol[i].GetByteSlice(larea)) 209 }) 210 end := start 211 for end < lvlen && bytes.HasPrefix(lcol[end].GetByteSlice(larea), val) { 212 end++ 213 } 214 if start == end { 215 return nil 216 } 217 sels := make([]int32, end-start) 218 for i := start; i < end; i++ { 219 sels[i-start] = int32(i) 220 } 221 return sels 222 } 223 } 224 225 func CollectOffsetsByPrefixBetweenFactory(lval, rval []byte) func(*Vector) []int32 { 226 return func(lvec *Vector) []int32 { 227 lvlen := lvec.Length() 228 if lvlen == 0 { 229 return nil 230 } 231 lcol, larea := MustVarlenaRawData(lvec) 232 start := sort.Search(lvlen, func(i int) bool { 233 return bytes.Compare(lcol[i].GetByteSlice(larea), lval) >= 0 234 }) 235 if start == lvlen { 236 return nil 237 } 238 end := sort.Search(lvlen, func(i int) bool { 239 return types.PrefixCompare(lcol[i].GetByteSlice(larea), rval) > 0 240 }) 241 if start == end { 242 return nil 243 } 244 sels := make([]int32, end-start) 245 for i := start; i < end; i++ { 246 sels[i-start] = int32(i) 247 } 248 return sels 249 } 250 } 251 252 func CollectOffsetsByPrefixInFactory(rvec *Vector) func(*Vector) []int32 { 253 return func(lvec *Vector) []int32 { 254 lvlen := lvec.Length() 255 if lvlen == 0 { 256 return nil 257 } 258 259 lcol, larea := MustVarlenaRawData(lvec) 260 rcol, rarea := MustVarlenaRawData(rvec) 261 262 rval := rcol[0].GetByteSlice(rarea) 263 rpos := 0 264 rvlen := rvec.Length() 265 266 sels := make([]int32, 0, rvlen) 267 for i := 0; i < lvlen; i++ { 268 lval := lcol[i].GetByteSlice(larea) 269 for types.PrefixCompare(lval, rval) > 0 { 270 rpos++ 271 if rpos == rvlen { 272 return sels 273 } 274 275 rval = rcol[rpos].GetByteSlice(rarea) 276 } 277 278 if bytes.HasPrefix(lval, rval) { 279 sels = append(sels, int32(i)) 280 } 281 } 282 283 return sels 284 } 285 }