github.com/matrixorigin/matrixone@v1.2.0/pkg/container/vector/search.go (about)

     1  // Copyright 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package vector
    16  
    17  import (
    18  	"bytes"
    19  	"sort"
    20  
    21  	"github.com/matrixorigin/matrixone/pkg/container/types"
    22  )
    23  
    24  const kMinLenForSubVector = 4
    25  const kMaxLenForBinarySearch = 64
    26  
    27  func OrderedBinarySearchOffsetByValFactory[T types.OrderedT](vals []T) func(*Vector) []int32 {
    28  	return func(vec *Vector) []int32 {
    29  		var sels []int32
    30  		rows := MustFixedCol[T](vec)
    31  		subVals := vals
    32  		if len(vals) >= kMinLenForSubVector {
    33  			minVal := rows[0]
    34  			maxVal := rows[len(rows)-1]
    35  			lowerBound := sort.Search(len(vals), func(i int) bool {
    36  				return minVal <= vals[i]
    37  			})
    38  			upperBound := sort.Search(len(vals), func(i int) bool {
    39  				return maxVal < vals[i]
    40  			})
    41  			subVals = vals[lowerBound:upperBound]
    42  		}
    43  
    44  		if len(subVals) <= kMaxLenForBinarySearch {
    45  			offset := 0
    46  			for i := range subVals {
    47  				idx := sort.Search(len(rows), func(idx int) bool {
    48  					return rows[idx] >= subVals[i]
    49  				})
    50  				if idx < len(rows) {
    51  					if rows[idx] == subVals[i] {
    52  						sels = append(sels, int32(offset+idx))
    53  					}
    54  					offset += idx
    55  					rows = rows[idx:]
    56  				} else {
    57  					break
    58  				}
    59  			}
    60  		} else {
    61  			n1, n2 := len(rows), len(subVals)
    62  			i1, i2 := 0, 0
    63  			for i1 < n1 && i2 < n2 {
    64  				if rows[i1] == subVals[i2] {
    65  					sels = append(sels, int32(i1))
    66  					i1++
    67  					i2++
    68  				} else if rows[i1] < subVals[i2] {
    69  					i1++
    70  				} else {
    71  					i2++
    72  				}
    73  			}
    74  		}
    75  
    76  		return sels
    77  	}
    78  }
    79  
    80  func VarlenBinarySearchOffsetByValFactory(vals [][]byte) func(*Vector) []int32 {
    81  	return func(vec *Vector) []int32 {
    82  		var sels []int32
    83  		n1 := vec.Length()
    84  		if n1 == 0 {
    85  			return sels
    86  		}
    87  		subVals := vals
    88  		if len(vals) >= kMinLenForSubVector {
    89  			lowerBound := sort.Search(len(vals), func(i int) bool {
    90  				return bytes.Compare(vec.GetBytesAt(0), vals[i]) <= 0
    91  			})
    92  			upperBound := sort.Search(len(vals), func(i int) bool {
    93  				return bytes.Compare(vec.GetBytesAt(n1-1), vals[i]) < 0
    94  			})
    95  			subVals = vals[lowerBound:upperBound]
    96  		}
    97  
    98  		if len(subVals) <= kMaxLenForBinarySearch {
    99  			offset := 0
   100  			for i := range subVals {
   101  				idx, found := sort.Find(n1, func(idx int) int {
   102  					return bytes.Compare(subVals[i], vec.GetBytesAt(offset+idx))
   103  				})
   104  				if idx < n1 {
   105  					if found {
   106  						sels = append(sels, int32(offset+idx))
   107  					}
   108  					offset += idx
   109  					n1 -= idx
   110  				} else {
   111  					break
   112  				}
   113  			}
   114  		} else {
   115  			n2 := len(subVals)
   116  			i1, i2 := 0, 0
   117  			varlenas := MustFixedCol[types.Varlena](vec)
   118  			s1 := varlenas[0].GetByteSlice(vec.GetArea())
   119  			for i2 < n2 {
   120  				ord := bytes.Compare(s1, subVals[i2])
   121  				if ord == 0 {
   122  					sels = append(sels, int32(i1))
   123  					i1++
   124  					if i1 == n1 {
   125  						break
   126  					}
   127  					i2++
   128  					s1 = varlenas[i1].GetByteSlice(vec.GetArea())
   129  				} else if ord < 0 {
   130  					i1++
   131  					if i1 == n1 {
   132  						break
   133  					}
   134  					s1 = varlenas[i1].GetByteSlice(vec.GetArea())
   135  				} else {
   136  					i2++
   137  				}
   138  			}
   139  		}
   140  
   141  		return sels
   142  	}
   143  }
   144  
   145  func FixedSizedBinarySearchOffsetByValFactory[T any](vals []T, cmp func(T, T) int) func(*Vector) []int32 {
   146  	return func(vec *Vector) []int32 {
   147  		var sels []int32
   148  		rows := MustFixedCol[T](vec)
   149  
   150  		subVals := vals
   151  		if len(vals) >= kMinLenForSubVector {
   152  			minVal := rows[0]
   153  			maxVal := rows[len(rows)-1]
   154  			lowerBound := sort.Search(len(vals), func(i int) bool {
   155  				return cmp(minVal, vals[i]) <= 0
   156  			})
   157  			upperBound := sort.Search(len(vals), func(i int) bool {
   158  				return cmp(maxVal, vals[i]) < 0
   159  			})
   160  			subVals = vals[lowerBound:upperBound]
   161  		}
   162  
   163  		if len(subVals) <= kMaxLenForBinarySearch {
   164  			offset := 0
   165  			for i := range subVals {
   166  				idx, found := sort.Find(len(rows), func(idx int) int {
   167  					return cmp(subVals[i], rows[i])
   168  				})
   169  				if idx < len(rows) {
   170  					if found {
   171  						sels = append(sels, int32(offset+idx))
   172  					}
   173  					offset += idx
   174  					rows = rows[idx:]
   175  				} else {
   176  					break
   177  				}
   178  			}
   179  		} else {
   180  			n1, n2 := len(rows), len(subVals)
   181  			i1, i2 := 0, 0
   182  			for i1 < n1 && i2 < n2 {
   183  				ord := cmp(rows[i1], subVals[i2])
   184  				if ord == 0 {
   185  					sels = append(sels, int32(i1))
   186  					i1++
   187  					i2++
   188  				} else if ord < 0 {
   189  					i1++
   190  				} else {
   191  					i2++
   192  				}
   193  			}
   194  		}
   195  
   196  		return sels
   197  	}
   198  }
   199  
   200  func CollectOffsetsByPrefixEqFactory(val []byte) func(*Vector) []int32 {
   201  	return func(lvec *Vector) []int32 {
   202  		lvlen := lvec.Length()
   203  		if lvlen == 0 {
   204  			return nil
   205  		}
   206  		lcol, larea := MustVarlenaRawData(lvec)
   207  		start, _ := sort.Find(lvlen, func(i int) int {
   208  			return bytes.Compare(val, lcol[i].GetByteSlice(larea))
   209  		})
   210  		end := start
   211  		for end < lvlen && bytes.HasPrefix(lcol[end].GetByteSlice(larea), val) {
   212  			end++
   213  		}
   214  		if start == end {
   215  			return nil
   216  		}
   217  		sels := make([]int32, end-start)
   218  		for i := start; i < end; i++ {
   219  			sels[i-start] = int32(i)
   220  		}
   221  		return sels
   222  	}
   223  }
   224  
   225  func CollectOffsetsByPrefixBetweenFactory(lval, rval []byte) func(*Vector) []int32 {
   226  	return func(lvec *Vector) []int32 {
   227  		lvlen := lvec.Length()
   228  		if lvlen == 0 {
   229  			return nil
   230  		}
   231  		lcol, larea := MustVarlenaRawData(lvec)
   232  		start := sort.Search(lvlen, func(i int) bool {
   233  			return bytes.Compare(lcol[i].GetByteSlice(larea), lval) >= 0
   234  		})
   235  		if start == lvlen {
   236  			return nil
   237  		}
   238  		end := sort.Search(lvlen, func(i int) bool {
   239  			return types.PrefixCompare(lcol[i].GetByteSlice(larea), rval) > 0
   240  		})
   241  		if start == end {
   242  			return nil
   243  		}
   244  		sels := make([]int32, end-start)
   245  		for i := start; i < end; i++ {
   246  			sels[i-start] = int32(i)
   247  		}
   248  		return sels
   249  	}
   250  }
   251  
   252  func CollectOffsetsByPrefixInFactory(rvec *Vector) func(*Vector) []int32 {
   253  	return func(lvec *Vector) []int32 {
   254  		lvlen := lvec.Length()
   255  		if lvlen == 0 {
   256  			return nil
   257  		}
   258  
   259  		lcol, larea := MustVarlenaRawData(lvec)
   260  		rcol, rarea := MustVarlenaRawData(rvec)
   261  
   262  		rval := rcol[0].GetByteSlice(rarea)
   263  		rpos := 0
   264  		rvlen := rvec.Length()
   265  
   266  		sels := make([]int32, 0, rvlen)
   267  		for i := 0; i < lvlen; i++ {
   268  			lval := lcol[i].GetByteSlice(larea)
   269  			for types.PrefixCompare(lval, rval) > 0 {
   270  				rpos++
   271  				if rpos == rvlen {
   272  					return sels
   273  				}
   274  
   275  				rval = rcol[rpos].GetByteSlice(rarea)
   276  			}
   277  
   278  			if bytes.HasPrefix(lval, rval) {
   279  				sels = append(sels, int32(i))
   280  			}
   281  		}
   282  
   283  		return sels
   284  	}
   285  }