github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/search.go (about) 1 package parquet 2 3 // CompareNullsFirst constructs a comparison function which assumes that null 4 // values are smaller than all other values. 5 func CompareNullsFirst(cmp func(Value, Value) int) func(Value, Value) int { 6 return func(a, b Value) int { 7 switch { 8 case a.IsNull(): 9 if b.IsNull() { 10 return 0 11 } 12 return -1 13 case b.IsNull(): 14 return +1 15 default: 16 return cmp(a, b) 17 } 18 } 19 } 20 21 // CompareNullsLast constructs a comparison function which assumes that null 22 // values are greater than all other values. 23 func CompareNullsLast(cmp func(Value, Value) int) func(Value, Value) int { 24 return func(a, b Value) int { 25 switch { 26 case a.IsNull(): 27 if b.IsNull() { 28 return 0 29 } 30 return +1 31 case b.IsNull(): 32 return -1 33 default: 34 return cmp(a, b) 35 } 36 } 37 } 38 39 // Search is like Find, but uses the default ordering of the given type. 40 func Search(index ColumnIndex, value Value, typ Type) int { 41 return Find(index, value, CompareNullsLast(typ.Compare)) 42 } 43 44 // Find uses the column index passed as argument to find the page that the 45 // given value is expected to be found in. 46 // 47 // The function returns the index of the first page that might contain the 48 // value. If the function determines that the value does not exist in the 49 // index, NumPages is returned. 50 // 51 // The comparison function passed as last argument is used to determine the 52 // relative order of values. This should generally be the Compare method of 53 // the column type, but can sometimes be customized to modify how null values 54 // are interpreted, for example: 55 // 56 // pageIndex := parquet.Find(columnIndex, value, 57 // parquet.CompareNullsFirst(typ.Compare), 58 // ) 59 // 60 func Find(index ColumnIndex, value Value, cmp func(Value, Value) int) int { 61 switch { 62 case index.IsAscending(): 63 return binarySearch(index, value, cmp) 64 default: 65 return linearSearch(index, value, cmp) 66 } 67 } 68 69 func binarySearch(index ColumnIndex, value Value, cmp func(Value, Value) int) int { 70 n := index.NumPages() 71 i := 0 72 j := n 73 74 for (j - i) > 1 { 75 k := ((j - i) / 2) + i 76 c := cmp(value, index.MinValue(k)) 77 78 switch { 79 case c < 0: 80 j = k 81 case c > 0: 82 i = k 83 default: 84 return k 85 } 86 } 87 88 if i < n { 89 min := index.MinValue(i) 90 max := index.MaxValue(i) 91 92 if cmp(value, min) < 0 || cmp(max, value) < 0 { 93 i = n 94 } 95 } 96 97 return i 98 } 99 100 func linearSearch(index ColumnIndex, value Value, cmp func(Value, Value) int) int { 101 n := index.NumPages() 102 103 for i := 0; i < n; i++ { 104 min := index.MinValue(i) 105 max := index.MaxValue(i) 106 107 if cmp(min, value) <= 0 && cmp(value, max) <= 0 { 108 return i 109 } 110 } 111 112 return n 113 }