github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/search.go (about)

     1  package parquet
     2  
     3  // CompareNullsFirst constructs a comparison function which assumes that null
     4  // values are smaller than all other values.
     5  func CompareNullsFirst(cmp func(Value, Value) int) func(Value, Value) int {
     6  	return func(a, b Value) int {
     7  		switch {
     8  		case a.IsNull():
     9  			if b.IsNull() {
    10  				return 0
    11  			}
    12  			return -1
    13  		case b.IsNull():
    14  			return +1
    15  		default:
    16  			return cmp(a, b)
    17  		}
    18  	}
    19  }
    20  
    21  // CompareNullsLast constructs a comparison function which assumes that null
    22  // values are greater than all other values.
    23  func CompareNullsLast(cmp func(Value, Value) int) func(Value, Value) int {
    24  	return func(a, b Value) int {
    25  		switch {
    26  		case a.IsNull():
    27  			if b.IsNull() {
    28  				return 0
    29  			}
    30  			return +1
    31  		case b.IsNull():
    32  			return -1
    33  		default:
    34  			return cmp(a, b)
    35  		}
    36  	}
    37  }
    38  
    39  // Search is like Find, but uses the default ordering of the given type.
    40  func Search(index ColumnIndex, value Value, typ Type) int {
    41  	return Find(index, value, CompareNullsLast(typ.Compare))
    42  }
    43  
    44  // Find uses the column index passed as argument to find the page that the
    45  // given value is expected to be found in.
    46  //
    47  // The function returns the index of the first page that might contain the
    48  // value. If the function determines that the value does not exist in the
    49  // index, NumPages is returned.
    50  //
    51  // The comparison function passed as last argument is used to determine the
    52  // relative order of values. This should generally be the Compare method of
    53  // the column type, but can sometimes be customized to modify how null values
    54  // are interpreted, for example:
    55  //
    56  //	pageIndex := parquet.Find(columnIndex, value,
    57  //		parquet.CompareNullsFirst(typ.Compare),
    58  //	)
    59  //
    60  func Find(index ColumnIndex, value Value, cmp func(Value, Value) int) int {
    61  	switch {
    62  	case index.IsAscending():
    63  		return binarySearch(index, value, cmp)
    64  	default:
    65  		return linearSearch(index, value, cmp)
    66  	}
    67  }
    68  
    69  func binarySearch(index ColumnIndex, value Value, cmp func(Value, Value) int) int {
    70  	n := index.NumPages()
    71  	i := 0
    72  	j := n
    73  
    74  	for (j - i) > 1 {
    75  		k := ((j - i) / 2) + i
    76  		c := cmp(value, index.MinValue(k))
    77  
    78  		switch {
    79  		case c < 0:
    80  			j = k
    81  		case c > 0:
    82  			i = k
    83  		default:
    84  			return k
    85  		}
    86  	}
    87  
    88  	if i < n {
    89  		min := index.MinValue(i)
    90  		max := index.MaxValue(i)
    91  
    92  		if cmp(value, min) < 0 || cmp(max, value) < 0 {
    93  			i = n
    94  		}
    95  	}
    96  
    97  	return i
    98  }
    99  
   100  func linearSearch(index ColumnIndex, value Value, cmp func(Value, Value) int) int {
   101  	n := index.NumPages()
   102  
   103  	for i := 0; i < n; i++ {
   104  		min := index.MinValue(i)
   105  		max := index.MaxValue(i)
   106  
   107  		if cmp(min, value) <= 0 && cmp(value, max) <= 0 {
   108  			return i
   109  		}
   110  	}
   111  
   112  	return n
   113  }