github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/sort.go (about)

     1  package parquet
     2  
     3  // The SortConfig type carries configuration options used to generate sorting
     4  // functions.
     5  //
     6  // SortConfig implements the SortOption interface so it can be used directly as
     7  // argument to the SortFuncOf function, for example:
     8  //
     9  //	sortFunc := parquet.SortFuncOf(columnType, &parquet.SortConfig{
    10  //		Descending: true,
    11  //		NullsFirst: true,
    12  //	})
    13  //
    14  type SortConfig struct {
    15  	MaxRepetitionLevel int
    16  	MaxDefinitionLevel int
    17  	Descending         bool
    18  	NullsFirst         bool
    19  }
    20  
    21  // Apply applies options to c.
    22  func (c *SortConfig) Apply(options ...SortOption) {
    23  	for _, opt := range options {
    24  		opt.ConfigureSort(c)
    25  	}
    26  }
    27  
    28  // ConfigureSort satisfies the SortOption interface.
    29  func (c *SortConfig) ConfigureSort(config *SortConfig) {
    30  	*c = *config
    31  }
    32  
    33  // SortMaxRepetitionLevel constructs a configuration option which sets the
    34  // maximum repetition level known to a sorting function.
    35  //
    36  // Defaults to zero, which represents a non-repeated column.
    37  func SortMaxRepetitionLevel(level int) SortOption {
    38  	return sortingOption(func(c *SortConfig) { c.MaxRepetitionLevel = level })
    39  }
    40  
    41  // SortMaxDefinitionLevel constructs a configuration option which sets the
    42  // maximum definition level known to a sorting function.
    43  //
    44  // Defaults to zero, which represents a non-nullable column.
    45  func SortMaxDefinitionLevel(level int) SortOption {
    46  	return sortingOption(func(c *SortConfig) { c.MaxDefinitionLevel = level })
    47  }
    48  
    49  // SortDescending constructs a configuration option which inverts the order of a
    50  // sorting function.
    51  //
    52  // Defaults to false, which means values are sorted in ascending order.
    53  func SortDescending(descending bool) SortOption {
    54  	return sortingOption(func(c *SortConfig) { c.Descending = descending })
    55  }
    56  
    57  // SortNullsFirst constructs a configuration option which places the null values
    58  // first or last.
    59  //
    60  // Defaults to false, which means null values are placed last.
    61  func SortNullsFirst(nullsFirst bool) SortOption {
    62  	return sortingOption(func(c *SortConfig) { c.NullsFirst = nullsFirst })
    63  }
    64  
    65  // SortOption is an interface implemented by types that carry configuration
    66  // options for sorting functions.
    67  type SortOption interface {
    68  	ConfigureSort(*SortConfig)
    69  }
    70  
    71  type sortingOption func(*SortConfig)
    72  
    73  func (f sortingOption) ConfigureSort(c *SortConfig) { f(c) }
    74  
    75  // SortFunc is a function type which compares two sets of column values.
    76  //
    77  // Slices with exactly one value must be passed to the function when comparing
    78  // values of non-repeated columns. For repeated columns, there may be zero or
    79  // more values in each slice, and the parameters may have different lengths.
    80  //
    81  // SortFunc is a low-level API which is usually useful to construct customize
    82  // implementations of the RowGroup interface.
    83  type SortFunc func(a, b []Value) int
    84  
    85  // SortFuncOf constructs a sorting function for values of the given type.
    86  //
    87  // The list of options contains the configuration used to construct the sorting
    88  // function.
    89  func SortFuncOf(t Type, options ...SortOption) SortFunc {
    90  	config := new(SortConfig)
    91  	config.Apply(options...)
    92  	return sortFuncOf(t, config)
    93  }
    94  
    95  func sortFuncOf(t Type, config *SortConfig) (sort SortFunc) {
    96  	sort = sortFuncOfRequired(t)
    97  
    98  	if config.Descending {
    99  		sort = sortFuncOfDescending(sort)
   100  	}
   101  
   102  	switch {
   103  	case makeRepetitionLevel(config.MaxRepetitionLevel) > 0:
   104  		sort = sortFuncOfRepeated(sort, config)
   105  	case makeDefinitionLevel(config.MaxDefinitionLevel) > 0:
   106  		sort = sortFuncOfOptional(sort, config)
   107  	}
   108  
   109  	return sort
   110  }
   111  
   112  //go:noinline
   113  func sortFuncOfDescending(sort SortFunc) SortFunc {
   114  	return func(a, b []Value) int { return -sort(a, b) }
   115  }
   116  
   117  func sortFuncOfOptional(sort SortFunc, config *SortConfig) SortFunc {
   118  	if config.NullsFirst {
   119  		return sortFuncOfOptionalNullsFirst(sort)
   120  	} else {
   121  		return sortFuncOfOptionalNullsLast(sort)
   122  	}
   123  }
   124  
   125  //go:noinline
   126  func sortFuncOfOptionalNullsFirst(sort SortFunc) SortFunc {
   127  	return func(a, b []Value) int {
   128  		switch {
   129  		case a[0].IsNull():
   130  			if b[0].IsNull() {
   131  				return 0
   132  			}
   133  			return -1
   134  		case b[0].IsNull():
   135  			return +1
   136  		default:
   137  			return sort(a, b)
   138  		}
   139  	}
   140  }
   141  
   142  //go:noinline
   143  func sortFuncOfOptionalNullsLast(sort SortFunc) SortFunc {
   144  	return func(a, b []Value) int {
   145  		switch {
   146  		case a[0].IsNull():
   147  			if b[0].IsNull() {
   148  				return 0
   149  			}
   150  			return +1
   151  		case b[0].IsNull():
   152  			return -1
   153  		default:
   154  			return sort(a, b)
   155  		}
   156  	}
   157  }
   158  
   159  //go:noinline
   160  func sortFuncOfRepeated(sort SortFunc, config *SortConfig) SortFunc {
   161  	sort = sortFuncOfOptional(sort, config)
   162  	return func(a, b []Value) int {
   163  		n := len(a)
   164  		if n > len(b) {
   165  			n = len(b)
   166  		}
   167  
   168  		for i := 0; i < n; i++ {
   169  			k := sort(a[i:i+1], b[i:i+1])
   170  			if k != 0 {
   171  				return k
   172  			}
   173  		}
   174  
   175  		return len(a) - len(b)
   176  	}
   177  }
   178  
   179  //go:noinline
   180  func sortFuncOfRequired(t Type) SortFunc {
   181  	return func(a, b []Value) int { return t.Compare(a[0], b[0]) }
   182  }