github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/sort.go (about) 1 package parquet 2 3 // The SortConfig type carries configuration options used to generate sorting 4 // functions. 5 // 6 // SortConfig implements the SortOption interface so it can be used directly as 7 // argument to the SortFuncOf function, for example: 8 // 9 // sortFunc := parquet.SortFuncOf(columnType, &parquet.SortConfig{ 10 // Descending: true, 11 // NullsFirst: true, 12 // }) 13 // 14 type SortConfig struct { 15 MaxRepetitionLevel int 16 MaxDefinitionLevel int 17 Descending bool 18 NullsFirst bool 19 } 20 21 // Apply applies options to c. 22 func (c *SortConfig) Apply(options ...SortOption) { 23 for _, opt := range options { 24 opt.ConfigureSort(c) 25 } 26 } 27 28 // ConfigureSort satisfies the SortOption interface. 29 func (c *SortConfig) ConfigureSort(config *SortConfig) { 30 *c = *config 31 } 32 33 // SortMaxRepetitionLevel constructs a configuration option which sets the 34 // maximum repetition level known to a sorting function. 35 // 36 // Defaults to zero, which represents a non-repeated column. 37 func SortMaxRepetitionLevel(level int) SortOption { 38 return sortingOption(func(c *SortConfig) { c.MaxRepetitionLevel = level }) 39 } 40 41 // SortMaxDefinitionLevel constructs a configuration option which sets the 42 // maximum definition level known to a sorting function. 43 // 44 // Defaults to zero, which represents a non-nullable column. 45 func SortMaxDefinitionLevel(level int) SortOption { 46 return sortingOption(func(c *SortConfig) { c.MaxDefinitionLevel = level }) 47 } 48 49 // SortDescending constructs a configuration option which inverts the order of a 50 // sorting function. 51 // 52 // Defaults to false, which means values are sorted in ascending order. 53 func SortDescending(descending bool) SortOption { 54 return sortingOption(func(c *SortConfig) { c.Descending = descending }) 55 } 56 57 // SortNullsFirst constructs a configuration option which places the null values 58 // first or last. 59 // 60 // Defaults to false, which means null values are placed last. 61 func SortNullsFirst(nullsFirst bool) SortOption { 62 return sortingOption(func(c *SortConfig) { c.NullsFirst = nullsFirst }) 63 } 64 65 // SortOption is an interface implemented by types that carry configuration 66 // options for sorting functions. 67 type SortOption interface { 68 ConfigureSort(*SortConfig) 69 } 70 71 type sortingOption func(*SortConfig) 72 73 func (f sortingOption) ConfigureSort(c *SortConfig) { f(c) } 74 75 // SortFunc is a function type which compares two sets of column values. 76 // 77 // Slices with exactly one value must be passed to the function when comparing 78 // values of non-repeated columns. For repeated columns, there may be zero or 79 // more values in each slice, and the parameters may have different lengths. 80 // 81 // SortFunc is a low-level API which is usually useful to construct customize 82 // implementations of the RowGroup interface. 83 type SortFunc func(a, b []Value) int 84 85 // SortFuncOf constructs a sorting function for values of the given type. 86 // 87 // The list of options contains the configuration used to construct the sorting 88 // function. 89 func SortFuncOf(t Type, options ...SortOption) SortFunc { 90 config := new(SortConfig) 91 config.Apply(options...) 92 return sortFuncOf(t, config) 93 } 94 95 func sortFuncOf(t Type, config *SortConfig) (sort SortFunc) { 96 sort = sortFuncOfRequired(t) 97 98 if config.Descending { 99 sort = sortFuncOfDescending(sort) 100 } 101 102 switch { 103 case makeRepetitionLevel(config.MaxRepetitionLevel) > 0: 104 sort = sortFuncOfRepeated(sort, config) 105 case makeDefinitionLevel(config.MaxDefinitionLevel) > 0: 106 sort = sortFuncOfOptional(sort, config) 107 } 108 109 return sort 110 } 111 112 //go:noinline 113 func sortFuncOfDescending(sort SortFunc) SortFunc { 114 return func(a, b []Value) int { return -sort(a, b) } 115 } 116 117 func sortFuncOfOptional(sort SortFunc, config *SortConfig) SortFunc { 118 if config.NullsFirst { 119 return sortFuncOfOptionalNullsFirst(sort) 120 } else { 121 return sortFuncOfOptionalNullsLast(sort) 122 } 123 } 124 125 //go:noinline 126 func sortFuncOfOptionalNullsFirst(sort SortFunc) SortFunc { 127 return func(a, b []Value) int { 128 switch { 129 case a[0].IsNull(): 130 if b[0].IsNull() { 131 return 0 132 } 133 return -1 134 case b[0].IsNull(): 135 return +1 136 default: 137 return sort(a, b) 138 } 139 } 140 } 141 142 //go:noinline 143 func sortFuncOfOptionalNullsLast(sort SortFunc) SortFunc { 144 return func(a, b []Value) int { 145 switch { 146 case a[0].IsNull(): 147 if b[0].IsNull() { 148 return 0 149 } 150 return +1 151 case b[0].IsNull(): 152 return -1 153 default: 154 return sort(a, b) 155 } 156 } 157 } 158 159 //go:noinline 160 func sortFuncOfRepeated(sort SortFunc, config *SortConfig) SortFunc { 161 sort = sortFuncOfOptional(sort, config) 162 return func(a, b []Value) int { 163 n := len(a) 164 if n > len(b) { 165 n = len(b) 166 } 167 168 for i := 0; i < n; i++ { 169 k := sort(a[i:i+1], b[i:i+1]) 170 if k != 0 { 171 return k 172 } 173 } 174 175 return len(a) - len(b) 176 } 177 } 178 179 //go:noinline 180 func sortFuncOfRequired(t Type) SortFunc { 181 return func(a, b []Value) int { return t.Compare(a[0], b[0]) } 182 }