github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/internal/encoder/sort.go (about) 1 /* 2 * Copyright 2021 ByteDance Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package encoder 18 19 // Algorithm 3-way Radix Quicksort, d means the radix. 20 // Reference: https://algs4.cs.princeton.edu/51radix/Quick3string.java.html 21 func radixQsort(kvs []_MapPair, d, maxDepth int) { 22 for len(kvs) > 11 { 23 // To avoid the worst case of quickSort (time: O(n^2)), use introsort here. 24 // Reference: https://en.wikipedia.org/wiki/Introsort and 25 // https://github.com/golang/go/issues/467 26 if maxDepth == 0 { 27 heapSort(kvs, 0, len(kvs)) 28 return 29 } 30 maxDepth-- 31 32 p := pivot(kvs, d) 33 lt, i, gt := 0, 0, len(kvs) 34 for i < gt { 35 c := byteAt(kvs[i].k, d) 36 if c < p { 37 swap(kvs, lt, i) 38 i++ 39 lt++ 40 } else if c > p { 41 gt-- 42 swap(kvs, i, gt) 43 } else { 44 i++ 45 } 46 } 47 48 // kvs[0:lt] < v = kvs[lt:gt] < kvs[gt:len(kvs)] 49 // Native implemention: 50 // radixQsort(kvs[:lt], d, maxDepth) 51 // if p > -1 { 52 // radixQsort(kvs[lt:gt], d+1, maxDepth) 53 // } 54 // radixQsort(kvs[gt:], d, maxDepth) 55 // Optimize as follows: make recursive calls only for the smaller parts. 56 // Reference: https://www.geeksforgeeks.org/quicksort-tail-call-optimization-reducing-worst-case-space-log-n/ 57 if p == -1 { 58 if lt > len(kvs) - gt { 59 radixQsort(kvs[gt:], d, maxDepth) 60 kvs = kvs[:lt] 61 } else { 62 radixQsort(kvs[:lt], d, maxDepth) 63 kvs = kvs[gt:] 64 } 65 } else { 66 ml := maxThree(lt, gt-lt, len(kvs)-gt) 67 if ml == lt { 68 radixQsort(kvs[lt:gt], d+1, maxDepth) 69 radixQsort(kvs[gt:], d, maxDepth) 70 kvs = kvs[:lt] 71 } else if ml == gt-lt { 72 radixQsort(kvs[:lt], d, maxDepth) 73 radixQsort(kvs[gt:], d, maxDepth) 74 kvs = kvs[lt:gt] 75 d += 1 76 } else { 77 radixQsort(kvs[:lt], d, maxDepth) 78 radixQsort(kvs[lt:gt], d+1, maxDepth) 79 kvs = kvs[gt:] 80 } 81 } 82 } 83 insertRadixSort(kvs, d) 84 } 85 86 func insertRadixSort(kvs []_MapPair, d int) { 87 for i := 1; i < len(kvs); i++ { 88 for j := i; j > 0 && lessFrom(kvs[j].k, kvs[j-1].k, d); j-- { 89 swap(kvs, j, j-1) 90 } 91 } 92 } 93 94 func pivot(kvs []_MapPair, d int) int { 95 m := len(kvs) >> 1 96 if len(kvs) > 40 { 97 // Tukey's ``Ninther,'' median of three mediankvs of three. 98 t := len(kvs) / 8 99 return medianThree( 100 medianThree(byteAt(kvs[0].k, d), byteAt(kvs[t].k, d), byteAt(kvs[2*t].k, d)), 101 medianThree(byteAt(kvs[m].k, d), byteAt(kvs[m-t].k, d), byteAt(kvs[m+t].k, d)), 102 medianThree(byteAt(kvs[len(kvs)-1].k, d), 103 byteAt(kvs[len(kvs)-1-t].k, d), 104 byteAt(kvs[len(kvs)-1-2*t].k, d))) 105 } 106 return medianThree(byteAt(kvs[0].k, d), byteAt(kvs[m].k, d), byteAt(kvs[len(kvs)-1].k, d)) 107 } 108 109 func medianThree(i, j, k int) int { 110 if i > j { 111 i, j = j, i 112 } // i < j 113 if k < i { 114 return i 115 } 116 if k > j { 117 return j 118 } 119 return k 120 } 121 122 func maxThree(i, j, k int) int { 123 max := i 124 if max < j { 125 max = j 126 } 127 if max < k { 128 max = k 129 } 130 return max 131 } 132 133 // maxDepth returns a threshold at which quicksort should switch 134 // to heapsort. It returnkvs 2*ceil(lg(n+1)). 135 func maxDepth(n int) int { 136 var depth int 137 for i := n; i > 0; i >>= 1 { 138 depth++ 139 } 140 return depth * 2 141 } 142 143 // siftDown implements the heap property on kvs[lo:hi]. 144 // first is an offset into the array where the root of the heap lies. 145 func siftDown(kvs []_MapPair, lo, hi, first int) { 146 root := lo 147 for { 148 child := 2*root + 1 149 if child >= hi { 150 break 151 } 152 if child+1 < hi && kvs[first+child].k < kvs[first+child+1].k { 153 child++ 154 } 155 if kvs[first+root].k >= kvs[first+child].k { 156 return 157 } 158 swap(kvs, first+root, first+child) 159 root = child 160 } 161 } 162 163 func heapSort(kvs []_MapPair, a, b int) { 164 first := a 165 lo := 0 166 hi := b - a 167 168 // Build heap with the greatest element at top. 169 for i := (hi - 1) / 2; i >= 0; i-- { 170 siftDown(kvs, i, hi, first) 171 } 172 173 // Pop elements, the largest first, into end of kvs. 174 for i := hi - 1; i >= 0; i-- { 175 swap(kvs, first, first+i) 176 siftDown(kvs, lo, i, first) 177 } 178 } 179 180 // Note that _MapPair.k is NOT pointed to _MapPair.m when map key is integer after swap 181 func swap(kvs []_MapPair, a, b int) { 182 kvs[a].k, kvs[b].k = kvs[b].k, kvs[a].k 183 kvs[a].v, kvs[b].v = kvs[b].v, kvs[a].v 184 } 185 186 // Compare two strings from the pos d. 187 func lessFrom(a, b string, d int) bool { 188 l := len(a) 189 if l > len(b) { 190 l = len(b) 191 } 192 for i := d; i < l; i++ { 193 if a[i] == b[i] { 194 continue 195 } 196 return a[i] < b[i] 197 } 198 return len(a) < len(b) 199 } 200 201 func byteAt(b string, p int) int { 202 if p < len(b) { 203 return int(b[p]) 204 } 205 return -1 206 }