github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/internal/encoder/sort.go (about)

     1  /*
     2   * Copyright 2021 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package encoder
    18  
    19  // Algorithm 3-way Radix Quicksort, d means the radix.
    20  // Reference: https://algs4.cs.princeton.edu/51radix/Quick3string.java.html
    21  func radixQsort(kvs []_MapPair, d, maxDepth int) {
    22      for len(kvs) > 11 {
    23          // To avoid the worst case of quickSort (time: O(n^2)), use introsort here.
    24          // Reference: https://en.wikipedia.org/wiki/Introsort and
    25          // https://github.com/golang/go/issues/467
    26          if maxDepth == 0 {
    27              heapSort(kvs, 0, len(kvs))
    28              return
    29          }
    30          maxDepth--
    31  
    32          p := pivot(kvs, d)
    33          lt, i, gt := 0, 0, len(kvs)
    34          for i < gt {
    35              c := byteAt(kvs[i].k, d)
    36              if c < p {
    37                  swap(kvs, lt, i)
    38                  i++
    39                  lt++
    40              } else if c > p {
    41                  gt--
    42                  swap(kvs, i, gt)
    43              } else {
    44                  i++
    45              }
    46          }
    47  
    48          // kvs[0:lt] < v = kvs[lt:gt] < kvs[gt:len(kvs)]
    49          // Native implemention:
    50          //     radixQsort(kvs[:lt], d, maxDepth)
    51          //     if p > -1 {
    52          //         radixQsort(kvs[lt:gt], d+1, maxDepth)
    53          //     }
    54          //     radixQsort(kvs[gt:], d, maxDepth)
    55          // Optimize as follows: make recursive calls only for the smaller parts.
    56          // Reference: https://www.geeksforgeeks.org/quicksort-tail-call-optimization-reducing-worst-case-space-log-n/
    57          if p == -1 {
    58              if lt > len(kvs) - gt {
    59                  radixQsort(kvs[gt:], d, maxDepth)
    60                  kvs = kvs[:lt]
    61              } else {
    62                  radixQsort(kvs[:lt], d, maxDepth)
    63                  kvs = kvs[gt:]
    64              }
    65          } else {
    66              ml := maxThree(lt, gt-lt, len(kvs)-gt)
    67              if ml == lt {
    68                  radixQsort(kvs[lt:gt], d+1, maxDepth)
    69                  radixQsort(kvs[gt:], d, maxDepth)
    70                  kvs = kvs[:lt]
    71              } else if ml == gt-lt {
    72                  radixQsort(kvs[:lt], d, maxDepth)
    73                  radixQsort(kvs[gt:], d, maxDepth)
    74                  kvs = kvs[lt:gt]
    75                  d += 1
    76              } else {
    77                  radixQsort(kvs[:lt], d, maxDepth)
    78                  radixQsort(kvs[lt:gt], d+1, maxDepth)
    79                  kvs = kvs[gt:] 
    80              }
    81          }
    82      }
    83      insertRadixSort(kvs, d)
    84  }
    85  
    86  func insertRadixSort(kvs []_MapPair, d int) {
    87      for i := 1; i < len(kvs); i++ {
    88          for j := i; j > 0 && lessFrom(kvs[j].k, kvs[j-1].k, d); j-- {
    89              swap(kvs, j, j-1)
    90          }
    91      }
    92  }
    93  
    94  func pivot(kvs []_MapPair, d int) int {
    95      m := len(kvs) >> 1
    96      if len(kvs) > 40 {
    97          // Tukey's ``Ninther,'' median of three mediankvs of three.
    98          t := len(kvs) / 8
    99          return medianThree(
   100              medianThree(byteAt(kvs[0].k, d), byteAt(kvs[t].k, d), byteAt(kvs[2*t].k, d)),
   101              medianThree(byteAt(kvs[m].k, d), byteAt(kvs[m-t].k, d), byteAt(kvs[m+t].k, d)),
   102              medianThree(byteAt(kvs[len(kvs)-1].k, d),
   103                  byteAt(kvs[len(kvs)-1-t].k, d),
   104                  byteAt(kvs[len(kvs)-1-2*t].k, d)))
   105      }
   106      return medianThree(byteAt(kvs[0].k, d), byteAt(kvs[m].k, d), byteAt(kvs[len(kvs)-1].k, d))
   107  }
   108  
   109  func medianThree(i, j, k int) int {
   110      if i > j {
   111          i, j = j, i
   112      } // i < j
   113      if k < i {
   114          return i
   115      }
   116      if k > j {
   117          return j
   118      }
   119      return k
   120  }
   121  
   122  func maxThree(i, j, k int) int {
   123      max := i
   124      if max < j {
   125          max = j
   126      }
   127      if max < k {
   128          max = k
   129      }
   130      return max
   131  }
   132  
   133  // maxDepth returns a threshold at which quicksort should switch
   134  // to heapsort. It returnkvs 2*ceil(lg(n+1)).
   135  func maxDepth(n int) int {
   136      var depth int
   137      for i := n; i > 0; i >>= 1 {
   138          depth++
   139      }
   140      return depth * 2
   141  }
   142  
   143  // siftDown implements the heap property on kvs[lo:hi].
   144  // first is an offset into the array where the root of the heap lies.
   145  func siftDown(kvs []_MapPair, lo, hi, first int) {
   146      root := lo
   147      for {
   148          child := 2*root + 1
   149          if child >= hi {
   150              break
   151          }
   152          if child+1 < hi && kvs[first+child].k < kvs[first+child+1].k {
   153              child++
   154          }
   155          if kvs[first+root].k >= kvs[first+child].k {
   156              return
   157          }
   158          swap(kvs, first+root, first+child)
   159          root = child
   160      }
   161  }
   162  
   163  func heapSort(kvs []_MapPair, a, b int) {
   164      first := a
   165      lo := 0
   166      hi := b - a
   167  
   168      // Build heap with the greatest element at top.
   169      for i := (hi - 1) / 2; i >= 0; i-- {
   170          siftDown(kvs, i, hi, first)
   171      }
   172  
   173      // Pop elements, the largest first, into end of kvs.
   174      for i := hi - 1; i >= 0; i-- {
   175          swap(kvs, first, first+i)
   176          siftDown(kvs, lo, i, first)
   177      }
   178  }
   179  
   180  // Note that _MapPair.k is NOT pointed to _MapPair.m when map key is integer after swap
   181  func swap(kvs []_MapPair, a, b int) {
   182      kvs[a].k, kvs[b].k = kvs[b].k, kvs[a].k
   183      kvs[a].v, kvs[b].v = kvs[b].v, kvs[a].v
   184  }
   185  
   186  // Compare two strings from the pos d.
   187  func lessFrom(a, b string, d int) bool {
   188      l := len(a)
   189      if l > len(b) {
   190          l = len(b)
   191      }
   192      for i := d; i < l; i++ {
   193          if a[i] == b[i] {
   194              continue
   195          }
   196          return a[i] < b[i]
   197      }
   198      return len(a) < len(b)
   199  }
   200  
   201  func byteAt(b string, p int) int {
   202      if p < len(b) {
   203          return int(b[p])
   204      }
   205      return -1
   206  }