github.com/goshafaq/sonic@v0.0.0-20231026082336-871835fb94c6/internal/encoder/sort.go (about)

     1  /*
     2   * Copyright 2021 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package encoder
    18  
    19  // Algorithm 3-way Radix Quicksort, d means the radix.
    20  // Reference: https://algs4.cs.princeton.edu/51radix/Quick3string.java.html
    21  func radixQsort(kvs []_MapPair, d, maxDepth int) {
    22  	for len(kvs) > 11 {
    23  		// To avoid the worst case of quickSort (time: O(n^2)), use introsort here.
    24  		// Reference: https://en.wikipedia.org/wiki/Introsort and
    25  		// https://github.com/golang/go/issues/467
    26  		if maxDepth == 0 {
    27  			heapSort(kvs, 0, len(kvs))
    28  			return
    29  		}
    30  		maxDepth--
    31  
    32  		p := pivot(kvs, d)
    33  		lt, i, gt := 0, 0, len(kvs)
    34  		for i < gt {
    35  			c := byteAt(kvs[i].k, d)
    36  			if c < p {
    37  				swap(kvs, lt, i)
    38  				i++
    39  				lt++
    40  			} else if c > p {
    41  				gt--
    42  				swap(kvs, i, gt)
    43  			} else {
    44  				i++
    45  			}
    46  		}
    47  
    48  		// kvs[0:lt] < v = kvs[lt:gt] < kvs[gt:len(kvs)]
    49  		// Native implemention:
    50  		//     radixQsort(kvs[:lt], d, maxDepth)
    51  		//     if p > -1 {
    52  		//         radixQsort(kvs[lt:gt], d+1, maxDepth)
    53  		//     }
    54  		//     radixQsort(kvs[gt:], d, maxDepth)
    55  		// Optimize as follows: make recursive calls only for the smaller parts.
    56  		// Reference: https://www.geeksforgeeks.org/quicksort-tail-call-optimization-reducing-worst-case-space-log-n/
    57  		if p == -1 {
    58  			if lt > len(kvs)-gt {
    59  				radixQsort(kvs[gt:], d, maxDepth)
    60  				kvs = kvs[:lt]
    61  			} else {
    62  				radixQsort(kvs[:lt], d, maxDepth)
    63  				kvs = kvs[gt:]
    64  			}
    65  		} else {
    66  			ml := maxThree(lt, gt-lt, len(kvs)-gt)
    67  			if ml == lt {
    68  				radixQsort(kvs[lt:gt], d+1, maxDepth)
    69  				radixQsort(kvs[gt:], d, maxDepth)
    70  				kvs = kvs[:lt]
    71  			} else if ml == gt-lt {
    72  				radixQsort(kvs[:lt], d, maxDepth)
    73  				radixQsort(kvs[gt:], d, maxDepth)
    74  				kvs = kvs[lt:gt]
    75  				d += 1
    76  			} else {
    77  				radixQsort(kvs[:lt], d, maxDepth)
    78  				radixQsort(kvs[lt:gt], d+1, maxDepth)
    79  				kvs = kvs[gt:]
    80  			}
    81  		}
    82  	}
    83  	insertRadixSort(kvs, d)
    84  }
    85  
    86  func insertRadixSort(kvs []_MapPair, d int) {
    87  	for i := 1; i < len(kvs); i++ {
    88  		for j := i; j > 0 && lessFrom(kvs[j].k, kvs[j-1].k, d); j-- {
    89  			swap(kvs, j, j-1)
    90  		}
    91  	}
    92  }
    93  
    94  func pivot(kvs []_MapPair, d int) int {
    95  	m := len(kvs) >> 1
    96  	if len(kvs) > 40 {
    97  		// Tukey's ``Ninther,'' median of three mediankvs of three.
    98  		t := len(kvs) / 8
    99  		return medianThree(
   100  			medianThree(byteAt(kvs[0].k, d), byteAt(kvs[t].k, d), byteAt(kvs[2*t].k, d)),
   101  			medianThree(byteAt(kvs[m].k, d), byteAt(kvs[m-t].k, d), byteAt(kvs[m+t].k, d)),
   102  			medianThree(byteAt(kvs[len(kvs)-1].k, d),
   103  				byteAt(kvs[len(kvs)-1-t].k, d),
   104  				byteAt(kvs[len(kvs)-1-2*t].k, d)))
   105  	}
   106  	return medianThree(byteAt(kvs[0].k, d), byteAt(kvs[m].k, d), byteAt(kvs[len(kvs)-1].k, d))
   107  }
   108  
   109  func medianThree(i, j, k int) int {
   110  	if i > j {
   111  		i, j = j, i
   112  	} // i < j
   113  	if k < i {
   114  		return i
   115  	}
   116  	if k > j {
   117  		return j
   118  	}
   119  	return k
   120  }
   121  
   122  func maxThree(i, j, k int) int {
   123  	max := i
   124  	if max < j {
   125  		max = j
   126  	}
   127  	if max < k {
   128  		max = k
   129  	}
   130  	return max
   131  }
   132  
   133  // maxDepth returns a threshold at which quicksort should switch
   134  // to heapsort. It returnkvs 2*ceil(lg(n+1)).
   135  func maxDepth(n int) int {
   136  	var depth int
   137  	for i := n; i > 0; i >>= 1 {
   138  		depth++
   139  	}
   140  	return depth * 2
   141  }
   142  
   143  // siftDown implements the heap property on kvs[lo:hi].
   144  // first is an offset into the array where the root of the heap lies.
   145  func siftDown(kvs []_MapPair, lo, hi, first int) {
   146  	root := lo
   147  	for {
   148  		child := 2*root + 1
   149  		if child >= hi {
   150  			break
   151  		}
   152  		if child+1 < hi && kvs[first+child].k < kvs[first+child+1].k {
   153  			child++
   154  		}
   155  		if kvs[first+root].k >= kvs[first+child].k {
   156  			return
   157  		}
   158  		swap(kvs, first+root, first+child)
   159  		root = child
   160  	}
   161  }
   162  
   163  func heapSort(kvs []_MapPair, a, b int) {
   164  	first := a
   165  	lo := 0
   166  	hi := b - a
   167  
   168  	// Build heap with the greatest element at top.
   169  	for i := (hi - 1) / 2; i >= 0; i-- {
   170  		siftDown(kvs, i, hi, first)
   171  	}
   172  
   173  	// Pop elements, the largest first, into end of kvs.
   174  	for i := hi - 1; i >= 0; i-- {
   175  		swap(kvs, first, first+i)
   176  		siftDown(kvs, lo, i, first)
   177  	}
   178  }
   179  
   180  // Note that _MapPair.k is NOT pointed to _MapPair.m when map key is integer after swap
   181  func swap(kvs []_MapPair, a, b int) {
   182  	kvs[a].k, kvs[b].k = kvs[b].k, kvs[a].k
   183  	kvs[a].v, kvs[b].v = kvs[b].v, kvs[a].v
   184  }
   185  
   186  // Compare two strings from the pos d.
   187  func lessFrom(a, b string, d int) bool {
   188  	l := len(a)
   189  	if l > len(b) {
   190  		l = len(b)
   191  	}
   192  	for i := d; i < l; i++ {
   193  		if a[i] == b[i] {
   194  			continue
   195  		}
   196  		return a[i] < b[i]
   197  	}
   198  	return len(a) < len(b)
   199  }
   200  
   201  func byteAt(b string, p int) int {
   202  	if p < len(b) {
   203  		return int(b[p])
   204  	}
   205  	return -1
   206  }