github.com/egonelbre/exp@v0.0.0-20240430123955-ed1d3aa93911/sorts/dpsort/sort_test.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package dpsort_test
     6  
     7  import (
     8  	"flag"
     9  	"fmt"
    10  	"math"
    11  	"math/rand"
    12  	"sort"
    13  	"strconv"
    14  	"testing"
    15  
    16  	. "github.com/egonelbre/exp/sorts/dpsort"
    17  )
    18  
    19  var skipAdversial = flag.Bool("skip-adversial", false, "skip adversial tests")
    20  
    21  var ints = [...]int{74, 59, 238, -784, 9845, 959, 905, 0, 0, 42, 7586, -5467984, 7586}
    22  var float64s = [...]float64{74.3, 59.0, math.Inf(1), 238.2, -784.0, 2.3, math.NaN(), math.NaN(), math.Inf(-1), 9845.768, -959.7485, 905, 7.8, 7.8}
    23  var strings = [...]string{"", "Hello", "foo", "bar", "foo", "f00", "%*&^*&^&", "***"}
    24  
    25  func TestSortIntSlice(t *testing.T) {
    26  	data := ints
    27  	a := sort.IntSlice(data[0:])
    28  	Sort(a)
    29  	if !sort.IsSorted(a) {
    30  		t.Errorf("sorted %v", ints)
    31  		t.Errorf("   got %v", data)
    32  	}
    33  }
    34  
    35  func TestSortFloat64Slice(t *testing.T) {
    36  	data := float64s
    37  	a := sort.Float64Slice(data[0:])
    38  	Sort(a)
    39  	if !sort.IsSorted(a) {
    40  		t.Errorf("sorted %v", float64s)
    41  		t.Errorf("   got %v", data)
    42  	}
    43  }
    44  
    45  func TestSortStringSlice(t *testing.T) {
    46  	data := strings
    47  	a := sort.StringSlice(data[0:])
    48  	Sort(a)
    49  	if !sort.IsSorted(a) {
    50  		t.Errorf("sorted %v", strings)
    51  		t.Errorf("   got %v", data)
    52  	}
    53  }
    54  
    55  type nonDeterministicTestingData struct {
    56  	r *rand.Rand
    57  }
    58  
    59  func (t *nonDeterministicTestingData) Len() int {
    60  	return 500
    61  }
    62  func (t *nonDeterministicTestingData) Less(i, j int) bool {
    63  	if i < 0 || j < 0 || i >= t.Len() || j >= t.Len() {
    64  		panic("nondeterministic comparison out of bounds")
    65  	}
    66  	return t.r.Float32() < 0.5
    67  }
    68  func (t *nonDeterministicTestingData) Swap(i, j int) {
    69  	if i < 0 || j < 0 || i >= t.Len() || j >= t.Len() {
    70  		panic("nondeterministic comparison out of bounds")
    71  	}
    72  }
    73  
    74  func TestNonDeterministicComparison(t *testing.T) {
    75  	// Ensure that sort.Sort does not panic when Less returns inconsistent results.
    76  	// See https://golang.org/issue/14377.
    77  	defer func() {
    78  		if r := recover(); r != nil {
    79  			t.Error(r)
    80  		}
    81  	}()
    82  
    83  	td := &nonDeterministicTestingData{
    84  		r: rand.New(rand.NewSource(0)),
    85  	}
    86  
    87  	for i := 0; i < 10; i++ {
    88  		Sort(td)
    89  	}
    90  }
    91  
    92  func BenchmarkSortString1K(b *testing.B) {
    93  	b.StopTimer()
    94  	unsorted := make([]string, 1<<10)
    95  	for i := range unsorted {
    96  		unsorted[i] = strconv.Itoa(i ^ 0x2cc)
    97  	}
    98  	data := make([]string, len(unsorted))
    99  
   100  	for i := 0; i < b.N; i++ {
   101  		copy(data, unsorted)
   102  		b.StartTimer()
   103  		Sort(sort.StringSlice(data))
   104  		b.StopTimer()
   105  	}
   106  }
   107  
   108  func BenchmarkSortInt1K(b *testing.B) {
   109  	b.StopTimer()
   110  	for i := 0; i < b.N; i++ {
   111  		data := make([]int, 1<<10)
   112  		for i := 0; i < len(data); i++ {
   113  			data[i] = i ^ 0x2cc
   114  		}
   115  		b.StartTimer()
   116  		Sort(sort.IntSlice(data))
   117  		b.StopTimer()
   118  	}
   119  }
   120  
   121  func BenchmarkSortInt64K(b *testing.B) {
   122  	b.StopTimer()
   123  	for i := 0; i < b.N; i++ {
   124  		data := make([]int, 1<<16)
   125  		for i := 0; i < len(data); i++ {
   126  			data[i] = i ^ 0xcccc
   127  		}
   128  		b.StartTimer()
   129  		Sort(sort.IntSlice(data))
   130  		b.StopTimer()
   131  	}
   132  }
   133  
   134  const (
   135  	_Sawtooth = iota
   136  	_Rand
   137  	_Stagger
   138  	_Plateau
   139  	_Shuffle
   140  	_NDist
   141  )
   142  
   143  const (
   144  	_Copy = iota
   145  	_Reverse
   146  	_ReverseFirstHalf
   147  	_ReverseSecondHalf
   148  	_Sorted
   149  	_Dither
   150  	_NMode
   151  )
   152  
   153  type testingData struct {
   154  	desc        string
   155  	t           *testing.T
   156  	data        []int
   157  	maxswap     int // number of swaps allowed
   158  	ncmp, nswap int
   159  }
   160  
   161  func (d *testingData) Len() int { return len(d.data) }
   162  func (d *testingData) Less(i, j int) bool {
   163  	d.ncmp++
   164  	return d.data[i] < d.data[j]
   165  }
   166  func (d *testingData) Swap(i, j int) {
   167  	if d.nswap >= d.maxswap {
   168  		d.t.Errorf("%s: used %d swaps sorting slice of %d", d.desc, d.nswap, len(d.data))
   169  		d.t.FailNow()
   170  	}
   171  	d.nswap++
   172  	d.data[i], d.data[j] = d.data[j], d.data[i]
   173  }
   174  
   175  func min(a, b int) int {
   176  	if a < b {
   177  		return a
   178  	}
   179  	return b
   180  }
   181  
   182  func lg(n int) int {
   183  	i := 0
   184  	for 1<<uint(i) < n {
   185  		i++
   186  	}
   187  	return i
   188  }
   189  
   190  func testBentleyMcIlroy(t *testing.T, sortfn func(sort.Interface), maxswap func(int) int) {
   191  	if *skipAdversial {
   192  		t.Skip()
   193  		return
   194  	}
   195  	sizes := []int{100, 1023, 1024, 1025}
   196  	if testing.Short() {
   197  		sizes = []int{100, 127, 128, 129}
   198  	}
   199  	dists := []string{"sawtooth", "rand", "stagger", "plateau", "shuffle"}
   200  	modes := []string{"copy", "reverse", "reverse1", "reverse2", "sort", "dither"}
   201  	var tmp1, tmp2 [1025]int
   202  	for _, n := range sizes {
   203  		for m := 1; m < 2*n; m *= 2 {
   204  			for dist := 0; dist < _NDist; dist++ {
   205  				j := 0
   206  				k := 1
   207  				data := tmp1[0:n]
   208  				for i := 0; i < n; i++ {
   209  					switch dist {
   210  					case _Sawtooth:
   211  						data[i] = i % m
   212  					case _Rand:
   213  						data[i] = rand.Intn(m)
   214  					case _Stagger:
   215  						data[i] = (i*m + i) % n
   216  					case _Plateau:
   217  						data[i] = min(i, m)
   218  					case _Shuffle:
   219  						if rand.Intn(m) != 0 {
   220  							j += 2
   221  							data[i] = j
   222  						} else {
   223  							k += 2
   224  							data[i] = k
   225  						}
   226  					}
   227  				}
   228  
   229  				mdata := tmp2[0:n]
   230  				for mode := 0; mode < _NMode; mode++ {
   231  					switch mode {
   232  					case _Copy:
   233  						for i := 0; i < n; i++ {
   234  							mdata[i] = data[i]
   235  						}
   236  					case _Reverse:
   237  						for i := 0; i < n; i++ {
   238  							mdata[i] = data[n-i-1]
   239  						}
   240  					case _ReverseFirstHalf:
   241  						for i := 0; i < n/2; i++ {
   242  							mdata[i] = data[n/2-i-1]
   243  						}
   244  						for i := n / 2; i < n; i++ {
   245  							mdata[i] = data[i]
   246  						}
   247  					case _ReverseSecondHalf:
   248  						for i := 0; i < n/2; i++ {
   249  							mdata[i] = data[i]
   250  						}
   251  						for i := n / 2; i < n; i++ {
   252  							mdata[i] = data[n-(i-n/2)-1]
   253  						}
   254  					case _Sorted:
   255  						for i := 0; i < n; i++ {
   256  							mdata[i] = data[i]
   257  						}
   258  						// Ints is known to be correct
   259  						// because mode Sort runs after mode _Copy.
   260  						Sort(sort.IntSlice(mdata))
   261  					case _Dither:
   262  						for i := 0; i < n; i++ {
   263  							mdata[i] = data[i] + i%5
   264  						}
   265  					}
   266  
   267  					desc := fmt.Sprintf("n=%d m=%d dist=%s mode=%s", n, m, dists[dist], modes[mode])
   268  					d := &testingData{desc: desc, t: t, data: mdata[0:n], maxswap: maxswap(n)}
   269  					sortfn(d)
   270  					// Uncomment if you are trying to improve the number of compares/swaps.
   271  					//t.Logf("%s: ncmp=%d, nswp=%d", desc, d.ncmp, d.nswap)
   272  
   273  					// If we were testing C qsort, we'd have to make a copy
   274  					// of the slice and sort it ourselves and then compare
   275  					// x against it, to ensure that qsort was only permuting
   276  					// the data, not (for example) overwriting it with zeros.
   277  					//
   278  					// In go, we don't have to be so paranoid: since the only
   279  					// mutating method Sort can call is TestingData.swap,
   280  					// it suffices here just to check that the final slice is sorted.
   281  					if !sort.IntsAreSorted(mdata) {
   282  						t.Errorf("%s: ints not sorted", desc)
   283  						t.Errorf("\t%v", mdata)
   284  						t.FailNow()
   285  					}
   286  				}
   287  			}
   288  		}
   289  	}
   290  }
   291  
   292  func TestSortBM(t *testing.T) {
   293  	testBentleyMcIlroy(t, Sort, func(n int) int { return n * lg(n) * 12 / 10 })
   294  }
   295  
   296  // This is based on the "antiquicksort" implementation by M. Douglas McIlroy.
   297  // See http://www.cs.dartmouth.edu/~doug/mdmspe.pdf for more info.
   298  type adversaryTestingData struct {
   299  	t         *testing.T
   300  	data      []int // item values, initialized to special gas value and changed by Less
   301  	maxcmp    int   // number of comparisons allowed
   302  	ncmp      int   // number of comparisons (calls to Less)
   303  	nsolid    int   // number of elements that have been set to non-gas values
   304  	candidate int   // guess at current pivot
   305  	gas       int   // special value for unset elements, higher than everything else
   306  }
   307  
   308  func (d *adversaryTestingData) Len() int { return len(d.data) }
   309  
   310  func (d *adversaryTestingData) Less(i, j int) bool {
   311  	if d.ncmp >= d.maxcmp {
   312  		d.t.Fatalf("used %d comparisons sorting adversary data with size %d", d.ncmp, len(d.data))
   313  	}
   314  	d.ncmp++
   315  
   316  	if d.data[i] == d.gas && d.data[j] == d.gas {
   317  		if i == d.candidate {
   318  			// freeze i
   319  			d.data[i] = d.nsolid
   320  			d.nsolid++
   321  		} else {
   322  			// freeze j
   323  			d.data[j] = d.nsolid
   324  			d.nsolid++
   325  		}
   326  	}
   327  
   328  	if d.data[i] == d.gas {
   329  		d.candidate = i
   330  	} else if d.data[j] == d.gas {
   331  		d.candidate = j
   332  	}
   333  
   334  	return d.data[i] < d.data[j]
   335  }
   336  
   337  func (d *adversaryTestingData) Swap(i, j int) {
   338  	d.data[i], d.data[j] = d.data[j], d.data[i]
   339  }
   340  
   341  func newAdversaryTestingData(t *testing.T, size int, maxcmp int) *adversaryTestingData {
   342  	gas := size - 1
   343  	data := make([]int, size)
   344  	for i := 0; i < size; i++ {
   345  		data[i] = gas
   346  	}
   347  	return &adversaryTestingData{t: t, data: data, maxcmp: maxcmp, gas: gas}
   348  }
   349  
   350  func TestAdversary(t *testing.T) {
   351  	if *skipAdversial {
   352  		t.Skip()
   353  		return
   354  	}
   355  
   356  	const size = 10000            // large enough to distinguish between O(n^2) and O(n*log(n))
   357  	maxcmp := size * lg(size) * 4 // the factor 4 was found by trial and error
   358  	d := newAdversaryTestingData(t, size, maxcmp)
   359  	Sort(d) // This should degenerate to heapsort.
   360  	// Check data is fully populated and sorted.
   361  	for i, v := range d.data {
   362  		if v != i {
   363  			t.Errorf("adversary data not fully sorted")
   364  			t.FailNow()
   365  		}
   366  	}
   367  }
   368  
   369  type intPairs []struct {
   370  	a, b int
   371  }
   372  
   373  // IntPairs compare on a only.
   374  func (d intPairs) Len() int           { return len(d) }
   375  func (d intPairs) Less(i, j int) bool { return d[i].a < d[j].a }
   376  func (d intPairs) Swap(i, j int)      { d[i], d[j] = d[j], d[i] }
   377  
   378  // Record initial order in B.
   379  func (d intPairs) initB() {
   380  	for i := range d {
   381  		d[i].b = i
   382  	}
   383  }
   384  
   385  // InOrder checks if a-equal elements were not reordered.
   386  func (d intPairs) inOrder() bool {
   387  	lastA, lastB := -1, 0
   388  	for i := 0; i < len(d); i++ {
   389  		if lastA != d[i].a {
   390  			lastA = d[i].a
   391  			lastB = d[i].b
   392  			continue
   393  		}
   394  		if d[i].b <= lastB {
   395  			return false
   396  		}
   397  		lastB = d[i].b
   398  	}
   399  	return true
   400  }
   401  
   402  var countOpsSizes = []int{1e2, 3e2, 1e3, 3e3, 1e4, 3e4, 1e5, 3e5, 1e6}
   403  
   404  func countOps(t *testing.T, algo func(sort.Interface), name string) {
   405  	sizes := countOpsSizes
   406  	if testing.Short() {
   407  		sizes = sizes[:5]
   408  	}
   409  	if !testing.Verbose() {
   410  		t.Skip("Counting skipped as non-verbose mode.")
   411  	}
   412  	for _, n := range sizes {
   413  		td := testingData{
   414  			desc:    name,
   415  			t:       t,
   416  			data:    make([]int, n),
   417  			maxswap: 1<<31 - 1,
   418  		}
   419  		for i := 0; i < n; i++ {
   420  			td.data[i] = rand.Intn(n / 5)
   421  		}
   422  		algo(&td)
   423  		t.Logf("%s %8d elements: %11d Swap, %10d Less", name, n, td.nswap, td.ncmp)
   424  	}
   425  }
   426  
   427  func TestCountSortOps(t *testing.T) { countOps(t, Sort, "Sort  ") }
   428  
   429  func bench(b *testing.B, size int, algo func(sort.Interface), name string) {
   430  	b.StopTimer()
   431  	data := make(intPairs, size)
   432  	x := ^uint32(0)
   433  	for i := 0; i < b.N; i++ {
   434  		for n := size - 3; n <= size+3; n++ {
   435  			for i := 0; i < len(data); i++ {
   436  				x += x
   437  				x ^= 1
   438  				if int32(x) < 0 {
   439  					x ^= 0x88888eef
   440  				}
   441  				data[i].a = int(x % uint32(n/5))
   442  			}
   443  			data.initB()
   444  			b.StartTimer()
   445  			algo(data)
   446  			b.StopTimer()
   447  			if !sort.IsSorted(data) {
   448  				b.Errorf("%s did not sort %d ints", name, n)
   449  			}
   450  		}
   451  	}
   452  }
   453  
   454  func BenchmarkSort1e2(b *testing.B) { bench(b, 1e2, Sort, "Sort") }
   455  func BenchmarkSort1e4(b *testing.B) { bench(b, 1e4, Sort, "Sort") }
   456  func BenchmarkSort1e6(b *testing.B) { bench(b, 1e6, Sort, "Sort") }