github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/intervalmap/intervalmap_test.go

github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/intervalmap/intervalmap_test.go (about)

     1  package intervalmap
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/gob"
     6  	"fmt"
     7  	"math/rand"
     8  	"sort"
     9  	"strings"
    10  	"testing"
    11  
    12  	"github.com/biogo/store/interval"
    13  	"github.com/grailbio/testutil/assert"
    14  	"github.com/grailbio/testutil/expect"
    15  )
    16  
    17  func TestInterval(t *testing.T) {
    18  	expect.False(t, Interval{1, 1}.Intersects(Interval{1, 2}))
    19  	expect.True(t, Interval{1, 2}.Intersects(Interval{1, 2}))
    20  	expect.True(t, Interval{1, 2}.Intersects(Interval{1, 3}))
    21  	expect.True(t, Interval{1, 2}.Intersects(Interval{-1, 2}))
    22  	expect.True(t, Interval{1, 2}.Intersects(Interval{-1, 3}))
    23  	expect.False(t, Interval{1, 2}.Intersects(Interval{2, 3}))
    24  	expect.False(t, Interval{1, 2}.Intersects(Interval{3, 4}))
    25  	expect.EQ(t, Interval{1, 2}.Span(Interval{3, 4}), Interval{1, 4})
    26  	expect.EQ(t, Interval{1, 4}.Span(Interval{2, 3}), Interval{1, 4})
    27  
    28  	expect.EQ(t, Interval{1, 4}.Span(Interval{3, 2}), Interval{1, 4})
    29  	expect.EQ(t, Interval{10, 14}.Span(Interval{3, 2}), Interval{10, 14})
    30  	expect.EQ(t, Interval{4, 1}.Span(Interval{2, 3}), Interval{2, 3})
    31  	expect.EQ(t, Interval{4, 1}.Span(Interval{12, 13}), Interval{12, 13})
    32  }
    33  
    34  // Sort the given intervals in place.
    35  func sortIntervals(matches []Interval) []Interval {
    36  	sort.Slice(matches, func(i, j int) bool {
    37  		e0 := matches[i]
    38  		e1 := matches[j]
    39  		if e0.Start != e1.Start {
    40  			return e0.Start < e1.Start
    41  		}
    42  		if e0.Limit != e1.Limit {
    43  			return e0.Limit < e1.Limit
    44  		}
    45  		return false
    46  	})
    47  	return matches
    48  }
    49  
    50  // slowModel is a slow, simple intervalmap.
    51  type slowModel []Interval
    52  
    53  func (m *slowModel) insert(start, limit Key) {
    54  	*m = append(*m, Interval{start, limit})
    55  }
    56  
    57  func (m slowModel) get(start, limit Key) []Interval {
    58  	matches := []Interval{}
    59  	for _, i := range m {
    60  		if i.Intersects(Interval{start, limit}) {
    61  			matches = append(matches, i)
    62  		}
    63  	}
    64  	return matches
    65  }
    66  
    67  // biogoModel is a intervalmap using biogo inttree.
    68  type biogoModel interval.IntTree
    69  
    70  func (m *biogoModel) insert(start, limit Key) {
    71  	ii := testInterval{
    72  		start: start,
    73  		limit: limit,
    74  		id:    uintptr(((*interval.IntTree)(m)).Len() + 100),
    75  	}
    76  	if err := ((*interval.IntTree)(m)).Insert(ii, false); err != nil {
    77  		panic(err)
    78  	}
    79  }
    80  
    81  func (m *biogoModel) get(start, limit Key) []Interval {
    82  	matches := []Interval{}
    83  	for _, match := range ((*interval.IntTree)(m)).Get(testInterval{start: start, limit: limit}) {
    84  		matches = append(matches, Interval{Start: int64(match.Range().Start), Limit: int64(match.Range().End)})
    85  	}
    86  	return matches
    87  }
    88  
    89  func testGet(tree *T, start, limit Key) []Interval {
    90  	p := []*Entry{}
    91  	tree.Get(Interval{start, limit}, &p)
    92  	matches := make([]Interval, len(p))
    93  	for i, e := range p {
    94  		payload := e.Data.(string)
    95  		if payload != fmt.Sprintf("[%d,%d)", e.Interval.Start, e.Interval.Limit) {
    96  			panic(e)
    97  		}
    98  		matches[i] = e.Interval
    99  	}
   100  	return matches
   101  }
   102  
   103  func newEntry(start, limit Key) Entry {
   104  	return Entry{
   105  		Interval: Interval{start, limit},
   106  		Data:     fmt.Sprintf("[%d,%d)", start, limit),
   107  	}
   108  }
   109  
   110  func TestEmpty(t *testing.T) {
   111  	expect.EQ(t, testGet(New(nil), 1, 2), []Interval{})
   112  }
   113  
   114  func TestSmall(t *testing.T) {
   115  	tree := New([]Entry{newEntry(1, 2), newEntry(10, 15)})
   116  	expect.EQ(t, testGet(tree, -1, 0), []Interval{})
   117  	expect.EQ(t, testGet(tree, 0, 2), []Interval{Interval{1, 2}})
   118  	expect.EQ(t, testGet(tree, 0, 10), []Interval{Interval{1, 2}})
   119  	expect.EQ(t, sortIntervals(testGet(tree, 0, 11)), []Interval{Interval{1, 2}, Interval{10, 15}})
   120  }
   121  
   122  func randInterval(r *rand.Rand, max Key, width float64) (Key, Key) {
   123  	for {
   124  		start := Key(r.Intn(int(max)))
   125  		limit := start + Key(r.ExpFloat64()*width)
   126  		if start == limit {
   127  			continue
   128  		}
   129  		if start > limit {
   130  			start, limit = limit, start
   131  		}
   132  		return start, limit
   133  	}
   134  }
   135  
   136  func testRandom(t *testing.T, seed int, nElem int, max Key, width float64) {
   137  	r := rand.New(rand.NewSource(int64(seed)))
   138  	m0 := slowModel{}
   139  	m1 := &biogoModel{}
   140  
   141  	entries := []Entry{}
   142  	for i := 0; i < nElem; i++ {
   143  		start, limit := randInterval(r, max, width)
   144  		m0.insert(start, limit)
   145  		m1.insert(start, limit)
   146  		entries = append(entries, newEntry(start, limit))
   147  	}
   148  	tree := New(entries)
   149  	tree2 := gobEncodeAndDecode(t, tree)
   150  
   151  	for i := 0; i < 1000; i++ {
   152  		start, limit := randInterval(r, max, width)
   153  
   154  		r0 := sortIntervals(m0.get(start, limit))
   155  		r1 := sortIntervals(m1.get(start, limit))
   156  		result := sortIntervals(testGet(tree, start, limit))
   157  		assert.EQ(t, result, r0, "seed=%d, i=%d, search=[%d,%d)", seed, i, start, limit)
   158  		assert.EQ(t, result, r1, "seed=%d, i=%d, search=[%d,%d)", seed, i, start, limit)
   159  		assert.EQ(t, result, sortIntervals(testGet(tree2, start, limit)))
   160  	}
   161  }
   162  
   163  func TestRandom0(t *testing.T) { testRandom(t, 0, 128, 1024, 10) }
   164  func TestRandom1(t *testing.T) { testRandom(t, 1, 128, 1024, 100) }
   165  func TestRandom2(t *testing.T) { testRandom(t, 1, 1000, 8192, 1000) }
   166  
   167  func gobEncodeAndDecode(t *testing.T, tree *T) *T {
   168  	buf := bytes.Buffer{}
   169  	e := gob.NewEncoder(&buf)
   170  	assert.NoError(t, e.Encode(tree))
   171  
   172  	d := gob.NewDecoder(&buf)
   173  	var tree2 *T
   174  	assert.NoError(t, d.Decode(&tree2))
   175  	return tree2
   176  }
   177  
   178  func TestGobEmpty(t *testing.T) {
   179  	tree := New(nil)
   180  	tree2 := gobEncodeAndDecode(t, tree)
   181  	expect.EQ(t, testGet(tree2, 1, 2), []Interval{})
   182  }
   183  
   184  func TestGobSmall(t *testing.T) {
   185  	tree := gobEncodeAndDecode(t, New([]Entry{newEntry(1, 2), newEntry(10, 15)}))
   186  	expect.EQ(t, testGet(tree, -1, 0), []Interval{})
   187  	expect.EQ(t, testGet(tree, 0, 2), []Interval{Interval{1, 2}})
   188  	expect.EQ(t, testGet(tree, 0, 10), []Interval{Interval{1, 2}})
   189  	expect.EQ(t, sortIntervals(testGet(tree, 0, 11)), []Interval{Interval{1, 2}, Interval{10, 15}})
   190  }
   191  
   192  func benchmarkRandom(b *testing.B, seed int, nElem int, max Key, width float64) {
   193  	b.StopTimer()
   194  	r := rand.New(rand.NewSource(int64(seed)))
   195  	entries := []Entry{}
   196  	for i := 0; i < nElem; i++ {
   197  		start, limit := randInterval(r, max, width)
   198  		entries = append(entries, newEntry(start, limit))
   199  	}
   200  	tree := New(entries)
   201  	b.Logf("Tree stats: %+v", tree.Stats())
   202  	b.StartTimer()
   203  	p := []*Entry{}
   204  	for i := 0; i < b.N; i++ {
   205  		start, limit := randInterval(r, max, width)
   206  		p = p[:0]
   207  		tree.Get(Interval{start, limit}, &p)
   208  	}
   209  }
   210  
   211  func BenchmarkRandom0(b *testing.B) {
   212  	benchmarkRandom(b, 0, 100, 10000, 10)
   213  }
   214  
   215  func BenchmarkRandom1(b *testing.B) {
   216  	benchmarkRandom(b, 0, 1000, 200000, 100)
   217  }
   218  
   219  func BenchmarkRandom2(b *testing.B) {
   220  	benchmarkRandom(b, 0, 1000, 1000000, 100)
   221  }
   222  
   223  type testInterval struct {
   224  	id           uintptr
   225  	start, limit Key
   226  }
   227  
   228  func (i testInterval) Overlap(b interval.IntRange) bool {
   229  	return i.limit > Key(b.Start) && i.start < Key(b.End)
   230  }
   231  
   232  // ID implements interval.IntInterface.
   233  func (i testInterval) ID() uintptr { return i.id }
   234  
   235  // Range implements interval.IntInterface.
   236  func (i testInterval) Range() interval.IntRange {
   237  	return interval.IntRange{Start: int(i.start), End: int(i.limit)}
   238  }
   239  
   240  // String implements interval.IntInterface
   241  func (i testInterval) String() string { return fmt.Sprintf("[%d,%d)#%d", i.start, i.limit, i.id) }
   242  
   243  func benchmarkBiogoRandom(b *testing.B, seed int, nElem int, max Key, width float64) {
   244  	b.StopTimer()
   245  	r := rand.New(rand.NewSource(int64(seed)))
   246  	tree := interval.IntTree{}
   247  
   248  	for i := 0; i < nElem; i++ {
   249  		start, limit := randInterval(r, max, width)
   250  		ii := testInterval{
   251  			start: start,
   252  			limit: limit,
   253  			id:    uintptr(i),
   254  		}
   255  		if err := tree.Insert(ii, false); err != nil {
   256  			b.Fatal(err)
   257  		}
   258  	}
   259  	b.StartTimer()
   260  	for i := 0; i < b.N; i++ {
   261  		start, limit := randInterval(r, max, width)
   262  		tree.Get(testInterval{start: start, limit: limit})
   263  	}
   264  }
   265  
   266  func BenchmarkBiogoRandom0(b *testing.B) {
   267  	benchmarkBiogoRandom(b, 0, 100, 10000, 10)
   268  }
   269  
   270  func BenchmarkBiogoRandom1(b *testing.B) {
   271  	benchmarkBiogoRandom(b, 0, 1000, 200000, 100)
   272  }
   273  
   274  func BenchmarkBiogoRandom2(b *testing.B) {
   275  	benchmarkBiogoRandom(b, 0, 1000, 1000000, 100)
   276  }
   277  
   278  func Example() {
   279  	newEntry := func(start, limit Key) Entry {
   280  		return Entry{
   281  			Interval: Interval{start, limit},
   282  			Data:     fmt.Sprintf("[%d,%d)", start, limit),
   283  		}
   284  	}
   285  
   286  	doGet := func(tree *T, start, limit Key) string {
   287  		matches := []*Entry{}
   288  		tree.Get(Interval{start, limit}, &matches)
   289  		s := []string{}
   290  		for _, m := range matches {
   291  			s = append(s, m.Data.(string))
   292  		}
   293  		sort.Strings(s)
   294  		return strings.Join(s, ",")
   295  	}
   296  
   297  	tree := New([]Entry{newEntry(1, 4), newEntry(3, 5), newEntry(6, 7)})
   298  	fmt.Println(doGet(tree, 0, 2))
   299  	fmt.Println(doGet(tree, 0, 4))
   300  	fmt.Println(doGet(tree, 4, 6))
   301  	fmt.Println(doGet(tree, 4, 7))
   302  	// Output:
   303  	// [1,4)
   304  	// [1,4),[3,5)
   305  	// [3,5)
   306  	// [3,5),[6,7)
   307  }
   308  
   309  // Example_gob is an example of serializing an intervalmap using Gob.
   310  func Example_gob() {
   311  	newEntry := func(start, limit Key) Entry {
   312  		return Entry{
   313  			Interval: Interval{start, limit},
   314  			Data:     fmt.Sprintf("[%d,%d)", start, limit),
   315  		}
   316  	}
   317  
   318  	tree := New([]Entry{newEntry(1, 4), newEntry(3, 5), newEntry(6, 7)})
   319  
   320  	buf := bytes.Buffer{}
   321  	enc := gob.NewEncoder(&buf)
   322  	if err := enc.Encode(tree); err != nil {
   323  		panic(err)
   324  	}
   325  	dec := gob.NewDecoder(&buf)
   326  	var tree2 *T
   327  	if err := dec.Decode(&tree2); err != nil {
   328  		panic(err)
   329  	}
   330  
   331  	doGet := func(tree *T, start, limit Key) string {
   332  		matches := []*Entry{}
   333  		tree.Get(Interval{start, limit}, &matches)
   334  		s := []string{}
   335  		for _, m := range matches {
   336  			s = append(s, m.Data.(string))
   337  		}
   338  		sort.Strings(s)
   339  		return strings.Join(s, ",")
   340  	}
   341  
   342  	fmt.Println(doGet(tree2, 0, 2))
   343  	fmt.Println(doGet(tree2, 0, 4))
   344  	fmt.Println(doGet(tree2, 4, 6))
   345  	fmt.Println(doGet(tree2, 4, 7))
   346  	// Output:
   347  	// [1,4)
   348  	// [1,4),[3,5)
   349  	// [3,5)
   350  	// [3,5),[6,7)
   351  }