github.com/VertebrateResequencing/muxfys@v3.0.5+incompatible/interval_test.go (about)

     1  // Copyright © 2017, 2018 Genome Research Limited
     2  // Author: Sendu Bala <sb10@sanger.ac.uk>.
     3  //
     4  //  This file is part of muxfys.
     5  //
     6  //  muxfys is free software: you can redistribute it and/or modify
     7  //  it under the terms of the GNU Lesser General Public License as published by
     8  //  the Free Software Foundation, either version 3 of the License, or
     9  //  (at your option) any later version.
    10  //
    11  //  muxfys is distributed in the hope that it will be useful,
    12  //  but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  //  GNU Lesser General Public License for more details.
    15  //
    16  //  You should have received a copy of the GNU Lesser General Public License
    17  //  along with muxfys. If not, see <http://www.gnu.org/licenses/>.
    18  
    19  package muxfys
    20  
    21  import (
    22  	"math"
    23  	"math/rand"
    24  	"testing"
    25  	"time"
    26  
    27  	. "github.com/smartystreets/goconvey/convey"
    28  )
    29  
    30  func TestIntervals(t *testing.T) {
    31  	Convey("You can create intervals a number of different ways", t, func() {
    32  		oneThree := NewInterval(int64(1), 3)
    33  		So(oneThree.Start, ShouldEqual, 1)
    34  		So(oneThree.End, ShouldEqual, 3)
    35  		twoSix := NewInterval(int64(2), 5)
    36  		So(twoSix.Start, ShouldEqual, 2)
    37  		So(twoSix.End, ShouldEqual, 6)
    38  		eightTen := Interval{8, 10}
    39  		So(eightTen.Start, ShouldEqual, 8)
    40  		So(eightTen.End, ShouldEqual, 10)
    41  		fifteenEighteen := Interval{15, 18}
    42  		fiveTen := Interval{5, 10}
    43  		tenEighteen := Interval{10, 18}
    44  		fourSix := Interval{4, 6}
    45  		sevenTen := Interval{Start: 7, End: 10}
    46  		elevenEighteen := Interval{Start: 11, End: 18}
    47  		twentyThirty := Interval{20, 30}
    48  		oneSix := Interval{1, 6}
    49  		oneEighteen := Interval{1, 18}
    50  		fourtyFifty := Interval{40, 50}
    51  
    52  		Convey("Length works", func() {
    53  			So(oneThree.Length(), ShouldEqual, 3)
    54  			So(twoSix.Length(), ShouldEqual, 5)
    55  			So(fifteenEighteen.Length(), ShouldEqual, 4)
    56  		})
    57  
    58  		Convey("Merging in order works", func() {
    59  			var ivs, newIvs Intervals
    60  			newIvs = ivs.Difference(oneThree)
    61  			So(newIvs, ShouldResemble, Intervals{oneThree})
    62  			ivs = ivs.Merge(oneThree)
    63  			So(len(ivs), ShouldEqual, 1)
    64  
    65  			newIvs = ivs.Difference(twoSix)
    66  			So(newIvs, ShouldResemble, Intervals{Interval{4, 6}})
    67  			ivs = ivs.Merge(twoSix)
    68  			So(len(ivs), ShouldEqual, 1)
    69  
    70  			newIvs = ivs.Difference(eightTen)
    71  			So(newIvs, ShouldResemble, Intervals{eightTen})
    72  			ivs = ivs.Merge(eightTen)
    73  			So(len(ivs), ShouldEqual, 2)
    74  
    75  			newIvs = ivs.Difference(fifteenEighteen)
    76  			So(newIvs, ShouldResemble, Intervals{fifteenEighteen})
    77  			ivs = ivs.Merge(fifteenEighteen)
    78  			So(len(ivs), ShouldEqual, 3)
    79  
    80  			expected := Intervals{oneSix, eightTen, fifteenEighteen}
    81  			So(ivs, ShouldResemble, expected)
    82  		})
    83  
    84  		Convey("Merging out of order works", func() {
    85  			var ivs, newIvs Intervals
    86  			newIvs = ivs.Difference(twoSix)
    87  			So(newIvs, ShouldResemble, Intervals{twoSix})
    88  			ivs = ivs.Merge(twoSix)
    89  			So(len(ivs), ShouldEqual, 1)
    90  
    91  			newIvs = ivs.Difference(fifteenEighteen)
    92  			So(newIvs, ShouldResemble, Intervals{fifteenEighteen})
    93  			ivs = ivs.Merge(fifteenEighteen)
    94  			So(len(ivs), ShouldEqual, 2)
    95  
    96  			newIvs = ivs.Difference(eightTen)
    97  			So(newIvs, ShouldResemble, Intervals{eightTen})
    98  			ivs = ivs.Merge(eightTen)
    99  			So(len(ivs), ShouldEqual, 3)
   100  
   101  			newIvs = ivs.Difference(oneThree)
   102  			So(newIvs, ShouldResemble, Intervals{Interval{1, 1}})
   103  			ivs = ivs.Merge(oneThree)
   104  			So(len(ivs), ShouldEqual, 3)
   105  
   106  			expected := Intervals{oneSix, eightTen, fifteenEighteen}
   107  			So(ivs, ShouldResemble, expected)
   108  		})
   109  
   110  		Convey("Merging where everything merges together works", func() {
   111  			var ivs, newIvs Intervals
   112  			newIvs = ivs.Difference(oneThree)
   113  			So(newIvs, ShouldResemble, Intervals{oneThree})
   114  			ivs = ivs.Merge(oneThree)
   115  			So(len(ivs), ShouldEqual, 1)
   116  
   117  			newIvs = ivs.Difference(twoSix)
   118  			So(newIvs, ShouldResemble, Intervals{Interval{4, 6}})
   119  			ivs = ivs.Merge(twoSix)
   120  			So(len(ivs), ShouldEqual, 1)
   121  
   122  			newIvs = ivs.Difference(fiveTen)
   123  			So(newIvs, ShouldResemble, Intervals{Interval{7, 10}})
   124  			ivs = ivs.Merge(fiveTen)
   125  			So(len(ivs), ShouldEqual, 1)
   126  
   127  			newIvs = ivs.Difference(tenEighteen)
   128  			So(newIvs, ShouldResemble, Intervals{Interval{11, 18}})
   129  			ivs = ivs.Merge(tenEighteen)
   130  			So(len(ivs), ShouldEqual, 1)
   131  
   132  			expected := Intervals{oneEighteen}
   133  			So(ivs, ShouldResemble, expected)
   134  		})
   135  
   136  		Convey("Merging unsorted where everything merges together works", func() {
   137  			var ivs, newIvs Intervals
   138  			newIvs = ivs.Difference(twoSix)
   139  			So(newIvs, ShouldResemble, Intervals{twoSix})
   140  			ivs = ivs.Merge(twoSix)
   141  			So(len(ivs), ShouldEqual, 1)
   142  
   143  			newIvs = ivs.Difference(oneThree)
   144  			So(newIvs, ShouldResemble, Intervals{Interval{1, 1}})
   145  			ivs = ivs.Merge(oneThree)
   146  			So(len(ivs), ShouldEqual, 1)
   147  
   148  			newIvs = ivs.Difference(tenEighteen)
   149  			So(newIvs, ShouldResemble, Intervals{tenEighteen})
   150  			ivs = ivs.Merge(tenEighteen)
   151  			So(len(ivs), ShouldEqual, 2)
   152  
   153  			newIvs = ivs.Difference(fiveTen)
   154  			So(newIvs, ShouldResemble, Intervals{Interval{7, 9}})
   155  			ivs = ivs.Merge(fiveTen)
   156  			So(len(ivs), ShouldEqual, 1)
   157  
   158  			expected := Intervals{oneEighteen}
   159  			So(ivs, ShouldResemble, expected)
   160  		})
   161  
   162  		Convey("Merging where nothing merges together works", func() {
   163  			var ivs, newIvs Intervals
   164  			newIvs = ivs.Difference(oneThree)
   165  			So(newIvs, ShouldResemble, Intervals{oneThree})
   166  			ivs = ivs.Merge(oneThree)
   167  			So(len(ivs), ShouldEqual, 1)
   168  
   169  			newIvs = ivs.Difference(sevenTen)
   170  			So(newIvs, ShouldResemble, Intervals{sevenTen})
   171  			ivs = ivs.Merge(sevenTen)
   172  			So(len(ivs), ShouldEqual, 2)
   173  
   174  			newIvs = ivs.Difference(twentyThirty)
   175  			So(newIvs, ShouldResemble, Intervals{twentyThirty})
   176  			ivs = ivs.Merge(twentyThirty)
   177  			So(len(ivs), ShouldEqual, 3)
   178  
   179  			expected := Intervals{oneThree, sevenTen, twentyThirty}
   180  			So(ivs, ShouldResemble, expected)
   181  		})
   182  
   183  		Convey("Merging adjacent intervals works", func() {
   184  			var ivs, newIvs Intervals
   185  			newIvs = ivs.Difference(oneThree)
   186  			So(newIvs, ShouldResemble, Intervals{oneThree})
   187  			ivs = ivs.Merge(oneThree)
   188  			So(len(ivs), ShouldEqual, 1)
   189  
   190  			newIvs = ivs.Difference(fourSix)
   191  			So(newIvs, ShouldResemble, Intervals{fourSix})
   192  			ivs = ivs.Merge(fourSix)
   193  			So(len(ivs), ShouldEqual, 1)
   194  
   195  			newIvs = ivs.Difference(sevenTen)
   196  			So(newIvs, ShouldResemble, Intervals{sevenTen})
   197  			ivs = ivs.Merge(sevenTen)
   198  			So(len(ivs), ShouldEqual, 1)
   199  
   200  			newIvs = ivs.Difference(elevenEighteen)
   201  			So(newIvs, ShouldResemble, Intervals{elevenEighteen})
   202  			ivs = ivs.Merge(elevenEighteen)
   203  			So(len(ivs), ShouldEqual, 1)
   204  
   205  			expected := Intervals{oneEighteen}
   206  			So(ivs, ShouldResemble, expected)
   207  		})
   208  
   209  		Convey("Merging unsorted adjacent intervals works", func() {
   210  			var ivs, newIvs Intervals
   211  			newIvs = ivs.Difference(fourSix)
   212  			So(newIvs, ShouldResemble, Intervals{fourSix})
   213  			ivs = ivs.Merge(fourSix)
   214  			So(len(ivs), ShouldEqual, 1)
   215  
   216  			newIvs = ivs.Difference(elevenEighteen)
   217  			So(newIvs, ShouldResemble, Intervals{elevenEighteen})
   218  			ivs = ivs.Merge(elevenEighteen)
   219  			So(len(ivs), ShouldEqual, 2)
   220  
   221  			newIvs = ivs.Difference(oneThree)
   222  			So(newIvs, ShouldResemble, Intervals{oneThree})
   223  			ivs = ivs.Merge(oneThree)
   224  			So(len(ivs), ShouldEqual, 2)
   225  
   226  			newIvs = ivs.Difference(sevenTen)
   227  			So(newIvs, ShouldResemble, Intervals{sevenTen})
   228  			ivs = ivs.Merge(sevenTen)
   229  			So(len(ivs), ShouldEqual, 1)
   230  
   231  			expected := Intervals{oneEighteen}
   232  			So(ivs, ShouldResemble, expected)
   233  		})
   234  
   235  		Convey("Merging subsumable intervals works", func() {
   236  			var ivs, newIvs Intervals
   237  			newIvs = ivs.Difference(oneThree)
   238  			So(newIvs, ShouldResemble, Intervals{oneThree})
   239  			ivs = ivs.Merge(oneThree)
   240  			So(len(ivs), ShouldEqual, 1)
   241  
   242  			newIvs = ivs.Difference(fourSix)
   243  			So(newIvs, ShouldResemble, Intervals{fourSix})
   244  			ivs = ivs.Merge(fourSix)
   245  			So(len(ivs), ShouldEqual, 1)
   246  
   247  			newIvs = ivs.Difference(twoSix)
   248  			So(newIvs, ShouldBeEmpty)
   249  			ivs = ivs.Merge(twoSix)
   250  			So(len(ivs), ShouldEqual, 1)
   251  
   252  			newIvs = ivs.Difference(oneThree)
   253  			So(newIvs, ShouldBeEmpty)
   254  			ivs = ivs.Merge(oneThree)
   255  			So(len(ivs), ShouldEqual, 1)
   256  
   257  			oneSeven := Interval{1, 7}
   258  
   259  			newIvs = ivs.Difference(oneSix)
   260  			So(newIvs, ShouldBeEmpty)
   261  			ivs = ivs.Merge(oneSix)
   262  			So(len(ivs), ShouldEqual, 1)
   263  
   264  			newIvs = ivs.Difference(oneSeven)
   265  			So(newIvs, ShouldResemble, Intervals{Interval{7, 7}})
   266  			ivs = ivs.Merge(oneSeven)
   267  			So(len(ivs), ShouldEqual, 1)
   268  
   269  			expected := Intervals{oneSeven}
   270  			So(ivs, ShouldResemble, expected)
   271  		})
   272  
   273  		Convey("Difference works across multiple intervals", func() {
   274  			var ivs, newIvs Intervals
   275  			newIvs = ivs.Difference(oneThree)
   276  			So(newIvs, ShouldResemble, Intervals{oneThree})
   277  			ivs = ivs.Merge(oneThree)
   278  			So(len(ivs), ShouldEqual, 1)
   279  
   280  			newIvs = ivs.Difference(sevenTen)
   281  			So(newIvs, ShouldResemble, Intervals{sevenTen})
   282  			ivs = ivs.Merge(sevenTen)
   283  			So(len(ivs), ShouldEqual, 2)
   284  
   285  			newIvs = ivs.Difference(fifteenEighteen)
   286  			So(newIvs, ShouldResemble, Intervals{fifteenEighteen})
   287  			ivs = ivs.Merge(fifteenEighteen)
   288  			So(len(ivs), ShouldEqual, 3)
   289  
   290  			newIvs = ivs.Difference(twentyThirty)
   291  			So(newIvs, ShouldResemble, Intervals{twentyThirty})
   292  			ivs = ivs.Merge(twentyThirty)
   293  			So(len(ivs), ShouldEqual, 4)
   294  
   295  			newIvs = ivs.Difference(fourtyFifty)
   296  			So(newIvs, ShouldResemble, Intervals{fourtyFifty})
   297  			ivs = ivs.Merge(fourtyFifty)
   298  			So(len(ivs), ShouldEqual, 5)
   299  
   300  			fiveTwentyFive := Interval{5, 25}
   301  			newIvs = ivs.Difference(fiveTwentyFive)
   302  			So(newIvs, ShouldResemble, Intervals{Interval{5, 6}, Interval{11, 14}, Interval{19, 19}})
   303  			ivs = ivs.Merge(fiveTwentyFive)
   304  			So(len(ivs), ShouldEqual, 3)
   305  
   306  			expected := Intervals{oneThree, Interval{5, 30}, fourtyFifty}
   307  			So(ivs, ShouldResemble, expected)
   308  		})
   309  
   310  		Convey("Difference works across multiple intervals that it subsumes", func() {
   311  			var ivs, newIvs Intervals
   312  			newIvs = ivs.Difference(oneThree)
   313  			So(newIvs, ShouldResemble, Intervals{oneThree})
   314  			ivs = ivs.Merge(oneThree)
   315  			So(len(ivs), ShouldEqual, 1)
   316  
   317  			newIvs = ivs.Difference(sevenTen)
   318  			So(newIvs, ShouldResemble, Intervals{sevenTen})
   319  			ivs = ivs.Merge(sevenTen)
   320  			So(len(ivs), ShouldEqual, 2)
   321  
   322  			newIvs = ivs.Difference(fifteenEighteen)
   323  			So(newIvs, ShouldResemble, Intervals{fifteenEighteen})
   324  			ivs = ivs.Merge(fifteenEighteen)
   325  			So(len(ivs), ShouldEqual, 3)
   326  
   327  			newIvs = ivs.Difference(twentyThirty)
   328  			So(newIvs, ShouldResemble, Intervals{twentyThirty})
   329  			ivs = ivs.Merge(twentyThirty)
   330  			So(len(ivs), ShouldEqual, 4)
   331  
   332  			newIvs = ivs.Difference(fourtyFifty)
   333  			So(newIvs, ShouldResemble, Intervals{fourtyFifty})
   334  			ivs = ivs.Merge(fourtyFifty)
   335  			So(len(ivs), ShouldEqual, 5)
   336  
   337  			fiveThirtyTwo := Interval{5, 32}
   338  			newIvs = ivs.Difference(fiveThirtyTwo)
   339  			So(newIvs, ShouldResemble, Intervals{Interval{5, 6}, Interval{11, 14}, Interval{19, 19}, Interval{31, 32}})
   340  			ivs = ivs.Merge(fiveThirtyTwo)
   341  			So(len(ivs), ShouldEqual, 3)
   342  
   343  			expected := Intervals{oneThree, fiveThirtyTwo, fourtyFifty}
   344  			So(ivs, ShouldResemble, expected)
   345  		})
   346  
   347  		Convey("Difference works across multiple out-of-order intervals that it subsumes", func() {
   348  			var ivs, newIvs Intervals
   349  			newIvs = ivs.Difference(sevenTen)
   350  			So(newIvs, ShouldResemble, Intervals{sevenTen})
   351  			ivs = ivs.Merge(sevenTen)
   352  			So(len(ivs), ShouldEqual, 1)
   353  
   354  			newIvs = ivs.Difference(twentyThirty)
   355  			So(newIvs, ShouldResemble, Intervals{twentyThirty})
   356  			ivs = ivs.Merge(twentyThirty)
   357  			So(len(ivs), ShouldEqual, 2)
   358  
   359  			newIvs = ivs.Difference(oneThree)
   360  			So(newIvs, ShouldResemble, Intervals{oneThree})
   361  			ivs = ivs.Merge(oneThree)
   362  			So(len(ivs), ShouldEqual, 3)
   363  
   364  			newIvs = ivs.Difference(fourtyFifty)
   365  			So(newIvs, ShouldResemble, Intervals{fourtyFifty})
   366  			ivs = ivs.Merge(fourtyFifty)
   367  			So(len(ivs), ShouldEqual, 4)
   368  
   369  			newIvs = ivs.Difference(fifteenEighteen)
   370  			So(newIvs, ShouldResemble, Intervals{fifteenEighteen})
   371  			ivs = ivs.Merge(fifteenEighteen)
   372  			So(len(ivs), ShouldEqual, 5)
   373  
   374  			fiveThirtyTwo := Interval{5, 32}
   375  			newIvs = ivs.Difference(fiveThirtyTwo)
   376  			So(newIvs, ShouldResemble, Intervals{Interval{5, 6}, Interval{11, 14}, Interval{19, 19}, Interval{31, 32}})
   377  			ivs = ivs.Merge(fiveThirtyTwo)
   378  			So(len(ivs), ShouldEqual, 3)
   379  
   380  			expected := Intervals{oneThree, fiveThirtyTwo, fourtyFifty}
   381  			So(ivs, ShouldResemble, expected)
   382  		})
   383  
   384  		Convey("Difference works when the interval overlaps and extends past the last interval in the set", func() {
   385  			var ivs, newIvs Intervals
   386  			ivs = ivs.Merge(Interval{0, 147455})
   387  			So(len(ivs), ShouldEqual, 1)
   388  
   389  			ivs = ivs.Merge(Interval{348160, 356351})
   390  			So(len(ivs), ShouldEqual, 2)
   391  
   392  			newIvs = ivs.Difference(Interval{352256, 368639})
   393  			So(newIvs, ShouldResemble, Intervals{Interval{356352, 368639}})
   394  			ivs = ivs.Merge(Interval{352256, 368639})
   395  			So(len(ivs), ShouldEqual, 2)
   396  		})
   397  
   398  		Convey("Truncate works", func() {
   399  			var ivs, newIvs Intervals
   400  			newIvs = ivs.Difference(sevenTen)
   401  			So(newIvs, ShouldResemble, Intervals{sevenTen})
   402  			ivs = ivs.Merge(sevenTen)
   403  			So(len(ivs), ShouldEqual, 1)
   404  
   405  			newIvs = ivs.Difference(twentyThirty)
   406  			So(newIvs, ShouldResemble, Intervals{twentyThirty})
   407  			ivs = ivs.Merge(twentyThirty)
   408  			So(len(ivs), ShouldEqual, 2)
   409  
   410  			newIvs = ivs.Difference(oneThree)
   411  			So(newIvs, ShouldResemble, Intervals{oneThree})
   412  			ivs = ivs.Merge(oneThree)
   413  			So(len(ivs), ShouldEqual, 3)
   414  
   415  			newIvs = ivs.Difference(fourtyFifty)
   416  			So(newIvs, ShouldResemble, Intervals{fourtyFifty})
   417  			ivs = ivs.Merge(fourtyFifty)
   418  			So(len(ivs), ShouldEqual, 4)
   419  
   420  			newIvs = ivs.Difference(fifteenEighteen)
   421  			So(newIvs, ShouldResemble, Intervals{fifteenEighteen})
   422  			ivs = ivs.Merge(fifteenEighteen)
   423  			So(len(ivs), ShouldEqual, 5)
   424  
   425  			ivs = ivs.Truncate(17)
   426  
   427  			expected := Intervals{oneThree, sevenTen, Interval{15, 17}}
   428  			So(ivs, ShouldResemble, expected)
   429  
   430  			ivs = ivs.Truncate(13)
   431  
   432  			expected = Intervals{oneThree, sevenTen}
   433  			So(ivs, ShouldResemble, expected)
   434  
   435  			ivs = ivs.Truncate(0)
   436  			So(ivs, ShouldResemble, Intervals{})
   437  		})
   438  	})
   439  
   440  	Convey("Merging many intervals is fast", t, func() {
   441  		// we will simulate reading a 1000000000 byte file 10000 bytes at a
   442  		// time. First we read the second half of the file, then we read the
   443  		// whole file. Within each half most of the reads are sequential, but a
   444  		// handful of them are swapped out of order, as happens in reality (for
   445  		// some unknown reason)
   446  		fileSize := 1000000000 - 1
   447  		halfSize := (fileSize / 2) + 1
   448  		readSize := 10000
   449  		var inputs []int
   450  		var exepectedNew []bool
   451  		for i := halfSize; i < fileSize; i += readSize {
   452  			inputs = append(inputs, i)
   453  			exepectedNew = append(exepectedNew, true)
   454  		}
   455  		for i := 0; i < halfSize; i += readSize {
   456  			inputs = append(inputs, i)
   457  			exepectedNew = append(exepectedNew, true)
   458  		}
   459  		for i := halfSize; i < fileSize; i += readSize {
   460  			inputs = append(inputs, i)
   461  			exepectedNew = append(exepectedNew, false)
   462  		}
   463  
   464  		// swap 10% of intervals with their neighbours
   465  		toSwap := int(math.Ceil((float64(len(inputs)) / 100.0) * 10.0))
   466  		doneSwaps := 0
   467  		swapped := make(map[int]bool)
   468  		for {
   469  			swap := rand.Intn(len(inputs))
   470  			if _, done := swapped[swap]; done {
   471  				continue
   472  			}
   473  			inputs[swap], inputs[swap+1] = inputs[swap+1], inputs[swap]
   474  			exepectedNew[swap], exepectedNew[swap+1] = exepectedNew[swap+1], exepectedNew[swap]
   475  			swapped[swap] = true
   476  			swapped[swap+1] = true
   477  			doneSwaps++
   478  			if doneSwaps == toSwap {
   479  				break
   480  			}
   481  		}
   482  
   483  		var ivs Intervals
   484  		errors := 0
   485  		t := time.Now()
   486  		for i, input := range inputs {
   487  			iv := NewInterval(int64(input), int64(readSize))
   488  			newIvs := ivs.Difference(iv)
   489  			if (len(newIvs) == 1) != exepectedNew[i] {
   490  				errors++
   491  			}
   492  			ivs = ivs.Merge(iv)
   493  		}
   494  		// fmt.Printf("\ntook %s\n", time.Since(t))
   495  		So(errors, ShouldEqual, 0)
   496  		So(len(ivs), ShouldEqual, 1)
   497  		So(time.Since(t).Seconds(), ShouldBeLessThan, 1) // 30ms on my machine
   498  	})
   499  }