github.com/biogo/biogo@v1.0.4/align/pals/piler.go (about) 1 // Copyright ©2012 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package pals 6 7 import ( 8 "fmt" 9 "log" 10 11 "github.com/biogo/biogo/feat" 12 "github.com/biogo/store/interval" 13 ) 14 15 var duplicatePair = fmt.Errorf("pals: attempt to add duplicate feature pair to pile") 16 17 // Note Location must be comparable according to http://golang.org/ref/spec#Comparison_operators. 18 type pileInterval struct { 19 id uintptr 20 start, end int 21 pile *Pile 22 location feat.Feature 23 images []*Feature 24 overlap int 25 } 26 27 func (i *pileInterval) Overlap(b interval.IntRange) bool { 28 return i.end-i.overlap >= b.Start && i.start <= b.End-i.overlap 29 } 30 func (i *pileInterval) ID() uintptr { return i.id } 31 func (i *pileInterval) Range() interval.IntRange { 32 return interval.IntRange{Start: i.start + i.overlap, End: i.end - i.overlap} 33 } 34 35 // A Piler performs the aggregation of feature pairs according to the description in section 2.3 36 // of Edgar and Myers (2005) using an interval tree, giving O(nlogn) time but better space complexity 37 // and flexibility with feature overlap. 38 type Piler struct { 39 // Logger logs pile construction during 40 // Piles calls if non-nil. 41 Logger *log.Logger 42 // LogFreq specifies how frequently 43 // log lines are witten if not zero. 44 LogFreq int 45 46 intervals map[feat.Feature]*interval.IntTree 47 seen map[[2]sf]struct{} 48 overlap int 49 piled bool 50 51 // next provides the next ID 52 // for merged intervals. IDs 53 // are unique across all intervals. 54 next uintptr 55 } 56 57 type sf struct { 58 loc feat.Feature 59 s, e int 60 } 61 62 // NewPiler creates a Piler object ready for piling feature pairs. 63 func NewPiler(overlap int) *Piler { 64 return &Piler{ 65 intervals: make(map[feat.Feature]*interval.IntTree), 66 seen: make(map[[2]sf]struct{}), 67 overlap: overlap, 68 } 69 } 70 71 // Add adds a feature pair to the piler incorporating the features into piles where appropriate. 72 func (p *Piler) Add(fp *Pair) error { 73 a := sf{loc: fp.A.Location(), s: fp.A.Start(), e: fp.A.End()} 74 b := sf{loc: fp.B.Location(), s: fp.B.Start(), e: fp.B.End()} 75 ab, ba := [2]sf{a, b}, [2]sf{b, a} 76 77 if _, ok := p.seen[ab]; ok { 78 return duplicatePair 79 } 80 if _, ok := p.seen[ba]; ok { 81 return duplicatePair 82 } 83 84 p.merge(&pileInterval{id: p.nextID(), start: fp.A.Start(), end: fp.A.End(), location: fp.A.Location(), images: []*Feature{fp.A}, overlap: p.overlap}) 85 p.merge(&pileInterval{id: p.nextID(), start: fp.B.Start(), end: fp.B.End(), location: fp.B.Location(), images: []*Feature{fp.B}, overlap: p.overlap}) 86 p.seen[ab] = struct{}{} 87 88 return nil 89 } 90 91 func (p *Piler) nextID() uintptr { 92 id := p.next 93 p.next++ 94 return id 95 } 96 97 func min(a, b int) int { 98 if a < b { 99 return a 100 } 101 return b 102 } 103 104 func max(a, b int) int { 105 if a > b { 106 return a 107 } 108 return b 109 } 110 111 // merge merges an interval into the tree moving location meta data from the replaced intervals 112 // into the new interval. 113 func (p *Piler) merge(pi *pileInterval) { 114 var ( 115 f = true 116 r []interval.IntInterface 117 qi = &pileInterval{start: pi.start, end: pi.end} 118 ) 119 t, ok := p.intervals[pi.location] 120 if !ok { 121 t = &interval.IntTree{} 122 p.intervals[pi.location] = t 123 } 124 t.DoMatching( 125 func(e interval.IntInterface) (done bool) { 126 r = append(r, e) 127 iv := e.(*pileInterval) 128 pi.images = append(pi.images, iv.images...) 129 if f { 130 pi.start = min(iv.start, pi.start) 131 f = false 132 } 133 pi.end = max(iv.end, pi.end) 134 return 135 }, 136 qi, 137 ) 138 for _, d := range r { 139 t.Delete(d, false) 140 } 141 t.Insert(pi, false) 142 } 143 144 // A PairFilter is used to determine whether a Pair's images are included in a Pile. 145 type PairFilter func(*Pair) bool 146 147 // Piles returns a slice of piles determined by application of the filter function f to 148 // the feature pairs that have been added to the piler. Piles may be called more than once, 149 // but the piles returned in earlier invocations will be altered by subsequent calls. 150 func (p *Piler) Piles(f PairFilter) []*Pile { 151 var n int 152 if !p.piled { 153 for _, t := range p.intervals { 154 t.Do(func(e interval.IntInterface) (done bool) { 155 pa := e.(*pileInterval) 156 if pa.pile == nil { 157 pa.pile = &Pile{Loc: pa.location, From: pa.start, To: pa.end} 158 } 159 for _, im := range pa.images { 160 if checkSanity { 161 assertPileSanity(t, im, pa.pile) 162 } 163 im.Loc = pa.pile 164 } 165 return 166 }) 167 n++ 168 if p.Logger != nil && p.LogFreq != 0 && n%p.LogFreq == 0 { 169 p.Logger.Printf("piled %d intervals of %d", n, len(p.intervals)) 170 } 171 } 172 p.piled = true 173 } 174 175 n = 0 176 var piles []*Pile 177 for _, t := range p.intervals { 178 t.Do(func(e interval.IntInterface) (done bool) { 179 pa := e.(*pileInterval) 180 pa.pile.Images = pa.pile.Images[:0] 181 for _, im := range pa.images { 182 if f != nil && !f(im.Pair) { 183 continue 184 } 185 if checkSanity { 186 assertPairSanity(im) 187 } 188 pa.pile.Images = append(pa.pile.Images, im) 189 } 190 piles = append(piles, pa.pile) 191 return 192 }) 193 n++ 194 if p.Logger != nil && p.LogFreq != 0 && n%p.LogFreq == 0 { 195 p.Logger.Printf("filtered %d intervals of %d", n, len(p.intervals)) 196 } 197 } 198 199 return piles 200 } 201 202 const checkSanity = false 203 204 func assertPileSanity(t *interval.IntTree, im *Feature, pi *Pile) { 205 if im.Start() < pi.Start() || im.End() > pi.End() { 206 panic(fmt.Sprintf("image extends beyond pile: %#v", im)) 207 } 208 if foundPiles := t.Get(&pileInterval{start: im.Start(), end: im.End()}); len(foundPiles) > 1 { 209 var containing int 210 for _, pile := range foundPiles { 211 r := pile.Range() 212 if (r.Start <= im.Start() && r.End > im.End()) || (r.Start < im.Start() && r.End >= im.End()) { 213 containing++ 214 } 215 } 216 if containing > 1 { 217 panic(fmt.Sprintf("found too many piles for %#v", im)) 218 } 219 } 220 } 221 222 func assertPairSanity(im *Feature) { 223 if _, ok := im.Loc.(*Pile); !ok { 224 panic(fmt.Sprintf("image not allocated to pile %#v", im)) 225 } 226 if _, ok := im.Mate().Loc.(*Pile); !ok { 227 panic(fmt.Sprintf("image mate not allocated to pile %#v", im.Mate())) 228 } 229 }