github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/search/booleanScorer.go (about) 1 package search 2 3 import ( 4 "github.com/balzaczyy/golucene/core/index" 5 ) 6 7 type BooleanScorerCollector struct { 8 bucketTable *BucketTable 9 mask int 10 scorer Scorer 11 } 12 13 func newBooleanScorerCollector(mask int, bucketTable *BucketTable) *BooleanScorerCollector { 14 return &BooleanScorerCollector{ 15 mask: mask, 16 bucketTable: bucketTable, 17 } 18 } 19 20 func (c *BooleanScorerCollector) Collect(doc int) (err error) { 21 table := c.bucketTable 22 i := doc & BUCKET_TABLE_MASK 23 bucket := table.buckets[i] 24 25 var score float32 26 if bucket.doc != doc { 27 bucket.doc = doc 28 if score, err = c.scorer.Score(); err != nil { 29 return err 30 } 31 bucket.score = float64(score) 32 bucket.bits = c.mask 33 bucket.coord = 1 34 35 bucket.next = table.first 36 table.first = bucket 37 } else { 38 if score, err = c.scorer.Score(); err != nil { 39 return err 40 } 41 bucket.score += float64(score) 42 bucket.bits |= c.mask 43 bucket.coord++ 44 } 45 46 return nil 47 } 48 49 func (c *BooleanScorerCollector) SetNextReader(*index.AtomicReaderContext) {} 50 func (c *BooleanScorerCollector) SetScorer(scorer Scorer) { c.scorer = scorer } 51 func (c *BooleanScorerCollector) AcceptsDocsOutOfOrder() bool { return true } 52 53 type Bucket struct { 54 doc int // tells if bucket is valid 55 score float64 56 bits int 57 coord int 58 next *Bucket 59 } 60 61 func newBucket() *Bucket { 62 return &Bucket{ 63 doc: -1, 64 } 65 } 66 67 const BUCKET_TABLE_SIZE = 1 << 11 68 const BUCKET_TABLE_MASK = BUCKET_TABLE_SIZE - 1 69 70 type BucketTable struct { 71 buckets []*Bucket 72 first *Bucket // head of valid list 73 } 74 75 func newBucketTable() *BucketTable { 76 ans := &BucketTable{ 77 buckets: make([]*Bucket, BUCKET_TABLE_SIZE), 78 } 79 // Pre-fill to save the lazy init when collecting each sub: 80 for i, _ := range ans.buckets { 81 ans.buckets[i] = newBucket() 82 } 83 return ans 84 } 85 86 func (t *BucketTable) newCollector(mask int) Collector { 87 return newBooleanScorerCollector(mask, t) 88 } 89 90 type SubScorer struct { 91 scorer BulkScorer 92 prohibited bool 93 collector Collector 94 next *SubScorer 95 more bool 96 } 97 98 func newSubScorer(scorer BulkScorer, required, prohibited bool, 99 collector Collector, next *SubScorer) *SubScorer { 100 101 assert2(!required, "this scorer cannot handle required=true") 102 return &SubScorer{ 103 scorer: scorer, 104 more: true, 105 prohibited: prohibited, 106 collector: collector, 107 next: next, 108 } 109 } 110 111 /* Any time a prohibited clause matches we set bit 0: */ 112 const PROHIBITED_MASK = 1 113 114 type BooleanScorer struct { 115 *BulkScorerImpl 116 scorers *SubScorer 117 bucketTable *BucketTable 118 coordFactors []float32 119 minNrShouldMatch int 120 end int 121 current *Bucket 122 weight Weight 123 } 124 125 func newBooleanScorer(weight *BooleanWeight, 126 disableCoord bool, minNrShouldMatch int, 127 optionalScorers, prohibitedScorers []BulkScorer, 128 maxCoord int) *BooleanScorer { 129 130 ans := &BooleanScorer{ 131 bucketTable: newBucketTable(), 132 minNrShouldMatch: minNrShouldMatch, 133 weight: weight, 134 } 135 ans.BulkScorerImpl = newBulkScorer(ans) 136 137 for _, scorer := range optionalScorers { 138 ans.scorers = newSubScorer(scorer, false, false, 139 ans.bucketTable.newCollector(0), ans.scorers) 140 } 141 142 for _, scorer := range prohibitedScorers { 143 ans.scorers = newSubScorer(scorer, false, true, 144 ans.bucketTable.newCollector(PROHIBITED_MASK), ans.scorers) 145 } 146 147 ans.coordFactors = make([]float32, len(optionalScorers)+1) 148 for i, _ := range ans.coordFactors { 149 if disableCoord { 150 ans.coordFactors[i] = 1 151 } else { 152 ans.coordFactors[i] = weight.coord(i, maxCoord) 153 } 154 } 155 156 return ans 157 } 158 159 func (s *BooleanScorer) ScoreAndCollectUpto(collector Collector, max int) (more bool, err error) { 160 fs := newFakeScorer() 161 162 // The internal loop will set the score and doc before calling collect. 163 collector.SetScorer(fs) 164 for { 165 s.bucketTable.first = nil 166 167 for s.current != nil { // more queued 168 // check prohibited & required 169 if (s.current.bits & PROHIBITED_MASK) == 0 { 170 171 if s.current.doc >= max { 172 panic("not implemented yet") 173 } 174 175 if s.current.coord >= s.minNrShouldMatch { 176 fs.score = float32(s.current.score * float64(s.coordFactors[s.current.coord])) 177 fs.doc = s.current.doc 178 fs.freq = s.current.coord 179 if err = collector.Collect(s.current.doc); err != nil { 180 return false, err 181 } 182 } 183 } 184 185 s.current = s.current.next // pop the queue 186 } 187 188 if s.bucketTable.first != nil { 189 panic("not implemented yet") 190 } 191 192 // refill the queue 193 more = false 194 s.end += BUCKET_TABLE_SIZE 195 for sub := s.scorers; sub != nil; sub = sub.next { 196 if sub.more { 197 if sub.more, err = sub.scorer.ScoreAndCollectUpto(sub.collector, s.end); err != nil { 198 return false, err 199 } 200 more = more || sub.more 201 } 202 } 203 s.current = s.bucketTable.first 204 205 if s.current == nil && !more { 206 break 207 } 208 } 209 return false, nil 210 } 211 212 func (s *BooleanScorer) String() string { 213 panic("not implemented yet") 214 } 215 216 type FakeScorer struct { 217 *abstractScorer 218 score float32 219 doc int 220 freq int 221 } 222 223 func newFakeScorer() *FakeScorer { 224 ans := &FakeScorer{ 225 doc: -1, 226 freq: -1, 227 } 228 ans.abstractScorer = newScorer(ans, nil) 229 return ans 230 } 231 232 func (s *FakeScorer) Advance(int) (int, error) { panic("FakeScorer doesn't support advance(int)") } 233 func (s *FakeScorer) DocId() int { return s.doc } 234 func (s *FakeScorer) Freq() (int, error) { return s.freq, nil } 235 func (s *FakeScorer) NextDoc() (int, error) { panic("FakeScorer doesn't support nextDoc()") } 236 func (s *FakeScorer) Score() (float32, error) { return s.score, nil } 237 238 // func (s *FakeScorer) Cost() int64 { return 1 } 239 // func (s *FakeScorer) Weight() Weight { panic("not supported") } 240 // func (s *FakeScorer) Children() []ChildScorer { panic("not supported") }