github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/search/terms.go (about) 1 package search 2 3 import ( 4 "bytes" 5 "fmt" 6 "github.com/balzaczyy/golucene/core/index" 7 . "github.com/balzaczyy/golucene/core/index/model" 8 . "github.com/balzaczyy/golucene/core/search/model" 9 "github.com/balzaczyy/golucene/core/util" 10 "reflect" 11 ) 12 13 type TermQuery struct { 14 *AbstractQuery 15 term *index.Term 16 docFreq int 17 perReaderTermState *index.TermContext 18 } 19 20 func NewTermQuery(t *index.Term) *TermQuery { 21 return NewTermQueryWithDocFreq(t, -1) 22 } 23 24 func NewTermQueryWithDocFreq(t *index.Term, docFreq int) *TermQuery { 25 ans := &TermQuery{} 26 ans.AbstractQuery = NewAbstractQuery(ans) 27 ans.term = t 28 ans.docFreq = docFreq 29 return ans 30 } 31 32 func (q *TermQuery) CreateWeight(ss *IndexSearcher) (w Weight, err error) { 33 ctx := ss.TopReaderContext() 34 var termState *index.TermContext 35 if q.perReaderTermState == nil || q.perReaderTermState.TopReaderContext != ctx { 36 // make TermQuery single-pass if we don't have a PRTS or if the context differs! 37 termState, err = index.NewTermContextFromTerm(ctx, q.term) 38 if err != nil { 39 return nil, err 40 } 41 } else { 42 // PRTS was pre-build for this IS 43 termState = q.perReaderTermState 44 } 45 46 // we must not ignore the given docFreq - if set use the given value (lie) 47 if q.docFreq != -1 { 48 termState.DocFreq = q.docFreq 49 } 50 51 return NewTermWeight(q, ss, termState), nil 52 } 53 54 func (q *TermQuery) ToString(field string) string { 55 var buf bytes.Buffer 56 if q.term.Field != field { 57 buf.WriteString(q.term.Field) 58 buf.WriteRune(':') 59 } 60 buf.WriteString(string(q.term.Bytes)) 61 if q.boost != 1.0 { 62 buf.WriteString(fmt.Sprintf("^%v", q.boost)) 63 } 64 return buf.String() 65 } 66 67 type TermWeight struct { 68 *WeightImpl 69 *TermQuery 70 similarity Similarity 71 stats SimWeight 72 termStates *index.TermContext 73 } 74 75 func NewTermWeight(owner *TermQuery, ss *IndexSearcher, termStates *index.TermContext) *TermWeight { 76 assert(termStates != nil) 77 sim := ss.similarity 78 ans := &TermWeight{ 79 TermQuery: owner, 80 similarity: sim, 81 stats: sim.computeWeight( 82 owner.boost, 83 ss.CollectionStatistics(owner.term.Field), 84 ss.TermStatistics(owner.term, termStates)), 85 termStates: termStates, 86 } 87 ans.WeightImpl = newWeightImpl(ans) 88 return ans 89 } 90 91 func (tw *TermWeight) String() string { 92 return fmt.Sprintf("weight(%v)", tw.TermQuery) 93 } 94 95 func (tw *TermWeight) ValueForNormalization() float32 { 96 return tw.stats.ValueForNormalization() 97 } 98 99 func (tw *TermWeight) Normalize(norm float32, topLevelBoost float32) { 100 tw.stats.Normalize(norm, topLevelBoost) 101 } 102 103 func (tw *TermWeight) IsScoresDocsOutOfOrder() bool { 104 return false 105 } 106 107 func (tw *TermWeight) Scorer(context *index.AtomicReaderContext, 108 acceptDocs util.Bits) (Scorer, error) { 109 110 assert2(tw.termStates.TopReaderContext == index.TopLevelContext(context), 111 "The top-reader used to create Weight (%v) is not the same as the current reader's top-reader (%v)", 112 tw.termStates.TopReaderContext, index.TopLevelContext(context)) 113 termsEnum, err := tw.termsEnum(context) 114 if termsEnum == nil || err != nil { 115 return nil, err 116 } 117 assert(termsEnum != nil) 118 docs, err := termsEnum.Docs(acceptDocs, nil) 119 if err != nil { 120 return nil, err 121 } 122 assert(docs != nil) 123 simScorer, err := tw.similarity.simScorer(tw.stats, context) 124 if err != nil { 125 return nil, err 126 } 127 return newTermScorer(tw, docs, simScorer), nil 128 } 129 130 func (tw *TermWeight) termsEnum(ctx *index.AtomicReaderContext) (TermsEnum, error) { 131 state := tw.termStates.State(ctx.Ord) 132 if state == nil { // term is not present in that reader 133 assert2(tw.termNotInReader(ctx.Reader(), tw.term), 134 "no termstate found but term exists in reader term=%v", tw.term) 135 return nil, nil 136 } 137 terms := ctx.Reader().(index.AtomicReader).Terms(tw.term.Field) 138 te := terms.Iterator(nil) 139 err := te.SeekExactFromLast(tw.term.Bytes, state) 140 return te, err 141 } 142 143 func (tw *TermWeight) termNotInReader(reader index.IndexReader, term *index.Term) bool { 144 n, err := reader.DocFreq(term) 145 assert(err == nil) 146 return n == 0 147 } 148 149 func (tw *TermWeight) Explain(ctx *index.AtomicReaderContext, doc int) (Explanation, error) { 150 scorer, err := tw.Scorer(ctx, ctx.Reader().(index.AtomicReader).LiveDocs()) 151 if err != nil { 152 return nil, err 153 } 154 if scorer != nil { 155 newDoc, err := scorer.Advance(doc) 156 if err != nil { 157 return nil, err 158 } 159 if newDoc == doc { 160 freq, err := scorer.Freq() 161 if err != nil { 162 return nil, err 163 } 164 docScorer, err := tw.similarity.simScorer(tw.stats, ctx) 165 if err != nil { 166 return nil, err 167 } 168 scoreExplanation := docScorer.explain(doc, 169 newExplanation(float32(freq), fmt.Sprintf("termFreq=%v", freq))) 170 ans := newComplexExplanation(true, 171 scoreExplanation.(*ExplanationImpl).value, 172 fmt.Sprintf("weight(%v in %v) [%v], result of:", 173 tw.TermQuery, doc, reflect.TypeOf(tw.similarity))) 174 ans.details = []Explanation{scoreExplanation} 175 return ans, nil 176 } 177 } 178 return newComplexExplanation(false, 0, "no matching term"), nil 179 } 180 181 // search/TermScorer.java 182 /** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>. 183 */ 184 type TermScorer struct { 185 *abstractScorer 186 docsEnum DocsEnum 187 docScorer SimScorer 188 } 189 190 func newTermScorer(w Weight, td DocsEnum, docScorer SimScorer) *TermScorer { 191 ans := &TermScorer{docsEnum: td, docScorer: docScorer} 192 ans.abstractScorer = newScorer(ans, w) 193 return ans 194 } 195 196 func (ts *TermScorer) DocId() int { 197 return ts.docsEnum.DocId() 198 } 199 200 func (ts *TermScorer) Freq() (int, error) { 201 return ts.docsEnum.Freq() 202 } 203 204 /** 205 * Advances to the next document matching the query. <br> 206 * 207 * @return the document matching the query or NO_MORE_DOCS if there are no more documents. 208 */ 209 func (ts *TermScorer) NextDoc() (d int, err error) { 210 return ts.docsEnum.NextDoc() 211 } 212 213 func (ts *TermScorer) Score() (s float32, err error) { 214 assert(ts.DocId() != NO_MORE_DOCS) 215 freq, err := ts.docsEnum.Freq() 216 if err != nil { 217 return 0, err 218 } 219 return ts.docScorer.Score(ts.docsEnum.DocId(), float32(freq)), nil 220 } 221 222 /* 223 Advances to the first match beyond the current whose document number 224 is greater than or equal to a given target. 225 */ 226 func (ts *TermScorer) Advance(target int) (int, error) { 227 return ts.docsEnum.Advance(target) 228 } 229 230 func (ts *TermScorer) String() string { 231 return fmt.Sprintf("scorer(%v)", ts.weight) 232 }