github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/compositeReader.go (about) 1 package index 2 3 import ( 4 "bytes" 5 "container/list" 6 "fmt" 7 . "github.com/balzaczyy/golucene/core/codec/spi" 8 "reflect" 9 ) 10 11 type CompositeReaderSPI interface { 12 getSequentialSubReaders() []IndexReader 13 } 14 15 type CompositeReader interface { 16 IndexReader 17 CompositeReaderSPI 18 } 19 20 type CompositeReaderImpl struct { 21 *IndexReaderImpl 22 CompositeReaderSPI 23 readerContext *CompositeReaderContext // lazy load 24 } 25 26 func newCompositeReader(spi CompositeReaderSPI, self IndexReaderImplSPI) *CompositeReaderImpl { 27 return &CompositeReaderImpl{ 28 IndexReaderImpl: newIndexReader(self), 29 CompositeReaderSPI: spi, 30 } 31 } 32 33 func (r *CompositeReaderImpl) String() string { 34 var buf bytes.Buffer 35 class := reflect.TypeOf(r.IndexReaderImplSPI).Name() 36 if class != "" { 37 buf.WriteString(class) 38 } else { 39 buf.WriteString("CompositeReader") 40 } 41 buf.WriteString("(") 42 subReaders := r.getSequentialSubReaders() 43 if len(subReaders) > 0 { 44 fmt.Fprintf(&buf, "%v", subReaders[0]) 45 for i, v := range subReaders { 46 if i > 0 { 47 fmt.Fprintf(&buf, " %v", v) 48 } 49 } 50 } 51 buf.WriteString(")") 52 return buf.String() 53 } 54 55 func (r *CompositeReaderImpl) Context() IndexReaderContext { 56 r.ensureOpen() 57 // lazy init without thread safety for perf reasons: Building the readerContext twice does not hurt! 58 if r.readerContext == nil { 59 // log.Print("Obtaining context for: ", r) 60 // assert getSequentialSubReaders() != null; 61 r.readerContext = newCompositeReaderContext(r) 62 } 63 return r.readerContext 64 } 65 66 type CompositeReaderContext struct { 67 *IndexReaderContextImpl 68 children []IndexReaderContext 69 leaves *list.List // operated by builder 70 reader CompositeReader 71 } 72 73 func newCompositeReaderContext(r CompositeReader) *CompositeReaderContext { 74 return newCompositeReaderContextBuilder(r).build() 75 } 76 77 func newCompositeReaderContext3(reader CompositeReader, 78 children []IndexReaderContext, leaves *list.List) *CompositeReaderContext { 79 return newCompositeReaderContext6(nil, reader, 0, 0, children, leaves) 80 } 81 82 func newCompositeReaderContext5(parent *CompositeReaderContext, reader CompositeReader, 83 ordInParent, docBaseInParent int, children []IndexReaderContext) *CompositeReaderContext { 84 return newCompositeReaderContext6(parent, reader, ordInParent, docBaseInParent, children, list.New()) 85 } 86 87 func newCompositeReaderContext6(parent *CompositeReaderContext, 88 reader CompositeReader, 89 ordInParent, docBaseInParent int, 90 children []IndexReaderContext, 91 leaves *list.List) *CompositeReaderContext { 92 ans := &CompositeReaderContext{} 93 ans.IndexReaderContextImpl = newIndexReaderContext(parent, ordInParent, docBaseInParent) 94 ans.children = children 95 ans.leaves = leaves 96 ans.reader = reader 97 return ans 98 } 99 100 func (ctx *CompositeReaderContext) Leaves() []*AtomicReaderContext { 101 assert2(ctx.isTopLevel, "This is not a top-level context.") 102 assert(ctx.leaves != nil) 103 ans := make([]*AtomicReaderContext, 0, ctx.leaves.Len()) 104 for e := ctx.leaves.Front(); e != nil; e = e.Next() { 105 ans = append(ans, e.Value.(*AtomicReaderContext)) 106 } 107 return ans 108 } 109 110 func (ctx *CompositeReaderContext) Children() []IndexReaderContext { 111 return ctx.children 112 } 113 114 func (ctx *CompositeReaderContext) Reader() IndexReader { 115 return ctx.reader 116 } 117 118 func (ctx *CompositeReaderContext) String() string { 119 return fmt.Sprintf("CompositeReaderContext{%v %v %v}", 120 ctx.IndexReaderContextImpl, ctx.children, ctx.reader) 121 } 122 123 type CompositeReaderContextBuilder struct { 124 reader CompositeReader 125 leaves *list.List 126 leafDocBase int 127 } 128 129 func newCompositeReaderContextBuilder(r CompositeReader) CompositeReaderContextBuilder { 130 return CompositeReaderContextBuilder{reader: r, leaves: list.New()} 131 } 132 133 func (b CompositeReaderContextBuilder) build() *CompositeReaderContext { 134 return b.build4(nil, b.reader, 0, 0).(*CompositeReaderContext) 135 } 136 137 func (b CompositeReaderContextBuilder) build4(parent *CompositeReaderContext, 138 reader IndexReader, ord, docBase int) IndexReaderContext { 139 // log.Printf("Building context from %v(parent: %v, %v-%v)", reader, parent, ord, docBase) 140 if ar, ok := reader.(AtomicReader); ok { 141 // log.Print("AtomicReader is detected.") 142 atomic := newAtomicReaderContext(parent, ar, ord, docBase, b.leaves.Len(), b.leafDocBase) 143 b.leaves.PushBack(atomic) 144 b.leafDocBase += reader.MaxDoc() 145 return atomic 146 } 147 // log.Print("CompositeReader is detected: ", reader) 148 cr := reader.(CompositeReader) 149 sequentialSubReaders := cr.getSequentialSubReaders() 150 // log.Printf("Found %v sub readers.", len(sequentialSubReaders)) 151 children := make([]IndexReaderContext, len(sequentialSubReaders)) 152 var newParent *CompositeReaderContext 153 if parent == nil { 154 newParent = newCompositeReaderContext3(cr, children, b.leaves) 155 } else { 156 newParent = newCompositeReaderContext5(parent, cr, ord, docBase, children) 157 } 158 newDocBase := 0 159 for i, r := range sequentialSubReaders { 160 children[i] = b.build4(newParent, r, i, newDocBase) 161 newDocBase = r.MaxDoc() 162 } 163 // assert newDocBase == cr.maxDoc() 164 return newParent 165 } 166 167 var ( 168 EMPTY_ARRAY = []ReaderSlice{} 169 ) 170 171 type ReaderSlice struct { 172 start, length, readerIndex int 173 } 174 175 func (rs ReaderSlice) String() string { 176 return fmt.Sprintf("slice start=%v length=%v readerIndex=%v", rs.start, rs.length, rs.readerIndex) 177 } 178 179 type BaseCompositeReaderSPI interface { 180 IndexReaderImplSPI 181 CompositeReaderSPI 182 } 183 184 type BaseCompositeReader struct { 185 *CompositeReaderImpl 186 subReaders []IndexReader 187 starts []int 188 maxDoc int 189 numDocs int 190 191 subReadersList []IndexReader 192 } 193 194 func newBaseCompositeReader(spi BaseCompositeReaderSPI, readers []IndexReader) *BaseCompositeReader { 195 // log.Printf("Initializing BaseCompositeReader with %v IndexReaders", len(readers)) 196 ans := &BaseCompositeReader{} 197 ans.CompositeReaderImpl = newCompositeReader(spi, spi) 198 ans.subReaders = readers 199 ans.subReadersList = make([]IndexReader, len(readers)) 200 copy(ans.subReadersList, readers) 201 ans.starts = make([]int, len(readers)+1) // build starts array 202 var maxDoc, numDocs int 203 for i, r := range readers { 204 ans.starts[i] = maxDoc 205 maxDoc += r.MaxDoc() // compute maxDocs 206 if maxDoc < 0 || maxDoc > actualMaxDocs { // overflow 207 panic(fmt.Sprintf( 208 "Too many documents, composite IndexReaders cannot exceed %v", 209 actualMaxDocs)) 210 } 211 numDocs += r.NumDocs() // compute numDocs 212 // log.Printf("Obtained %v docs (max %v)", numDocs, maxDoc) 213 r.registerParentReader(ans) 214 } 215 ans.starts[len(readers)] = maxDoc 216 ans.maxDoc = maxDoc 217 ans.numDocs = numDocs 218 // log.Print("Success") 219 return ans 220 } 221 222 func (r *BaseCompositeReader) TermVectors(docID int) error { 223 panic("not implemented yet") 224 // r.ensureOpen() 225 // i := readerIndex(docID) 226 // return r.subReaders[i].TermVectors(docID - starts[i]) 227 } 228 229 func (r *BaseCompositeReader) NumDocs() int { 230 // Don't call ensureOpen() here (it could affect performance) 231 return r.numDocs 232 } 233 234 func (r *BaseCompositeReader) MaxDoc() int { 235 // Don't call ensureOpen() here (it could affect performance) 236 return r.maxDoc 237 } 238 239 func (r *BaseCompositeReader) VisitDocument(docID int, visitor StoredFieldVisitor) error { 240 r.ensureOpen() 241 i := r.readerIndex(docID) // find subreader num 242 return r.subReaders[i].VisitDocument(docID-r.starts[i], visitor) 243 } 244 245 func (r *BaseCompositeReader) DocFreq(term *Term) (int, error) { 246 panic("not implemented yet") 247 } 248 249 func (r *BaseCompositeReader) TotalTermFreq(term *Term) int64 { 250 panic("not implemented yet") 251 } 252 253 func (r *BaseCompositeReader) SumDocFreq(field string) int64 { 254 panic("not implemented yet") 255 } 256 257 func (r *BaseCompositeReader) DocCount(field string) int { 258 panic("not implemented yet") 259 } 260 261 func (r *BaseCompositeReader) SumTotalTermFreq(field string) int64 { 262 panic("not implemented yet") 263 } 264 265 func (r *BaseCompositeReader) readerIndex(docID int) int { 266 if docID < 0 || docID >= r.maxDoc { 267 panic(fmt.Sprintf("docID must be [0, %v] (got docID=%v)", r.maxDoc, docID)) 268 } 269 return subIndex(docID, r.starts) 270 } 271 272 func (r *BaseCompositeReader) readerBase(readerIndex int) int { 273 panic("not implemented yet") 274 } 275 276 func (r *BaseCompositeReader) getSequentialSubReaders() []IndexReader { 277 // log.Printf("Found %v sub readers.", len(r.subReadersList)) 278 return r.subReadersList 279 }