github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/flush.go (about) 1 package index 2 3 import ( 4 "github.com/balzaczyy/golucene/core/util" 5 "sync" 6 ) 7 8 /* 9 FlushPlicy controls when segments are flushed from a RAM resident 10 internal data-structure to the IndexWriter's Directory. 11 12 Segments are traditionally flushed by: 13 1. RAM consumption - configured via IndexWriterConfig.SetRAMBufferSizeMB() 14 2. Number of RAM resident documents - configured via IndexWriterConfig.SetMaxBufferedDocs() 15 16 The policy also applies pending delete operations (by term and/or 17 query), given the threshold set in IndexcWriterConfig.SetMaxBufferedDeleteTerms(). 18 19 IndexWriter consults the provided FlushPolicy to control the flushing 20 process. The policy is informed for each added or updated document as 21 well as for each delete term. Based on the FlushPolicy, the 22 information provided via ThreadState and DocumentsWriterFlushControl, 23 the FlushPolicy decides if a DocumentsWriterPerThread needs flushing 24 and mark it as flush-pending via DocumentsWriterFlushControl.SetFLushingPending(), 25 or if deletes need to be applied. 26 */ 27 type FlushPolicy interface { 28 // Called for each delete term. If this is a delte triggered due to 29 // an update the given ThreadState is non-nil. 30 // 31 // Note: this method is called synchronized on the given 32 // DocumentsWriterFlushControl and it is guaranteed that the 33 // calling goroutine holds the lock on the given ThreadState 34 onDelete(*DocumentsWriterFlushControl, *ThreadState) 35 // Called for each document update on the given ThreadState's DWPT 36 // 37 // Note: this method is called synchronized on the given DWFC and 38 // it is guaranteed that the calling thread holds the lock on the 39 // given ThreadState 40 onUpdate(*DocumentsWriterFlushControl, *ThreadState) 41 // Called for each document addition on the given ThreadState's DWPT. 42 // 43 // Note: this method is synchronized by the given DWFC and it is 44 // guaranteed that the calling thread holds the lock on the given 45 // ThreadState 46 onInsert(*DocumentsWriterFlushControl, *ThreadState) 47 // Called by DocumentsWriter to initialize the FlushPolicy 48 init(indexWriterConfig LiveIndexWriterConfig) 49 } 50 51 type FlushPolicyImplSPI interface { 52 onInsert(*DocumentsWriterFlushControl, *ThreadState) 53 onDelete(*DocumentsWriterFlushControl, *ThreadState) 54 } 55 56 type FlushPolicyImpl struct { 57 sync.Locker 58 spi FlushPolicyImplSPI 59 indexWriterConfig LiveIndexWriterConfig 60 infoStream util.InfoStream 61 } 62 63 func newFlushPolicyImpl(spi FlushPolicyImplSPI) *FlushPolicyImpl { 64 return &FlushPolicyImpl{ 65 Locker: &sync.Mutex{}, 66 spi: spi, 67 } 68 } 69 70 func (fp *FlushPolicyImpl) onUpdate(control *DocumentsWriterFlushControl, state *ThreadState) { 71 fp.spi.onInsert(control, state) 72 fp.spi.onDelete(control, state) 73 } 74 75 func (fp *FlushPolicyImpl) init(indexWriterConfig LiveIndexWriterConfig) { 76 fp.Lock() // synchronized 77 defer fp.Unlock() 78 fp.indexWriterConfig = indexWriterConfig 79 fp.infoStream = indexWriterConfig.InfoStream() 80 } 81 82 /* 83 Returns the current most RAM consuming non-pending ThreadState with 84 at least one indexed document. 85 86 This method will never return nil 87 */ 88 func (p *FlushPolicyImpl) findLargestNonPendingWriter(control *DocumentsWriterFlushControl, 89 perThreadState *ThreadState) *ThreadState { 90 assert(perThreadState.dwpt.numDocsInRAM > 0) 91 maxRamSoFar := perThreadState.bytesUsed 92 // the dwpt which needs to be flushed eventually 93 maxRamUsingThreadState := perThreadState 94 assert2(!perThreadState.flushPending, "DWPT should have flushed") 95 count := 0 96 control.perThreadPool.foreach(func(next *ThreadState) { 97 if !next.flushPending { 98 if nextRam := next.bytesUsed; nextRam > 0 && next.dwpt.numDocsInRAM > 0 { 99 if p.infoStream.IsEnabled("FP") { 100 p.infoStream.Message("FP", "thread state has %v bytes; docInRAM=%v", 101 nextRam, next.dwpt.numDocsInRAM) 102 } 103 count++ 104 if nextRam > maxRamSoFar { 105 maxRamSoFar = nextRam 106 maxRamUsingThreadState = next 107 } 108 } 109 } 110 }) 111 if p.infoStream.IsEnabled("FP") { 112 p.infoStream.Message("FP", "%v in-use non-flusing threads states", count) 113 } 114 p.message("set largest ram consuming thread pending on lower watermark") 115 return maxRamUsingThreadState 116 } 117 118 func (p *FlushPolicyImpl) message(s string) { 119 if p.infoStream.IsEnabled("FP") { 120 p.infoStream.Message("FP", s) 121 } 122 } 123 124 // index/FlushByRamOrCountsPolicy.java 125 126 /* 127 Default FlushPolicy implementation that flushes new segments based on 128 RAM used and document count depending on the IndexWriter's 129 IndexWriterConfig. It also applies pending deletes based on the 130 number of buffered delete terms. 131 132 1. onDelete() - applies pending delete operations based on the global 133 number of buffered delete terms iff MaxBufferedDeleteTerms() is 134 enabled 135 2. onInsert() - flushes either on the number of documents per 136 DocumentsWriterPerThread (NumDocsInRAM()) or on the global active 137 memory consumption in the current indexing session iff 138 MaxBufferedDocs() or RAMBufferSizeMB() is enabled respectively 139 3. onUpdate() - calls onInsert() and onDelete() in order 140 141 All IndexWriterConfig settings are used to mark DocumentsWriterPerThread 142 as flush pending during indexing with respect to their live updates. 143 144 If SetRAMBufferSizeMB() is enabled, the largest ram consuming 145 DocumentsWriterPerThread will be marked as pending iff the global 146 active RAM consumption is >= the configured max RAM buffer. 147 */ 148 type FlushByRamOrCountsPolicy struct { 149 *FlushPolicyImpl 150 } 151 152 func newFlushByRamOrCountsPolicy() *FlushByRamOrCountsPolicy { 153 ans := new(FlushByRamOrCountsPolicy) 154 ans.FlushPolicyImpl = newFlushPolicyImpl(ans) 155 return ans 156 } 157 158 func (p *FlushByRamOrCountsPolicy) onDelete(control *DocumentsWriterFlushControl, state *ThreadState) { 159 panic("not implemented yet") 160 } 161 162 func (p *FlushByRamOrCountsPolicy) onInsert(control *DocumentsWriterFlushControl, state *ThreadState) { 163 if p.flushOnDocCount() && state.dwpt.numDocsInRAM >= p.indexWriterConfig.MaxBufferedDocs() { 164 // flush this state by num docs 165 control.setFlushPending(state) 166 } else if p.flushOnRAM() { // flush by RAM 167 limit := int64(p.indexWriterConfig.RAMBufferSizeMB() * 1024 * 1024) 168 totalRam := control._activeBytes + control.deleteBytesUsed() // safe w/o sync 169 if totalRam >= limit { 170 if p.infoStream.IsEnabled("FP") { 171 p.infoStream.Message("FP", 172 "trigger flush: activeBytes=%v deleteBytes=%v vs limit=%v", 173 control._activeBytes, control.deleteBytesUsed(), limit) 174 } 175 p.markLargestWriterPending(control, state, totalRam) 176 } 177 } 178 } 179 180 /* Marks the mos tram consuming active DWPT flush pending */ 181 func (p *FlushByRamOrCountsPolicy) markLargestWriterPending(control *DocumentsWriterFlushControl, 182 perThreadState *ThreadState, currentBytesPerThread int64) { 183 control.setFlushPending(p.findLargestNonPendingWriter(control, perThreadState)) 184 } 185 186 /* Returns true if this FLushPolicy flushes on IndexWriterConfig.MaxBufferedDocs(), otherwise false */ 187 func (p *FlushByRamOrCountsPolicy) flushOnDocCount() bool { 188 return p.indexWriterConfig.MaxBufferedDocs() != DISABLE_AUTO_FLUSH 189 } 190 191 /* Returns true if this FlushPolicy flushes on IndexWriterConfig.RAMBufferSizeMB(), otherwise false */ 192 func (p *FlushByRamOrCountsPolicy) flushOnRAM() bool { 193 return p.indexWriterConfig.RAMBufferSizeMB() != DISABLE_AUTO_FLUSH 194 }