github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/concurrentMergeScheduler.go (about) 1 package index 2 3 import ( 4 "fmt" 5 "log" 6 "sync" 7 "sync/atomic" 8 "time" 9 ) 10 11 // index/ConcurrentMergeScheduler.java 12 13 type MergeJob struct { 14 start time.Time 15 writer *IndexWriter 16 merge *OneMerge 17 } 18 19 /* 20 Default maxThreadCount. We default to 1: tests on spinning-magnet 21 drives showed slower indexing performance if more than one merge 22 routine runs at once (though on an SSD it was faster) 23 */ 24 const DEFAULT_MAX_ROUTINE_COUNT = 1 25 26 // Default maxMergeCount. 27 const DEFAULT_MAX_MERGE_COUNT = 2 28 29 /* 30 A MergeScheduler that runs each merge using a separate goroutine. 31 32 Specify the max number of goroutines that may run at once, and the 33 maximum number of simultaneous merges with SetMaxMergesAndRoutines(). 34 35 If the number of merges exceeds the max number of threads then the 36 largest merges are paused until one of the smaller merges completes. 37 38 If more than MaxMergeCount() merges are requested then this class 39 will forcefully throttle the incoming goroutines by pausing until one 40 or more merges complete. 41 */ 42 type ConcurrentMergeScheduler struct { 43 sync.Locker 44 45 // Max number of merge routines allowed to be running at once. When 46 // there are more merges then this, we forcefully pause the larger 47 // ones, letting the smaller ones run, up until maxMergeCount 48 // merges at which point we forcefully pause incoming routines 49 // (that presumably are the ones causing so much merging). 50 maxRoutineCount int 51 52 // Max number of merges we accept before forcefully throttling the 53 // incoming routines 54 maxMergeCount int 55 56 // IndexWriter that owns this instance. 57 writer *IndexWriter 58 59 // How many merges have kicked off (this is used to name them). 60 mergeThreadCount int32 // atomic 61 62 suppressErrors bool 63 64 chRequest chan *MergeJob 65 chSync chan *sync.WaitGroup 66 concurrentMergeCount int32 // atomic 67 numMergeRoutines int32 // atomic 68 } 69 70 func NewConcurrentMergeScheduler() *ConcurrentMergeScheduler { 71 cms := &ConcurrentMergeScheduler{ 72 Locker: &sync.Mutex{}, 73 chRequest: make(chan *MergeJob), 74 chSync: make(chan *sync.WaitGroup), 75 } 76 cms.SetMaxMergesAndRoutines(DEFAULT_MAX_MERGE_COUNT, DEFAULT_MAX_ROUTINE_COUNT) 77 return cms 78 } 79 80 /* 81 Daemon worker that accepts and processes merge job. 82 83 GoLucene assumes each merge is pre-sorted according to its merge size 84 before acquired from IndexWriter. It makes use of pre-allocated 85 go routines, instead of MergeThread to do the real merge work, 86 witout explicit synchronizations and waitings. 87 88 Note, however, change of merge count won't pause/resume workers. 89 */ 90 func (cms *ConcurrentMergeScheduler) worker(id int) { 91 atomic.AddInt32(&cms.numMergeRoutines, 1) 92 fmt.Printf("CMS Worker %v is started.\n", id) 93 var isRunning = true 94 var wg *sync.WaitGroup 95 for isRunning && id < cms.maxRoutineCount { 96 select { 97 case job := <-cms.chRequest: 98 cms.process(job) 99 case wg = <-cms.chSync: 100 isRunning = false 101 defer wg.Done() 102 } 103 } 104 fmt.Printf("CMS Worker %v is stopped.\n", id) 105 atomic.AddInt32(&cms.numMergeRoutines, -1) 106 } 107 108 func (cms *ConcurrentMergeScheduler) process(job *MergeJob) { 109 atomic.AddInt32(&cms.concurrentMergeCount, 1) 110 defer func() { 111 atomic.AddInt32(&cms.concurrentMergeCount, -1) 112 }() 113 114 if cms.verbose() { 115 elapsed := time.Now().Sub(job.start) 116 cms.message(" stalled for %v", elapsed) 117 cms.message(" consider merge %v", job.writer.readerPool.segmentsToString(job.merge.segments)) 118 // OK to spawn a new merge routine to handle this merge 119 cms.message(" launch new thread [%v]", atomic.AddInt32(&cms.mergeThreadCount, 1)) 120 cms.message(" merge thread: start") 121 } 122 123 err := job.writer.merge(job.merge) 124 if err != nil { 125 // Ignore the error if it was due to abort: 126 if _, ok := err.(MergeAbortedError); !ok && !cms.suppressErrors { 127 // suppressErrors is normally only set during testing. 128 cms.handleMergeError(err) 129 } 130 } 131 } 132 133 // Sets the maximum number of merge goroutines and simultaneous 134 // merges allowed. 135 func (cms *ConcurrentMergeScheduler) SetMaxMergesAndRoutines(maxMergeCount, maxRoutineCount int) { 136 assert2(maxRoutineCount >= 1, "maxRoutineCount should be at least 1") 137 assert2(maxMergeCount >= 1, "maxMergeCount should be at least 1") 138 assert2(maxRoutineCount <= maxMergeCount, fmt.Sprintf( 139 "maxRoutineCount should be <= maxMergeCount (= %v)", maxMergeCount)) 140 141 oldCount := cms.maxRoutineCount 142 cms.maxRoutineCount = maxRoutineCount 143 cms.maxMergeCount = maxMergeCount 144 145 cms.Lock() 146 defer cms.Unlock() 147 for i := oldCount; i < maxRoutineCount; i++ { 148 go cms.worker(i) 149 } 150 } 151 152 /* 153 Returns true if verbosing is enabled. This method is usually used in 154 conjunction with message(), like that: 155 156 if cms.verbose() { 157 cms.message("your message") 158 } 159 */ 160 func (cms *ConcurrentMergeScheduler) verbose() bool { 161 return cms.writer != nil && cms.writer.infoStream.IsEnabled("CMS") 162 } 163 164 /* 165 Outputs the given message - this method assumes verbose() was called 166 and returned true. 167 */ 168 func (cms *ConcurrentMergeScheduler) message(format string, args ...interface{}) { 169 cms.writer.infoStream.Message("CMS", format, args...) 170 } 171 172 func (cms *ConcurrentMergeScheduler) Close() error { 173 cms.sync() 174 return nil 175 } 176 177 /* 178 Wait for any running merge threads to finish. This call is not 179 Interruptible as used by Close() 180 */ 181 func (cms *ConcurrentMergeScheduler) sync() { 182 cms.Lock() 183 defer cms.Unlock() 184 185 wg := new(sync.WaitGroup) 186 // no need to synchronize on numMergeRoutines 187 for i, limit := 0, int(cms.numMergeRoutines); i < limit; i++ { 188 wg.Add(1) 189 cms.chSync <- wg 190 } 191 wg.Wait() 192 } 193 194 func (cms *ConcurrentMergeScheduler) Merge(writer *IndexWriter, 195 trigger MergeTrigger, newMergesFound bool) error { 196 cms.Lock() // synchronized 197 defer cms.Unlock() 198 199 // assert !Thread.holdsLock(writer) 200 cms.writer = writer 201 202 // First, quickly run through the newly proposed merges 203 // and add any orthogonal merges (ie a merge not 204 // involving segments already pending to be merged) to 205 // the queue. If we are way behind on merging, many of 206 // these newly proposed merges will likely already be 207 // registered. 208 if cms.verbose() { 209 cms.message("now merge") 210 cms.message(" index: %v", writer.segString()) 211 } 212 213 // Iterate, pulling from the IndexWriter's queue of 214 // pending merges, until it's empty: 215 for merge := writer.nextMerge(); merge != nil; merge = writer.nextMerge() { 216 if atomic.LoadInt32(&cms.concurrentMergeCount) >= int32(cms.maxMergeCount) { 217 // This means merging has fallen too far behind: we 218 // have already created maxMergeCount threads, and 219 // now there's at least one more merge pending. 220 // Note that only maxThreadCount of 221 // those created merge threads will actually be 222 // running; the rest will be paused (see 223 // updateMergeThreads). We stall this producer 224 // thread to prevent creation of new segments, 225 // until merging has caught up: 226 if cms.verbose() { 227 cms.message(" too many merges; stalling...") 228 } 229 } 230 cms.chRequest <- &MergeJob{time.Now(), writer, merge} 231 } 232 if cms.verbose() { 233 cms.message(" no more merges pending; now return") 234 } 235 return nil 236 } 237 238 /* 239 Called when an error is hit in a background merge thread 240 */ 241 func (cms *ConcurrentMergeScheduler) handleMergeError(err error) { 242 // When an exception is hit during merge, IndexWriter 243 // removes any partial files and then allows another 244 // merge to run. If whatever caused the error is not 245 // transient then the exception will keep happening, 246 // so, we sleep here to avoid saturating CPU in such 247 // cases: 248 time.Sleep(250 * time.Millisecond) 249 // Lucene Java throw Unchecked exception in a separate thread. 250 // GoLucene just dump the error in console. 251 log.Printf("Merge error: %v", err) 252 } 253 254 func (cms *ConcurrentMergeScheduler) String() string { 255 panic("not implemented yet") 256 }