github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/concurrentMergeScheduler.go

github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/concurrentMergeScheduler.go (about)

     1  package index
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"sync"
     7  	"sync/atomic"
     8  	"time"
     9  )
    10  
    11  // index/ConcurrentMergeScheduler.java
    12  
    13  type MergeJob struct {
    14  	start  time.Time
    15  	writer *IndexWriter
    16  	merge  *OneMerge
    17  }
    18  
    19  /*
    20  Default maxThreadCount. We default to 1: tests on spinning-magnet
    21  drives showed slower indexing performance if more than one merge
    22  routine runs at once (though on an SSD it was faster)
    23  */
    24  const DEFAULT_MAX_ROUTINE_COUNT = 1
    25  
    26  // Default maxMergeCount.
    27  const DEFAULT_MAX_MERGE_COUNT = 2
    28  
    29  /*
    30  A MergeScheduler that runs each merge using a separate goroutine.
    31  
    32  Specify the max number of goroutines that may run at once, and the
    33  maximum number of simultaneous merges with SetMaxMergesAndRoutines().
    34  
    35  If the number of merges exceeds the max number of threads then the
    36  largest merges are paused until one of the smaller merges completes.
    37  
    38  If more than MaxMergeCount() merges are requested then this class
    39  will forcefully throttle the incoming goroutines by pausing until one
    40  or more merges complete.
    41  */
    42  type ConcurrentMergeScheduler struct {
    43  	sync.Locker
    44  
    45  	// Max number of merge routines allowed to be running at once. When
    46  	// there are more merges then this, we forcefully pause the larger
    47  	// ones, letting the smaller ones run, up until maxMergeCount
    48  	// merges at which point we forcefully pause incoming routines
    49  	// (that presumably are the ones causing so much merging).
    50  	maxRoutineCount int
    51  
    52  	// Max number of merges we accept before forcefully throttling the
    53  	// incoming routines
    54  	maxMergeCount int
    55  
    56  	// IndexWriter that owns this instance.
    57  	writer *IndexWriter
    58  
    59  	// How many merges have kicked off (this is used to name them).
    60  	mergeThreadCount int32 // atomic
    61  
    62  	suppressErrors bool
    63  
    64  	chRequest            chan *MergeJob
    65  	chSync               chan *sync.WaitGroup
    66  	concurrentMergeCount int32 // atomic
    67  	numMergeRoutines     int32 // atomic
    68  }
    69  
    70  func NewConcurrentMergeScheduler() *ConcurrentMergeScheduler {
    71  	cms := &ConcurrentMergeScheduler{
    72  		Locker:    &sync.Mutex{},
    73  		chRequest: make(chan *MergeJob),
    74  		chSync:    make(chan *sync.WaitGroup),
    75  	}
    76  	cms.SetMaxMergesAndRoutines(DEFAULT_MAX_MERGE_COUNT, DEFAULT_MAX_ROUTINE_COUNT)
    77  	return cms
    78  }
    79  
    80  /*
    81  Daemon worker that accepts and processes merge job.
    82  
    83  GoLucene assumes each merge is pre-sorted according to its merge size
    84  before acquired from IndexWriter. It makes use of pre-allocated
    85  go routines, instead of MergeThread to do the real merge work,
    86  witout explicit synchronizations and waitings.
    87  
    88  Note, however, change of merge count won't pause/resume workers.
    89  */
    90  func (cms *ConcurrentMergeScheduler) worker(id int) {
    91  	atomic.AddInt32(&cms.numMergeRoutines, 1)
    92  	fmt.Printf("CMS Worker %v is started.\n", id)
    93  	var isRunning = true
    94  	var wg *sync.WaitGroup
    95  	for isRunning && id < cms.maxRoutineCount {
    96  		select {
    97  		case job := <-cms.chRequest:
    98  			cms.process(job)
    99  		case wg = <-cms.chSync:
   100  			isRunning = false
   101  			defer wg.Done()
   102  		}
   103  	}
   104  	fmt.Printf("CMS Worker %v is stopped.\n", id)
   105  	atomic.AddInt32(&cms.numMergeRoutines, -1)
   106  }
   107  
   108  func (cms *ConcurrentMergeScheduler) process(job *MergeJob) {
   109  	atomic.AddInt32(&cms.concurrentMergeCount, 1)
   110  	defer func() {
   111  		atomic.AddInt32(&cms.concurrentMergeCount, -1)
   112  	}()
   113  
   114  	if cms.verbose() {
   115  		elapsed := time.Now().Sub(job.start)
   116  		cms.message("  stalled for %v", elapsed)
   117  		cms.message("  consider merge %v", job.writer.readerPool.segmentsToString(job.merge.segments))
   118  		// OK to spawn a new merge routine to handle this merge
   119  		cms.message("    launch new thread [%v]", atomic.AddInt32(&cms.mergeThreadCount, 1))
   120  		cms.message("  merge thread: start")
   121  	}
   122  
   123  	err := job.writer.merge(job.merge)
   124  	if err != nil {
   125  		// Ignore the error if it was due to abort:
   126  		if _, ok := err.(MergeAbortedError); !ok && !cms.suppressErrors {
   127  			// suppressErrors is normally only set during testing.
   128  			cms.handleMergeError(err)
   129  		}
   130  	}
   131  }
   132  
   133  // Sets the maximum number of merge goroutines and simultaneous
   134  // merges allowed.
   135  func (cms *ConcurrentMergeScheduler) SetMaxMergesAndRoutines(maxMergeCount, maxRoutineCount int) {
   136  	assert2(maxRoutineCount >= 1, "maxRoutineCount should be at least 1")
   137  	assert2(maxMergeCount >= 1, "maxMergeCount should be at least 1")
   138  	assert2(maxRoutineCount <= maxMergeCount, fmt.Sprintf(
   139  		"maxRoutineCount should be <= maxMergeCount (= %v)", maxMergeCount))
   140  
   141  	oldCount := cms.maxRoutineCount
   142  	cms.maxRoutineCount = maxRoutineCount
   143  	cms.maxMergeCount = maxMergeCount
   144  
   145  	cms.Lock()
   146  	defer cms.Unlock()
   147  	for i := oldCount; i < maxRoutineCount; i++ {
   148  		go cms.worker(i)
   149  	}
   150  }
   151  
   152  /*
   153  Returns true if verbosing is enabled. This method is usually used in
   154  conjunction with message(), like that:
   155  
   156  	if cms.verbose() {
   157  		cms.message("your message")
   158  	}
   159  */
   160  func (cms *ConcurrentMergeScheduler) verbose() bool {
   161  	return cms.writer != nil && cms.writer.infoStream.IsEnabled("CMS")
   162  }
   163  
   164  /*
   165  Outputs the given message - this method assumes verbose() was called
   166  and returned true.
   167  */
   168  func (cms *ConcurrentMergeScheduler) message(format string, args ...interface{}) {
   169  	cms.writer.infoStream.Message("CMS", format, args...)
   170  }
   171  
   172  func (cms *ConcurrentMergeScheduler) Close() error {
   173  	cms.sync()
   174  	return nil
   175  }
   176  
   177  /*
   178  Wait for any running merge threads to finish. This call is not
   179  Interruptible as used by Close()
   180  */
   181  func (cms *ConcurrentMergeScheduler) sync() {
   182  	cms.Lock()
   183  	defer cms.Unlock()
   184  
   185  	wg := new(sync.WaitGroup)
   186  	// no need to synchronize on numMergeRoutines
   187  	for i, limit := 0, int(cms.numMergeRoutines); i < limit; i++ {
   188  		wg.Add(1)
   189  		cms.chSync <- wg
   190  	}
   191  	wg.Wait()
   192  }
   193  
   194  func (cms *ConcurrentMergeScheduler) Merge(writer *IndexWriter,
   195  	trigger MergeTrigger, newMergesFound bool) error {
   196  	cms.Lock() // synchronized
   197  	defer cms.Unlock()
   198  
   199  	// assert !Thread.holdsLock(writer)
   200  	cms.writer = writer
   201  
   202  	// First, quickly run through the newly proposed merges
   203  	// and add any orthogonal merges (ie a merge not
   204  	// involving segments already pending to be merged) to
   205  	// the queue.  If we are way behind on merging, many of
   206  	// these newly proposed merges will likely already be
   207  	// registered.
   208  	if cms.verbose() {
   209  		cms.message("now merge")
   210  		cms.message("  index: %v", writer.segString())
   211  	}
   212  
   213  	// Iterate, pulling from the IndexWriter's queue of
   214  	// pending merges, until it's empty:
   215  	for merge := writer.nextMerge(); merge != nil; merge = writer.nextMerge() {
   216  		if atomic.LoadInt32(&cms.concurrentMergeCount) >= int32(cms.maxMergeCount) {
   217  			// This means merging has fallen too far behind: we
   218  			// have already created maxMergeCount threads, and
   219  			// now there's at least one more merge pending.
   220  			// Note that only maxThreadCount of
   221  			// those created merge threads will actually be
   222  			// running; the rest will be paused (see
   223  			// updateMergeThreads).  We stall this producer
   224  			// thread to prevent creation of new segments,
   225  			// until merging has caught up:
   226  			if cms.verbose() {
   227  				cms.message("    too many merges; stalling...")
   228  			}
   229  		}
   230  		cms.chRequest <- &MergeJob{time.Now(), writer, merge}
   231  	}
   232  	if cms.verbose() {
   233  		cms.message("  no more merges pending; now return")
   234  	}
   235  	return nil
   236  }
   237  
   238  /*
   239  Called when an error is hit in a background merge thread
   240  */
   241  func (cms *ConcurrentMergeScheduler) handleMergeError(err error) {
   242  	// When an exception is hit during merge, IndexWriter
   243  	// removes any partial files and then allows another
   244  	// merge to run.  If whatever caused the error is not
   245  	// transient then the exception will keep happening,
   246  	// so, we sleep here to avoid saturating CPU in such
   247  	// cases:
   248  	time.Sleep(250 * time.Millisecond)
   249  	// Lucene Java throw Unchecked exception in a separate thread.
   250  	// GoLucene just dump the error in console.
   251  	log.Printf("Merge error: %v", err)
   252  }
   253  
   254  func (cms *ConcurrentMergeScheduler) String() string {
   255  	panic("not implemented yet")
   256  }