github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/threadPool.go (about)

     1  package index
     2  
     3  import (
     4  	"container/list"
     5  	"fmt"
     6  	"sync"
     7  )
     8  
     9  // index/DocumentsWriterPerThreadPool.java
    10  
    11  /*
    12  ThreadState references and guards a DocumentsWriterPerThread instance
    13  that is used during indexing to build a in-memory index segment.
    14  ThreadState also holds all flush related per-thread data controlled
    15  by DocumentsWriterFlushControl.
    16  
    17  A ThreadState, its methods and members should only accessed by one
    18  goroutine a time. users must acquire the lock via lock() and release
    19  the lock in a finally block via unlock() before accesing the state.
    20  */
    21  type ThreadState struct {
    22  	id   int // used by pool
    23  	dwpt *DocumentsWriterPerThread
    24  	// TODO this should really be part of DocumentsWriterFlushControl
    25  	// write access guarded by DocumentsWriterFlushControl
    26  	flushPending bool // volatile
    27  	// TODO this should really be part of DocumentsWriterFlushControl
    28  	// write access guarded by DocumentsWriterFlushControl
    29  	bytesUsed int64
    30  	isActive  bool
    31  }
    32  
    33  func newThreadState(id int) *ThreadState {
    34  	return &ThreadState{id: id, isActive: true}
    35  }
    36  
    37  func (ts *ThreadState) deactivate() {
    38  	ts.isActive = false
    39  	ts.reset()
    40  }
    41  
    42  func (ts *ThreadState) reset() {
    43  	ts.dwpt = nil
    44  	ts.bytesUsed = 0
    45  	ts.flushPending = false
    46  }
    47  
    48  /*
    49  DocumentsWriterPerThreadPool controls ThreadState instances and their
    50  goroutine assignment during indexing. Each TheadState holds a
    51  reference to a DocumentsWriterPerThread that is once a ThreadState is
    52  obtained from the pool exclusively used for indexing a single
    53  document by the obtaining thread. Each indexing thread must obtain
    54  such a ThreadState to make progress. Depending on the DocumentsWriterPerThreadPool
    55  implementation ThreadState assingments might differ from document to
    56  document.
    57  
    58  Once a DocumentWriterPerThread is selected for flush the thread pool
    59  is reusing the flushing DocumentsWriterPerthread's ThreadState with a
    60  new DocumentsWriterPerThread instance.
    61  
    62  GoRoutine is different from Java's thread. So intead of thread
    63  affinity, I will use channels and concurrent running goroutines to
    64  hold individual DocumentsWriterPerThread instances and states.
    65  */
    66  type DocumentsWriterPerThreadPool struct {
    67  	sync.Locker
    68  	threadStates  []*ThreadState
    69  	listeners     []*list.List
    70  	freeList      *list.List
    71  	lockedList    *list.List
    72  	hasMoreStates *sync.Cond
    73  }
    74  
    75  func NewDocumentsWriterPerThreadPool(maxNumThreadStates int) *DocumentsWriterPerThreadPool {
    76  	assert2(maxNumThreadStates >= 1, fmt.Sprintf("maxNumThreadStates must be >= 1 but was: %v", maxNumThreadStates))
    77  	return &DocumentsWriterPerThreadPool{
    78  		Locker:        &sync.Mutex{},
    79  		threadStates:  make([]*ThreadState, 0, maxNumThreadStates),
    80  		listeners:     make([]*list.List, maxNumThreadStates),
    81  		freeList:      list.New(),
    82  		lockedList:    list.New(),
    83  		hasMoreStates: sync.NewCond(&sync.Mutex{}),
    84  	}
    85  }
    86  
    87  func (tp *DocumentsWriterPerThreadPool) numActiveThreadState() int {
    88  	return len(tp.threadStates)
    89  }
    90  
    91  func (tp *DocumentsWriterPerThreadPool) reset(threadState *ThreadState, closed bool) *DocumentsWriterPerThread {
    92  	dwpt := threadState.dwpt
    93  	if !closed {
    94  		threadState.reset()
    95  	} else {
    96  		threadState.deactivate()
    97  	}
    98  	return dwpt
    99  }
   100  
   101  /*
   102  It's unfortunately that Go doesn't support 'Thread Affinity'. Default
   103  strategy is LIFO.
   104  */
   105  func (tp *DocumentsWriterPerThreadPool) lockAny() (res *ThreadState) {
   106  	for res == nil {
   107  		if res = tp.findNextAvailableThreadState(); res == nil {
   108  			// ThreadState is already locked before return by this method:
   109  			if res = tp.newThreadState(); res == nil {
   110  				// Wait until a thread state freez up:
   111  				func() {
   112  					tp.hasMoreStates.L.Lock()
   113  					defer tp.hasMoreStates.L.Unlock()
   114  
   115  					tp.hasMoreStates.Wait()
   116  				}()
   117  			}
   118  		}
   119  	}
   120  	return
   121  }
   122  
   123  func (tp *DocumentsWriterPerThreadPool) lock(id int, wait bool) *ThreadState {
   124  	tp.Lock()
   125  	defer tp.Unlock()
   126  
   127  	for e := tp.freeList.Front(); e != nil; e = e.Next() {
   128  		if tid := e.Value.(int); tid == id {
   129  			tp.freeList.Remove(e)
   130  			tp.lockedList.PushBack(id)
   131  			return tp.threadStates[tid]
   132  		}
   133  	}
   134  
   135  	if !wait {
   136  		return nil
   137  	}
   138  	waitingList := tp.listeners[id]
   139  	if waitingList == nil {
   140  		waitingList = list.New()
   141  		tp.listeners[id] = waitingList
   142  	}
   143  	ch := make(chan *ThreadState)
   144  	waitingList.PushBack(ch)
   145  	return <-ch // block until reserved thread state is released
   146  }
   147  
   148  func (tp *DocumentsWriterPerThreadPool) findNextAvailableThreadState() *ThreadState {
   149  	tp.Lock()
   150  	defer tp.Unlock()
   151  
   152  	if tp.freeList.Len() > 0 {
   153  		// Important that we are LIFO here! This way if number of
   154  		// concurrent indexing threads was once high, but has now
   155  		// reduced, we only use a limited number of thread states:
   156  		e := tp.freeList.Back()
   157  		id := e.Value.(int)
   158  		tp.freeList.Remove(e)
   159  
   160  		if tp.threadStates[id].dwpt == nil {
   161  			// This thread-state is not initialized, e.g. it was just
   162  			// flushed. See if we can instead find another free thread
   163  			// state that already has docs indexed. This way if incoming
   164  			// thread concurrentcy has decreased, we don't leave docs
   165  			// indefinitely buffered, tying up RAM. This will instead get
   166  			// thread states flushed, freein up RAM for larger segment
   167  			// flushes:
   168  			for e = tp.freeList.Front(); e != nil; e = e.Next() {
   169  				if id2 := e.Value.(int); tp.threadStates[id2].dwpt != nil {
   170  					// Use this one instead, and swap it with the
   171  					// un-initialized one:
   172  					tp.freeList.PushFront(id)
   173  					id = id2
   174  					tp.freeList.Remove(e)
   175  					break
   176  				}
   177  			}
   178  		}
   179  		return tp.threadStates[id]
   180  	}
   181  	return nil
   182  }
   183  
   184  /*
   185  Returns a new ThreadState iff any new state is available otherwise
   186  nil.
   187  
   188  NOTE: the returned ThreadState is already locked iff non-nil.
   189  */
   190  func (tp *DocumentsWriterPerThreadPool) newThreadState() (ts *ThreadState) {
   191  	tp.Lock()
   192  	defer tp.Unlock()
   193  
   194  	// Create a new empty thread state if possible
   195  	if len(tp.threadStates) < cap(tp.threadStates) {
   196  		ts = newThreadState(len(tp.threadStates))
   197  		tp.threadStates = append(tp.threadStates, ts)
   198  		tp.lockedList.PushBack(ts.id)
   199  	}
   200  	return
   201  }
   202  
   203  func (tp *DocumentsWriterPerThreadPool) foreach(f func(state *ThreadState)) {
   204  	for i, limit := 0, len(tp.threadStates); i < limit; i++ {
   205  		ts := tp.lock(i, true)
   206  		assert(ts != nil)
   207  		f(ts)
   208  		tp.release(ts)
   209  	}
   210  }
   211  
   212  func (tp *DocumentsWriterPerThreadPool) find(f func(state *ThreadState) interface{}) interface{} {
   213  	for i, limit := 0, len(tp.threadStates); i < limit; i++ {
   214  		if ts := tp.lock(i, false); ts != nil {
   215  			res := f(ts)
   216  			tp.release(ts)
   217  			if res != nil {
   218  				return res
   219  			}
   220  		}
   221  	}
   222  	return nil
   223  }
   224  
   225  /*
   226  Release the ThreadState back to the pool. Equals to
   227  ThreadState.Unlock() in Lucene Java.
   228  */
   229  func (tp *DocumentsWriterPerThreadPool) release(ts *ThreadState) {
   230  	tp.Lock()
   231  	defer tp.Unlock()
   232  
   233  	if waitingList := tp.listeners[ts.id]; waitingList != nil && waitingList.Len() > 0 {
   234  		// this thread state is reserved
   235  		e := waitingList.Front()
   236  		waitingList.Remove(e)
   237  		// re-allocate to external handler
   238  		e.Value.(chan *ThreadState) <- ts
   239  		return
   240  	}
   241  
   242  	// push the thread state back to
   243  	tp.freeList.PushBack(ts.id)
   244  	tp.hasMoreStates.Signal()
   245  }